From 8ce85d24622bf3ade57f94f91915cc3d74ecfabb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= <branimirkaradzic@gmail.com>
Date: Mon, 3 Apr 2017 22:42:27 -0700
Subject: [PATCH] Added bimg dependency.

---
 3rdparty/edtaa3/LICENSE.md                    |    34 -
 3rdparty/edtaa3/edtaa3func.cpp                |   580 -
 3rdparty/edtaa3/edtaa3func.h                  |     7 -
 3rdparty/etc1/LICENSE                         |   161 -
 3rdparty/etc1/etc1.cpp                        |   686 -
 3rdparty/etc1/etc1.h                          |   114 -
 3rdparty/etc2/LICENSE.txt                     |    24 -
 3rdparty/etc2/Math.hpp                        |    90 -
 3rdparty/etc2/ProcessCommon.hpp               |    51 -
 3rdparty/etc2/ProcessRGB.cpp                  |   719 -
 3rdparty/etc2/ProcessRGB.hpp                  |     9 -
 3rdparty/etc2/Tables.cpp                      |   109 -
 3rdparty/etc2/Tables.hpp                      |    25 -
 3rdparty/etc2/Types.hpp                       |    17 -
 3rdparty/etc2/Vector.hpp                      |   222 -
 3rdparty/iqa/LICENSE                          |    32 -
 3rdparty/iqa/README.txt                       |    36 -
 3rdparty/iqa/include/convolve.h               |   111 -
 3rdparty/iqa/include/decimate.h               |    55 -
 3rdparty/iqa/include/iqa.h                    |   134 -
 3rdparty/iqa/include/iqa_os.h                 |    68 -
 3rdparty/iqa/include/math_utils.h             |    64 -
 3rdparty/iqa/include/ssim.h                   |   117 -
 3rdparty/iqa/source/convolve.c                |   195 -
 3rdparty/iqa/source/decimate.c                |    59 -
 3rdparty/iqa/source/math_utils.c              |    82 -
 3rdparty/iqa/source/ms_ssim.c                 |   277 -
 3rdparty/iqa/source/mse.c                     |    50 -
 3rdparty/iqa/source/psnr.c                    |    42 -
 3rdparty/iqa/source/ssim.c                    |   322 -
 3rdparty/libsquish/LICENSE                    |    20 -
 3rdparty/libsquish/README                     |    35 -
 3rdparty/libsquish/alpha.cpp                  |   350 -
 3rdparty/libsquish/alpha.h                    |    41 -
 3rdparty/libsquish/clusterfit.cpp             |   392 -
 3rdparty/libsquish/clusterfit.h               |    61 -
 3rdparty/libsquish/colourblock.cpp            |   214 -
 3rdparty/libsquish/colourblock.h              |    41 -
 3rdparty/libsquish/colourfit.cpp              |    54 -
 3rdparty/libsquish/colourfit.h                |    56 -
 3rdparty/libsquish/colourset.cpp              |   121 -
 3rdparty/libsquish/colourset.h                |    58 -
 3rdparty/libsquish/config.h                   |    49 -
 3rdparty/libsquish/maths.cpp                  |   259 -
 3rdparty/libsquish/maths.h                    |   233 -
 3rdparty/libsquish/rangefit.cpp               |   201 -
 3rdparty/libsquish/rangefit.h                 |    54 -
 3rdparty/libsquish/simd.h                     |    32 -
 3rdparty/libsquish/simd_float.h               |   183 -
 3rdparty/libsquish/singlecolourfit.cpp        |   172 -
 3rdparty/libsquish/singlecolourfit.h          |    58 -
 3rdparty/libsquish/singlecolourlookup.inl     |  1064 --
 3rdparty/libsquish/squish.cpp                 |   260 -
 3rdparty/libsquish/squish.h                   |   269 -
 3rdparty/lodepng/README.md                    |    10 -
 3rdparty/lodepng/lodepng.cpp                  |  6224 --------
 3rdparty/lodepng/lodepng.h                    |  1759 ---
 .../nvtt/NVIDIA_Texture_Tools_LICENSE.txt     |    24 -
 3rdparty/nvtt/bc6h/bits.h                     |    75 -
 3rdparty/nvtt/bc6h/shapes_two.h               |   133 -
 3rdparty/nvtt/bc6h/tile.h                     |    82 -
 3rdparty/nvtt/bc6h/zoh.cpp                    |   197 -
 3rdparty/nvtt/bc6h/zoh.h                      |    65 -
 3rdparty/nvtt/bc6h/zoh_utils.cpp              |   324 -
 3rdparty/nvtt/bc6h/zoh_utils.h                |    72 -
 3rdparty/nvtt/bc6h/zohone.cpp                 |   799 -
 3rdparty/nvtt/bc6h/zohtwo.cpp                 |   883 --
 3rdparty/nvtt/bc7/avpcl.cpp                   |   264 -
 3rdparty/nvtt/bc7/avpcl.h                     |    99 -
 3rdparty/nvtt/bc7/avpcl_mode0.cpp             |  1066 --
 3rdparty/nvtt/bc7/avpcl_mode1.cpp             |  1047 --
 3rdparty/nvtt/bc7/avpcl_mode2.cpp             |  1004 --
 3rdparty/nvtt/bc7/avpcl_mode3.cpp             |  1059 --
 3rdparty/nvtt/bc7/avpcl_mode4.cpp             |  1214 --
 3rdparty/nvtt/bc7/avpcl_mode5.cpp             |  1216 --
 3rdparty/nvtt/bc7/avpcl_mode6.cpp             |  1055 --
 3rdparty/nvtt/bc7/avpcl_mode7.cpp             |  1094 --
 3rdparty/nvtt/bc7/avpcl_utils.cpp             |   389 -
 3rdparty/nvtt/bc7/avpcl_utils.h               |    61 -
 3rdparty/nvtt/bc7/bits.h                      |    76 -
 3rdparty/nvtt/bc7/endpts.h                    |    81 -
 3rdparty/nvtt/bc7/shapes_three.h              |   132 -
 3rdparty/nvtt/bc7/shapes_two.h                |   133 -
 3rdparty/nvtt/bc7/tile.h                      |    41 -
 3rdparty/nvtt/nvcore/array.h                  |   181 -
 3rdparty/nvtt/nvcore/array.inl                |   437 -
 3rdparty/nvtt/nvcore/debug.h                  |   216 -
 3rdparty/nvtt/nvcore/defsgnucdarwin.h         |    57 -
 3rdparty/nvtt/nvcore/defsgnuclinux.h          |    63 -
 3rdparty/nvtt/nvcore/defsgnucwin32.h          |    65 -
 3rdparty/nvtt/nvcore/defsvcwin32.h            |    94 -
 3rdparty/nvtt/nvcore/foreach.h                |    68 -
 3rdparty/nvtt/nvcore/hash.h                   |    83 -
 3rdparty/nvtt/nvcore/memory.h                 |    30 -
 3rdparty/nvtt/nvcore/nvcore.h                 |   363 -
 3rdparty/nvtt/nvcore/posh.h                   |  1030 --
 3rdparty/nvtt/nvcore/stdstream.h              |   459 -
 3rdparty/nvtt/nvcore/stream.h                 |   163 -
 3rdparty/nvtt/nvcore/strlib.h                 |   429 -
 3rdparty/nvtt/nvcore/utils.h                  |   281 -
 3rdparty/nvtt/nvmath/fitting.cpp              |  1200 --
 3rdparty/nvtt/nvmath/fitting.h                |    49 -
 3rdparty/nvtt/nvmath/matrix.h                 |   112 -
 3rdparty/nvtt/nvmath/matrix.inl               |  1274 --
 3rdparty/nvtt/nvmath/nvmath.h                 |    61 -
 3rdparty/nvtt/nvmath/plane.h                  |    40 -
 3rdparty/nvtt/nvmath/plane.inl                |    49 -
 3rdparty/nvtt/nvmath/vector.h                 |   148 -
 3rdparty/nvtt/nvmath/vector.inl               |   921 --
 3rdparty/nvtt/nvtt.cpp                        |    95 -
 3rdparty/nvtt/nvtt.h                          |    13 -
 3rdparty/pvrtc/AlphaBitmap.h                  |    20 -
 3rdparty/pvrtc/BitScale.cpp                   |   183 -
 3rdparty/pvrtc/BitScale.h                     |    28 -
 3rdparty/pvrtc/BitUtility.h                   |    19 -
 3rdparty/pvrtc/Bitmap.h                       |    36 -
 3rdparty/pvrtc/ColorRgba.h                    |   152 -
 3rdparty/pvrtc/Interval.h                     |    21 -
 3rdparty/pvrtc/LICENSE.TXT                    |    25 -
 3rdparty/pvrtc/MortonTable.cpp                |    43 -
 3rdparty/pvrtc/MortonTable.h                  |    18 -
 3rdparty/pvrtc/Point2.h                       |    17 -
 3rdparty/pvrtc/PvrTcDecoder.cpp               |   144 -
 3rdparty/pvrtc/PvrTcDecoder.h                 |    25 -
 3rdparty/pvrtc/PvrTcEncoder.cpp               |   464 -
 3rdparty/pvrtc/PvrTcEncoder.h                 |    43 -
 3rdparty/pvrtc/PvrTcPacket.cpp                |   209 -
 3rdparty/pvrtc/PvrTcPacket.h                  |    65 -
 3rdparty/pvrtc/README.md                      |    17 -
 3rdparty/pvrtc/RgbBitmap.h                    |    25 -
 3rdparty/pvrtc/RgbaBitmap.h                   |    24 -
 3rdparty/stb/stb_image.c                      |  6769 ---------
 3rdparty/tinyexr/README.md                    |   274 -
 3rdparty/tinyexr/tinyexr.h                    | 12354 ----------------
 examples/32-particles/particles.cpp           |     4 +-
 examples/common/bgfx_utils.cpp                |    18 +-
 examples/common/bgfx_utils.h                  |     4 +-
 examples/common/image_decode.cpp              |   440 -
 examples/common/image_decode.h                |    64 -
 examples/common/nanovg/nanovg.cpp             |    14 +-
 include/bgfx/bgfx.h                           |    38 -
 include/bgfx/c99/bgfx.h                       |     6 -
 include/bgfx/c99/platform.h                   |     2 -
 scripts/bgfx.lua                              |     2 +
 scripts/example-common.lua                    |     3 +-
 scripts/genie.lua                             |     7 +-
 scripts/shaderc.lua                           |     3 +-
 scripts/texturec.lua                          |    32 +-
 scripts/texturev.lua                          |     3 +
 src/amalgamated.cpp                           |     1 -
 src/bgfx.cpp                                  |   110 +-
 src/bgfx_p.h                                  |    17 +-
 src/image.cpp                                 |  3261 ----
 src/image.h                                   |   251 -
 src/renderer_d3d11.cpp                        |    36 +-
 src/renderer_d3d12.cpp                        |    36 +-
 src/renderer_d3d9.cpp                         |    54 +-
 src/renderer_gl.cpp                           |    48 +-
 src/renderer_vk.cpp                           |     2 +-
 tools/texturec/texturec.cpp                   |   308 +-
 tools/texturev/texturev.cpp                   |     4 +-
 161 files changed, 257 insertions(+), 62914 deletions(-)
 delete mode 100644 3rdparty/edtaa3/LICENSE.md
 delete mode 100644 3rdparty/edtaa3/edtaa3func.cpp
 delete mode 100644 3rdparty/edtaa3/edtaa3func.h
 delete mode 100644 3rdparty/etc1/LICENSE
 delete mode 100644 3rdparty/etc1/etc1.cpp
 delete mode 100644 3rdparty/etc1/etc1.h
 delete mode 100644 3rdparty/etc2/LICENSE.txt
 delete mode 100644 3rdparty/etc2/Math.hpp
 delete mode 100644 3rdparty/etc2/ProcessCommon.hpp
 delete mode 100644 3rdparty/etc2/ProcessRGB.cpp
 delete mode 100644 3rdparty/etc2/ProcessRGB.hpp
 delete mode 100644 3rdparty/etc2/Tables.cpp
 delete mode 100644 3rdparty/etc2/Tables.hpp
 delete mode 100644 3rdparty/etc2/Types.hpp
 delete mode 100644 3rdparty/etc2/Vector.hpp
 delete mode 100644 3rdparty/iqa/LICENSE
 delete mode 100644 3rdparty/iqa/README.txt
 delete mode 100644 3rdparty/iqa/include/convolve.h
 delete mode 100644 3rdparty/iqa/include/decimate.h
 delete mode 100644 3rdparty/iqa/include/iqa.h
 delete mode 100644 3rdparty/iqa/include/iqa_os.h
 delete mode 100644 3rdparty/iqa/include/math_utils.h
 delete mode 100644 3rdparty/iqa/include/ssim.h
 delete mode 100644 3rdparty/iqa/source/convolve.c
 delete mode 100644 3rdparty/iqa/source/decimate.c
 delete mode 100644 3rdparty/iqa/source/math_utils.c
 delete mode 100644 3rdparty/iqa/source/ms_ssim.c
 delete mode 100644 3rdparty/iqa/source/mse.c
 delete mode 100644 3rdparty/iqa/source/psnr.c
 delete mode 100644 3rdparty/iqa/source/ssim.c
 delete mode 100644 3rdparty/libsquish/LICENSE
 delete mode 100644 3rdparty/libsquish/README
 delete mode 100644 3rdparty/libsquish/alpha.cpp
 delete mode 100644 3rdparty/libsquish/alpha.h
 delete mode 100644 3rdparty/libsquish/clusterfit.cpp
 delete mode 100644 3rdparty/libsquish/clusterfit.h
 delete mode 100644 3rdparty/libsquish/colourblock.cpp
 delete mode 100644 3rdparty/libsquish/colourblock.h
 delete mode 100644 3rdparty/libsquish/colourfit.cpp
 delete mode 100644 3rdparty/libsquish/colourfit.h
 delete mode 100644 3rdparty/libsquish/colourset.cpp
 delete mode 100644 3rdparty/libsquish/colourset.h
 delete mode 100644 3rdparty/libsquish/config.h
 delete mode 100644 3rdparty/libsquish/maths.cpp
 delete mode 100644 3rdparty/libsquish/maths.h
 delete mode 100644 3rdparty/libsquish/rangefit.cpp
 delete mode 100644 3rdparty/libsquish/rangefit.h
 delete mode 100644 3rdparty/libsquish/simd.h
 delete mode 100644 3rdparty/libsquish/simd_float.h
 delete mode 100644 3rdparty/libsquish/singlecolourfit.cpp
 delete mode 100644 3rdparty/libsquish/singlecolourfit.h
 delete mode 100644 3rdparty/libsquish/singlecolourlookup.inl
 delete mode 100644 3rdparty/libsquish/squish.cpp
 delete mode 100644 3rdparty/libsquish/squish.h
 delete mode 100644 3rdparty/lodepng/README.md
 delete mode 100644 3rdparty/lodepng/lodepng.cpp
 delete mode 100644 3rdparty/lodepng/lodepng.h
 delete mode 100644 3rdparty/nvtt/NVIDIA_Texture_Tools_LICENSE.txt
 delete mode 100644 3rdparty/nvtt/bc6h/bits.h
 delete mode 100644 3rdparty/nvtt/bc6h/shapes_two.h
 delete mode 100644 3rdparty/nvtt/bc6h/tile.h
 delete mode 100644 3rdparty/nvtt/bc6h/zoh.cpp
 delete mode 100644 3rdparty/nvtt/bc6h/zoh.h
 delete mode 100644 3rdparty/nvtt/bc6h/zoh_utils.cpp
 delete mode 100644 3rdparty/nvtt/bc6h/zoh_utils.h
 delete mode 100644 3rdparty/nvtt/bc6h/zohone.cpp
 delete mode 100644 3rdparty/nvtt/bc6h/zohtwo.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl.h
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_mode0.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_mode1.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_mode2.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_mode3.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_mode4.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_mode5.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_mode6.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_mode7.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_utils.cpp
 delete mode 100644 3rdparty/nvtt/bc7/avpcl_utils.h
 delete mode 100644 3rdparty/nvtt/bc7/bits.h
 delete mode 100644 3rdparty/nvtt/bc7/endpts.h
 delete mode 100644 3rdparty/nvtt/bc7/shapes_three.h
 delete mode 100644 3rdparty/nvtt/bc7/shapes_two.h
 delete mode 100644 3rdparty/nvtt/bc7/tile.h
 delete mode 100644 3rdparty/nvtt/nvcore/array.h
 delete mode 100644 3rdparty/nvtt/nvcore/array.inl
 delete mode 100644 3rdparty/nvtt/nvcore/debug.h
 delete mode 100644 3rdparty/nvtt/nvcore/defsgnucdarwin.h
 delete mode 100644 3rdparty/nvtt/nvcore/defsgnuclinux.h
 delete mode 100644 3rdparty/nvtt/nvcore/defsgnucwin32.h
 delete mode 100644 3rdparty/nvtt/nvcore/defsvcwin32.h
 delete mode 100644 3rdparty/nvtt/nvcore/foreach.h
 delete mode 100644 3rdparty/nvtt/nvcore/hash.h
 delete mode 100644 3rdparty/nvtt/nvcore/memory.h
 delete mode 100644 3rdparty/nvtt/nvcore/nvcore.h
 delete mode 100644 3rdparty/nvtt/nvcore/posh.h
 delete mode 100644 3rdparty/nvtt/nvcore/stdstream.h
 delete mode 100644 3rdparty/nvtt/nvcore/stream.h
 delete mode 100644 3rdparty/nvtt/nvcore/strlib.h
 delete mode 100644 3rdparty/nvtt/nvcore/utils.h
 delete mode 100644 3rdparty/nvtt/nvmath/fitting.cpp
 delete mode 100644 3rdparty/nvtt/nvmath/fitting.h
 delete mode 100644 3rdparty/nvtt/nvmath/matrix.h
 delete mode 100644 3rdparty/nvtt/nvmath/matrix.inl
 delete mode 100644 3rdparty/nvtt/nvmath/nvmath.h
 delete mode 100644 3rdparty/nvtt/nvmath/plane.h
 delete mode 100644 3rdparty/nvtt/nvmath/plane.inl
 delete mode 100644 3rdparty/nvtt/nvmath/vector.h
 delete mode 100644 3rdparty/nvtt/nvmath/vector.inl
 delete mode 100644 3rdparty/nvtt/nvtt.cpp
 delete mode 100644 3rdparty/nvtt/nvtt.h
 delete mode 100644 3rdparty/pvrtc/AlphaBitmap.h
 delete mode 100644 3rdparty/pvrtc/BitScale.cpp
 delete mode 100644 3rdparty/pvrtc/BitScale.h
 delete mode 100644 3rdparty/pvrtc/BitUtility.h
 delete mode 100644 3rdparty/pvrtc/Bitmap.h
 delete mode 100644 3rdparty/pvrtc/ColorRgba.h
 delete mode 100644 3rdparty/pvrtc/Interval.h
 delete mode 100644 3rdparty/pvrtc/LICENSE.TXT
 delete mode 100644 3rdparty/pvrtc/MortonTable.cpp
 delete mode 100644 3rdparty/pvrtc/MortonTable.h
 delete mode 100644 3rdparty/pvrtc/Point2.h
 delete mode 100644 3rdparty/pvrtc/PvrTcDecoder.cpp
 delete mode 100644 3rdparty/pvrtc/PvrTcDecoder.h
 delete mode 100644 3rdparty/pvrtc/PvrTcEncoder.cpp
 delete mode 100644 3rdparty/pvrtc/PvrTcEncoder.h
 delete mode 100644 3rdparty/pvrtc/PvrTcPacket.cpp
 delete mode 100644 3rdparty/pvrtc/PvrTcPacket.h
 delete mode 100644 3rdparty/pvrtc/README.md
 delete mode 100644 3rdparty/pvrtc/RgbBitmap.h
 delete mode 100644 3rdparty/pvrtc/RgbaBitmap.h
 delete mode 100644 3rdparty/stb/stb_image.c
 delete mode 100644 3rdparty/tinyexr/README.md
 delete mode 100644 3rdparty/tinyexr/tinyexr.h
 delete mode 100644 examples/common/image_decode.cpp
 delete mode 100644 examples/common/image_decode.h
 delete mode 100644 src/image.cpp
 delete mode 100644 src/image.h

diff --git a/3rdparty/edtaa3/LICENSE.md b/3rdparty/edtaa3/LICENSE.md
deleted file mode 100644
index 93e6a9452..000000000
--- a/3rdparty/edtaa3/LICENSE.md
+++ /dev/null
@@ -1,34 +0,0 @@
-https://github.com/OpenGLInsights/OpenGLInsightsCode/blob/master/Chapter%2012%202D%20Shape%20Rendering%20by%20Distance%20Fields/LICENSE.txt
-
-The C code and the GLSL code for the OpenGL demo is public
-domain code. The distance transform code in the console
-application to create distance field textures, located in
-the file "edtaa3func.c", is MIT licensed, and free to use
-under the following conditions.
-
-https://github.com/OpenGLInsights/OpenGLInsightsCode/issues/6#issuecomment-67829157
-
-----
-
-Copyright (C) 2011 by Stefan Gustavson
-(stefan.gustavson@liu.se)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-
-----
diff --git a/3rdparty/edtaa3/edtaa3func.cpp b/3rdparty/edtaa3/edtaa3func.cpp
deleted file mode 100644
index e20952954..000000000
--- a/3rdparty/edtaa3/edtaa3func.cpp
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * edtaa3()
- *
- * Sweep-and-update Euclidean distance transform of an
- * image. Positive pixels are treated as object pixels,
- * zero or negative pixels are treated as background.
- * An attempt is made to treat antialiased edges correctly.
- * The input image must have pixels in the range [0,1],
- * and the antialiased image should be a box-filter
- * sampling of the ideal, crisp edge.
- * If the antialias region is more than 1 pixel wide,
- * the result from this transform will be inaccurate.
- *
- * By Stefan Gustavson (stefan.gustavson@gmail.com).
- *
- * Originally written in 1994, based on a verbal
- * description of Per-Erik Danielsson's SSED8 algorithm
- * as presented in the PhD dissertation of Ingemar
- * Ragnemalm. This is Per-Erik Danielsson's scanline
- * scheme from 1979 - I only implemented it in C.
- *
- * Updated in 2004 to treat border pixels correctly,
- * and cleaned up the code to improve readability.
- *
- * Updated in 2009 to handle anti-aliased edges,
- * as published in the article "Anti-aliased Euclidean
- * distance transform" by Stefan Gustavson and Robin Strand,
- * Pattern Recognition Letters 32 (2011) 252�257.
- *
- * Updated in 2011 to avoid a corner case causing an
- * infinite loop for some input data.
- *
-*/
-
-/*
-
-Copyright (C) 2011 by Stefan Gustavson
-
-(stefan.gustavson@liu.se)
-
-This code is distributed under the permissive "MIT license":
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-
-*/
-
-#include <math.h>
-
-/*
- * Compute the local gradient at edge pixels using convolution filters.
- * The gradient is computed only at edge pixels. At other places in the
- * image, it is never used, and it's mostly zero anyway.
- */
-void computegradient(double *img, int w, int h, double *gx, double *gy)
-{
-    int i,j,k;
-    double glength;
-#define SQRT2 1.4142136
-    for(i = 1; i < h-1; i++) { // Avoid edges where the kernels would spill over
-        for(j = 1; j < w-1; j++) {
-            k = i*w + j;
-            if((img[k]>0.0) && (img[k]<1.0)) { // Compute gradient for edge pixels only
-                gx[k] = -img[k-w-1] - SQRT2*img[k-1] - img[k+w-1] + img[k-w+1] + SQRT2*img[k+1] + img[k+w+1];
-                gy[k] = -img[k-w-1] - SQRT2*img[k-w] - img[k-w+1] + img[k+w-1] + SQRT2*img[k+w] + img[k+w+1];
-                glength = gx[k]*gx[k] + gy[k]*gy[k];
-                if(glength > 0.0) { // Avoid division by zero
-                    glength = sqrt(glength);
-                    gx[k]=gx[k]/glength;
-                    gy[k]=gy[k]/glength;
-                }
-            }
-        }
-    }
-    // TODO: Compute reasonable values for gx, gy also around the image edges.
-    // (These are zero now, which reduces the accuracy for a 1-pixel wide region
-	// around the image edge.) 2x2 kernels would be suitable for this.
-}
-
-/*
- * A somewhat tricky function to approximate the distance to an edge in a
- * certain pixel, with consideration to either the local gradient (gx,gy)
- * or the direction to the pixel (dx,dy) and the pixel greyscale value a.
- * The latter alternative, using (dx,dy), is the metric used by edtaa2().
- * Using a local estimate of the edge gradient (gx,gy) yields much better
- * accuracy at and near edges, and reduces the error even at distant pixels
- * provided that the gradient direction is accurately estimated.
- */
-double edgedf(double gx, double gy, double a)
-{
-    double df, glength, temp, a1;
-
-    if ((gx == 0) || (gy == 0)) { // Either A) gu or gv are zero, or B) both
-        df = 0.5-a;  // Linear approximation is A) correct or B) a fair guess
-    } else {
-        glength = sqrt(gx*gx + gy*gy);
-        if(glength>0) {
-            gx = gx/glength;
-            gy = gy/glength;
-        }
-        /* Everything is symmetric wrt sign and transposition,
-         * so move to first octant (gx>=0, gy>=0, gx>=gy) to
-         * avoid handling all possible edge directions.
-         */
-        gx = fabs(gx);
-        gy = fabs(gy);
-        if(gx<gy) {
-            temp = gx;
-            gx = gy;
-            gy = temp;
-        }
-        a1 = 0.5*gy/gx;
-        if (a < a1) { // 0 <= a < a1
-            df = 0.5*(gx + gy) - sqrt(2.0*gx*gy*a);
-        } else if (a < (1.0-a1)) { // a1 <= a <= 1-a1
-            df = (0.5-a)*gx;
-        } else { // 1-a1 < a <= 1
-            df = -0.5*(gx + gy) + sqrt(2.0*gx*gy*(1.0-a));
-        }
-    }    
-    return df;
-}
-
-double distaa3(double *img, double *gximg, double *gyimg, int w, int c, int xc, int yc, int xi, int yi)
-{
-  double di, df, dx, dy, gx, gy, a;
-  int closest;
-  
-  closest = c-xc-yc*w; // Index to the edge pixel pointed to from c
-  a = img[closest];    // Grayscale value at the edge pixel
-  gx = gximg[closest]; // X gradient component at the edge pixel
-  gy = gyimg[closest]; // Y gradient component at the edge pixel
-  
-  if(a > 1.0) a = 1.0;
-  if(a < 0.0) a = 0.0; // Clip grayscale values outside the range [0,1]
-  if(a == 0.0) return 1000000.0; // Not an object pixel, return "very far" ("don't know yet")
-
-  dx = (double)xi;
-  dy = (double)yi;
-  di = sqrt(dx*dx + dy*dy); // Length of integer vector, like a traditional EDT
-  if(di==0) { // Use local gradient only at edges
-      // Estimate based on local gradient only
-      df = edgedf(gx, gy, a);
-  } else {
-      // Estimate gradient based on direction to edge (accurate for large di)
-      df = edgedf(dx, dy, a);
-  }
-  return di + df; // Same metric as edtaa2, except at edges (where di=0)
-}
-
-// Shorthand macro: add ubiquitous parameters img, gx, gy and w and call distaa3()
-#define DISTAA(c,xc,yc,xi,yi) (distaa3(img, gx, gy, w, c, xc, yc, xi, yi))
-
-void edtaa3(double *img, double *gx, double *gy, int w, int h, short *distx, short *disty, double *dist)
-{
-  int x, y, i, c;
-  int offset_u, offset_ur, offset_r, offset_rd,
-  offset_d, offset_dl, offset_l, offset_lu;
-  double olddist, newdist;
-  int cdistx, cdisty, newdistx, newdisty;
-  int changed;
-  double epsilon = 1e-3; // Safeguard against errors due to limited precision
-
-  /* Initialize index offsets for the current image width */
-  offset_u = -w;
-  offset_ur = -w+1;
-  offset_r = 1;
-  offset_rd = w+1;
-  offset_d = w;
-  offset_dl = w-1;
-  offset_l = -1;
-  offset_lu = -w-1;
-
-  /* Initialize the distance images */
-  for(i=0; i<w*h; i++) {
-    distx[i] = 0; // At first, all pixels point to
-    disty[i] = 0; // themselves as the closest known.
-    if(img[i] <= 0.0)
-      {
-	dist[i]= 1000000.0; // Big value, means "not set yet"
-      }
-    else if (img[i]<1.0) {
-      dist[i] = edgedf(gx[i], gy[i], img[i]); // Gradient-assisted estimate
-    }
-    else {
-      dist[i]= 0.0; // Inside the object
-    }
-  }
-
-  /* Perform the transformation */
-  do
-    {
-      changed = 0;
-
-      /* Scan rows, except first row */
-      for(y=1; y<h; y++)
-        {
-
-          /* move index to leftmost pixel of current row */
-          i = y*w;
-
-          /* scan right, propagate distances from above & left */
-
-          /* Leftmost pixel is special, has no left neighbors */
-          olddist = dist[i];
-          if(olddist > 0) // If non-zero distance or not set yet
-            {
-	      c = i + offset_u; // Index of candidate for testing
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_ur;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-          i++;
-
-          /* Middle pixels have all neighbors */
-          for(x=1; x<w-1; x++, i++)
-            {
-              olddist = dist[i];
-              if(olddist <= 0) continue; // No need to update further
-
-	      c = i+offset_l;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_lu;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_u;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_ur;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-
-          /* Rightmost pixel of row is special, has no right neighbors */
-          olddist = dist[i];
-          if(olddist > 0) // If not already zero distance
-            {
-	      c = i+offset_l;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_lu;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_u;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-
-          /* Move index to second rightmost pixel of current row. */
-          /* Rightmost pixel is skipped, it has no right neighbor. */
-          i = y*w + w-2;
-
-          /* scan left, propagate distance from right */
-          for(x=w-2; x>=0; x--, i--)
-            {
-              olddist = dist[i];
-              if(olddist <= 0) continue; // Already zero distance
-
-	      c = i+offset_r;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-        }
-      
-      /* Scan rows in reverse order, except last row */
-      for(y=h-2; y>=0; y--)
-        {
-          /* move index to rightmost pixel of current row */
-          i = y*w + w-1;
-
-          /* Scan left, propagate distances from below & right */
-
-          /* Rightmost pixel is special, has no right neighbors */
-          olddist = dist[i];
-          if(olddist > 0) // If not already zero distance
-            {
-	      c = i+offset_d;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_dl;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-          i--;
-
-          /* Middle pixels have all neighbors */
-          for(x=w-2; x>0; x--, i--)
-            {
-              olddist = dist[i];
-              if(olddist <= 0) continue; // Already zero distance
-
-	      c = i+offset_r;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_rd;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_d;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_dl;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-          /* Leftmost pixel is special, has no left neighbors */
-          olddist = dist[i];
-          if(olddist > 0) // If not already zero distance
-            {
-	      c = i+offset_r;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_rd;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_d;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-
-          /* Move index to second leftmost pixel of current row. */
-          /* Leftmost pixel is skipped, it has no left neighbor. */
-          i = y*w + 1;
-          for(x=1; x<w; x++, i++)
-            {
-              /* scan right, propagate distance from left */
-              olddist = dist[i];
-              if(olddist <= 0) continue; // Already zero distance
-
-	      c = i+offset_l;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-        }
-    }
-  while(changed); // Sweep until no more updates are made
-
-  /* The transformation is completed. */
-
-}
diff --git a/3rdparty/edtaa3/edtaa3func.h b/3rdparty/edtaa3/edtaa3func.h
deleted file mode 100644
index 6052aa477..000000000
--- a/3rdparty/edtaa3/edtaa3func.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef EDTAA3_H_HEADER_GUARD
-#define EDTAA3_H_HEADER_GUARD
-
-extern void computegradient(double *img, int w, int h, double *gx, double *gy);
-extern void edtaa3(double *img, double *gx, double *gy, int w, int h, short *distx, short *disty, double *dist);
-
-#endif // EDTAA3_H_HEADER_GUARD
diff --git a/3rdparty/etc1/LICENSE b/3rdparty/etc1/LICENSE
deleted file mode 100644
index 64635a408..000000000
--- a/3rdparty/etc1/LICENSE
+++ /dev/null
@@ -1,161 +0,0 @@
-Apache License
-
-Version 2.0, January 2004
-
-http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
-"License" shall mean the terms and conditions for use, reproduction, and
-distribution as defined by Sections 1 through 9 of this document.
-
-"Licensor" shall mean the copyright owner or entity authorized by the
-copyright owner that is granting the License.
-
-"Legal Entity" shall mean the union of the acting entity and all other
-entities that control, are controlled by, or are under common control with
-that entity. For the purposes of this definition, "control" means (i) the
-power, direct or indirect, to cause the direction or management of such 
-entity, whether by contract or otherwise, or (ii) ownership of fifty 
-percent (50%) or more of the outstanding shares, or (iii) beneficial 
-ownership of such entity.
-
-"You" (or "Your") shall mean an individual or Legal Entity exercising 
-permissions granted by this License.
-
-"Source" form shall mean the preferred form for making modifications, 
-including but not limited to software source code, documentation 
-source, and configuration files.
-
-"Object" form shall mean any form resulting from mechanical transformation 
-or translation of a Source form, including but not limited to compiled 
-object code, generated documentation, and conversions to other media types.
-
-"Work" shall mean the work of authorship, whether in Source or Object 
-form, made available under the License, as indicated by a copyright 
-notice that is included in or attached to the work (an example is 
-provided in the Appendix below).
-
-"Derivative Works" shall mean any work, whether in Source or Object 
-form, that is based on (or derived from) the Work and for which the 
-editorial revisions, annotations, elaborations, or other modifications 
-represent, as a whole, an original work of authorship. For the purposes 
-of this License, Derivative Works shall not include works that remain 
-separable from, or merely link (or bind by name) to the interfaces of, 
-the Work and Derivative Works thereof.
-
-"Contribution" shall mean any work of authorship, including the original 
-version of the Work and any modifications or additions to that Work or 
-Derivative Works thereof, that is intentionally submitted to Licensor 
-for inclusion in the Work by the copyright owner or by an individual or 
-Legal Entity authorized to submit on behalf of the copyright owner. For 
-the purposes of this definition, "submitted" means any form of electronic, 
-verbal, or written communication sent to the Licensor or its 
-representatives, including but not limited to communication on electronic 
-mailing lists, source code control systems, and issue tracking systems that 
-are managed by, or on behalf of, the Licensor for the purpose of discussing 
-and improving the Work, but excluding communication that is conspicuously 
-marked or otherwise designated in writing by the copyright owner as "Not 
-a Contribution."
-
-"Contributor" shall mean Licensor and any individual or Legal Entity on 
-behalf of whom a Contribution has been received by Licensor and subsequently 
-incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of this 
-License, each Contributor hereby grants to You a perpetual, worldwide, 
-non-exclusive, no-charge, royalty-free, irrevocable copyright license to 
-reproduce, prepare Derivative Works of, publicly display, publicly perform, 
-sublicense, and distribute the Work and such Derivative Works in Source or 
-Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of this 
-License, each Contributor hereby grants to You a perpetual, worldwide, 
-non-exclusive, no-charge, royalty-free, irrevocable (except as stated in 
-this section) patent license to make, have made, use, offer to sell, sell, 
-import, and otherwise transfer the Work, where such license applies only to 
-those patent claims licensable by such Contributor that are necessarily 
-infringed by their Contribution(s) alone or by combination of their 
-Contribution(s) with the Work to which such Contribution(s) was submitted. 
-If You institute patent litigation against any entity (including a cross-claim
-or counterclaim in a lawsuit) alleging that the Work or a Contribution 
-incorporated within the Work constitutes direct or contributory patent 
-infringement, then any patent licenses granted to You under this License 
-for that Work shall terminate as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the Work or 
-Derivative Works thereof in any medium, with or without modifications, and 
-in Source or Object form, provided that You meet the following conditions:
-
-You must give any other recipients of the Work or Derivative Works a copy of 
-this License; and
-You must cause any modified files to carry prominent notices stating that 
-You changed the files; and
-You must retain, in the Source form of any Derivative Works that You 
-distribute, all copyright, patent, trademark, and attribution notices 
-from the Source form of the Work, excluding those notices that do not 
-pertain to any part of the Derivative Works; and
-If the Work includes a "NOTICE" text file as part of its distribution, 
-then any Derivative Works that You distribute must include a readable 
-copy of the attribution notices contained within such NOTICE file, excluding
-those notices that do not pertain to any part of the Derivative Works, in
-at least one of the following places: within a NOTICE text file distributed 
-as part of the Derivative Works; within the Source form or documentation, if 
-provided along with the Derivative Works; or, within a display generated by 
-the Derivative Works, if and wherever such third-party notices normally 
-appear. The contents of the NOTICE file are for informational purposes 
-only and do not modify the License. You may add Your own attribution 
-notices within Derivative Works that You distribute, alongside or as 
-an addendum to the NOTICE text from the Work, provided that such additional 
-attribution notices cannot be construed as modifying the License. 
-
-You may add Your own copyright statement to Your modifications and may provide
-additional or different license terms and conditions for use, reproduction, or
-distribution of Your modifications, or for any such Derivative Works as a 
-whole, provided Your use, reproduction, and distribution of the Work otherwise 
-complies with the conditions stated in this License.
-5. Submission of Contributions. Unless You explicitly state otherwise, any 
-Contribution intentionally submitted for inclusion in the Work by You to the 
-Licensor shall be under the terms and conditions of this License, without any 
-additional terms or conditions. Notwithstanding the above, nothing herein 
-shall supersede or modify the terms of any separate license agreement you 
-may have executed with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade names, 
-trademarks, service marks, or product names of the Licensor, except as 
-required for reasonable and customary use in describing the origin of the 
-Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or agreed to 
-in writing, Licensor provides the Work (and each Contributor provides its 
-Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 
-ANY KIND, either express or implied, including, without limitation, any 
-warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or 
-FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining 
-the appropriateness of using or redistributing the Work and assume any risks 
-associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory, whether in
-tort (including negligence), contract, or otherwise, unless required by 
-applicable law (such as deliberate and grossly negligent acts) or agreed to 
-in writing, shall any Contributor be liable to You for damages, including 
-any direct, indirect, special, incidental, or consequential damages of any 
-character arising as a result of this License or out of the use or inability 
-to use the Work (including but not limited to damages for loss of goodwill, 
-work stoppage, computer failure or malfunction, or any and all other 
-commercial damages or losses), even if such Contributor has been advised 
-of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing the 
-Work or Derivative Works thereof, You may choose to offer, and charge a 
-fee for, acceptance of support, warranty, indemnity, or other liability 
-obligations and/or rights consistent with this License. However, in accepting
-such obligations, You may act only on Your own behalf and on Your sole 
-responsibility, not on behalf of any other Contributor, and only if You
-agree to indemnify, defend, and hold each Contributor harmless for any 
-liability incurred by, or claims asserted against, such Contributor by 
-reason of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
\ No newline at end of file
diff --git a/3rdparty/etc1/etc1.cpp b/3rdparty/etc1/etc1.cpp
deleted file mode 100644
index 0953f9897..000000000
--- a/3rdparty/etc1/etc1.cpp
+++ /dev/null
@@ -1,686 +0,0 @@
-// Copyright 2009 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-// This is a fork of the AOSP project ETC1 codec. The original code can be found
-// at the following web site:
-// https://android.googlesource.com/platform/frameworks/native/+/master/opengl/include/ETC1/
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include "etc1.h"
-
-#include <cstring>
-
-/* From http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
-
- The number of bits that represent a 4x4 texel block is 64 bits if
- <internalformat> is given by ETC1_RGB8_OES.
-
- The data for a block is a number of bytes,
-
- {q0, q1, q2, q3, q4, q5, q6, q7}
-
- where byte q0 is located at the lowest memory address and q7 at
- the highest. The 64 bits specifying the block is then represented
- by the following 64 bit integer:
-
- int64bit = 256*(256*(256*(256*(256*(256*(256*q0+q1)+q2)+q3)+q4)+q5)+q6)+q7;
-
- ETC1_RGB8_OES:
-
- a) bit layout in bits 63 through 32 if diffbit = 0
-
- 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
- -----------------------------------------------
- | base col1 | base col2 | base col1 | base col2 |
- | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)|
- -----------------------------------------------
-
- 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32
- ---------------------------------------------------
- | base col1 | base col2 | table  | table  |diff|flip|
- | B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
- ---------------------------------------------------
-
-
- b) bit layout in bits 63 through 32 if diffbit = 1
-
- 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
- -----------------------------------------------
- | base col1    | dcol 2 | base col1    | dcol 2 |
- | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    |
- -----------------------------------------------
-
- 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32
- ---------------------------------------------------
- | base col 1   | dcol 2 | table  | table  |diff|flip|
- | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
- ---------------------------------------------------
-
-
- c) bit layout in bits 31 through 0 (in both cases)
-
- 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
- -----------------------------------------------
- |       most significant pixel index bits       |
- | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a|
- -----------------------------------------------
-
- 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
- --------------------------------------------------
- |         least significant pixel index bits       |
- | p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
- --------------------------------------------------
-
-
- Add table 3.17.2: Intensity modifier sets for ETC1 compressed textures:
-
- table codeword                modifier table
- ------------------        ----------------------
- 0                     -8  -2  2   8
- 1                    -17  -5  5  17
- 2                    -29  -9  9  29
- 3                    -42 -13 13  42
- 4                    -60 -18 18  60
- 5                    -80 -24 24  80
- 6                   -106 -33 33 106
- 7                   -183 -47 47 183
-
-
- Add table 3.17.3 Mapping from pixel index values to modifier values for
- ETC1 compressed textures:
-
- pixel index value
- ---------------
- msb     lsb           resulting modifier value
- -----   -----          -------------------------
- 1       1            -b (large negative value)
- 1       0            -a (small negative value)
- 0       0             a (small positive value)
- 0       1             b (large positive value)
-
-
- */
-
-static const int kModifierTable[] = {
-/* 0 */2, 8, -2, -8,
-/* 1 */5, 17, -5, -17,
-/* 2 */9, 29, -9, -29,
-/* 3 */13, 42, -13, -42,
-/* 4 */18, 60, -18, -60,
-/* 5 */24, 80, -24, -80,
-/* 6 */33, 106, -33, -106,
-/* 7 */47, 183, -47, -183 };
-
-static const int kLookup[8] = { 0, 1, 2, 3, -4, -3, -2, -1 };
-
-static inline etc1_byte clamp(int x) {
-    return (etc1_byte) (x >= 0 ? (x < 255 ? x : 255) : 0);
-}
-
-static
-inline int convert4To8(int b) {
-    int c = b & 0xf;
-    return (c << 4) | c;
-}
-
-static
-inline int convert5To8(int b) {
-    int c = b & 0x1f;
-    return (c << 3) | (c >> 2);
-}
-
-static
-inline int convert6To8(int b) {
-    int c = b & 0x3f;
-    return (c << 2) | (c >> 4);
-}
-
-static
-inline int divideBy255(int d) {
-    return (d + 128 + (d >> 8)) >> 8;
-}
-
-static
-inline int convert8To4(int b) {
-    int c = b & 0xff;
-    return divideBy255(c * 15);
-}
-
-static
-inline int convert8To5(int b) {
-    int c = b & 0xff;
-    return divideBy255(c * 31);
-}
-
-static
-inline int convertDiff(int base, int diff) {
-    return convert5To8((0x1f & base) + kLookup[0x7 & diff]);
-}
-
-static
-void decode_subblock(etc1_byte* pOut, int r, int g, int b, const int* table,
-        etc1_uint32 low, bool second, bool flipped) {
-    int baseX = 0;
-    int baseY = 0;
-    if (second) {
-        if (flipped) {
-            baseY = 2;
-        } else {
-            baseX = 2;
-        }
-    }
-    for (int i = 0; i < 8; i++) {
-        int x, y;
-        if (flipped) {
-            x = baseX + (i >> 1);
-            y = baseY + (i & 1);
-        } else {
-            x = baseX + (i >> 2);
-            y = baseY + (i & 3);
-        }
-        int k = y + (x * 4);
-        int offset = ((low >> k) & 1) | ((low >> (k + 15)) & 2);
-        int delta = table[offset];
-        etc1_byte* q = pOut + 3 * (x + 4 * y);
-        *q++ = clamp(r + delta);
-        *q++ = clamp(g + delta);
-        *q++ = clamp(b + delta);
-    }
-}
-
-// Input is an ETC1 compressed version of the data.
-// Output is a 4 x 4 square of 3-byte pixels in form R, G, B
-
-void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut) {
-    etc1_uint32 high = (pIn[0] << 24) | (pIn[1] << 16) | (pIn[2] << 8) | pIn[3];
-    etc1_uint32 low = (pIn[4] << 24) | (pIn[5] << 16) | (pIn[6] << 8) | pIn[7];
-    int r1, r2, g1, g2, b1, b2;
-    if (high & 2) {
-        // differential
-        int rBase = high >> 27;
-        int gBase = high >> 19;
-        int bBase = high >> 11;
-        r1 = convert5To8(rBase);
-        r2 = convertDiff(rBase, high >> 24);
-        g1 = convert5To8(gBase);
-        g2 = convertDiff(gBase, high >> 16);
-        b1 = convert5To8(bBase);
-        b2 = convertDiff(bBase, high >> 8);
-    } else {
-        // not differential
-        r1 = convert4To8(high >> 28);
-        r2 = convert4To8(high >> 24);
-        g1 = convert4To8(high >> 20);
-        g2 = convert4To8(high >> 16);
-        b1 = convert4To8(high >> 12);
-        b2 = convert4To8(high >> 8);
-    }
-    int tableIndexA = 7 & (high >> 5);
-    int tableIndexB = 7 & (high >> 2);
-    const int* tableA = kModifierTable + tableIndexA * 4;
-    const int* tableB = kModifierTable + tableIndexB * 4;
-    bool flipped = (high & 1) != 0;
-    decode_subblock(pOut, r1, g1, b1, tableA, low, false, flipped);
-    decode_subblock(pOut, r2, g2, b2, tableB, low, true, flipped);
-}
-
-typedef struct {
-    etc1_uint32 high;
-    etc1_uint32 low;
-    etc1_uint32 score; // Lower is more accurate
-} etc_compressed;
-
-static
-inline void take_best(etc_compressed* a, const etc_compressed* b) {
-    if (a->score > b->score) {
-        *a = *b;
-    }
-}
-
-static
-void etc_average_colors_subblock(const etc1_byte* pIn, etc1_uint32 inMask,
-        etc1_byte* pColors, bool flipped, bool second) {
-    int r = 0;
-    int g = 0;
-    int b = 0;
-
-    if (flipped) {
-        int by = 0;
-        if (second) {
-            by = 2;
-        }
-        for (int y = 0; y < 2; y++) {
-            int yy = by + y;
-            for (int x = 0; x < 4; x++) {
-                int i = x + 4 * yy;
-                if (inMask & (1 << i)) {
-                    const etc1_byte* p = pIn + i * 3;
-                    r += *(p++);
-                    g += *(p++);
-                    b += *(p++);
-                }
-            }
-        }
-    } else {
-        int bx = 0;
-        if (second) {
-            bx = 2;
-        }
-        for (int y = 0; y < 4; y++) {
-            for (int x = 0; x < 2; x++) {
-                int xx = bx + x;
-                int i = xx + 4 * y;
-                if (inMask & (1 << i)) {
-                    const etc1_byte* p = pIn + i * 3;
-                    r += *(p++);
-                    g += *(p++);
-                    b += *(p++);
-                }
-            }
-        }
-    }
-    pColors[0] = (etc1_byte)((r + 4) >> 3);
-    pColors[1] = (etc1_byte)((g + 4) >> 3);
-    pColors[2] = (etc1_byte)((b + 4) >> 3);
-}
-
-static
-inline int square(int x) {
-    return x * x;
-}
-
-static etc1_uint32 chooseModifier(const etc1_byte* pBaseColors,
-        const etc1_byte* pIn, etc1_uint32 *pLow, int bitIndex,
-        const int* pModifierTable) {
-    etc1_uint32 bestScore = ~0;
-    int bestIndex = 0;
-    int pixelR = pIn[0];
-    int pixelG = pIn[1];
-    int pixelB = pIn[2];
-    int r = pBaseColors[0];
-    int g = pBaseColors[1];
-    int b = pBaseColors[2];
-    for (int i = 0; i < 4; i++) {
-        int modifier = pModifierTable[i];
-        int decodedG = clamp(g + modifier);
-        etc1_uint32 score = (etc1_uint32) (6 * square(decodedG - pixelG));
-        if (score >= bestScore) {
-            continue;
-        }
-        int decodedR = clamp(r + modifier);
-        score += (etc1_uint32) (3 * square(decodedR - pixelR));
-        if (score >= bestScore) {
-            continue;
-        }
-        int decodedB = clamp(b + modifier);
-        score += (etc1_uint32) square(decodedB - pixelB);
-        if (score < bestScore) {
-            bestScore = score;
-            bestIndex = i;
-        }
-    }
-    etc1_uint32 lowMask = (((bestIndex >> 1) << 16) | (bestIndex & 1))
-            << bitIndex;
-    *pLow |= lowMask;
-    return bestScore;
-}
-
-static
-void etc_encode_subblock_helper(const etc1_byte* pIn, etc1_uint32 inMask,
-        etc_compressed* pCompressed, bool flipped, bool second,
-        const etc1_byte* pBaseColors, const int* pModifierTable) {
-    int score = pCompressed->score;
-    if (flipped) {
-        int by = 0;
-        if (second) {
-            by = 2;
-        }
-        for (int y = 0; y < 2; y++) {
-            int yy = by + y;
-            for (int x = 0; x < 4; x++) {
-                int i = x + 4 * yy;
-                if (inMask & (1 << i)) {
-                    score += chooseModifier(pBaseColors, pIn + i * 3,
-                            &pCompressed->low, yy + x * 4, pModifierTable);
-                }
-            }
-        }
-    } else {
-        int bx = 0;
-        if (second) {
-            bx = 2;
-        }
-        for (int y = 0; y < 4; y++) {
-            for (int x = 0; x < 2; x++) {
-                int xx = bx + x;
-                int i = xx + 4 * y;
-                if (inMask & (1 << i)) {
-                    score += chooseModifier(pBaseColors, pIn + i * 3,
-                            &pCompressed->low, y + xx * 4, pModifierTable);
-                }
-            }
-        }
-    }
-    pCompressed->score = score;
-}
-
-static bool inRange4bitSigned(int color) {
-    return color >= -4 && color <= 3;
-}
-
-static void etc_encodeBaseColors(etc1_byte* pBaseColors,
-        const etc1_byte* pColors, etc_compressed* pCompressed) {
-    int r1, g1, b1, r2, g2, b2; // 8 bit base colors for sub-blocks
-    bool differential;
-    {
-        int r51 = convert8To5(pColors[0]);
-        int g51 = convert8To5(pColors[1]);
-        int b51 = convert8To5(pColors[2]);
-        int r52 = convert8To5(pColors[3]);
-        int g52 = convert8To5(pColors[4]);
-        int b52 = convert8To5(pColors[5]);
-
-        r1 = convert5To8(r51);
-        g1 = convert5To8(g51);
-        b1 = convert5To8(b51);
-
-        int dr = r52 - r51;
-        int dg = g52 - g51;
-        int db = b52 - b51;
-
-        differential = inRange4bitSigned(dr) && inRange4bitSigned(dg)
-                && inRange4bitSigned(db);
-        if (differential) {
-            r2 = convert5To8(r51 + dr);
-            g2 = convert5To8(g51 + dg);
-            b2 = convert5To8(b51 + db);
-            pCompressed->high |= (r51 << 27) | ((7 & dr) << 24) | (g51 << 19)
-                    | ((7 & dg) << 16) | (b51 << 11) | ((7 & db) << 8) | 2;
-        }
-    }
-
-    if (!differential) {
-        int r41 = convert8To4(pColors[0]);
-        int g41 = convert8To4(pColors[1]);
-        int b41 = convert8To4(pColors[2]);
-        int r42 = convert8To4(pColors[3]);
-        int g42 = convert8To4(pColors[4]);
-        int b42 = convert8To4(pColors[5]);
-        r1 = convert4To8(r41);
-        g1 = convert4To8(g41);
-        b1 = convert4To8(b41);
-        r2 = convert4To8(r42);
-        g2 = convert4To8(g42);
-        b2 = convert4To8(b42);
-        pCompressed->high |= (r41 << 28) | (r42 << 24) | (g41 << 20) | (g42
-                << 16) | (b41 << 12) | (b42 << 8);
-    }
-    pBaseColors[0] = r1;
-    pBaseColors[1] = g1;
-    pBaseColors[2] = b1;
-    pBaseColors[3] = r2;
-    pBaseColors[4] = g2;
-    pBaseColors[5] = b2;
-}
-
-static
-void etc_encode_block_helper(const etc1_byte* pIn, etc1_uint32 inMask,
-        const etc1_byte* pColors, etc_compressed* pCompressed, bool flipped) {
-    pCompressed->score = ~0;
-    pCompressed->high = (flipped ? 1 : 0);
-    pCompressed->low = 0;
-
-    etc1_byte pBaseColors[6];
-
-    etc_encodeBaseColors(pBaseColors, pColors, pCompressed);
-
-    int originalHigh = pCompressed->high;
-
-    const int* pModifierTable = kModifierTable;
-    for (int i = 0; i < 8; i++, pModifierTable += 4) {
-        etc_compressed temp;
-        temp.score = 0;
-        temp.high = originalHigh | (i << 5);
-        temp.low = 0;
-        etc_encode_subblock_helper(pIn, inMask, &temp, flipped, false,
-                pBaseColors, pModifierTable);
-        take_best(pCompressed, &temp);
-    }
-    pModifierTable = kModifierTable;
-    etc_compressed firstHalf = *pCompressed;
-    for (int i = 0; i < 8; i++, pModifierTable += 4) {
-        etc_compressed temp;
-        temp.score = firstHalf.score;
-        temp.high = firstHalf.high | (i << 2);
-        temp.low = firstHalf.low;
-        etc_encode_subblock_helper(pIn, inMask, &temp, flipped, true,
-                pBaseColors + 3, pModifierTable);
-        if (i == 0) {
-            *pCompressed = temp;
-        } else {
-            take_best(pCompressed, &temp);
-        }
-    }
-}
-
-static void writeBigEndian(etc1_byte* pOut, etc1_uint32 d) {
-    pOut[0] = (etc1_byte)(d >> 24);
-    pOut[1] = (etc1_byte)(d >> 16);
-    pOut[2] = (etc1_byte)(d >> 8);
-    pOut[3] = (etc1_byte) d;
-}
-
-// Input is a 4 x 4 square of 3-byte pixels in form R, G, B
-// inmask is a 16-bit mask where bit (1 << (x + y * 4)) tells whether the corresponding (x,y)
-// pixel is valid or not. Invalid pixel color values are ignored when compressing.
-// Output is an ETC1 compressed version of the data.
-
-void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 inMask,
-        etc1_byte* pOut) {
-    etc1_byte colors[6];
-    etc1_byte flippedColors[6];
-    etc_average_colors_subblock(pIn, inMask, colors, false, false);
-    etc_average_colors_subblock(pIn, inMask, colors + 3, false, true);
-    etc_average_colors_subblock(pIn, inMask, flippedColors, true, false);
-    etc_average_colors_subblock(pIn, inMask, flippedColors + 3, true, true);
-
-    etc_compressed a, b;
-    etc_encode_block_helper(pIn, inMask, colors, &a, false);
-    etc_encode_block_helper(pIn, inMask, flippedColors, &b, true);
-    take_best(&a, &b);
-    writeBigEndian(pOut, a.high);
-    writeBigEndian(pOut + 4, a.low);
-}
-
-// Return the size of the encoded image data (does not include size of PKM header).
-
-etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height) {
-    return (((width + 3) & ~3) * ((height + 3) & ~3)) >> 1;
-}
-
-// Encode an entire image.
-// pIn - pointer to the image data. Formatted such that the Red component of
-//       pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset;
-// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
-
-int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
-        etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut) {
-    if (pixelSize < 2 || pixelSize > 4) {
-        return -1;
-    }
-    static const unsigned short kYMask[] = { 0x0, 0xf, 0xff, 0xfff, 0xffff };
-    static const unsigned short kXMask[] = { 0x0, 0x1111, 0x3333, 0x7777,
-            0xffff };
-    etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
-    etc1_byte encoded[ETC1_ENCODED_BLOCK_SIZE];
-
-    etc1_uint32 encodedWidth = (width + 3) & ~3;
-    etc1_uint32 encodedHeight = (height + 3) & ~3;
-
-    for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
-        etc1_uint32 yEnd = height - y;
-        if (yEnd > 4) {
-            yEnd = 4;
-        }
-        int ymask = kYMask[yEnd];
-        for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
-            etc1_uint32 xEnd = width - x;
-            if (xEnd > 4) {
-                xEnd = 4;
-            }
-            int mask = ymask & kXMask[xEnd];
-            for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
-                etc1_byte* q = block + (cy * 4) * 3;
-                const etc1_byte* p = pIn + pixelSize * x + stride * (y + cy);
-                if (pixelSize >= 3) {
-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
-                        memcpy(q, p, 3);
-                        q += 3;
-                        p += pixelSize;
-                    }
-                } else {
-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
-                        int pixel = (p[1] << 8) | p[0];
-                        *q++ = convert5To8(pixel >> 11);
-                        *q++ = convert6To8(pixel >> 5);
-                        *q++ = convert5To8(pixel);
-                        p += pixelSize;
-                    }
-                }
-            }
-            etc1_encode_block(block, mask, encoded);
-            memcpy(pOut, encoded, sizeof(encoded));
-            pOut += sizeof(encoded);
-        }
-    }
-    return 0;
-}
-
-// Decode an entire image.
-// pIn - pointer to encoded data.
-// pOut - pointer to the image data. Will be written such that the Red component of
-//       pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset. Must be
-//        large enough to store entire image.
-
-
-int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
-        etc1_uint32 width, etc1_uint32 height,
-        etc1_uint32 pixelSize, etc1_uint32 stride) {
-    if (pixelSize < 2 || pixelSize > 4) {
-        return -1;
-    }
-    etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
-
-    etc1_uint32 encodedWidth = (width + 3) & ~3;
-    etc1_uint32 encodedHeight = (height + 3) & ~3;
-
-    for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
-        etc1_uint32 yEnd = height - y;
-        if (yEnd > 4) {
-            yEnd = 4;
-        }
-        for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
-            etc1_uint32 xEnd = width - x;
-            if (xEnd > 4) {
-                xEnd = 4;
-            }
-            etc1_decode_block(pIn, block);
-            pIn += ETC1_ENCODED_BLOCK_SIZE;
-            for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
-                const etc1_byte* q = block + (cy * 4) * 3;
-                etc1_byte* p = pOut + pixelSize * x + stride * (y + cy);
-                if (pixelSize >= 3) {
-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
-                        memcpy(p, q, 3);
-                        q += 3;
-                        p += pixelSize;
-                    }
-                } else {
-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
-                        etc1_byte r = *q++;
-                        etc1_byte g = *q++;
-                        etc1_byte b = *q++;
-                        etc1_uint32 pixel = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
-                        *p++ = (etc1_byte) pixel;
-                        *p++ = (etc1_byte) (pixel >> 8);
-                    }
-                }
-            }
-        }
-    }
-    return 0;
-}
-
-static const char kMagic[] = { 'P', 'K', 'M', ' ', '1', '0' };
-
-static const etc1_uint32 ETC1_PKM_FORMAT_OFFSET = 6;
-static const etc1_uint32 ETC1_PKM_ENCODED_WIDTH_OFFSET = 8;
-static const etc1_uint32 ETC1_PKM_ENCODED_HEIGHT_OFFSET = 10;
-static const etc1_uint32 ETC1_PKM_WIDTH_OFFSET = 12;
-static const etc1_uint32 ETC1_PKM_HEIGHT_OFFSET = 14;
-
-static const etc1_uint32 ETC1_RGB_NO_MIPMAPS = 0;
-
-static void writeBEUint16(etc1_byte* pOut, etc1_uint32 data) {
-    pOut[0] = (etc1_byte) (data >> 8);
-    pOut[1] = (etc1_byte) data;
-}
-
-static etc1_uint32 readBEUint16(const etc1_byte* pIn) {
-    return (pIn[0] << 8) | pIn[1];
-}
-
-// Format a PKM header
-
-void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height) {
-    memcpy(pHeader, kMagic, sizeof(kMagic));
-    etc1_uint32 encodedWidth = (width + 3) & ~3;
-    etc1_uint32 encodedHeight = (height + 3) & ~3;
-    writeBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET, ETC1_RGB_NO_MIPMAPS);
-    writeBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET, encodedWidth);
-    writeBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET, encodedHeight);
-    writeBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET, width);
-    writeBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET, height);
-}
-
-// Check if a PKM header is correctly formatted.
-
-etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader) {
-    if (memcmp(pHeader, kMagic, sizeof(kMagic))) {
-        return false;
-    }
-    etc1_uint32 format = readBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET);
-    etc1_uint32 encodedWidth = readBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET);
-    etc1_uint32 encodedHeight = readBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET);
-    etc1_uint32 width = readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
-    etc1_uint32 height = readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
-    return format == ETC1_RGB_NO_MIPMAPS &&
-            encodedWidth >= width && encodedWidth - width < 4 &&
-            encodedHeight >= height && encodedHeight - height < 4;
-}
-
-// Read the image width from a PKM header
-
-etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader) {
-    return readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
-}
-
-// Read the image height from a PKM header
-
-etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader){
-    return readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
-}
diff --git a/3rdparty/etc1/etc1.h b/3rdparty/etc1/etc1.h
deleted file mode 100644
index d66ca9d3e..000000000
--- a/3rdparty/etc1/etc1.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2009 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-// This is a fork of the AOSP project ETC1 codec. The original code can be found
-// at the following web site:
-// https://android.googlesource.com/platform/frameworks/native/+/master/opengl/libs/ETC1/
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __etc1_h__
-#define __etc1_h__
-
-#define ETC1_ENCODED_BLOCK_SIZE 8
-#define ETC1_DECODED_BLOCK_SIZE 48
-
-#ifndef ETC1_RGB8_OES
-#define ETC1_RGB8_OES 0x8D64
-#endif
-
-typedef unsigned char etc1_byte;
-typedef int etc1_bool;
-typedef unsigned int etc1_uint32;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Encode a block of pixels.
-//
-// pIn is a pointer to a ETC_DECODED_BLOCK_SIZE array of bytes that represent a
-// 4 x 4 square of 3-byte pixels in form R, G, B. Byte (3 * (x + 4 * y) is the R
-// value of pixel (x, y).
-//
-// validPixelMask is a 16-bit mask where bit (1 << (x + y * 4)) indicates whether
-// the corresponding (x,y) pixel is valid. Invalid pixel color values are ignored when compressing.
-//
-// pOut is an ETC1 compressed version of the data.
-
-void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 validPixelMask, etc1_byte* pOut);
-
-// Decode a block of pixels.
-//
-// pIn is an ETC1 compressed version of the data.
-//
-// pOut is a pointer to a ETC_DECODED_BLOCK_SIZE array of bytes that represent a
-// 4 x 4 square of 3-byte pixels in form R, G, B. Byte (3 * (x + 4 * y) is the R
-// value of pixel (x, y).
-
-void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut);
-
-// Return the size of the encoded image data (does not include size of PKM header).
-
-etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height);
-
-// Encode an entire image.
-// pIn - pointer to the image data. Formatted such that
-//       pixel (x,y) is at pIn + pixelSize * x + stride * y;
-// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
-// pixelSize can be 2 or 3. 2 is an GL_UNSIGNED_SHORT_5_6_5 image, 3 is a GL_BYTE RGB image.
-// returns non-zero if there is an error.
-
-int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
-        etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut);
-
-// Decode an entire image.
-// pIn - pointer to encoded data.
-// pOut - pointer to the image data. Will be written such that
-//        pixel (x,y) is at pIn + pixelSize * x + stride * y. Must be
-//        large enough to store entire image.
-// pixelSize can be 2 or 3. 2 is an GL_UNSIGNED_SHORT_5_6_5 image, 3 is a GL_BYTE RGB image.
-// returns non-zero if there is an error.
-
-int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
-        etc1_uint32 width, etc1_uint32 height,
-        etc1_uint32 pixelSize, etc1_uint32 stride);
-
-// Size of a PKM header, in bytes.
-
-#define ETC_PKM_HEADER_SIZE 16
-
-// Format a PKM header
-
-void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height);
-
-// Check if a PKM header is correctly formatted.
-
-etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader);
-
-// Read the image width from a PKM header
-
-etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader);
-
-// Read the image height from a PKM header
-
-etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/3rdparty/etc2/LICENSE.txt b/3rdparty/etc2/LICENSE.txt
deleted file mode 100644
index 2254f9ece..000000000
--- a/3rdparty/etc2/LICENSE.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Copyright (c) 2013, Bartosz Taudul <wolf.pld@gmail.com>
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the <organization> nor the
-      names of its contributors may be used to endorse or promote products
-      derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/3rdparty/etc2/Math.hpp b/3rdparty/etc2/Math.hpp
deleted file mode 100644
index 3a92a2e73..000000000
--- a/3rdparty/etc2/Math.hpp
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef __DARKRL__MATH_HPP__
-#define __DARKRL__MATH_HPP__
-
-#include <algorithm>
-#include <math.h>
-
-#include "Types.hpp"
-
-template<typename T>
-inline T AlignPOT( T val )
-{
-    if( val == 0 ) return 1;
-    val--;
-    for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
-    {
-        val |= val >> i;
-    }
-    return val + 1;
-}
-
-inline int CountSetBits( uint32 val )
-{
-    val -= ( val >> 1 ) & 0x55555555;
-    val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
-    val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
-    val += val >> 8;
-    val += val >> 16;
-    return val & 0x0000003f;
-}
-
-inline int CountLeadingZeros( uint32 val )
-{
-    val |= val >> 1;
-    val |= val >> 2;
-    val |= val >> 4;
-    val |= val >> 8;
-    val |= val >> 16;
-    return 32 - CountSetBits( val );
-}
-
-inline float sRGB2linear( float v )
-{
-    const float a = 0.055f;
-    if( v <= 0.04045f )
-    {
-        return v / 12.92f;
-    }
-    else
-    {
-        return powf( ( v + a ) / ( 1 + a ), 2.4f );
-    }
-}
-
-inline float linear2sRGB( float v )
-{
-    const float a = 0.055f;
-    if( v <= 0.0031308f )
-    {
-        return 12.92f * v;
-    }
-    else
-    {
-        return ( 1 + a ) * pow( v, 1/2.4f ) - a;
-    }
-}
-
-template<class T>
-inline T SmoothStep( T x )
-{
-    return x*x*(3-2*x);
-}
-
-inline uint8 clampu8( int32 val )
-{
-    return std::min( std::max( 0, val ), 255 );
-}
-
-template<class T>
-inline T sq( T val )
-{
-    return val * val;
-}
-
-static inline int mul8bit( int a, int b )
-{
-    int t = a*b + 128;
-    return ( t + ( t >> 8 ) ) >> 8;
-}
-
-#endif
diff --git a/3rdparty/etc2/ProcessCommon.hpp b/3rdparty/etc2/ProcessCommon.hpp
deleted file mode 100644
index 7e6addbcd..000000000
--- a/3rdparty/etc2/ProcessCommon.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef __PROCESSCOMMON_HPP__
-#define __PROCESSCOMMON_HPP__
-
-#include <assert.h>
-#include <stddef.h>
-
-#include "Types.hpp"
-
-template<class T>
-static size_t GetLeastError( const T* err, size_t num )
-{
-    size_t idx = 0;
-    for( size_t i=1; i<num; i++ )
-    {
-        if( err[i] < err[idx] )
-        {
-            idx = i;
-        }
-    }
-    return idx;
-}
-
-static uint64 FixByteOrder( uint64 d )
-{
-    return ( ( d & 0x00000000FFFFFFFF ) ) |
-           ( ( d & 0xFF00000000000000 ) >> 24 ) |
-           ( ( d & 0x000000FF00000000 ) << 24 ) |
-           ( ( d & 0x00FF000000000000 ) >> 8 ) |
-           ( ( d & 0x0000FF0000000000 ) << 8 );
-}
-
-template<class T, class S>
-static uint64 EncodeSelectors( uint64 d, const T terr[2][8], const S tsel[16][8], const uint32* id )
-{
-    size_t tidx[2];
-    tidx[0] = GetLeastError( terr[0], 8 );
-    tidx[1] = GetLeastError( terr[1], 8 );
-
-    d |= tidx[0] << 26;
-    d |= tidx[1] << 29;
-    for( int i=0; i<16; i++ )
-    {
-        uint64 t = tsel[i][tidx[id[i]%2]];
-        d |= ( t & 0x1 ) << ( i + 32 );
-        d |= ( t & 0x2 ) << ( i + 47 );
-    }
-
-    return d;
-}
-
-#endif
diff --git a/3rdparty/etc2/ProcessRGB.cpp b/3rdparty/etc2/ProcessRGB.cpp
deleted file mode 100644
index 29f0f7ea4..000000000
--- a/3rdparty/etc2/ProcessRGB.cpp
+++ /dev/null
@@ -1,719 +0,0 @@
-#include <string.h>
-
-#include "Math.hpp"
-#include "ProcessCommon.hpp"
-#include "ProcessRGB.hpp"
-#include "Tables.hpp"
-#include "Types.hpp"
-#include "Vector.hpp"
-
-#include <bx/endian.h>
-
-#ifdef __SSE4_1__
-#  ifdef _MSC_VER
-#    include <intrin.h>
-#    include <Windows.h>
-#  else
-#    include <x86intrin.h>
-#  endif
-#endif
-
-namespace
-{
-
-typedef uint16 v4i[4];
-
-void Average( const uint8* data, v4i* a )
-{
-#ifdef __SSE4_1__
-    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
-    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
-    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
-    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
-
-    __m128i d0l = _mm_unpacklo_epi8(d0, _mm_setzero_si128());
-    __m128i d0h = _mm_unpackhi_epi8(d0, _mm_setzero_si128());
-    __m128i d1l = _mm_unpacklo_epi8(d1, _mm_setzero_si128());
-    __m128i d1h = _mm_unpackhi_epi8(d1, _mm_setzero_si128());
-    __m128i d2l = _mm_unpacklo_epi8(d2, _mm_setzero_si128());
-    __m128i d2h = _mm_unpackhi_epi8(d2, _mm_setzero_si128());
-    __m128i d3l = _mm_unpacklo_epi8(d3, _mm_setzero_si128());
-    __m128i d3h = _mm_unpackhi_epi8(d3, _mm_setzero_si128());
-
-    __m128i sum0 = _mm_add_epi16(d0l, d1l);
-    __m128i sum1 = _mm_add_epi16(d0h, d1h);
-    __m128i sum2 = _mm_add_epi16(d2l, d3l);
-    __m128i sum3 = _mm_add_epi16(d2h, d3h);
-
-    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
-    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
-    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
-    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
-    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
-    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
-    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
-    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
-
-    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
-    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
-    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
-    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
-
-    __m128i a0 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b2, b3), _mm_set1_epi32(4)), 3);
-    __m128i a1 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b1), _mm_set1_epi32(4)), 3);
-    __m128i a2 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b1, b3), _mm_set1_epi32(4)), 3);
-    __m128i a3 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b2), _mm_set1_epi32(4)), 3);
-
-    _mm_storeu_si128((__m128i*)&a[0], _mm_packus_epi32(_mm_shuffle_epi32(a0, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a1, _MM_SHUFFLE(3, 0, 1, 2))));
-    _mm_storeu_si128((__m128i*)&a[2], _mm_packus_epi32(_mm_shuffle_epi32(a2, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a3, _MM_SHUFFLE(3, 0, 1, 2))));
-#else
-    uint32 r[4];
-    uint32 g[4];
-    uint32 b[4];
-
-    memset(r, 0, sizeof(r));
-    memset(g, 0, sizeof(g));
-    memset(b, 0, sizeof(b));
-
-    for( int j=0; j<4; j++ )
-    {
-        for( int i=0; i<4; i++ )
-        {
-            int index = (j & 2) + (i >> 1);
-            b[index] += *data++;
-            g[index] += *data++;
-            r[index] += *data++;
-            data++;
-        }
-    }
-
-    a[0][0] = uint16( (r[2] + r[3] + 4) / 8 );
-    a[0][1] = uint16( (g[2] + g[3] + 4) / 8 );
-    a[0][2] = uint16( (b[2] + b[3] + 4) / 8 );
-    a[0][3] = 0;
-    a[1][0] = uint16( (r[0] + r[1] + 4) / 8 );
-    a[1][1] = uint16( (g[0] + g[1] + 4) / 8 );
-    a[1][2] = uint16( (b[0] + b[1] + 4) / 8 );
-    a[1][3] = 0;
-    a[2][0] = uint16( (r[1] + r[3] + 4) / 8 );
-    a[2][1] = uint16( (g[1] + g[3] + 4) / 8 );
-    a[2][2] = uint16( (b[1] + b[3] + 4) / 8 );
-    a[2][3] = 0;
-    a[3][0] = uint16( (r[0] + r[2] + 4) / 8 );
-    a[3][1] = uint16( (g[0] + g[2] + 4) / 8 );
-    a[3][2] = uint16( (b[0] + b[2] + 4) / 8 );
-    a[3][3] = 0;
-#endif
-}
-
-void CalcErrorBlock( const uint8* data, uint err[4][4] )
-{
-#ifdef __SSE4_1__
-    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
-    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
-    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
-    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
-
-    __m128i dm0 = _mm_and_si128(d0, _mm_set1_epi32(0x00FFFFFF));
-    __m128i dm1 = _mm_and_si128(d1, _mm_set1_epi32(0x00FFFFFF));
-    __m128i dm2 = _mm_and_si128(d2, _mm_set1_epi32(0x00FFFFFF));
-    __m128i dm3 = _mm_and_si128(d3, _mm_set1_epi32(0x00FFFFFF));
-
-    __m128i d0l = _mm_unpacklo_epi8(dm0, _mm_setzero_si128());
-    __m128i d0h = _mm_unpackhi_epi8(dm0, _mm_setzero_si128());
-    __m128i d1l = _mm_unpacklo_epi8(dm1, _mm_setzero_si128());
-    __m128i d1h = _mm_unpackhi_epi8(dm1, _mm_setzero_si128());
-    __m128i d2l = _mm_unpacklo_epi8(dm2, _mm_setzero_si128());
-    __m128i d2h = _mm_unpackhi_epi8(dm2, _mm_setzero_si128());
-    __m128i d3l = _mm_unpacklo_epi8(dm3, _mm_setzero_si128());
-    __m128i d3h = _mm_unpackhi_epi8(dm3, _mm_setzero_si128());
-
-    __m128i sum0 = _mm_add_epi16(d0l, d1l);
-    __m128i sum1 = _mm_add_epi16(d0h, d1h);
-    __m128i sum2 = _mm_add_epi16(d2l, d3l);
-    __m128i sum3 = _mm_add_epi16(d2h, d3h);
-
-    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
-    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
-    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
-    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
-    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
-    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
-    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
-    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
-
-    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
-    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
-    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
-    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
-
-    __m128i a0 = _mm_add_epi32(b2, b3);
-    __m128i a1 = _mm_add_epi32(b0, b1);
-    __m128i a2 = _mm_add_epi32(b1, b3);
-    __m128i a3 = _mm_add_epi32(b0, b2);
-
-    _mm_storeu_si128((__m128i*)&err[0], a0);
-    _mm_storeu_si128((__m128i*)&err[1], a1);
-    _mm_storeu_si128((__m128i*)&err[2], a2);
-    _mm_storeu_si128((__m128i*)&err[3], a3);
-#else
-    uint terr[4][4];
-
-    memset(terr, 0, 16 * sizeof(uint));
-
-    for( int j=0; j<4; j++ )
-    {
-        for( int i=0; i<4; i++ )
-        {
-            int index = (j & 2) + (i >> 1);
-            uint d = *data++;
-            terr[index][0] += d;
-            d = *data++;
-            terr[index][1] += d;
-            d = *data++;
-            terr[index][2] += d;
-            data++;
-        }
-    }
-
-    for( int i=0; i<3; i++ )
-    {
-        err[0][i] = terr[2][i] + terr[3][i];
-        err[1][i] = terr[0][i] + terr[1][i];
-        err[2][i] = terr[1][i] + terr[3][i];
-        err[3][i] = terr[0][i] + terr[2][i];
-    }
-    for( int i=0; i<4; i++ )
-    {
-        err[i][3] = 0;
-    }
-#endif
-}
-
-uint CalcError( const uint block[4], const v4i& average )
-{
-    uint err = 0x3FFFFFFF; // Big value to prevent negative values, but small enough to prevent overflow
-    err -= block[0] * 2 * average[2];
-    err -= block[1] * 2 * average[1];
-    err -= block[2] * 2 * average[0];
-    err += 8 * ( sq( average[0] ) + sq( average[1] ) + sq( average[2] ) );
-    return err;
-}
-
-void ProcessAverages( v4i* a )
-{
-#ifdef __SSE4_1__
-    for( int i=0; i<2; i++ )
-    {
-        __m128i d = _mm_loadu_si128((__m128i*)a[i*2]);
-
-        __m128i t = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(31)), _mm_set1_epi16(128));
-
-        __m128i c = _mm_srli_epi16(_mm_add_epi16(t, _mm_srli_epi16(t, 8)), 8);
-
-        __m128i c1 = _mm_shuffle_epi32(c, _MM_SHUFFLE(3, 2, 3, 2));
-        __m128i diff = _mm_sub_epi16(c, c1);
-        diff = _mm_max_epi16(diff, _mm_set1_epi16(-4));
-        diff = _mm_min_epi16(diff, _mm_set1_epi16(3));
-
-        __m128i co = _mm_add_epi16(c1, diff);
-
-        c = _mm_blend_epi16(co, c, 0xF0);
-
-        __m128i a0 = _mm_or_si128(_mm_slli_epi16(c, 3), _mm_srli_epi16(c, 2));
-
-        _mm_storeu_si128((__m128i*)a[4+i*2], a0);
-    }
-
-    for( int i=0; i<2; i++ )
-    {
-        __m128i d = _mm_loadu_si128((__m128i*)a[i*2]);
-
-        __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(15)), _mm_set1_epi16(128));
-        __m128i t1 = _mm_srli_epi16(_mm_add_epi16(t0, _mm_srli_epi16(t0, 8)), 8);
-
-        __m128i t2 = _mm_or_si128(t1, _mm_slli_epi16(t1, 4));
-
-        _mm_storeu_si128((__m128i*)a[i*2], t2);
-    }
-#else
-    for( int i=0; i<2; i++ )
-    {
-        for( int j=0; j<3; j++ )
-        {
-            int32 c1 = mul8bit( a[i*2+1][j], 31 );
-            int32 c2 = mul8bit( a[i*2][j], 31 );
-
-            int32 diff = c2 - c1;
-            if( diff > 3 ) diff = 3;
-            else if( diff < -4 ) diff = -4;
-
-            int32 co = c1 + diff;
-
-            a[5+i*2][j] = ( c1 << 3 ) | ( c1 >> 2 );
-            a[4+i*2][j] = ( co << 3 ) | ( co >> 2 );
-        }
-    }
-
-    for( int i=0; i<4; i++ )
-    {
-        a[i][0] = g_avg2[mul8bit( a[i][0], 15 )];
-        a[i][1] = g_avg2[mul8bit( a[i][1], 15 )];
-        a[i][2] = g_avg2[mul8bit( a[i][2], 15 )];
-    }
-#endif
-}
-
-void EncodeAverages( uint64& _d, const v4i* a, size_t idx )
-{
-    uint64 d = _d;
-    d |= ( idx << 24 );
-    size_t base = idx << 1;
-
-    if( ( idx & 0x2 ) == 0 )
-    {
-        for( int i=0; i<3; i++ )
-        {
-            d |= uint64( a[base+0][i] >> 4 ) << ( i*8 );
-            d |= uint64( a[base+1][i] >> 4 ) << ( i*8 + 4 );
-        }
-    }
-    else
-    {
-        for( int i=0; i<3; i++ )
-        {
-            d |= uint64( a[base+1][i] & 0xF8 ) << ( i*8 );
-            int32 c = ( ( a[base+0][i] & 0xF8 ) - ( a[base+1][i] & 0xF8 ) ) >> 3;
-            c &= ~0xFFFFFFF8;
-            d |= ((uint64)c) << ( i*8 );
-        }
-    }
-    _d = d;
-}
-
-uint64 CheckSolid( const uint8* src )
-{
-#ifdef __SSE4_1__
-    __m128i d0 = _mm_loadu_si128(((__m128i*)src) + 0);
-    __m128i d1 = _mm_loadu_si128(((__m128i*)src) + 1);
-    __m128i d2 = _mm_loadu_si128(((__m128i*)src) + 2);
-    __m128i d3 = _mm_loadu_si128(((__m128i*)src) + 3);
-
-    __m128i c = _mm_shuffle_epi32(d0, _MM_SHUFFLE(0, 0, 0, 0));
-
-    __m128i c0 = _mm_cmpeq_epi8(d0, c);
-    __m128i c1 = _mm_cmpeq_epi8(d1, c);
-    __m128i c2 = _mm_cmpeq_epi8(d2, c);
-    __m128i c3 = _mm_cmpeq_epi8(d3, c);
-
-    __m128i m0 = _mm_and_si128(c0, c1);
-    __m128i m1 = _mm_and_si128(c2, c3);
-    __m128i m = _mm_and_si128(m0, m1);
-
-    if (!_mm_testc_si128(m, _mm_set1_epi32(-1)))
-    {
-        return 0;
-    }
-#else
-    const uint8* ptr = src + 4;
-    for( int i=1; i<16; i++ )
-    {
-        if( memcmp( src, ptr, 4 ) != 0 )
-        {
-            return 0;
-        }
-        ptr += 4;
-    }
-#endif
-    return 0x02000000 |
-        ( uint( src[0] & 0xF8 ) << 16 ) |
-        ( uint( src[1] & 0xF8 ) << 8 ) |
-        ( uint( src[2] & 0xF8 ) );
-}
-
-void PrepareAverages( v4i a[8], const uint8* src, uint err[4] )
-{
-    Average( src, a );
-    ProcessAverages( a );
-
-    uint errblock[4][4];
-    CalcErrorBlock( src, errblock );
-
-    for( int i=0; i<4; i++ )
-    {
-        err[i/2] += CalcError( errblock[i], a[i] );
-        err[2+i/2] += CalcError( errblock[i], a[i+4] );
-    }
-}
-
-void FindBestFit( uint64 terr[2][8], uint16 tsel[16][8], v4i a[8], const uint32* id, const uint8* data )
-{
-    for( size_t i=0; i<16; i++ )
-    {
-        uint16* sel = tsel[i];
-        uint bid = id[i];
-        uint64* ter = terr[bid%2];
-
-        uint8 b = *data++;
-        uint8 g = *data++;
-        uint8 r = *data++;
-        data++;
-
-        int dr = a[bid][0] - r;
-        int dg = a[bid][1] - g;
-        int db = a[bid][2] - b;
-
-#ifdef __SSE4_1__
-        // Reference implementation
-
-        __m128i pix = _mm_set1_epi32(dr * 77 + dg * 151 + db * 28);
-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
-        __m128i error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[0]));
-        __m128i error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[1]));
-        __m128i error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[0]));
-        __m128i error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[1]));
-
-        __m128i index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
-        __m128i minError0 = _mm_min_epi32(error0, error1);
-
-        __m128i index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
-        __m128i minError1 = _mm_min_epi32(error2, error3);
-
-        __m128i minIndex0 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
-        __m128i minError = _mm_min_epi32(minError0, minError1);
-
-        // Squaring the minimum error to produce correct values when adding
-        __m128i minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
-        __m128i squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
-        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
-        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
-        __m128i minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
-        __m128i squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
-        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
-        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
-
-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
-        error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[2]));
-        error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[3]));
-        error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[2]));
-        error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[3]));
-
-        index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
-        minError0 = _mm_min_epi32(error0, error1);
-
-        index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
-        minError1 = _mm_min_epi32(error2, error3);
-
-        __m128i minIndex1 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
-        minError = _mm_min_epi32(minError0, minError1);
-
-        // Squaring the minimum error to produce correct values when adding
-        minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
-        squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
-        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 2));
-        _mm_storeu_si128(((__m128i*)ter) + 2, squareErrorLow);
-        minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
-        squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
-        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 3));
-        _mm_storeu_si128(((__m128i*)ter) + 3, squareErrorHigh);
-        __m128i minIndex = _mm_packs_epi32(minIndex0, minIndex1);
-        _mm_storeu_si128((__m128i*)sel, minIndex);
-#else
-        int pix = dr * 77 + dg * 151 + db * 28;
-
-        for( int t=0; t<8; t++ )
-        {
-            const int64* tab = g_table256[t];
-            uint idx = 0;
-            uint64 err = sq( tab[0] + pix );
-            for( int j=1; j<4; j++ )
-            {
-                uint64 local = sq( tab[j] + pix );
-                if( local < err )
-                {
-                    err = local;
-                    idx = j;
-                }
-            }
-            *sel++ = idx;
-            *ter++ += err;
-        }
-#endif
-    }
-}
-
-#ifdef __SSE4_1__
-// Non-reference implementation, but faster. Produces same results as the AVX2 version
-void FindBestFit( uint32 terr[2][8], uint16 tsel[16][8], v4i a[8], const uint32* id, const uint8* data )
-{
-    for( size_t i=0; i<16; i++ )
-    {
-        uint16* sel = tsel[i];
-        uint bid = id[i];
-        uint32* ter = terr[bid%2];
-
-        uint8 b = *data++;
-        uint8 g = *data++;
-        uint8 r = *data++;
-        data++;
-
-        int dr = a[bid][0] - r;
-        int dg = a[bid][1] - g;
-        int db = a[bid][2] - b;
-
-        // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
-        // This produces slightly different results, but is significant faster
-        __m128i pixel = _mm_set1_epi16(dr * 38 + dg * 76 + db * 14);
-        __m128i pix = _mm_abs_epi16(pixel);
-
-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
-        // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
-        __m128i error0 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[0]));
-        __m128i error1 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[1]));
-
-        __m128i index = _mm_and_si128(_mm_cmplt_epi16(error1, error0), _mm_set1_epi16(1));
-        __m128i minError = _mm_min_epi16(error0, error1);
-
-        // Exploiting symmetry of the selector table and use the sign bit
-        // This produces slightly different results, but is needed to produce same results as AVX2 implementation
-        __m128i indexBit = _mm_andnot_si128(_mm_srli_epi16(pixel, 15), _mm_set1_epi8(-1));
-        __m128i minIndex = _mm_or_si128(index, _mm_add_epi16(indexBit, indexBit));
-
-        // Squaring the minimum error to produce correct values when adding
-        __m128i squareErrorLo = _mm_mullo_epi16(minError, minError);
-        __m128i squareErrorHi = _mm_mulhi_epi16(minError, minError);
-
-        __m128i squareErrorLow = _mm_unpacklo_epi16(squareErrorLo, squareErrorHi);
-        __m128i squareErrorHigh = _mm_unpackhi_epi16(squareErrorLo, squareErrorHi);
-
-        squareErrorLow = _mm_add_epi32(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
-        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
-        squareErrorHigh = _mm_add_epi32(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
-        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
-
-        _mm_storeu_si128((__m128i*)sel, minIndex);
-    }
-}
-#endif
-
-uint8_t convert6(float f)
-{
-    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
-    return (i + 11 - ((i + 11) >> 7) - ((i + 4) >> 7)) >> 3;
-}
-
-uint8_t convert7(float f)
-{
-    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
-    return (i + 9 - ((i + 9) >> 8) - ((i + 6) >> 8)) >> 2;
-}
-
-std::pair<uint64, uint64> Planar(const uint8* src)
-{
-    int32 r = 0;
-    int32 g = 0;
-    int32 b = 0;
-
-    for (int i = 0; i < 16; ++i)
-    {
-        b += src[i * 4 + 0];
-        g += src[i * 4 + 1];
-        r += src[i * 4 + 2];
-    }
-
-    int32 difRyz = 0;
-    int32 difGyz = 0;
-    int32 difByz = 0;
-    int32 difRxz = 0;
-    int32 difGxz = 0;
-    int32 difBxz = 0;
-
-    const int32 scaling[] = { -255, -85, 85, 255 };
-
-    for (int i = 0; i < 16; ++i)
-    {
-        int32 difB = (static_cast<int>(src[i * 4 + 0]) << 4) - b;
-        int32 difG = (static_cast<int>(src[i * 4 + 1]) << 4) - g;
-        int32 difR = (static_cast<int>(src[i * 4 + 2]) << 4) - r;
-
-        difRyz += difR * scaling[i % 4];
-        difGyz += difG * scaling[i % 4];
-        difByz += difB * scaling[i % 4];
-
-        difRxz += difR * scaling[i / 4];
-        difGxz += difG * scaling[i / 4];
-        difBxz += difB * scaling[i / 4];
-    }
-
-    const float scale = -4.0f / ((255 * 255 * 8.0f + 85 * 85 * 8.0f) * 16.0f);
-
-    float aR = difRxz * scale;
-    float aG = difGxz * scale;
-    float aB = difBxz * scale;
-
-    float bR = difRyz * scale;
-    float bG = difGyz * scale;
-    float bB = difByz * scale;
-
-    float dR = r * (4.0f / 16.0f);
-    float dG = g * (4.0f / 16.0f);
-    float dB = b * (4.0f / 16.0f);
-
-    // calculating the three colors RGBO, RGBH, and RGBV.  RGB = df - af * x - bf * y;
-    float cofR = (aR *  255.0f + (bR *  255.0f + dR));
-    float cofG = (aG *  255.0f + (bG *  255.0f + dG));
-    float cofB = (aB *  255.0f + (bB *  255.0f + dB));
-    float chfR = (aR * -425.0f + (bR *  255.0f + dR));
-    float chfG = (aG * -425.0f + (bG *  255.0f + dG));
-    float chfB = (aB * -425.0f + (bB *  255.0f + dB));
-    float cvfR = (aR *  255.0f + (bR * -425.0f + dR));
-    float cvfG = (aG *  255.0f + (bG * -425.0f + dG));
-    float cvfB = (aB *  255.0f + (bB * -425.0f + dB));
-
-    // convert to r6g7b6
-    int32 coR = convert6(cofR);
-    int32 coG = convert7(cofG);
-    int32 coB = convert6(cofB);
-    int32 chR = convert6(chfR);
-    int32 chG = convert7(chfG);
-    int32 chB = convert6(chfB);
-    int32 cvR = convert6(cvfR);
-    int32 cvG = convert7(cvfG);
-    int32 cvB = convert6(cvfB);
-
-    // Error calculation
-    int32 ro0 = coR;
-    int32 go0 = coG;
-    int32 bo0 = coB;
-    int32 ro1 = (ro0 >> 4) | (ro0 << 2);
-    int32 go1 = (go0 >> 6) | (go0 << 1);
-    int32 bo1 = (bo0 >> 4) | (bo0 << 2);
-    int32 ro2 = (ro1 << 2) + 2;
-    int32 go2 = (go1 << 2) + 2;
-    int32 bo2 = (bo1 << 2) + 2;
-
-    int32 rh0 = chR;
-    int32 gh0 = chG;
-    int32 bh0 = chB;
-    int32 rh1 = (rh0 >> 4) | (rh0 << 2);
-    int32 gh1 = (gh0 >> 6) | (gh0 << 1);
-    int32 bh1 = (bh0 >> 4) | (bh0 << 2);
-
-    int32 rh2 = rh1 - ro1;
-    int32 gh2 = gh1 - go1;
-    int32 bh2 = bh1 - bo1;
-
-    int32 rv0 = cvR;
-    int32 gv0 = cvG;
-    int32 bv0 = cvB;
-    int32 rv1 = (rv0 >> 4) | (rv0 << 2);
-    int32 gv1 = (gv0 >> 6) | (gv0 << 1);
-    int32 bv1 = (bv0 >> 4) | (bv0 << 2);
-
-    int32 rv2 = rv1 - ro1;
-    int32 gv2 = gv1 - go1;
-    int32 bv2 = bv1 - bo1;
-
-    uint64 error = 0;
-
-    for (int i = 0; i < 16; ++i)
-    {
-        int32 cR = clampu8((rh2 * (i / 4) + rv2 * (i % 4) + ro2) >> 2);
-        int32 cG = clampu8((gh2 * (i / 4) + gv2 * (i % 4) + go2) >> 2);
-        int32 cB = clampu8((bh2 * (i / 4) + bv2 * (i % 4) + bo2) >> 2);
-
-        int32 difB = static_cast<int>(src[i * 4 + 0]) - cB;
-        int32 difG = static_cast<int>(src[i * 4 + 1]) - cG;
-        int32 difR = static_cast<int>(src[i * 4 + 2]) - cR;
-
-        int32 dif = difR * 38 + difG * 76 + difB * 14;
-
-        error += dif * dif;
-    }
-
-    /**/
-    uint32 rgbv = cvB | (cvG << 6) | (cvR << 13);
-    uint32 rgbh = chB | (chG << 6) | (chR << 13);
-    uint32 hi = rgbv | ((rgbh & 0x1FFF) << 19);
-    uint32 lo = (chR & 0x1) | 0x2 | ((chR << 1) & 0x7C);
-    lo |= ((coB & 0x07) <<  7) | ((coB & 0x18) <<  8) | ((coB & 0x20) << 11);
-    lo |= ((coG & 0x3F) << 17) | ((coG & 0x40) << 18);
-    lo |= coR << 25;
-
-    const int32 idx = (coR & 0x20) | ((coG & 0x20) >> 1) | ((coB & 0x1E) >> 1);
-
-    lo |= g_flags[idx];
-
-    uint64 result = static_cast<uint32>(bx::endianSwap(lo));
-    result |= static_cast<uint64>(static_cast<uint32>(bx::endianSwap(hi))) << 32;
-
-    return std::make_pair(result, error);
-}
-
-template<class T, class S>
-uint64 EncodeSelectors( uint64 d, const T terr[2][8], const S tsel[16][8], const uint32* id, const uint64 value, const uint64 error)
-{
-    size_t tidx[2];
-    tidx[0] = GetLeastError( terr[0], 8 );
-    tidx[1] = GetLeastError( terr[1], 8 );
-
-    if ((terr[0][tidx[0]] + terr[1][tidx[1]]) >= error)
-    {
-        return value;
-    }
-
-    d |= tidx[0] << 26;
-    d |= tidx[1] << 29;
-    for( int i=0; i<16; i++ )
-    {
-        uint64 t = tsel[i][tidx[id[i]%2]];
-        d |= ( t & 0x1 ) << ( i + 32 );
-        d |= ( t & 0x2 ) << ( i + 47 );
-    }
-
-    return FixByteOrder(d);
-}
-}
-
-uint64 ProcessRGB( const uint8* src )
-{
-    uint64 d = CheckSolid( src );
-    if( d != 0 ) return d;
-
-    v4i a[8];
-    uint err[4] = {};
-    PrepareAverages( a, src, err );
-    size_t idx = GetLeastError( err, 4 );
-    EncodeAverages( d, a, idx );
-
-#if defined __SSE4_1__ && !defined REFERENCE_IMPLEMENTATION
-    uint32 terr[2][8] = {};
-#else
-    uint64 terr[2][8] = {};
-#endif
-    uint16 tsel[16][8];
-    const uint32* id = g_id[idx];
-    FindBestFit( terr, tsel, a, id, src );
-
-    return FixByteOrder( EncodeSelectors( d, terr, tsel, id ) );
-}
-
-uint64 ProcessRGB_ETC2( const uint8* src )
-{
-    std::pair<uint64, uint64> result = Planar( src );
-
-    uint64 d = 0;
-
-    v4i a[8];
-    uint err[4] = {};
-    PrepareAverages( a, src, err );
-    size_t idx = GetLeastError( err, 4 );
-    EncodeAverages( d, a, idx );
-
-    uint64 terr[2][8] = {};
-    uint16 tsel[16][8];
-    const uint32* id = g_id[idx];
-    FindBestFit( terr, tsel, a, id, src );
-
-    return EncodeSelectors( d, terr, tsel, id, result.first, result.second );
-}
diff --git a/3rdparty/etc2/ProcessRGB.hpp b/3rdparty/etc2/ProcessRGB.hpp
deleted file mode 100644
index 21434a3b2..000000000
--- a/3rdparty/etc2/ProcessRGB.hpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __PROCESSRGB_HPP__
-#define __PROCESSRGB_HPP__
-
-#include "Types.hpp"
-
-uint64 ProcessRGB( const uint8* src );
-uint64 ProcessRGB_ETC2( const uint8* src );
-
-#endif
diff --git a/3rdparty/etc2/Tables.cpp b/3rdparty/etc2/Tables.cpp
deleted file mode 100644
index 968fbf583..000000000
--- a/3rdparty/etc2/Tables.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-#include "Tables.hpp"
-
-const int32 g_table[8][4] = {
-    {  2,  8,   -2,   -8 },
-    {  5, 17,   -5,  -17 },
-    {  9, 29,   -9,  -29 },
-    { 13, 42,  -13,  -42 },
-    { 18, 60,  -18,  -60 },
-    { 24, 80,  -24,  -80 },
-    { 33, 106, -33, -106 },
-    { 47, 183, -47, -183 }
-};
-
-const int64 g_table256[8][4] = {
-    {  2*256,  8*256,   -2*256,   -8*256 },
-    {  5*256, 17*256,   -5*256,  -17*256 },
-    {  9*256, 29*256,   -9*256,  -29*256 },
-    { 13*256, 42*256,  -13*256,  -42*256 },
-    { 18*256, 60*256,  -18*256,  -60*256 },
-    { 24*256, 80*256,  -24*256,  -80*256 },
-    { 33*256, 106*256, -33*256, -106*256 },
-    { 47*256, 183*256, -47*256, -183*256 }
-};
-
-const uint32 g_id[4][16] = {
-    { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-    { 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
-    { 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
-    { 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
-};
-
-const uint32 g_avg2[16] = {
-    0x00,
-    0x11,
-    0x22,
-    0x33,
-    0x44,
-    0x55,
-    0x66,
-    0x77,
-    0x88,
-    0x99,
-    0xAA,
-    0xBB,
-    0xCC,
-    0xDD,
-    0xEE,
-    0xFF
-};
-
-const uint32 g_flags[64] = {
-    0x80800402, 0x80800402, 0x80800402, 0x80800402,
-    0x80800402, 0x80800402, 0x80800402, 0x8080E002,
-    0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
-    0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
-    0x80000402, 0x80000402, 0x80000402, 0x80000402,
-    0x80000402, 0x80000402, 0x80000402, 0x8000E002,
-    0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
-    0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
-    0x00800402, 0x00800402, 0x00800402, 0x00800402,
-    0x00800402, 0x00800402, 0x00800402, 0x0080E002,
-    0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
-    0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
-    0x00000402, 0x00000402, 0x00000402, 0x00000402,
-    0x00000402, 0x00000402, 0x00000402, 0x0000E002,
-    0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
-    0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
-};
-
-#ifdef __SSE4_1__
-const uint8 g_flags_AVX2[64] =
-{
-    0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x7D,
-    0x63, 0x63, 0x7D, 0x7D,
-    0x63, 0x7D, 0x7D, 0x7D,
-    0x43, 0x43, 0x43, 0x43,
-    0x43, 0x43, 0x43, 0x5D,
-    0x43, 0x43, 0x5D, 0x5D,
-    0x43, 0x5D, 0x5D, 0x5D,
-    0x23, 0x23, 0x23, 0x23,
-    0x23, 0x23, 0x23, 0x3D,
-    0x23, 0x23, 0x3D, 0x3D,
-    0x23, 0x3D, 0x3D, 0x3D,
-    0x03, 0x03, 0x03, 0x03,
-    0x03, 0x03, 0x03, 0x1D,
-    0x03, 0x03, 0x1D, 0x1D,
-    0x03, 0x1D, 0x1D, 0x1D,
-};
-
-const __m128i g_table_SIMD[2] =
-{
-    _mm_setr_epi16(   2,   5,   9,  13,  18,  24,  33,  47),
-    _mm_setr_epi16(   8,  17,  29,  42,  60,  80, 106, 183)
-};
-const __m128i g_table128_SIMD[2] =
-{
-    _mm_setr_epi16(   2*128,   5*128,   9*128,  13*128,  18*128,  24*128,  33*128,  47*128),
-    _mm_setr_epi16(   8*128,  17*128,  29*128,  42*128,  60*128,  80*128, 106*128, 183*128)
-};
-const __m128i g_table256_SIMD[4] =
-{
-    _mm_setr_epi32(  2*256,   5*256,   9*256,  13*256),
-    _mm_setr_epi32(  8*256,  17*256,  29*256,  42*256),
-    _mm_setr_epi32( 18*256,  24*256,  33*256,  47*256),
-    _mm_setr_epi32( 60*256,  80*256, 106*256, 183*256)
-};
-#endif
-
diff --git a/3rdparty/etc2/Tables.hpp b/3rdparty/etc2/Tables.hpp
deleted file mode 100644
index b570526dc..000000000
--- a/3rdparty/etc2/Tables.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __TABLES_HPP__
-#define __TABLES_HPP__
-
-#include "Types.hpp"
-#ifdef __SSE4_1__
-#include <smmintrin.h>
-#endif
-
-extern const int32 g_table[8][4];
-extern const int64 g_table256[8][4];
-
-extern const uint32 g_id[4][16];
-
-extern const uint32 g_avg2[16];
-
-extern const uint32 g_flags[64];
-
-#ifdef __SSE4_1__
-extern const uint8 g_flags_AVX2[64];
-extern const __m128i g_table_SIMD[2];
-extern const __m128i g_table128_SIMD[2];
-extern const __m128i g_table256_SIMD[4];
-#endif
-
-#endif
diff --git a/3rdparty/etc2/Types.hpp b/3rdparty/etc2/Types.hpp
deleted file mode 100644
index b31da22e4..000000000
--- a/3rdparty/etc2/Types.hpp
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef __DARKRL__TYPES_HPP__
-#define __DARKRL__TYPES_HPP__
-
-#include <stdint.h>
-
-typedef int8_t      int8;
-typedef uint8_t     uint8;
-typedef int16_t     int16;
-typedef uint16_t    uint16;
-typedef int32_t     int32;
-typedef uint32_t    uint32;
-typedef int64_t     int64;
-typedef uint64_t    uint64;
-
-typedef unsigned int uint;
-
-#endif
diff --git a/3rdparty/etc2/Vector.hpp b/3rdparty/etc2/Vector.hpp
deleted file mode 100644
index 3005fdc53..000000000
--- a/3rdparty/etc2/Vector.hpp
+++ /dev/null
@@ -1,222 +0,0 @@
-#ifndef __DARKRL__VECTOR_HPP__
-#define __DARKRL__VECTOR_HPP__
-
-#include <assert.h>
-#include <algorithm>
-#include <math.h>
-
-#include "Math.hpp"
-#include "Types.hpp"
-
-template<class T>
-struct Vector2
-{
-    Vector2() : x( 0 ), y( 0 ) {}
-    Vector2( T v ) : x( v ), y( v ) {}
-    Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
-
-    bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
-    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
-
-    Vector2<T>& operator+=( const Vector2<T>& rhs )
-    {
-        x += rhs.x;
-        y += rhs.y;
-        return *this;
-    }
-    Vector2<T>& operator-=( const Vector2<T>& rhs )
-    {
-        x -= rhs.x;
-        y -= rhs.y;
-        return *this;
-    }
-    Vector2<T>& operator*=( const Vector2<T>& rhs )
-    {
-        x *= rhs.x;
-        y *= rhs.y;
-        return *this;
-    }
-
-    T x, y;
-};
-
-template<class T>
-Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
-{
-    return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
-}
-
-template<class T>
-Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
-{
-    return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
-}
-
-template<class T>
-Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
-{
-    return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
-}
-
-template<class T>
-Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
-{
-    return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
-}
-
-
-typedef Vector2<int32> v2i;
-typedef Vector2<float> v2f;
-
-
-template<class T>
-struct Vector3
-{
-    Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
-    Vector3( T v ) : x( v ), y( v ), z( v ) {}
-    Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
-    template<class Y>
-    Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
-
-    T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
-    void Clamp()
-    {
-        x = std::min( T(1), std::max( T(0), x ) );
-        y = std::min( T(1), std::max( T(0), y ) );
-        z = std::min( T(1), std::max( T(0), z ) );
-    }
-
-    bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
-    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
-
-    T& operator[]( uint idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
-    const T& operator[]( uint idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
-
-    Vector3<T> operator+=( const Vector3<T>& rhs )
-    {
-        x += rhs.x;
-        y += rhs.y;
-        z += rhs.z;
-        return *this;
-    }
-
-    Vector3<T> operator*=( const Vector3<T>& rhs )
-    {
-        x *= rhs.x;
-        y *= rhs.y;
-        z *= rhs.z;
-        return *this;
-    }
-
-    Vector3<T> operator*=( const float& rhs )
-    {
-        x *= rhs;
-        y *= rhs;
-        z *= rhs;
-        return *this;
-    }
-
-    T x, y, z;
-    T padding;
-};
-
-template<class T>
-Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
-{
-    return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
-}
-
-template<class T>
-Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
-{
-    return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
-}
-
-template<class T>
-Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
-{
-    return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
-}
-
-template<class T>
-Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
-{
-    return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
-}
-
-template<class T>
-Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
-{
-    return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
-}
-
-template<class T>
-bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
-{
-    return lhs.Luminance() < rhs.Luminance();
-}
-
-typedef Vector3<int32> v3i;
-typedef Vector3<float> v3f;
-typedef Vector3<uint8> v3b;
-
-
-static inline v3b v3f_to_v3b( const v3f& v )
-{
-    return v3b( uint8( std::min( 1.f, v.x ) * 255 ), uint8( std::min( 1.f, v.y ) * 255 ), uint8( std::min( 1.f, v.z ) * 255 ) );
-}
-
-template<class T>
-Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
-{
-    return v1 + ( v2 - v1 ) * amount;
-}
-
-template<>
-inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
-{
-    return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
-}
-
-template<class T>
-Vector3<T> Desaturate( const Vector3<T>& v )
-{
-    T l = v.Luminance();
-    return Vector3<T>( l, l, l );
-}
-
-template<class T>
-Vector3<T> Desaturate( const Vector3<T>& v, float mul )
-{
-    T l = T( v.Luminance() * mul );
-    return Vector3<T>( l, l, l );
-}
-
-template<class T>
-Vector3<T> pow( const Vector3<T>& base, float exponent )
-{
-    return Vector3<T>(
-        pow( base.x, exponent ),
-        pow( base.y, exponent ),
-        pow( base.z, exponent ) );
-}
-
-template<class T>
-Vector3<T> sRGB2linear( const Vector3<T>& v )
-{
-    return Vector3<T>(
-        sRGB2linear( v.x ),
-        sRGB2linear( v.y ),
-        sRGB2linear( v.z ) );
-}
-
-template<class T>
-Vector3<T> linear2sRGB( const Vector3<T>& v )
-{
-    return Vector3<T>(
-        linear2sRGB( v.x ),
-        linear2sRGB( v.y ),
-        linear2sRGB( v.z ) );
-}
-
-#endif
diff --git a/3rdparty/iqa/LICENSE b/3rdparty/iqa/LICENSE
deleted file mode 100644
index ff67944bf..000000000
--- a/3rdparty/iqa/LICENSE
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
diff --git a/3rdparty/iqa/README.txt b/3rdparty/iqa/README.txt
deleted file mode 100644
index 2028d4692..000000000
--- a/3rdparty/iqa/README.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Doxygen documentation can be found at: http://tdistler.com/iqa
-
-BUILD:
-
-  All build artifacts end up in build/<configuration>, where <configuration> is
-  'debug' or 'release'.
-
-  Windows:
-    - Open iqa.sln, select 'Debug' or 'Release', and build. The output is a 
-      static library 'iqa.lib'.
-    - To run the tests under the debugger, first right-click the 'test' project,
-      select Properties -> Configuration Properties -> Debugging and set
-      'Working Directory' to '$(OutDir)'. Then start the application.
-
-  Linux:
-    - Change directories into the root of the IQA branch you want to build.
-    - Type `make` for a debug build, or `make RELEASE=1` for a release build.
-      The output is a static library 'libiqa.a'.
-    - Type `make test` (or `make test RELEASE=1`) to build the unit tests.
-    - Type `make clean` (or `make clean RELEASE=1`) to delete all build
-      artifacts.
-    - To run the tests, `cd` to the build/<configuration> directory and type
-      `./test`.
-
-
-USE:
-
-  - Include 'iqa.h' in your source file.
-  - Call iqa_* methods.
-  - Link against the IQA library.
-
-
-HELP & SUPPORT:
-
-  Further help can be found at: https://sourceforge.net/projects/iqa/support
-
diff --git a/3rdparty/iqa/include/convolve.h b/3rdparty/iqa/include/convolve.h
deleted file mode 100644
index a5e2e71a2..000000000
--- a/3rdparty/iqa/include/convolve.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _CONVOLVE_H_
-#define _CONVOLVE_H_
-
-typedef float (*_iqa_get_pixel)(const float *img, int w, int h, int x, int y, float bnd_const);
-
-/** Out-of-bounds array values are a mirrored reflection of the border values*/
-float KBND_SYMMETRIC(const float *img, int w, int h, int x, int y, float bnd_const);
-/** Out-of-bounds array values are set to the nearest border value */
-float KBND_REPLICATE(const float *img, int w, int h, int x, int y, float bnd_const);
-/** Out-of-bounds array values are set to 'bnd_const' */
-float KBND_CONSTANT(const float *img, int w, int h, int x, int y, float bnd_const);
-
-
-/** Defines a convolution kernel */
-struct _kernel {
-    float *kernel;          /**< Pointer to the kernel values */
-    int w;                  /**< The kernel width */
-    int h;                  /**< The kernel height */
-    int normalized;         /**< 1 if the kernel values add up to 1. 0 otherwise */
-    _iqa_get_pixel bnd_opt; /**< Defines how out-of-bounds image values are handled */
-    float bnd_const;        /**< If 'bnd_opt' is KBND_CONSTANT, this specifies the out-of-bounds value */
-};
-
-/**
- * @brief Applies the specified kernel to the image.
- * The kernel will be applied to all areas where it fits completely within
- * the image. The resulting image will be smaller by half the kernel width 
- * and height (w - kw/2 and h - kh/2).
- *
- * @param img Image to modify
- * @param w Image width
- * @param h Image height
- * @param k The kernel to apply
- * @param result Buffer to hold the resulting image ((w-kw)*(h-kh), where kw
- *               and kh are the kernel width and height). If 0, the result
- *               will be written to the original image buffer.
- * @param rw Optional. The width of the resulting image will be stored here.
- * @param rh Optional. The height of the resulting image will be stored here.
- */
-void _iqa_convolve(float *img, int w, int h, const struct _kernel *k, float *result, int *rw, int *rh);
-
-/**
- * The same as _iqa_convolve() except the kernel is applied to the entire image.
- * In other words, the kernel is applied to all areas where the top-left corner
- * of the kernel is in the image. Out-of-bound pixel value (off the right and
- * bottom edges) are chosen based on the 'bnd_opt' and 'bnd_const' members of
- * the kernel structure. The resulting array is the same size as the input
- * image.
- *
- * @param img Image to modify
- * @param w Image width
- * @param h Image height
- * @param k The kernel to apply
- * @param result Buffer to hold the resulting image ((w-kw)*(h-kh), where kw
- *               and kh are the kernel width and height). If 0, the result
- *               will be written to the original image buffer.
- * @return 0 if successful. Non-zero otherwise.
- */
-int _iqa_img_filter(float *img, int w, int h, const struct _kernel *k, float *result);
-
-/**
- * Returns the filtered version of the specified pixel. If no kernel is given,
- * the raw pixel value is returned.
- * 
- * @param img Source image
- * @param w Image width
- * @param h Image height
- * @param x The x location of the pixel to filter
- * @param y The y location of the pixel to filter
- * @param k Optional. The convolution kernel to apply to the pixel.
- * @param kscale The scale of the kernel (for normalization). 1 for normalized
- *               kernels. Required if 'k' is not null.
- * @return The filtered pixel value.
- */
-float _iqa_filter_pixel(const float *img, int w, int h, int x, int y, const struct _kernel *k, const float kscale);
-
-
-#endif /*_CONVOLVE_H_*/
diff --git a/3rdparty/iqa/include/decimate.h b/3rdparty/iqa/include/decimate.h
deleted file mode 100644
index 40f1a8c04..000000000
--- a/3rdparty/iqa/include/decimate.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _DECIMATE_H_
-#define _DECIMATE_H_
-
-#include "convolve.h"
-
-/**
- * @brief Downsamples (decimates) an image.
- *
- * @param img Image to modify
- * @param w Image width
- * @param h Image height
- * @param factor Decimation factor
- * @param k The kernel to apply (e.g. low-pass filter). Can be 0.
- * @param result Buffer to hold the resulting image (w/factor*h/factor). If 0,
- *               the result will be written to the original image buffer.
- * @param rw Optional. The width of the resulting image will be stored here.
- * @param rh Optional. The height of the resulting image will be stored here.
- * @return 0 on success.
- */
-int _iqa_decimate(float *img, int w, int h, int factor, const struct _kernel *k, float *result, int *rw, int *rh);
-
-#endif /*_DECIMATE_H_*/
diff --git a/3rdparty/iqa/include/iqa.h b/3rdparty/iqa/include/iqa.h
deleted file mode 100644
index 408675e5a..000000000
--- a/3rdparty/iqa/include/iqa.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _IQA_H_
-#define _IQA_H_
-
-#include "iqa_os.h"
-
-/**
- * Allows fine-grain control of the SSIM algorithm.
- */
-struct iqa_ssim_args {
-    float alpha;    /**< luminance exponent */
-    float beta;     /**< contrast exponent */
-    float gamma;    /**< structure exponent */
-    int L;          /**< dynamic range (2^8 - 1)*/
-    float K1;       /**< stabilization constant 1 */
-    float K2;       /**< stabilization constant 2 */
-    int f;          /**< scale factor. 0=default scaling, 1=no scaling */
-};
-
-/**
- * Allows fine-grain control of the MS-SSIM algorithm.
- */
-struct iqa_ms_ssim_args {
-    int wang;             /**< 1=original algorithm by Wang, et al. 0=MS-SSIM* by Rouse/Hemami (default). */
-    int gaussian;         /**< 1=11x11 Gaussian window (default). 0=8x8 linear window. */
-    int scales;           /**< Number of scaled images to use. Default is 5. */
-    const float *alphas;  /**< Pointer to array of alpha values for each scale. Required if 'scales' isn't 5. */
-    const float *betas;   /**< Pointer to array of beta values for each scale. Required if 'scales' isn't 5. */
-    const float *gammas;  /**< Pointer to array of gamma values for each scale. Required if 'scales' isn't 5. */
-};
-
-/**
- * Calculates the Mean Squared Error between 2 equal-sized 8-bit images.
- * @note The images must have the same width, height, and stride.
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images
- * @param h Height of the images
- * @param stride The length (in bytes) of each horizontal line in the image.
- *               This may be different from the image width.
- * @return The MSE.
- */
-float iqa_mse(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride);
-
-/**
- * Calculates the Peak Signal-to-Noise-Ratio between 2 equal-sized 8-bit
- * images.
- * @note The images must have the same width, height, and stride.
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images
- * @param h Height of the images
- * @param stride The length (in bytes) of each horizontal line in the image.
- *               This may be different from the image width.
- * @return The PSNR.
- */
-float iqa_psnr(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride);
-
-/**
- * Calculates the Structural SIMilarity between 2 equal-sized 8-bit images.
- *
- * See https://ece.uwaterloo.ca/~z70wang/publications/ssim.html
- * @note The images must have the same width, height, and stride.
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images
- * @param h Height of the images
- * @param stride The length (in bytes) of each horizontal line in the image.
- *               This may be different from the image width.
- * @param gaussian 0 = 8x8 square window, 1 = 11x11 circular-symmetric Gaussian
- * weighting.
- * @param args Optional SSIM arguments for fine control of the algorithm. 0 for
- * defaults. Defaults are a=b=g=1.0, L=255, K1=0.01, K2=0.03
- * @return The mean SSIM over the entire image (MSSIM), or INFINITY if error.
- */
-float iqa_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride, 
-    int gaussian, const struct iqa_ssim_args *args);
-
-/**
- * Calculates the Multi-Scale Structural SIMilarity between 2 equal-sized 8-bit
- * images. The default algorithm is MS-SSIM* proposed by Rouse/Hemami 2008.
- *
- * See https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf and
- * http://foulard.ece.cornell.edu/publications/dmr_hvei2008_paper.pdf
- *
- * @note 1. The images must have the same width, height, and stride.
- * @note 2. The minimum image width or height is 2^(scales-1) * filter, where 'filter' is 11
- * if a Gaussian window is being used, or 9 otherwise.
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images.
- * @param h Height of the images.
- * @param stride The length (in bytes) of each horizontal line in the image.
- *               This may be different from the image width.
- * @param args Optional MS-SSIM arguments for fine control of the algorithm. 0
- * for defaults. Defaults are wang=0, scales=5, gaussian=1.
- * @return The mean MS-SSIM over the entire image, or INFINITY if error.
- */
-float iqa_ms_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride, 
-    const struct iqa_ms_ssim_args *args);
-
-#endif /*_IQA_H_*/
diff --git a/3rdparty/iqa/include/iqa_os.h b/3rdparty/iqa/include/iqa_os.h
deleted file mode 100644
index 52e0be016..000000000
--- a/3rdparty/iqa/include/iqa_os.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _OS_H_
-#define _OS_H_
-
-/* Microsoft tends to implement features early, but they have a high legacy
- * cost because they won't break existing implementations. As such, certain
- * features we take for granted on other platforms (like C99) aren't fully
- * implemented. This file is meant to rectify that.
- */
-
-#ifdef WIN32
-
-#include <windows.h>
-#include <math.h>
-
-#define IQA_INLINE __inline
-
-#ifndef INFINITY
-    #define INFINITY (float)HUGE_VAL /**< Defined in C99 (Windows is C89) */
-#endif /*INFINITY*/
-
-#ifndef NAN
-    static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
-    #define NAN (*(const float *) __nan) /**< Defined in C99 (Windows is C99) */
-#endif
-
-#define IQA_EXPORT __declspec(dllexport)
-
-#else /* !Windows */
-
-#define IQA_INLINE inline
-#define IQA_EXPORT
-
-#endif
-
-#endif /* _OS_H_ */
diff --git a/3rdparty/iqa/include/math_utils.h b/3rdparty/iqa/include/math_utils.h
deleted file mode 100644
index 674b354d0..000000000
--- a/3rdparty/iqa/include/math_utils.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _MATH_UTILS_H_
-#define _MATH_UTILS_H_
-
-#include "iqa_os.h"
-#include <math.h>
-
-/**
- * Rounds a float to the nearest integer.
- */
-IQA_EXPORT int _round(float a);
-
-IQA_EXPORT int _max(int x, int y);
-
-IQA_EXPORT int _min(int x, int y);
-
-
-/** 
- * Compares 2 floats to the specified digit of precision.
- * @return 0 if equal, 1 otherwise.
- */
-IQA_EXPORT int _cmp_float(float a, float b, int digits);
-
-
-/** 
- * Compares 2 matrices with the specified precision. 'b' is assumed to be the
- * same size as 'a' or smaller.
- * @return 0 if equal, 1 otherwise
- */
-IQA_EXPORT int _matrix_cmp(const float *a, const float *b, int w, int h, int digits);
-
-#endif /*_MATH_UTILS_H_*/
diff --git a/3rdparty/iqa/include/ssim.h b/3rdparty/iqa/include/ssim.h
deleted file mode 100644
index 5653afe80..000000000
--- a/3rdparty/iqa/include/ssim.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SSIM_H_
-#define _SSIM_H_
-
-#include "convolve.h"
-
-/*
- * Circular-symmetric Gaussian weighting.
- * h(x,y) = hg(x,y)/SUM(SUM(hg)) , for normalization to 1.0
- * hg(x,y) = e^( -0.5*( (x^2+y^2)/sigma^2 ) ) , where sigma was 1.5
- */
-#define GAUSSIAN_LEN 11
-static const float g_gaussian_window[GAUSSIAN_LEN][GAUSSIAN_LEN] = {
-    {0.000001f, 0.000008f, 0.000037f, 0.000112f, 0.000219f, 0.000274f, 0.000219f, 0.000112f, 0.000037f, 0.000008f, 0.000001f},
-    {0.000008f, 0.000058f, 0.000274f, 0.000831f, 0.001619f, 0.002021f, 0.001619f, 0.000831f, 0.000274f, 0.000058f, 0.000008f},
-    {0.000037f, 0.000274f, 0.001296f, 0.003937f, 0.007668f, 0.009577f, 0.007668f, 0.003937f, 0.001296f, 0.000274f, 0.000037f},
-    {0.000112f, 0.000831f, 0.003937f, 0.011960f, 0.023294f, 0.029091f, 0.023294f, 0.011960f, 0.003937f, 0.000831f, 0.000112f},
-    {0.000219f, 0.001619f, 0.007668f, 0.023294f, 0.045371f, 0.056662f, 0.045371f, 0.023294f, 0.007668f, 0.001619f, 0.000219f},
-    {0.000274f, 0.002021f, 0.009577f, 0.029091f, 0.056662f, 0.070762f, 0.056662f, 0.029091f, 0.009577f, 0.002021f, 0.000274f},
-    {0.000219f, 0.001619f, 0.007668f, 0.023294f, 0.045371f, 0.056662f, 0.045371f, 0.023294f, 0.007668f, 0.001619f, 0.000219f},
-    {0.000112f, 0.000831f, 0.003937f, 0.011960f, 0.023294f, 0.029091f, 0.023294f, 0.011960f, 0.003937f, 0.000831f, 0.000112f},
-    {0.000037f, 0.000274f, 0.001296f, 0.003937f, 0.007668f, 0.009577f, 0.007668f, 0.003937f, 0.001296f, 0.000274f, 0.000037f},
-    {0.000008f, 0.000058f, 0.000274f, 0.000831f, 0.001619f, 0.002021f, 0.001619f, 0.000831f, 0.000274f, 0.000058f, 0.000008f},
-    {0.000001f, 0.000008f, 0.000037f, 0.000112f, 0.000219f, 0.000274f, 0.000219f, 0.000112f, 0.000037f, 0.000008f, 0.000001f},
-};
-
-/*
- * Equal weight square window.
- * Each pixel is equally weighted (1/64) so that SUM(x) = 1.0
- */
-#define SQUARE_LEN 8
-static const float g_square_window[SQUARE_LEN][SQUARE_LEN] = {
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-};
-
-/* Holds intermediate SSIM values for map-reduce operation. */
-struct _ssim_int {
-    double l;
-    double c;
-    double s;
-};
-
-/* Defines the pointers to the map-reduce functions. */
-typedef int (*_map)(const struct _ssim_int *, void *);
-typedef float (*_reduce)(int, int, void *);
-
-/* Arguments for map-reduce. The 'context' is user-defined. */
-struct _map_reduce {
-    _map map;
-    _reduce reduce;
-    void *context;
-};
-
-/**
- * Private method that calculates the SSIM value on a pre-processed image.
- *
- * The input images must have stride==width. This method does not scale.
- *
- * @note Image buffers are modified.
- *
- * Map-reduce is used for doing the final SSIM calculation. The map function is
- * called for every pixel, and the reduce is called at the end. The context is
- * caller-defined and *not* modified by this method.
- *
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images
- * @param h Height of the images
- * @param k The kernel used as the window function
- * @param mr Optional map-reduce functions to use to calculate SSIM. Required
- *           if 'args' is not null. Ignored if 'args' is null.
- * @param args Optional SSIM arguments for fine control of the algorithm. 0 for defaults.
- *             Defaults are a=b=g=1.0, L=255, K1=0.01, K2=0.03
- * @return The mean SSIM over the entire image (MSSIM), or INFINITY if error.
- */
-float _iqa_ssim(float *ref, float *cmp, int w, int h, const struct _kernel *k, const struct _map_reduce *mr, const struct iqa_ssim_args *args);
-
-#endif /* _SSIM_H_ */
diff --git a/3rdparty/iqa/source/convolve.c b/3rdparty/iqa/source/convolve.c
deleted file mode 100644
index c9159070f..000000000
--- a/3rdparty/iqa/source/convolve.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "convolve.h"
-#include <stdlib.h>
-
-float KBND_SYMMETRIC(const float *img, int w, int h, int x, int y, float bnd_const)
-{
-    (void)bnd_const;
-    if (x<0) x=-1-x;
-    else if (x>=w) x=(w-(x-w))-1;
-    if (y<0) y=-1-y;
-    else if (y>=h) y=(h-(y-h))-1;
-    return img[y*w + x];
-}
-
-float KBND_REPLICATE(const float *img, int w, int h, int x, int y, float bnd_const)
-{
-    (void)bnd_const;
-    if (x<0) x=0;
-    if (x>=w) x=w-1;
-    if (y<0) y=0;
-    if (y>=h) y=h-1;
-    return img[y*w + x];
-}
-
-float KBND_CONSTANT(const float *img, int w, int h, int x, int y, float bnd_const)
-{
-    if (x<0) x=0;
-    if (y<0) y=0;
-    if (x>=w || y>=h)
-        return bnd_const;
-    return img[y*w + x];
-}
-
-static float _calc_scale(const struct _kernel *k)
-{
-    int ii,k_len;
-    double sum=0.0;
-
-    if (k->normalized)
-        return 1.0f;
-    else {
-        k_len = k->w * k->h;
-        for (ii=0; ii<k_len; ++ii)
-            sum += k->kernel[ii];
-        if (sum != 0.0)
-            return (float)(1.0 / sum);
-        return 1.0f;
-    }
-}
-
-void _iqa_convolve(float *img, int w, int h, const struct _kernel *k, float *result, int *rw, int *rh)
-{
-    int x,y,kx,ky,u,v;
-    int uc = k->w/2;
-    int vc = k->h/2;
-    int kw_even = (k->w&1)?0:1;
-    int kh_even = (k->h&1)?0:1;
-    int dst_w = w - k->w + 1;
-    int dst_h = h - k->h + 1;
-    int img_offset,k_offset;
-    double sum;
-    float scale, *dst=result;
-
-    if (!dst)
-        dst = img; /* Convolve in-place */
-
-    /* Kernel is applied to all positions where the kernel is fully contained
-     * in the image */
-    scale = _calc_scale(k);
-    for (y=0; y < dst_h; ++y) {
-        for (x=0; x < dst_w; ++x) {
-            sum = 0.0;
-            k_offset = 0;
-            ky = y+vc;
-            kx = x+uc;
-            for (v=-vc; v <= vc-kh_even; ++v) {
-                img_offset = (ky+v)*w + kx;
-                for (u=-uc; u <= uc-kw_even; ++u, ++k_offset) {
-                    sum += img[img_offset+u] * k->kernel[k_offset];
-                }
-            }
-            dst[y*dst_w + x] = (float)(sum * scale);
-        }
-    }
-
-    if (rw) *rw = dst_w;
-    if (rh) *rh = dst_h;
-}
-
-int _iqa_img_filter(float *img, int w, int h, const struct _kernel *k, float *result)
-{
-    int x,y;
-    int img_offset;
-    float scale, *dst=result;
-
-    if (!k || !k->bnd_opt)
-        return 1;
-
-    if (!dst) {
-        dst = (float*)malloc(w*h*sizeof(float));
-        if (!dst)
-            return 2;
-    }
-
-    scale = _calc_scale(k);
-
-    /* Kernel is applied to all positions where top-left corner is in the image */
-    for (y=0; y < h; ++y) {
-        for (x=0; x < w; ++x) {
-            dst[y*w + x] = _iqa_filter_pixel(img, w, h, x, y, k, scale);
-        }
-    }
-
-    /* If no result buffer given, copy results to image buffer */
-    if (!result) {
-        for (y=0; y<h; ++y) {
-            img_offset = y*w;
-            for (x=0; x<w; ++x, ++img_offset) {
-                img[img_offset] = dst[img_offset];
-            }
-        }
-        free(dst);
-    }
-    return 0;
-}
-
-float _iqa_filter_pixel(const float *img, int w, int h, int x, int y, const struct _kernel *k, const float kscale)
-{
-    int u,v,uc,vc;
-    int kw_even,kh_even;
-    int x_edge_left,x_edge_right,y_edge_top,y_edge_bottom;
-    int edge,img_offset,k_offset;
-    double sum;
-
-    if (!k)
-        return img[y*w + x];
-
-    uc = k->w/2;
-    vc = k->h/2;
-    kw_even = (k->w&1)?0:1;
-    kh_even = (k->h&1)?0:1;
-    x_edge_left  = uc;
-    x_edge_right = w-uc;
-    y_edge_top = vc;
-    y_edge_bottom = h-vc;
-
-    edge = 0;
-    if (x < x_edge_left || y < y_edge_top || x >= x_edge_right || y >= y_edge_bottom)
-        edge = 1;
-
-    sum = 0.0;
-    k_offset = 0;
-    for (v=-vc; v <= vc-kh_even; ++v) {
-        img_offset = (y+v)*w + x;
-        for (u=-uc; u <= uc-kw_even; ++u, ++k_offset) {
-            if (!edge)
-                sum += img[img_offset+u] * k->kernel[k_offset];
-            else
-                sum += k->bnd_opt(img, w, h, x+u, y+v, k->bnd_const) * k->kernel[k_offset];
-        }
-    }
-    return (float)(sum * kscale);
-}
diff --git a/3rdparty/iqa/source/decimate.c b/3rdparty/iqa/source/decimate.c
deleted file mode 100644
index 91c6a9be3..000000000
--- a/3rdparty/iqa/source/decimate.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "decimate.h"
-#include <stdlib.h>
-
-int _iqa_decimate(float *img, int w, int h, int factor, const struct _kernel *k, float *result, int *rw, int *rh)
-{
-    int x,y;
-    int sw = w/factor + (w&1);
-    int sh = h/factor + (h&1);
-    int dst_offset;
-    float *dst=img;
-
-    if (result)
-        dst = result;
-
-    /* Downsample */
-    for (y=0; y<sh; ++y) {
-        dst_offset = y*sw;
-        for (x=0; x<sw; ++x,++dst_offset) {
-            dst[dst_offset] = _iqa_filter_pixel(img, w, h, x*factor, y*factor, k, 1.0f);
-        }
-    }
-    
-    if (rw) *rw = sw;
-    if (rh) *rh = sh;
-    return 0;
-}
diff --git a/3rdparty/iqa/source/math_utils.c b/3rdparty/iqa/source/math_utils.c
deleted file mode 100644
index 83f923d76..000000000
--- a/3rdparty/iqa/source/math_utils.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "math_utils.h"
-#include <math.h>
-
-int _round(float a)
-{
-    int sign_a = a > 0.0f ? 1 : -1;
-    return a-(int)a >= 0.5 ? (int)a + sign_a : (int)a;
-}
-
-int _max(int x, int y)
-{
-    return x >= y ? x : y;
-}
-
-int _min(int x, int y)
-{
-    return x <= y ? x : y;
-}
-
-int _cmp_float(float a, float b, int digits)
-{
-    /* Round */
-    int sign_a = a > 0.0f ? 1 : -1;
-    int sign_b = b > 0.0f ? 1 : -1;
-    double scale = pow(10.0, (double)digits);
-    double ax = a * scale;
-    double bx = b * scale;
-    int ai = ax-(int)ax >= 0.5 ? (int)ax + sign_a : (int)ax;
-    int bi = bx-(int)bx >= 0.5 ? (int)bx + sign_b : (int)bx;
-
-    /* Compare */
-    return ai == bi ? 0 : 1;
-}
-
-int _matrix_cmp(const float *a, const float *b, int w, int h, int digits)
-{
-    int offset;
-    int result=0;
-    int len=w*h;
-    for (offset=0; offset<len; ++offset) {
-        if (_cmp_float(a[offset], b[offset], digits)) {
-            result = 1;
-            break;
-        }
-    }
-
-    return result;
-}
-
diff --git a/3rdparty/iqa/source/ms_ssim.c b/3rdparty/iqa/source/ms_ssim.c
deleted file mode 100644
index 91812a077..000000000
--- a/3rdparty/iqa/source/ms_ssim.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "iqa.h"
-#include "ssim.h"
-#include "decimate.h"
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* Default number of scales */
-#define SCALES  5
-
-/* Low-pass filter for down-sampling (9/7 biorthogonal wavelet filter) */
-#define LPF_LEN 9
-static const float g_lpf[LPF_LEN][LPF_LEN] = {
-   { 0.000714f,-0.000450f,-0.002090f, 0.007132f, 0.016114f, 0.007132f,-0.002090f,-0.000450f, 0.000714f},
-   {-0.000450f, 0.000283f, 0.001316f,-0.004490f,-0.010146f,-0.004490f, 0.001316f, 0.000283f,-0.000450f},
-   {-0.002090f, 0.001316f, 0.006115f,-0.020867f,-0.047149f,-0.020867f, 0.006115f, 0.001316f,-0.002090f},
-   { 0.007132f,-0.004490f,-0.020867f, 0.071207f, 0.160885f, 0.071207f,-0.020867f,-0.004490f, 0.007132f},
-   { 0.016114f,-0.010146f,-0.047149f, 0.160885f, 0.363505f, 0.160885f,-0.047149f,-0.010146f, 0.016114f},
-   { 0.007132f,-0.004490f,-0.020867f, 0.071207f, 0.160885f, 0.071207f,-0.020867f,-0.004490f, 0.007132f},
-   {-0.002090f, 0.001316f, 0.006115f,-0.020867f,-0.047149f,-0.020867f, 0.006115f, 0.001316f,-0.002090f},
-   {-0.000450f, 0.000283f, 0.001316f,-0.004490f,-0.010146f,-0.004490f, 0.001316f, 0.000283f,-0.000450f},
-   { 0.000714f,-0.000450f,-0.002090f, 0.007132f, 0.016114f, 0.007132f,-0.002090f,-0.000450f, 0.000714f},
-};
-
-/* Alpha, beta, and gamma values for each scale */
-static float g_alphas[] = { 0.0000f, 0.0000f, 0.0000f, 0.0000f, 0.1333f };
-static float g_betas[]  = { 0.0448f, 0.2856f, 0.3001f, 0.2363f, 0.1333f };
-static float g_gammas[] = { 0.0448f, 0.2856f, 0.3001f, 0.2363f, 0.1333f };
-
-
-struct _context {
-    double l;  /* Luminance */
-    double c;  /* Contrast */
-    double s;  /* Structure */
-    float alpha;
-    float beta;
-    float gamma;
-};
-
-/* Called for each pixel */
-int _ms_ssim_map(const struct _ssim_int *si, void *ctx)
-{
-    struct _context *ms_ctx = (struct _context*)ctx;
-    ms_ctx->l += si->l;
-    ms_ctx->c += si->c;
-    ms_ctx->s += si->s;
-    return 0;
-}
-
-/* Called to calculate the final result */
-float _ms_ssim_reduce(int w, int h, void *ctx)
-{
-    double size = (double)(w*h);
-    struct _context *ms_ctx = (struct _context*)ctx;
-    ms_ctx->l = pow(ms_ctx->l / size, (double)ms_ctx->alpha);
-    ms_ctx->c = pow(ms_ctx->c / size, (double)ms_ctx->beta);
-    ms_ctx->s = pow(fabs(ms_ctx->s / size), (double)ms_ctx->gamma);
-    return (float)(ms_ctx->l * ms_ctx->c * ms_ctx->s);
-}
-
-/* Releases the scaled buffers */
-void _free_buffers(float **buf, int scales)
-{
-    int idx;
-    for (idx=0; idx<scales; ++idx)
-        free(buf[idx]);
-}
-
-/* Allocates the scaled buffers. If error, all buffers are free'd */
-int _alloc_buffers(float **buf, int w, int h, int scales)
-{
-    int idx;
-    int cur_w = w;
-    int cur_h = h;
-    for (idx=0; idx<scales; ++idx) {
-        buf[idx] = (float*)malloc(cur_w*cur_h*sizeof(float));
-        if (!buf[idx]) {
-            _free_buffers(buf, idx);
-            return 1;
-        }
-        cur_w = cur_w/2 + (cur_w&1);
-        cur_h = cur_h/2 + (cur_h&1);
-    }
-    return 0;
-}
-
-/*
- * MS_SSIM(X,Y) = Lm(x,y)^aM * MULT[j=1->M]( Cj(x,y)^bj  *  Sj(x,y)^gj )
- * where,
- *  L = mean
- *  C = variance
- *  S = cross-correlation
- *
- *  b1=g1=0.0448, b2=g2=0.2856, b3=g3=0.3001, b4=g4=0.2363, a5=b5=g5=0.1333
- */
-float iqa_ms_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, 
-    int stride, const struct iqa_ms_ssim_args *args)
-{
-    int wang=0;
-    int scales=SCALES;
-    int gauss=1;
-    const float *alphas=g_alphas, *betas=g_betas, *gammas=g_gammas;
-    int idx,x,y,cur_w,cur_h;
-    int offset,src_offset;
-    float **ref_imgs, **cmp_imgs; /* Array of pointers to scaled images */
-    float msssim;
-    struct _kernel lpf, window;
-    struct iqa_ssim_args s_args;
-    struct _map_reduce mr;
-    struct _context ms_ctx;
-
-    if (args) {
-        wang   = args->wang;
-        gauss  = args->gaussian;
-        scales = args->scales;
-        if (args->alphas)
-            alphas = args->alphas;
-        if (args->betas)
-            betas  = args->betas;
-        if (args->gammas)
-            gammas = args->gammas;
-    }
-
-    /* Make sure we won't scale below 1x1 */
-    cur_w = w;
-    cur_h = h;
-    for (idx=0; idx<scales; ++idx) {
-        if ( gauss ? cur_w<GAUSSIAN_LEN || cur_h<GAUSSIAN_LEN : cur_w<LPF_LEN || cur_h<LPF_LEN )
-            return INFINITY;
-        cur_w /= 2;
-        cur_h /= 2;
-    }
-
-    window.kernel = (float*)g_square_window;
-    window.w = window.h = SQUARE_LEN;
-    window.normalized = 1;
-    window.bnd_opt = KBND_SYMMETRIC;
-    if (gauss) {
-        window.kernel = (float*)g_gaussian_window;
-        window.w = window.h = GAUSSIAN_LEN;
-    }
-
-    mr.map     = _ms_ssim_map;
-    mr.reduce  = _ms_ssim_reduce;
-
-    /* Allocate the scaled image buffers */
-    ref_imgs = (float**)malloc(scales*sizeof(float*));
-    cmp_imgs = (float**)malloc(scales*sizeof(float*));
-    if (!ref_imgs || !cmp_imgs) {
-        if (ref_imgs) free(ref_imgs);
-        if (cmp_imgs) free(cmp_imgs);
-        return INFINITY;
-    }
-    if (_alloc_buffers(ref_imgs, w, h, scales)) {
-        free(ref_imgs);
-        free(cmp_imgs);
-        return INFINITY;
-    }
-    if (_alloc_buffers(cmp_imgs, w, h, scales)) {
-        _free_buffers(ref_imgs, scales);
-        free(ref_imgs);
-        free(cmp_imgs);
-        return INFINITY;
-    }
-
-    /* Copy original images into first scale buffer, forcing stride = width. */
-    for (y=0; y<h; ++y) {
-        src_offset = y*stride;
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset, ++src_offset) {
-            ref_imgs[0][offset] = (float)ref[src_offset];
-            cmp_imgs[0][offset] = (float)cmp[src_offset];
-        }
-    }
-
-    /* Create scaled versions of the images */
-    cur_w=w;
-    cur_h=h;
-    lpf.kernel = (float*)g_lpf;
-    lpf.w = lpf.h = LPF_LEN;
-    lpf.normalized = 1;
-    lpf.bnd_opt = KBND_SYMMETRIC;
-    for (idx=1; idx<scales; ++idx) {
-        if (_iqa_decimate(ref_imgs[idx-1], cur_w, cur_h, 2, &lpf, ref_imgs[idx], 0, 0) ||
-            _iqa_decimate(cmp_imgs[idx-1], cur_w, cur_h, 2, &lpf, cmp_imgs[idx], &cur_w, &cur_h))
-        {
-            _free_buffers(ref_imgs, scales);
-            _free_buffers(cmp_imgs, scales);
-            free(ref_imgs);
-            free(cmp_imgs);
-            return INFINITY;
-        }
-    }
-
-    cur_w=w;
-    cur_h=h;
-    msssim = 1.0;
-    for (idx=0; idx<scales; ++idx) {
-
-        ms_ctx.l = 0;
-        ms_ctx.c = 0;
-        ms_ctx.s = 0;
-        ms_ctx.alpha = alphas[idx];
-        ms_ctx.beta  = betas[idx];
-        ms_ctx.gamma = gammas[idx];
-
-        if (!wang) {
-            /* MS-SSIM* (Rouse/Hemami) */
-            s_args.alpha = 1.0f;
-            s_args.beta  = 1.0f;
-            s_args.gamma = 1.0f;
-            s_args.K1 = 0.0f; /* Force stabilization constants to 0 */
-            s_args.K2 = 0.0f;
-            s_args.L  = 255;
-            s_args.f  = 1; /* Don't resize */
-            mr.context = &ms_ctx;
-            msssim *= _iqa_ssim(ref_imgs[idx], cmp_imgs[idx], cur_w, cur_h, &window, &mr, &s_args);
-        }
-        else {
-            /* MS-SSIM (Wang) */
-            s_args.alpha = 1.0f;
-            s_args.beta  = 1.0f;
-            s_args.gamma = 1.0f;
-            s_args.K1 = 0.01f;
-            s_args.K2 = 0.03f;
-            s_args.L  = 255;
-            s_args.f  = 1; /* Don't resize */
-            mr.context = &ms_ctx;
-            msssim *= _iqa_ssim(ref_imgs[idx], cmp_imgs[idx], cur_w, cur_h, &window, &mr, &s_args);
-        }
-
-        if (msssim == INFINITY)
-            break;
-        cur_w = cur_w/2 + (cur_w&1);
-        cur_h = cur_h/2 + (cur_h&1);
-    }
-
-    _free_buffers(ref_imgs, scales);
-    _free_buffers(cmp_imgs, scales);
-    free(ref_imgs);
-    free(cmp_imgs);
-
-    return msssim;
-}
diff --git a/3rdparty/iqa/source/mse.c b/3rdparty/iqa/source/mse.c
deleted file mode 100644
index da0ce772e..000000000
--- a/3rdparty/iqa/source/mse.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "iqa.h"
-
-/* MSE(a,b) = 1/N * SUM((a-b)^2) */
-float iqa_mse(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride)
-{
-    int error, offset;
-    unsigned long long sum=0;
-    int ww,hh;
-    for (hh=0; hh<h; ++hh) {
-        offset = hh*stride;
-        for (ww=0; ww<w; ++ww, ++offset) {
-            error = ref[offset] - cmp[offset];
-            sum += error * error;
-        }
-    }
-    return (float)( (double)sum / (double)(w*h) );
-}
diff --git a/3rdparty/iqa/source/psnr.c b/3rdparty/iqa/source/psnr.c
deleted file mode 100644
index 456707165..000000000
--- a/3rdparty/iqa/source/psnr.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "iqa.h"
-#include <math.h>
-
-/* PSNR(a,b) = 10*log10(L^2 / MSE(a,b)), where L=2^b - 1 (8bit = 255) */
-float iqa_psnr(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride)
-{
-    const int L_sqd = 255 * 255;
-    return (float)( 10.0 * log10( L_sqd / iqa_mse(ref,cmp,w,h,stride) ) );
-}
diff --git a/3rdparty/iqa/source/ssim.c b/3rdparty/iqa/source/ssim.c
deleted file mode 100644
index d1acccb40..000000000
--- a/3rdparty/iqa/source/ssim.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "iqa.h"
-#include "convolve.h"
-#include "decimate.h"
-#include "math_utils.h"
-#include "ssim.h"
-#include <stdlib.h>
-#include <math.h>
-
-
-/* Forward declarations. */
-IQA_INLINE static double _calc_luminance(float, float, float, float);
-IQA_INLINE static double _calc_contrast(double, float, float, float, float);
-IQA_INLINE static double _calc_structure(float, double, float, float, float, float);
-static int _ssim_map(const struct _ssim_int *, void *);
-static float _ssim_reduce(int, int, void *);
-
-/* 
- * SSIM(x,y)=(2*ux*uy + C1)*(2sxy + C2) / (ux^2 + uy^2 + C1)*(sx^2 + sy^2 + C2)
- * where,
- *  ux = SUM(w*x)
- *  sx = (SUM(w*(x-ux)^2)^0.5
- *  sxy = SUM(w*(x-ux)*(y-uy))
- *
- * Returns mean SSIM. MSSIM(X,Y) = 1/M * SUM(SSIM(x,y))
- */
-float iqa_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride,
-    int gaussian, const struct iqa_ssim_args *args)
-{
-    int scale;
-    int x,y,src_offset,offset;
-    float *ref_f,*cmp_f;
-    struct _kernel low_pass;
-    struct _kernel window;
-    float result;
-    double ssim_sum=0.0;
-    struct _map_reduce mr;
-
-    /* Initialize algorithm parameters */
-    scale = _max( 1, _round( (float)_min(w,h) / 256.0f ) );
-    if (args) {
-        if(args->f)
-            scale = args->f;
-        mr.map     = _ssim_map;
-        mr.reduce  = _ssim_reduce;
-        mr.context = (void*)&ssim_sum;
-    }
-    window.kernel = (float*)g_square_window;
-    window.w = window.h = SQUARE_LEN;
-    window.normalized = 1;
-    window.bnd_opt = KBND_SYMMETRIC;
-    if (gaussian) {
-        window.kernel = (float*)g_gaussian_window;
-        window.w = window.h = GAUSSIAN_LEN;
-    }
-
-    /* Convert image values to floats. Forcing stride = width. */
-    ref_f = (float*)malloc(w*h*sizeof(float));
-    cmp_f = (float*)malloc(w*h*sizeof(float));
-    if (!ref_f || !cmp_f) {
-        if (ref_f) free(ref_f);
-        if (cmp_f) free(cmp_f);
-        return INFINITY;
-    }
-    for (y=0; y<h; ++y) {
-        src_offset = y*stride;
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset, ++src_offset) {
-            ref_f[offset] = (float)ref[src_offset];
-            cmp_f[offset] = (float)cmp[src_offset];
-        }
-    }
-
-    /* Scale the images down if required */
-    if (scale > 1) {
-        /* Generate simple low-pass filter */
-        low_pass.kernel = (float*)malloc(scale*scale*sizeof(float));
-        if (!low_pass.kernel) {
-            free(ref_f);
-            free(cmp_f);
-            return INFINITY;
-        }
-        low_pass.w = low_pass.h = scale;
-        low_pass.normalized = 0;
-        low_pass.bnd_opt = KBND_SYMMETRIC;
-        for (offset=0; offset<scale*scale; ++offset)
-            low_pass.kernel[offset] = 1.0f/(scale*scale);
-
-        /* Resample */
-        if (_iqa_decimate(ref_f, w, h, scale, &low_pass, 0, 0, 0) ||
-            _iqa_decimate(cmp_f, w, h, scale, &low_pass, 0, &w, &h)) { /* Update w/h */
-            free(ref_f);
-            free(cmp_f);
-            free(low_pass.kernel);
-            return INFINITY;
-        }
-        free(low_pass.kernel);
-    }
-
-    result = _iqa_ssim(ref_f, cmp_f, w, h, &window, &mr, args);
-    
-    free(ref_f);
-    free(cmp_f);
-
-    return result;
-}
-
-
-/* _iqa_ssim */
-float _iqa_ssim(float *ref, float *cmp, int w, int h, const struct _kernel *k, const struct _map_reduce *mr, const struct iqa_ssim_args *args)
-{
-    float alpha=1.0f, beta=1.0f, gamma=1.0f;
-    int L=255;
-    float K1=0.01f, K2=0.03f;
-    float C1,C2,C3;
-    int x,y,offset;
-    float *ref_mu,*cmp_mu,*ref_sigma_sqd,*cmp_sigma_sqd,*sigma_both;
-    double ssim_sum, numerator, denominator;
-    double luminance_comp, contrast_comp, structure_comp, sigma_root;
-    struct _ssim_int sint;
-
-    /* Initialize algorithm parameters */
-    if (args) {
-        if (!mr)
-            return INFINITY;
-        alpha = args->alpha;
-        beta  = args->beta;
-        gamma = args->gamma;
-        L     = args->L;
-        K1    = args->K1;
-        K2    = args->K2;
-    }
-    C1 = (K1*L)*(K1*L);
-    C2 = (K2*L)*(K2*L);
-    C3 = C2 / 2.0f;
-
-    ref_mu = (float*)malloc(w*h*sizeof(float));
-    cmp_mu = (float*)malloc(w*h*sizeof(float));
-    ref_sigma_sqd = (float*)malloc(w*h*sizeof(float));
-    cmp_sigma_sqd = (float*)malloc(w*h*sizeof(float));
-    sigma_both = (float*)malloc(w*h*sizeof(float));
-    if (!ref_mu || !cmp_mu || !ref_sigma_sqd || !cmp_sigma_sqd || !sigma_both) {
-        if (ref_mu) free(ref_mu);
-        if (cmp_mu) free(cmp_mu);
-        if (ref_sigma_sqd) free(ref_sigma_sqd);
-        if (cmp_sigma_sqd) free(cmp_sigma_sqd);
-        if (sigma_both) free(sigma_both);
-        return INFINITY;
-    }
-
-    /* Calculate mean */
-    _iqa_convolve(ref, w, h, k, ref_mu, 0, 0);
-    _iqa_convolve(cmp, w, h, k, cmp_mu, 0, 0);
-
-    for (y=0; y<h; ++y) {
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset) {
-            ref_sigma_sqd[offset] = ref[offset] * ref[offset];
-            cmp_sigma_sqd[offset] = cmp[offset] * cmp[offset];
-            sigma_both[offset] = ref[offset] * cmp[offset];
-        }
-    }
-
-    /* Calculate sigma */
-    _iqa_convolve(ref_sigma_sqd, w, h, k, 0, 0, 0);
-    _iqa_convolve(cmp_sigma_sqd, w, h, k, 0, 0, 0);
-    _iqa_convolve(sigma_both, w, h, k, 0, &w, &h); /* Update the width and height */
-
-    /* The convolution results are smaller by the kernel width and height */
-    for (y=0; y<h; ++y) {
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset) {
-            ref_sigma_sqd[offset] -= ref_mu[offset] * ref_mu[offset];
-            cmp_sigma_sqd[offset] -= cmp_mu[offset] * cmp_mu[offset];
-            sigma_both[offset] -= ref_mu[offset] * cmp_mu[offset];
-        }
-    }
-
-    ssim_sum = 0.0;
-    for (y=0; y<h; ++y) {
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset) {
-
-            if (!args) {
-                /* The default case */
-                numerator   = (2.0 * ref_mu[offset] * cmp_mu[offset] + C1) * (2.0 * sigma_both[offset] + C2);
-                denominator = (ref_mu[offset]*ref_mu[offset] + cmp_mu[offset]*cmp_mu[offset] + C1) * 
-                    (ref_sigma_sqd[offset] + cmp_sigma_sqd[offset] + C2);
-                ssim_sum += numerator / denominator;
-            }
-            else {
-                /* User tweaked alpha, beta, or gamma */
-
-                /* passing a negative number to sqrt() cause a domain error */
-                if (ref_sigma_sqd[offset] < 0.0f)
-                    ref_sigma_sqd[offset] = 0.0f;
-                if (cmp_sigma_sqd[offset] < 0.0f)
-                    cmp_sigma_sqd[offset] = 0.0f;
-                sigma_root = sqrt(ref_sigma_sqd[offset] * cmp_sigma_sqd[offset]);
-
-                luminance_comp = _calc_luminance(ref_mu[offset], cmp_mu[offset], C1, alpha);
-                contrast_comp  = _calc_contrast(sigma_root, ref_sigma_sqd[offset], cmp_sigma_sqd[offset], C2, beta);
-                structure_comp = _calc_structure(sigma_both[offset], sigma_root, ref_sigma_sqd[offset], cmp_sigma_sqd[offset], C3, gamma);
-
-                sint.l = luminance_comp;
-                sint.c = contrast_comp;
-                sint.s = structure_comp;
-
-                if (mr->map(&sint, mr->context))
-                    return INFINITY;
-            }
-        }
-    }
-
-    free(ref_mu);
-    free(cmp_mu);
-    free(ref_sigma_sqd);
-    free(cmp_sigma_sqd);
-    free(sigma_both);
-
-    if (!args)
-        return (float)(ssim_sum / (double)(w*h));
-    return mr->reduce(w, h, mr->context);
-}
-
-
-/* _ssim_map */
-int _ssim_map(const struct _ssim_int *si, void *ctx)
-{
-    double *ssim_sum = (double*)ctx;
-    *ssim_sum += si->l * si->c * si->s;
-    return 0;
-}
-
-/* _ssim_reduce */
-float _ssim_reduce(int w, int h, void *ctx)
-{
-    double *ssim_sum = (double*)ctx;
-    return (float)(*ssim_sum / (double)(w*h));
-}
-
-
-/* _calc_luminance */
-IQA_INLINE static double _calc_luminance(float mu1, float mu2, float C1, float alpha)
-{
-    double result;
-    float sign;
-    /* For MS-SSIM* */
-    if (C1 == 0 && mu1*mu1 == 0 && mu2*mu2 == 0)
-        return 1.0;
-    result = (2.0 * mu1 * mu2 + C1) / (mu1*mu1 + mu2*mu2 + C1);
-    if (alpha == 1.0f)
-        return result;
-    sign = result < 0.0 ? -1.0f : 1.0f;
-    return sign * pow(fabs(result),(double)alpha);
-}
-
-/* _calc_contrast */
-IQA_INLINE static double _calc_contrast(double sigma_comb_12, float sigma1_sqd, float sigma2_sqd, float C2, float beta)
-{
-    double result;
-    float sign;
-    /* For MS-SSIM* */
-    if (C2 == 0 && sigma1_sqd + sigma2_sqd == 0)
-        return 1.0;
-    result = (2.0 * sigma_comb_12 + C2) / (sigma1_sqd + sigma2_sqd + C2);
-    if (beta == 1.0f)
-        return result;
-    sign = result < 0.0 ? -1.0f : 1.0f;
-    return sign * pow(fabs(result),(double)beta);
-}
-
-/* _calc_structure */
-IQA_INLINE static double _calc_structure(float sigma_12, double sigma_comb_12, float sigma1, float sigma2, float C3, float gamma)
-{
-    double result;
-    float sign;
-    /* For MS-SSIM* */
-    if (C3 == 0 && sigma_comb_12 == 0) {
-        if (sigma1 == 0 && sigma2 == 0)
-            return 1.0;
-        else if (sigma1 == 0 || sigma2 == 0)
-            return 0.0;
-    }
-    result = (sigma_12 + C3) / (sigma_comb_12 + C3);
-    if (gamma == 1.0f)
-        return result;
-    sign = result < 0.0 ? -1.0f : 1.0f;
-    return sign * pow(fabs(result),(double)gamma);
-}
\ No newline at end of file
diff --git a/3rdparty/libsquish/LICENSE b/3rdparty/libsquish/LICENSE
deleted file mode 100644
index ed1c78d93..000000000
--- a/3rdparty/libsquish/LICENSE
+++ /dev/null
@@ -1,20 +0,0 @@
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/3rdparty/libsquish/README b/3rdparty/libsquish/README
deleted file mode 100644
index d26b72ed5..000000000
--- a/3rdparty/libsquish/README
+++ /dev/null
@@ -1,35 +0,0 @@
-LICENSE
--------
-
-The squish library is distributed under the terms and conditions of the MIT
-license. This license is specified at the top of each source file and must be
-preserved in its entirety.
-
-BUILDING AND INSTALLING THE LIBRARY
------------------------------------
-
-If you are using Visual Studio 2003 or above under Windows then load the Visual
-Studio 2003 project in the vs7 folder. By default, the library is built using
-SSE2 optimisations. To change this either change or remove the SQUISH_USE_SSE=2
-from the preprocessor symbols.
-
-If you are using a Mac then load the Xcode 2.2 project in the distribution. By
-default, the library is built using Altivec optimisations. To change this
-either change or remove SQUISH_USE_ALTIVEC=1 from the preprocessor symbols. I
-guess I'll have to think about changing this for the new Intel Macs that are
-rolling out...
-
-If you are using unix then first edit the config file in the base directory of
-the distribution, enabling Altivec or SSE with the USE_ALTIVEC or USE_SSE
-variables, and editing the optimisation flags passed to the C++ compiler if
-necessary. Then make can be used to build the library, and make install (from
-the superuser account) can be used to install (into /usr/local by default).
-
-REPORTING BUGS OR FEATURE REQUESTS
-----------------------------------
-
-Feedback can be sent to Simon Brown (the developer) at si@sjbrown.co.uk
-
-New releases are announced on the squish library homepage at
-http://sjbrown.co.uk/?code=squish
-
diff --git a/3rdparty/libsquish/alpha.cpp b/3rdparty/libsquish/alpha.cpp
deleted file mode 100644
index 0f94e2147..000000000
--- a/3rdparty/libsquish/alpha.cpp
+++ /dev/null
@@ -1,350 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "alpha.h"
-
-#include <climits>
-#include <algorithm>
-
-namespace squish {
-
-static int FloatToInt( float a, int limit )
-{
-	// use ANSI round-to-zero behaviour to get round-to-nearest
-	int i = ( int )( a + 0.5f );
-
-	// clamp to the limit
-	if( i < 0 )
-		i = 0;
-	else if( i > limit )
-		i = limit; 
-
-	// done
-	return i;
-}
-
-void CompressAlphaDxt3( u8 const* rgba, int mask, void* block )
-{
-	u8* bytes = reinterpret_cast< u8* >( block );
-	
-	// quantise and pack the alpha values pairwise
-	for( int i = 0; i < 8; ++i )
-	{
-		// quantise down to 4 bits
-		float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
-		float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
-		int quant1 = FloatToInt( alpha1, 15 );
-		int quant2 = FloatToInt( alpha2, 15 );
-		
-		// set alpha to zero where masked
-		int bit1 = 1 << ( 2*i );
-		int bit2 = 1 << ( 2*i + 1 );
-		if( ( mask & bit1 ) == 0 )
-			quant1 = 0;
-		if( ( mask & bit2 ) == 0 )
-			quant2 = 0;
-
-		// pack into the byte
-		bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
-	}
-}
-
-void DecompressAlphaDxt3( u8* rgba, void const* block )
-{
-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
-	
-	// unpack the alpha values pairwise
-	for( int i = 0; i < 8; ++i )
-	{
-		// quantise down to 4 bits
-		u8 quant = bytes[i];
-		
-		// unpack the values
-		u8 lo = quant & 0x0f;
-		u8 hi = quant & 0xf0;
-
-		// convert back up to bytes
-		rgba[8*i + 3] = lo | ( lo << 4 );
-		rgba[8*i + 7] = hi | ( hi >> 4 );
-	}
-}
-
-static void FixRange( int& min, int& max, int steps )
-{
-	if( max - min < steps )
-		max = std::min( min + steps, 255 );
-	if( max - min < steps )
-		min = std::max( 0, max - steps );
-}
-
-static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
-{
-	// fit each alpha value to the codebook
-	int err = 0;
-	for( int i = 0; i < 16; ++i )
-	{
-		// check this pixel is valid
-		int bit = 1 << i;
-		if( ( mask & bit ) == 0 )
-		{
-			// use the first code
-			indices[i] = 0;
-			continue;
-		}
-		
-		// find the least error and corresponding index
-		int value = rgba[4*i + 3];
-		int least = INT_MAX;
-		int index = 0;
-		for( int j = 0; j < 8; ++j )
-		{
-			// get the squared error from this code
-			int dist = ( int )value - ( int )codes[j];
-			dist *= dist;
-			
-			// compare with the best so far
-			if( dist < least )
-			{
-				least = dist;
-				index = j;
-			}
-		}
-		
-		// save this index and accumulate the error
-		indices[i] = ( u8 )index;
-		err += least;
-	}
-	
-	// return the total error
-	return err;
-}
-
-static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
-{
-	u8* bytes = reinterpret_cast< u8* >( block );
-	
-	// write the first two bytes
-	bytes[0] = ( u8 )alpha0;
-	bytes[1] = ( u8 )alpha1;
-	
-	// pack the indices with 3 bits each
-	u8* dest = bytes + 2;
-	u8 const* src = indices;
-	for( int i = 0; i < 2; ++i )
-	{
-		// pack 8 3-bit values
-		int value = 0;
-		for( int j = 0; j < 8; ++j )
-		{
-			int index = *src++;
-			value |= ( index << 3*j );
-		}
-			
-		// store in 3 bytes
-		for( int j = 0; j < 3; ++j )
-		{
-			int byte = ( value >> 8*j ) & 0xff;
-			*dest++ = ( u8 )byte;
-		}
-	}
-}
-
-static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
-{
-	// check the relative values of the endpoints
-	if( alpha0 > alpha1 )
-	{
-		// swap the indices
-		u8 swapped[16];
-		for( int i = 0; i < 16; ++i )
-		{
-			u8 index = indices[i];
-			if( index == 0 )
-				swapped[i] = 1;
-			else if( index == 1 )
-				swapped[i] = 0;
-			else if( index <= 5 )
-				swapped[i] = 7 - index;
-			else 
-				swapped[i] = index;
-		}
-		
-		// write the block
-		WriteAlphaBlock( alpha1, alpha0, swapped, block );
-	}
-	else
-	{
-		// write the block
-		WriteAlphaBlock( alpha0, alpha1, indices, block );
-	}	
-}
-
-static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
-{
-	// check the relative values of the endpoints
-	if( alpha0 < alpha1 )
-	{
-		// swap the indices
-		u8 swapped[16];
-		for( int i = 0; i < 16; ++i )
-		{
-			u8 index = indices[i];
-			if( index == 0 )
-				swapped[i] = 1;
-			else if( index == 1 )
-				swapped[i] = 0;
-			else
-				swapped[i] = 9 - index;
-		}
-		
-		// write the block
-		WriteAlphaBlock( alpha1, alpha0, swapped, block );
-	}
-	else
-	{
-		// write the block
-		WriteAlphaBlock( alpha0, alpha1, indices, block );
-	}	
-}
-
-void CompressAlphaDxt5( u8 const* rgba, int mask, void* block )
-{
-	// get the range for 5-alpha and 7-alpha interpolation
-	int min5 = 255;
-	int max5 = 0;
-	int min7 = 255;
-	int max7 = 0;
-	for( int i = 0; i < 16; ++i )
-	{
-		// check this pixel is valid
-		int bit = 1 << i;
-		if( ( mask & bit ) == 0 )
-			continue;
-
-		// incorporate into the min/max
-		int value = rgba[4*i + 3];
-		if( value < min7 )
-			min7 = value;
-		if( value > max7 )
-			max7 = value;
-		if( value != 0 && value < min5 )
-			min5 = value;
-		if( value != 255 && value > max5 )
-			max5 = value;
-	}
-	
-	// handle the case that no valid range was found
-	if( min5 > max5 )
-		min5 = max5;
-	if( min7 > max7 )
-		min7 = max7;
-		
-	// fix the range to be the minimum in each case
-	FixRange( min5, max5, 5 );
-	FixRange( min7, max7, 7 );
-	
-	// set up the 5-alpha code book
-	u8 codes5[8];
-	codes5[0] = ( u8 )min5;
-	codes5[1] = ( u8 )max5;
-	for( int i = 1; i < 5; ++i )
-		codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
-	codes5[6] = 0;
-	codes5[7] = 255;
-	
-	// set up the 7-alpha code book
-	u8 codes7[8];
-	codes7[0] = ( u8 )min7;
-	codes7[1] = ( u8 )max7;
-	for( int i = 1; i < 7; ++i )
-		codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
-		
-	// fit the data to both code books
-	u8 indices5[16];
-	u8 indices7[16];
-	int err5 = FitCodes( rgba, mask, codes5, indices5 );
-	int err7 = FitCodes( rgba, mask, codes7, indices7 );
-	
-	// save the block with least error
-	if( err5 <= err7 )
-		WriteAlphaBlock5( min5, max5, indices5, block );
-	else
-		WriteAlphaBlock7( min7, max7, indices7, block );
-}
-
-void DecompressAlphaDxt5( u8* rgba, void const* block )
-{
-	// get the two alpha values
-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
-	int alpha0 = bytes[0];
-	int alpha1 = bytes[1];
-	
-	// compare the values to build the codebook
-	u8 codes[8];
-	codes[0] = ( u8 )alpha0;
-	codes[1] = ( u8 )alpha1;
-	if( alpha0 <= alpha1 )
-	{
-		// use 5-alpha codebook
-		for( int i = 1; i < 5; ++i )
-			codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
-		codes[6] = 0;
-		codes[7] = 255;
-	}
-	else
-	{
-		// use 7-alpha codebook
-		for( int i = 1; i < 7; ++i )
-			codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
-	}
-	
-	// decode the indices
-	u8 indices[16];
-	u8 const* src = bytes + 2;
-	u8* dest = indices;
-	for( int i = 0; i < 2; ++i )
-	{
-		// grab 3 bytes
-		int value = 0;
-		for( int j = 0; j < 3; ++j )
-		{
-			int byte = *src++;
-			value |= ( byte << 8*j );
-		}
-		
-		// unpack 8 3-bit values from it
-		for( int j = 0; j < 8; ++j )
-		{
-			int index = ( value >> 3*j ) & 0x7;
-			*dest++ = ( u8 )index;
-		}
-	}
-	
-	// write out the indexed codebook values
-	for( int i = 0; i < 16; ++i )
-		rgba[4*i + 3] = codes[indices[i]];
-}
-
-} // namespace squish
diff --git a/3rdparty/libsquish/alpha.h b/3rdparty/libsquish/alpha.h
deleted file mode 100644
index e5e7f320a..000000000
--- a/3rdparty/libsquish/alpha.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_ALPHA_H
-#define SQUISH_ALPHA_H
-
-#include "squish.h"
-
-namespace squish {
-
-void CompressAlphaDxt3( u8 const* rgba, int mask, void* block );
-void CompressAlphaDxt5( u8 const* rgba, int mask, void* block );
-
-void DecompressAlphaDxt3( u8* rgba, void const* block );
-void DecompressAlphaDxt5( u8* rgba, void const* block );
-
-} // namespace squish
-
-#endif // ndef SQUISH_ALPHA_H
diff --git a/3rdparty/libsquish/clusterfit.cpp b/3rdparty/libsquish/clusterfit.cpp
deleted file mode 100644
index 96704460e..000000000
--- a/3rdparty/libsquish/clusterfit.cpp
+++ /dev/null
@@ -1,392 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-	Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "clusterfit.h"
-#include "colourset.h"
-#include "colourblock.h"
-#include <cfloat>
-
-namespace squish {
-
-ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) 
-  : ColourFit( colours, flags )
-{
-	// set the iteration count
-	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
-
-	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
-	if( metric )
-		m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
-	else
-		m_metric = VEC4_CONST( 1.0f );	
-
-	// initialise the best error
-	m_besterror = VEC4_CONST( FLT_MAX );
-
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-
-	// get the covariance matrix
-	Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
-	
-	// compute the principle component
-	m_principle = ComputePrincipleComponent( covariance );
-}
-
-bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
-{
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-
-	// build the list of dot products
-	float dps[16];
-	u8* order = ( u8* )m_order + 16*iteration;
-	for( int i = 0; i < count; ++i )
-	{
-		dps[i] = Dot( values[i], axis );
-		order[i] = ( u8 )i;
-	}
-		
-	// stable sort using them
-	for( int i = 0; i < count; ++i )
-	{
-		for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
-		{
-			std::swap( dps[j], dps[j - 1] );
-			std::swap( order[j], order[j - 1] );
-		}
-	}
-	
-	// check this ordering is unique
-	for( int it = 0; it < iteration; ++it )
-	{
-		u8 const* prev = ( u8* )m_order + 16*it;
-		bool same = true;
-		for( int i = 0; i < count; ++i )
-		{
-			if( order[i] != prev[i] )
-			{
-				same = false;
-				break;
-			}
-		}
-		if( same )
-			return false;
-	}
-	
-	// copy the ordering and weight all the points
-	Vec3 const* unweighted = m_colours->GetPoints();
-	float const* weights = m_colours->GetWeights();
-	m_xsum_wsum = VEC4_CONST( 0.0f );
-	for( int i = 0; i < count; ++i )
-	{
-		int j = order[i];
-		Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
-		Vec4 w( weights[j] );
-		Vec4 x = p*w;
-		m_points_weights[i] = x;
-		m_xsum_wsum += x;
-	}
-	return true;
-}
-
-void ClusterFit::Compress3( void* block )
-{
-	// declare variables
-	int const count = m_colours->GetCount();
-	Vec4 const two = VEC4_CONST( 2.0 );
-	Vec4 const one = VEC4_CONST( 1.0f );
-	Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
-	Vec4 const zero = VEC4_CONST( 0.0f );
-	Vec4 const half = VEC4_CONST( 0.5f );
-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
-
-	// prepare an ordering using the principle axis
-	ConstructOrdering( m_principle, 0 );
-	
-	// check all possible clusters and iterate on the total order
-	Vec4 beststart = VEC4_CONST( 0.0f );
-	Vec4 bestend = VEC4_CONST( 0.0f );
-	Vec4 besterror = m_besterror;
-	u8 bestindices[16];
-	int bestiteration = 0;
-	int besti = 0, bestj = 0;
-	
-	// loop over iterations (we avoid the case that all points in first or last cluster)
-	for( int iterationIndex = 0;; )
-	{
-		// first cluster [0,i) is at the start
-		Vec4 part0 = VEC4_CONST( 0.0f );
-		for( int i = 0; i < count; ++i )
-		{
-			// second cluster [i,j) is half along
-			Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
-			int jmin = ( i == 0 ) ? 1 : i;
-			for( int j = jmin;; )
-			{
-				// last cluster [j,count) is at the end
-				Vec4 part2 = m_xsum_wsum - part1 - part0;
-				
-				// compute least squares terms directly
-				Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
-				Vec4 alpha2_sum = alphax_sum.SplatW();
-
-				Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
-				Vec4 beta2_sum = betax_sum.SplatW();
-
-				Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
-
-				// compute the least-squares optimal points
-				Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
-				Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
-				Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
-
-				// clamp to the grid
-				a = Min( one, Max( zero, a ) );
-				b = Min( one, Max( zero, b ) );
-				a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
-				b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
-				
-				// compute the error (we skip the constant xxsum)
-				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-				Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
-				// apply the metric to the error term
-				Vec4 e5 = e4*m_metric;
-				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
-				
-				// keep the solution if it wins
-				if( CompareAnyLessThan( error, besterror ) )
-				{
-					beststart = a;
-					bestend = b;
-					besti = i;
-					bestj = j;
-					besterror = error;
-					bestiteration = iterationIndex;
-				}
-
-				// advance
-				if( j == count )
-					break;
-				part1 += m_points_weights[j];
-				++j;
-			}
-
-			// advance
-			part0 += m_points_weights[i];
-		}
-		
-		// stop if we didn't improve in this iteration
-		if( bestiteration != iterationIndex )
-			break;
-			
-		// advance if possible
-		++iterationIndex;
-		if( iterationIndex == m_iterationCount )
-			break;
-			
-		// stop if a new iteration is an ordering that has already been tried
-		Vec3 axis = ( bestend - beststart ).GetVec3();
-		if( !ConstructOrdering( axis, iterationIndex ) )
-			break;
-	}
-		
-	// save the block if necessary
-	if( CompareAnyLessThan( besterror, m_besterror ) )
-	{
-		// remap the indices
-		u8 const* order = ( u8* )m_order + 16*bestiteration;
-
-		u8 unordered[16];
-		for( int m = 0; m < besti; ++m )
-			unordered[order[m]] = 0;
-		for( int m = besti; m < bestj; ++m )
-			unordered[order[m]] = 2;
-		for( int m = bestj; m < count; ++m )
-			unordered[order[m]] = 1;
-
-		m_colours->RemapIndices( unordered, bestindices );
-		
-		// save the block
-		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
-
-		// save the error
-		m_besterror = besterror;
-	}
-}
-
-void ClusterFit::Compress4( void* block )
-{
-	// declare variables
-	int const count = m_colours->GetCount();
-	Vec4 const two = VEC4_CONST( 2.0f );
-	Vec4 const one = VEC4_CONST( 1.0f );
-	Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
-	Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
-	Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
-	Vec4 const zero = VEC4_CONST( 0.0f );
-	Vec4 const half = VEC4_CONST( 0.5f );
-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
-
-	// prepare an ordering using the principle axis
-	ConstructOrdering( m_principle, 0 );
-	
-	// check all possible clusters and iterate on the total order
-	Vec4 beststart = VEC4_CONST( 0.0f );
-	Vec4 bestend = VEC4_CONST( 0.0f );
-	Vec4 besterror = m_besterror;
-	u8 bestindices[16];
-	int bestiteration = 0;
-	int besti = 0, bestj = 0, bestk = 0;
-	
-	// loop over iterations (we avoid the case that all points in first or last cluster)
-	for( int iterationIndex = 0;; )
-	{
-		// first cluster [0,i) is at the start
-		Vec4 part0 = VEC4_CONST( 0.0f );
-		for( int i = 0; i < count; ++i )
-		{
-			// second cluster [i,j) is one third along
-			Vec4 part1 = VEC4_CONST( 0.0f );
-			for( int j = i;; )
-			{
-				// third cluster [j,k) is two thirds along
-				Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
-				int kmin = ( j == 0 ) ? 1 : j;
-				for( int k = kmin;; )
-				{
-					// last cluster [k,count) is at the end
-					Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
-
-					// compute least squares terms directly
-					Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
-					Vec4 const alpha2_sum = alphax_sum.SplatW();
-					
-					Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
-					Vec4 const beta2_sum = betax_sum.SplatW();
-					
-					Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
-
-					// compute the least-squares optimal points
-					Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
-					Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
-					Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
-
-					// clamp to the grid
-					a = Min( one, Max( zero, a ) );
-					b = Min( one, Max( zero, b ) );
-					a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
-					b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
-					
-					// compute the error (we skip the constant xxsum)
-					Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-					Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-					Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-					Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
-					// apply the metric to the error term
-					Vec4 e5 = e4*m_metric;
-					Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
-
-					// keep the solution if it wins
-					if( CompareAnyLessThan( error, besterror ) )
-					{
-						beststart = a;
-						bestend = b;
-						besterror = error;
-						besti = i;
-						bestj = j;
-						bestk = k;
-						bestiteration = iterationIndex;
-					}
-
-					// advance
-					if( k == count )
-						break;
-					part2 += m_points_weights[k];
-					++k;
-				}
-
-				// advance
-				if( j == count )
-					break;
-				part1 += m_points_weights[j];
-				++j;
-			}
-
-			// advance
-			part0 += m_points_weights[i];
-		}
-		
-		// stop if we didn't improve in this iteration
-		if( bestiteration != iterationIndex )
-			break;
-			
-		// advance if possible
-		++iterationIndex;
-		if( iterationIndex == m_iterationCount )
-			break;
-			
-		// stop if a new iteration is an ordering that has already been tried
-		Vec3 axis = ( bestend - beststart ).GetVec3();
-		if( !ConstructOrdering( axis, iterationIndex ) )
-			break;
-	}
-
-	// save the block if necessary
-	if( CompareAnyLessThan( besterror, m_besterror ) )
-	{
-		// remap the indices
-		u8 const* order = ( u8* )m_order + 16*bestiteration;
-
-		u8 unordered[16];
-		for( int m = 0; m < besti; ++m )
-			unordered[order[m]] = 0;
-		for( int m = besti; m < bestj; ++m )
-			unordered[order[m]] = 2;
-		for( int m = bestj; m < bestk; ++m )
-			unordered[order[m]] = 3;
-		for( int m = bestk; m < count; ++m )
-			unordered[order[m]] = 1;
-
-		m_colours->RemapIndices( unordered, bestindices );
-		
-		// save the block
-		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
-
-		// save the error
-		m_besterror = besterror;
-	}
-}
-
-} // namespace squish
diff --git a/3rdparty/libsquish/clusterfit.h b/3rdparty/libsquish/clusterfit.h
deleted file mode 100644
index c882469c8..000000000
--- a/3rdparty/libsquish/clusterfit.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-	Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_CLUSTERFIT_H
-#define SQUISH_CLUSTERFIT_H
-
-#include "squish.h"
-#include "maths.h"
-#include "simd.h"
-#include "colourfit.h"
-
-namespace squish {
-
-class ClusterFit : public ColourFit
-{
-public:
-	ClusterFit( ColourSet const* colours, int flags, float* metric );
-	
-private:
-	bool ConstructOrdering( Vec3 const& axis, int iteration );
-
-	virtual void Compress3( void* block );
-	virtual void Compress4( void* block );
-
-	enum { kMaxIterations = 8 };
-
-	int m_iterationCount;
-	Vec3 m_principle;
-	u8 m_order[16*kMaxIterations];
-	Vec4 m_points_weights[16];
-	Vec4 m_xsum_wsum;
-	Vec4 m_metric;
-	Vec4 m_besterror;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_CLUSTERFIT_H
diff --git a/3rdparty/libsquish/colourblock.cpp b/3rdparty/libsquish/colourblock.cpp
deleted file mode 100644
index e6a5788b7..000000000
--- a/3rdparty/libsquish/colourblock.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "colourblock.h"
-
-namespace squish {
-
-static int FloatToInt( float a, int limit )
-{
-	// use ANSI round-to-zero behaviour to get round-to-nearest
-	int i = ( int )( a + 0.5f );
-
-	// clamp to the limit
-	if( i < 0 )
-		i = 0;
-	else if( i > limit )
-		i = limit; 
-
-	// done
-	return i;
-}
-
-static int FloatTo565( Vec3::Arg colour )
-{
-	// get the components in the correct range
-	int r = FloatToInt( 31.0f*colour.X(), 31 );
-	int g = FloatToInt( 63.0f*colour.Y(), 63 );
-	int b = FloatToInt( 31.0f*colour.Z(), 31 );
-	
-	// pack into a single value
-	return ( r << 11 ) | ( g << 5 ) | b;
-}
-
-static void WriteColourBlock( int a, int b, u8* indices, void* block )
-{
-	// get the block as bytes
-	u8* bytes = ( u8* )block;
-
-	// write the endpoints
-	bytes[0] = ( u8 )( a & 0xff );
-	bytes[1] = ( u8 )( a >> 8 );
-	bytes[2] = ( u8 )( b & 0xff );
-	bytes[3] = ( u8 )( b >> 8 );
-	
-	// write the indices
-	for( int i = 0; i < 4; ++i )
-	{
-		u8 const* ind = indices + 4*i;
-		bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
-	}
-}
-
-void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
-{
-	// get the packed values
-	int a = FloatTo565( start );
-	int b = FloatTo565( end );
-
-	// remap the indices
-	u8 remapped[16];
-	if( a <= b )
-	{
-		// use the indices directly
-		for( int i = 0; i < 16; ++i )
-			remapped[i] = indices[i];
-	}
-	else
-	{
-		// swap a and b
-		std::swap( a, b );
-		for( int i = 0; i < 16; ++i )
-		{
-			if( indices[i] == 0 )
-				remapped[i] = 1;
-			else if( indices[i] == 1 )
-				remapped[i] = 0;
-			else
-				remapped[i] = indices[i];
-		}
-	}
-	
-	// write the block
-	WriteColourBlock( a, b, remapped, block );
-}
-
-void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
-{
-	// get the packed values
-	int a = FloatTo565( start );
-	int b = FloatTo565( end );
-
-	// remap the indices
-	u8 remapped[16];
-	if( a < b )
-	{
-		// swap a and b
-		std::swap( a, b );
-		for( int i = 0; i < 16; ++i )
-			remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
-	}
-	else if( a == b )
-	{
-		// use index 0
-		for( int i = 0; i < 16; ++i )
-			remapped[i] = 0;
-	}
-	else
-	{
-		// use the indices directly
-		for( int i = 0; i < 16; ++i )
-			remapped[i] = indices[i];
-	}
-	
-	// write the block
-	WriteColourBlock( a, b, remapped, block );
-}
-
-static int Unpack565( u8 const* packed, u8* colour )
-{
-	// build the packed value
-	int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
-	
-	// get the components in the stored range
-	u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
-	u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
-	u8 blue = ( u8 )( value & 0x1f );
-
-	// scale up to 8 bits
-	colour[0] = ( red << 3 ) | ( red >> 2 );
-	colour[1] = ( green << 2 ) | ( green >> 4 );
-	colour[2] = ( blue << 3 ) | ( blue >> 2 );
-	colour[3] = 255;
-	
-	// return the value
-	return value;
-}
-
-void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
-{
-	// get the block bytes
-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
-	
-	// unpack the endpoints
-	u8 codes[16];
-	int a = Unpack565( bytes, codes );
-	int b = Unpack565( bytes + 2, codes + 4 );
-	
-	// generate the midpoints
-	for( int i = 0; i < 3; ++i )
-	{
-		int c = codes[i];
-		int d = codes[4 + i];
-
-		if( isDxt1 && a <= b )
-		{
-			codes[8 + i] = ( u8 )( ( c + d )/2 );
-			codes[12 + i] = 0;
-		}
-		else
-		{
-			codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
-			codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
-		}
-	}
-	
-	// fill in alpha for the intermediate values
-	codes[8 + 3] = 255;
-	codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
-	
-	// unpack the indices
-	u8 indices[16];
-	for( int i = 0; i < 4; ++i )
-	{
-		u8* ind = indices + 4*i;
-		u8 packed = bytes[4 + i];
-		
-		ind[0] = packed & 0x3;
-		ind[1] = ( packed >> 2 ) & 0x3;
-		ind[2] = ( packed >> 4 ) & 0x3;
-		ind[3] = ( packed >> 6 ) & 0x3;
-	}
-
-	// store out the colours
-	for( int i = 0; i < 16; ++i )
-	{
-		u8 offset = 4*indices[i];
-		for( int j = 0; j < 4; ++j )
-			rgba[4*i + j] = codes[offset + j];
-	}
-}
-
-} // namespace squish
diff --git a/3rdparty/libsquish/colourblock.h b/3rdparty/libsquish/colourblock.h
deleted file mode 100644
index 2562561d7..000000000
--- a/3rdparty/libsquish/colourblock.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_COLOURBLOCK_H
-#define SQUISH_COLOURBLOCK_H
-
-#include "squish.h"
-#include "maths.h"
-
-namespace squish {
-
-void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
-void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
-
-void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
-
-} // namespace squish
-
-#endif // ndef SQUISH_COLOURBLOCK_H
diff --git a/3rdparty/libsquish/colourfit.cpp b/3rdparty/libsquish/colourfit.cpp
deleted file mode 100644
index 11efa4674..000000000
--- a/3rdparty/libsquish/colourfit.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "colourfit.h"
-#include "colourset.h"
-
-namespace squish {
-
-ColourFit::ColourFit( ColourSet const* colours, int flags ) 
-  : m_colours( colours ), 
-	m_flags( flags )
-{
-}
-
-ColourFit::~ColourFit()
-{
-}
-
-void ColourFit::Compress( void* block )
-{
-	bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
-	if( isDxt1 )
-	{
-		Compress3( block );
-		if( !m_colours->IsTransparent() )
-			Compress4( block );
-	}
-	else
-		Compress4( block );
-}
-
-} // namespace squish
diff --git a/3rdparty/libsquish/colourfit.h b/3rdparty/libsquish/colourfit.h
deleted file mode 100644
index 759322329..000000000
--- a/3rdparty/libsquish/colourfit.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_COLOURFIT_H
-#define SQUISH_COLOURFIT_H
-
-#include "squish.h"
-#include "maths.h"
-
-#include <climits>
-
-namespace squish {
-
-class ColourSet;
-
-class ColourFit
-{
-public:
-	ColourFit( ColourSet const* colours, int flags );
-	virtual ~ColourFit();
-
-	void Compress( void* block );
-
-protected:
-	virtual void Compress3( void* block ) = 0;
-	virtual void Compress4( void* block ) = 0;
-
-	ColourSet const* m_colours;
-	int m_flags;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_COLOURFIT_H
diff --git a/3rdparty/libsquish/colourset.cpp b/3rdparty/libsquish/colourset.cpp
deleted file mode 100644
index 97d29d987..000000000
--- a/3rdparty/libsquish/colourset.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "colourset.h"
-
-namespace squish {
-
-ColourSet::ColourSet( u8 const* rgba, int mask, int flags )
-  : m_count( 0 ), 
-	m_transparent( false )
-{
-	// check the compression mode for dxt1
-	bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
-	bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
-
-	// create the minimal set
-	for( int i = 0; i < 16; ++i )
-	{
-		// check this pixel is enabled
-		int bit = 1 << i;
-		if( ( mask & bit ) == 0 )
-		{
-			m_remap[i] = -1;
-			continue;
-		}
-	
-		// check for transparent pixels when using dxt1
-		if( isDxt1 && rgba[4*i + 3] < 128 )
-		{
-			m_remap[i] = -1;
-			m_transparent = true;
-			continue;
-		}
-
-		// loop over previous points for a match
-		for( int j = 0;; ++j )
-		{
-			// allocate a new point
-			if( j == i )
-			{
-				// normalise coordinates to [0,1]
-				float x = ( float )rgba[4*i] / 255.0f;
-				float y = ( float )rgba[4*i + 1] / 255.0f;
-				float z = ( float )rgba[4*i + 2] / 255.0f;
-				
-				// ensure there is always non-zero weight even for zero alpha
-				float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
-
-				// add the point
-				m_points[m_count] = Vec3( x, y, z );
-				m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
-				m_remap[i] = m_count;
-				
-				// advance
-				++m_count;
-				break;
-			}
-		
-			// check for a match
-			int oldbit = 1 << j;
-			bool match = ( ( mask & oldbit ) != 0 )
-				&& ( rgba[4*i] == rgba[4*j] )
-				&& ( rgba[4*i + 1] == rgba[4*j + 1] )
-				&& ( rgba[4*i + 2] == rgba[4*j + 2] )
-				&& ( rgba[4*j + 3] >= 128 || !isDxt1 );
-			if( match )
-			{
-				// get the index of the match
-				int index = m_remap[j];
-				
-				// ensure there is always non-zero weight even for zero alpha
-				float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
-
-				// map to this point and increase the weight
-				m_weights[index] += ( weightByAlpha ? w : 1.0f );
-				m_remap[i] = index;
-				break;
-			}
-		}
-	}
-
-	// square root the weights
-	for( int i = 0; i < m_count; ++i )
-		m_weights[i] = std::sqrt( m_weights[i] );
-}
-
-void ColourSet::RemapIndices( u8 const* source, u8* target ) const
-{
-	for( int i = 0; i < 16; ++i )
-	{
-		int j = m_remap[i];
-		if( j == -1 )
-			target[i] = 3;
-		else
-			target[i] = source[j];
-	}
-}
-
-} // namespace squish
diff --git a/3rdparty/libsquish/colourset.h b/3rdparty/libsquish/colourset.h
deleted file mode 100644
index 0c66fe440..000000000
--- a/3rdparty/libsquish/colourset.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_COLOURSET_H
-#define SQUISH_COLOURSET_H
-
-#include "squish.h"
-#include "maths.h"
-
-namespace squish {
-
-/*! @brief Represents a set of block colours
-*/
-class ColourSet
-{
-public:
-	ColourSet( u8 const* rgba, int mask, int flags );
-
-	int GetCount() const { return m_count; }
-	Vec3 const* GetPoints() const { return m_points; }
-	float const* GetWeights() const { return m_weights; }
-	bool IsTransparent() const { return m_transparent; }
-
-	void RemapIndices( u8 const* source, u8* target ) const;
-
-private:
-	int m_count;
-	Vec3 m_points[16];
-	float m_weights[16];
-	int m_remap[16];
-	bool m_transparent;
-};
-
-} // namespace sqish
-
-#endif // ndef SQUISH_COLOURSET_H
diff --git a/3rdparty/libsquish/config.h b/3rdparty/libsquish/config.h
deleted file mode 100644
index 2fad5576a..000000000
--- a/3rdparty/libsquish/config.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_CONFIG_H
-#define SQUISH_CONFIG_H
-
-// Set to 1 when building squish to use Altivec instructions.
-#ifndef SQUISH_USE_ALTIVEC
-#define SQUISH_USE_ALTIVEC 0
-#endif
-
-// Set to 1 or 2 when building squish to use SSE or SSE2 instructions.
-#ifndef SQUISH_USE_SSE
-#define SQUISH_USE_SSE 0
-#endif
-
-// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
-#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
-#error "Cannot enable both Altivec and SSE!"
-#endif
-#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
-#define SQUISH_USE_SIMD 1
-#else
-#define SQUISH_USE_SIMD 0
-#endif
-
-#endif // ndef SQUISH_CONFIG_H
diff --git a/3rdparty/libsquish/maths.cpp b/3rdparty/libsquish/maths.cpp
deleted file mode 100644
index 9af4197d3..000000000
--- a/3rdparty/libsquish/maths.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-/*! @file
-
-	The symmetric eigensystem solver algorithm is from 
-	http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf
-*/
-
-#include "maths.h"
-#include "simd.h"
-#include <cfloat>
-
-namespace squish {
-
-Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
-{
-	// compute the centroid
-	float total = 0.0f;
-	Vec3 centroid( 0.0f );
-	for( int i = 0; i < n; ++i )
-	{
-		total += weights[i];
-		centroid += weights[i]*points[i];
-	}
-	if( total > FLT_EPSILON )
-		centroid /= total;
-
-	// accumulate the covariance matrix
-	Sym3x3 covariance( 0.0f );
-	for( int i = 0; i < n; ++i )
-	{
-		Vec3 a = points[i] - centroid;
-		Vec3 b = weights[i]*a;
-		
-		covariance[0] += a.X()*b.X();
-		covariance[1] += a.X()*b.Y();
-		covariance[2] += a.X()*b.Z();
-		covariance[3] += a.Y()*b.Y();
-		covariance[4] += a.Y()*b.Z();
-		covariance[5] += a.Z()*b.Z();
-	}
-	
-	// return it
-	return covariance;
-}
-
-#if 0
-
-static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
-{
-	// compute M
-	Sym3x3 m;
-	m[0] = matrix[0] - evalue;
-	m[1] = matrix[1];
-	m[2] = matrix[2];
-	m[3] = matrix[3] - evalue;
-	m[4] = matrix[4];
-	m[5] = matrix[5] - evalue;
-
-	// compute U
-	Sym3x3 u;
-	u[0] = m[3]*m[5] - m[4]*m[4];
-	u[1] = m[2]*m[4] - m[1]*m[5];
-	u[2] = m[1]*m[4] - m[2]*m[3];
-	u[3] = m[0]*m[5] - m[2]*m[2];
-	u[4] = m[1]*m[2] - m[4]*m[0];
-	u[5] = m[0]*m[3] - m[1]*m[1];
-
-	// find the largest component
-	float mc = std::fabs( u[0] );
-	int mi = 0;
-	for( int i = 1; i < 6; ++i )
-	{
-		float c = std::fabs( u[i] );
-		if( c > mc )
-		{
-			mc = c;
-			mi = i;
-		}
-	}
-
-	// pick the column with this component
-	switch( mi )
-	{
-	case 0:
-		return Vec3( u[0], u[1], u[2] );
-
-	case 1:
-	case 3:
-		return Vec3( u[1], u[3], u[4] );
-
-	default:
-		return Vec3( u[2], u[4], u[5] );
-	}
-}
-
-static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
-{
-	// compute M
-	Sym3x3 m;
-	m[0] = matrix[0] - evalue;
-	m[1] = matrix[1];
-	m[2] = matrix[2];
-	m[3] = matrix[3] - evalue;
-	m[4] = matrix[4];
-	m[5] = matrix[5] - evalue;
-
-	// find the largest component
-	float mc = std::fabs( m[0] );
-	int mi = 0;
-	for( int i = 1; i < 6; ++i )
-	{
-		float c = std::fabs( m[i] );
-		if( c > mc )
-		{
-			mc = c;
-			mi = i;
-		}
-	}
-
-	// pick the first eigenvector based on this index
-	switch( mi )
-	{
-	case 0:
-	case 1:
-		return Vec3( -m[1], m[0], 0.0f );
-
-	case 2:
-		return Vec3( m[2], 0.0f, -m[0] );
-
-	case 3:
-	case 4:
-		return Vec3( 0.0f, -m[4], m[3] );
-
-	default:
-		return Vec3( 0.0f, -m[5], m[4] );
-	}
-}
-
-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
-{
-	// compute the cubic coefficients
-	float c0 = matrix[0]*matrix[3]*matrix[5] 
-		+ 2.0f*matrix[1]*matrix[2]*matrix[4] 
-		- matrix[0]*matrix[4]*matrix[4] 
-		- matrix[3]*matrix[2]*matrix[2] 
-		- matrix[5]*matrix[1]*matrix[1];
-	float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
-		- matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
-	float c2 = matrix[0] + matrix[3] + matrix[5];
-
-	// compute the quadratic coefficients
-	float a = c1 - ( 1.0f/3.0f )*c2*c2;
-	float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
-
-	// compute the root count check
-	float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
-
-	// test the multiplicity
-	if( FLT_EPSILON < Q )
-	{
-		// only one root, which implies we have a multiple of the identity
-        return Vec3( 1.0f );
-	}
-	else if( Q < -FLT_EPSILON )
-	{
-		// three distinct roots
-		float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
-		float rho = std::sqrt( 0.25f*b*b - Q );
-
-		float rt = std::pow( rho, 1.0f/3.0f );
-		float ct = std::cos( theta/3.0f );
-		float st = std::sin( theta/3.0f );
-
-		float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
-		float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
-		float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
-
-		// pick the larger
-		if( std::fabs( l2 ) > std::fabs( l1 ) )
-			l1 = l2;
-		if( std::fabs( l3 ) > std::fabs( l1 ) )
-			l1 = l3;
-
-		// get the eigenvector
-		return GetMultiplicity1Evector( matrix, l1 );
-	}
-	else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
-	{
-		// two roots
-		float rt;
-		if( b < 0.0f )
-			rt = -std::pow( -0.5f*b, 1.0f/3.0f );
-		else
-			rt = std::pow( 0.5f*b, 1.0f/3.0f );
-		
-		float l1 = ( 1.0f/3.0f )*c2 + rt;		// repeated
-		float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
-		
-		// get the eigenvector
-		if( std::fabs( l1 ) > std::fabs( l2 ) )
-			return GetMultiplicity2Evector( matrix, l1 );
-		else
-			return GetMultiplicity1Evector( matrix, l2 );
-	}
-}
-
-#else
-
-#define POWER_ITERATION_COUNT 	8
-
-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
-{
-	Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
-	Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
-	Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
-	Vec4 v = VEC4_CONST( 1.0f );
-	for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
-	{
-		// matrix multiply
-		Vec4 w = row0*v.SplatX();
-		w = MultiplyAdd(row1, v.SplatY(), w);
-		w = MultiplyAdd(row2, v.SplatZ(), w);
-
-		// get max component from xyz in all channels
-		Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
-
-		// divide through and advance
-		v = w*Reciprocal(a);
-	}
-	return v.GetVec3();
-}
-
-#endif
-
-} // namespace squish
diff --git a/3rdparty/libsquish/maths.h b/3rdparty/libsquish/maths.h
deleted file mode 100644
index 769ae463f..000000000
--- a/3rdparty/libsquish/maths.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_MATHS_H
-#define SQUISH_MATHS_H
-
-#include <cmath>
-#include <algorithm>
-#include "config.h"
-
-namespace squish {
-
-class Vec3
-{
-public:
-	typedef Vec3 const& Arg;
-
-	Vec3()
-	{
-	}
-
-	explicit Vec3( float s )
-	{
-		m_x = s;
-		m_y = s;
-		m_z = s;
-	}
-
-	Vec3( float x, float y, float z )
-	{
-		m_x = x;
-		m_y = y;
-		m_z = z;
-	}
-	
-	float X() const { return m_x; }
-	float Y() const { return m_y; }
-	float Z() const { return m_z; }
-	
-	Vec3 operator-() const
-	{
-		return Vec3( -m_x, -m_y, -m_z );
-	}
-	
-	Vec3& operator+=( Arg v )
-	{
-		m_x += v.m_x;
-		m_y += v.m_y;
-		m_z += v.m_z;
-		return *this;
-	}
-	
-	Vec3& operator-=( Arg v )
-	{
-		m_x -= v.m_x;
-		m_y -= v.m_y;
-		m_z -= v.m_z;
-		return *this;
-	}
-	
-	Vec3& operator*=( Arg v )
-	{
-		m_x *= v.m_x;
-		m_y *= v.m_y;
-		m_z *= v.m_z;
-		return *this;
-	}
-	
-	Vec3& operator*=( float s )
-	{
-		m_x *= s;
-		m_y *= s;
-		m_z *= s;
-		return *this;
-	}
-	
-	Vec3& operator/=( Arg v )
-	{
-		m_x /= v.m_x;
-		m_y /= v.m_y;
-		m_z /= v.m_z;
-		return *this;
-	}
-	
-	Vec3& operator/=( float s )
-	{
-		float t = 1.0f/s;
-		m_x *= t;
-		m_y *= t;
-		m_z *= t;
-		return *this;
-	}
-	
-	friend Vec3 operator+( Arg left, Arg right )
-	{
-		Vec3 copy( left );
-		return copy += right;
-	}
-	
-	friend Vec3 operator-( Arg left, Arg right )
-	{
-		Vec3 copy( left );
-		return copy -= right;
-	}
-	
-	friend Vec3 operator*( Arg left, Arg right )
-	{
-		Vec3 copy( left );
-		return copy *= right;
-	}
-	
-	friend Vec3 operator*( Arg left, float right )
-	{
-		Vec3 copy( left );
-		return copy *= right;
-	}
-	
-	friend Vec3 operator*( float left, Arg right )
-	{
-		Vec3 copy( right );
-		return copy *= left;
-	}
-	
-	friend Vec3 operator/( Arg left, Arg right )
-	{
-		Vec3 copy( left );
-		return copy /= right;
-	}
-	
-	friend Vec3 operator/( Arg left, float right )
-	{
-		Vec3 copy( left );
-		return copy /= right;
-	}
-	
-	friend float Dot( Arg left, Arg right )
-	{
-		return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z;
-	}
-	
-	friend Vec3 Min( Arg left, Arg right )
-	{
-		return Vec3(
-			std::min( left.m_x, right.m_x ), 
-			std::min( left.m_y, right.m_y ), 
-			std::min( left.m_z, right.m_z )
-		);
-	}
-
-	friend Vec3 Max( Arg left, Arg right )
-	{
-		return Vec3(
-			std::max( left.m_x, right.m_x ), 
-			std::max( left.m_y, right.m_y ), 
-			std::max( left.m_z, right.m_z )
-		);
-	}
-
-	friend Vec3 Truncate( Arg v )
-	{
-		return Vec3(
-			v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), 
-			v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), 
-			v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z )
-		);
-	}
-
-private:
-	float m_x;
-	float m_y;
-	float m_z;
-};
-
-inline float LengthSquared( Vec3::Arg v )
-{
-	return Dot( v, v );
-}
-
-class Sym3x3
-{
-public:
-	Sym3x3()
-	{
-	}
-
-	Sym3x3( float s )
-	{
-		for( int i = 0; i < 6; ++i )
-			m_x[i] = s;
-	}
-
-	float operator[]( int index ) const
-	{
-		return m_x[index];
-	}
-
-	float& operator[]( int index )
-	{
-		return m_x[index];
-	}
-
-private:
-	float m_x[6];
-};
-
-Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights );
-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix );
-
-} // namespace squish
-
-#endif // ndef SQUISH_MATHS_H
diff --git a/3rdparty/libsquish/rangefit.cpp b/3rdparty/libsquish/rangefit.cpp
deleted file mode 100644
index 3fca1245e..000000000
--- a/3rdparty/libsquish/rangefit.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "rangefit.h"
-#include "colourset.h"
-#include "colourblock.h"
-#include <cfloat>
-
-namespace squish {
-
-RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric ) 
-  : ColourFit( colours, flags )
-{
-	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
-	if( metric )
-		m_metric = Vec3( metric[0], metric[1], metric[2] );
-	else
-		m_metric = Vec3( 1.0f );	
-
-	// initialise the best error
-	m_besterror = FLT_MAX;
-
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-	float const* weights = m_colours->GetWeights();
-	
-	// get the covariance matrix
-	Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights );
-	
-	// compute the principle component
-	Vec3 principle = ComputePrincipleComponent( covariance );
-
-	// get the min and max range as the codebook endpoints
-	Vec3 start( 0.0f );
-	Vec3 end( 0.0f );
-	if( count > 0 )
-	{
-		float min, max;
-		
-		// compute the range
-		start = end = values[0];
-		min = max = Dot( values[0], principle );
-		for( int i = 1; i < count; ++i )
-		{
-			float val = Dot( values[i], principle );
-			if( val < min )
-			{
-				start = values[i];
-				min = val;
-			}
-			else if( val > max )
-			{
-				end = values[i];
-				max = val;
-			}
-		}
-	}
-			
-	// clamp the output to [0, 1]
-	Vec3 const one( 1.0f );
-	Vec3 const zero( 0.0f );
-	start = Min( one, Max( zero, start ) );
-	end = Min( one, Max( zero, end ) );
-
-	// clamp to the grid and save
-	Vec3 const grid( 31.0f, 63.0f, 31.0f );
-	Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
-	Vec3 const half( 0.5f );
-	m_start = Truncate( grid*start + half )*gridrcp;
-	m_end = Truncate( grid*end + half )*gridrcp;
-}
-
-void RangeFit::Compress3( void* block )
-{
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-	
-	// create a codebook
-	Vec3 codes[3];
-	codes[0] = m_start;
-	codes[1] = m_end;
-	codes[2] = 0.5f*m_start + 0.5f*m_end;
-
-	// match each point to the closest code
-	u8 closest[16];
-	float error = 0.0f;
-	for( int i = 0; i < count; ++i )
-	{
-		// find the closest code
-		float dist = FLT_MAX;
-		int idx = 0;
-		for( int j = 0; j < 3; ++j )
-		{
-			float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
-			if( d < dist )
-			{
-				dist = d;
-				idx = j;
-			}
-		}
-		
-		// save the index
-		closest[i] = ( u8 )idx;
-		
-		// accumulate the error
-		error += dist;
-	}
-	
-	// save this scheme if it wins
-	if( error < m_besterror )
-	{
-		// remap the indices
-		u8 indices[16];
-		m_colours->RemapIndices( closest, indices );
-		
-		// save the block
-		WriteColourBlock3( m_start, m_end, indices, block );
-		
-		// save the error
-		m_besterror = error;
-	}
-}
-
-void RangeFit::Compress4( void* block )
-{
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-	
-	// create a codebook
-	Vec3 codes[4];
-	codes[0] = m_start;
-	codes[1] = m_end;
-	codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
-	codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;
-
-	// match each point to the closest code
-	u8 closest[16];
-	float error = 0.0f;
-	for( int i = 0; i < count; ++i )
-	{
-		// find the closest code
-		float dist = FLT_MAX;
-		int idx = 0;
-		for( int j = 0; j < 4; ++j )
-		{
-			float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
-			if( d < dist )
-			{
-				dist = d;
-				idx = j;
-			}
-		}
-		
-		// save the index
-		closest[i] = ( u8 )idx;
-		
-		// accumulate the error
-		error += dist;
-	}
-	
-	// save this scheme if it wins
-	if( error < m_besterror )
-	{
-		// remap the indices
-		u8 indices[16];
-		m_colours->RemapIndices( closest, indices );
-		
-		// save the block
-		WriteColourBlock4( m_start, m_end, indices, block );
-
-		// save the error
-		m_besterror = error;
-	}
-}
-
-} // namespace squish
diff --git a/3rdparty/libsquish/rangefit.h b/3rdparty/libsquish/rangefit.h
deleted file mode 100644
index e293bdcf3..000000000
--- a/3rdparty/libsquish/rangefit.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_RANGEFIT_H
-#define SQUISH_RANGEFIT_H
-
-#include "squish.h"
-#include "colourfit.h"
-#include "maths.h"
-
-namespace squish {
-
-class ColourSet;
-
-class RangeFit : public ColourFit
-{
-public:
-	RangeFit( ColourSet const* colours, int flags, float* metric );
-	
-private:
-	virtual void Compress3( void* block );
-	virtual void Compress4( void* block );
-	
-	Vec3 m_metric;
-	Vec3 m_start;
-	Vec3 m_end;
-	float m_besterror;
-};
-
-} // squish
-
-#endif // ndef SQUISH_RANGEFIT_H
diff --git a/3rdparty/libsquish/simd.h b/3rdparty/libsquish/simd.h
deleted file mode 100644
index 92965e02e..000000000
--- a/3rdparty/libsquish/simd.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SIMD_H
-#define SQUISH_SIMD_H
-
-#include "maths.h"
-#include "simd_float.h"
-
-#endif // ndef SQUISH_SIMD_H
diff --git a/3rdparty/libsquish/simd_float.h b/3rdparty/libsquish/simd_float.h
deleted file mode 100644
index e6351b80e..000000000
--- a/3rdparty/libsquish/simd_float.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SIMD_FLOAT_H
-#define SQUISH_SIMD_FLOAT_H
-
-#include <algorithm>
-
-namespace squish {
-
-#define VEC4_CONST( X ) Vec4( X )
-
-class Vec4
-{
-public:
-	typedef Vec4 const& Arg;
-
-	Vec4() {}
-		
-	explicit Vec4( float s )
-	  : m_x( s ),
-		m_y( s ),
-		m_z( s ),
-		m_w( s )
-	{
-	}
-	
-	Vec4( float x, float y, float z, float w )
-	  : m_x( x ),
-		m_y( y ),
-		m_z( z ),
-		m_w( w )
-	{
-	}
-	
-	Vec3 GetVec3() const
-	{
-		return Vec3( m_x, m_y, m_z );
-	}
-	
-	Vec4 SplatX() const { return Vec4( m_x ); }
-	Vec4 SplatY() const { return Vec4( m_y ); }
-	Vec4 SplatZ() const { return Vec4( m_z ); }
-	Vec4 SplatW() const { return Vec4( m_w ); }
-
-	Vec4& operator+=( Arg v )
-	{
-		m_x += v.m_x;
-		m_y += v.m_y;
-		m_z += v.m_z;
-		m_w += v.m_w;
-		return *this;
-	}
-	
-	Vec4& operator-=( Arg v )
-	{
-		m_x -= v.m_x;
-		m_y -= v.m_y;
-		m_z -= v.m_z;
-		m_w -= v.m_w;
-		return *this;
-	}
-	
-	Vec4& operator*=( Arg v )
-	{
-		m_x *= v.m_x;
-		m_y *= v.m_y;
-		m_z *= v.m_z;
-		m_w *= v.m_w;
-		return *this;
-	}
-	
-	friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
-	{
-		Vec4 copy( left );
-		return copy += right;
-	}
-	
-	friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
-	{
-		Vec4 copy( left );
-		return copy -= right;
-	}
-	
-	friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
-	{
-		Vec4 copy( left );
-		return copy *= right;
-	}
-	
-	//! Returns a*b + c
-	friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return a*b + c;
-	}
-	
-	//! Returns -( a*b - c )
-	friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return c - a*b;
-	}
-	
-	friend Vec4 Reciprocal( Vec4::Arg v )
-	{
-		return Vec4( 
-			1.0f/v.m_x, 
-			1.0f/v.m_y, 
-			1.0f/v.m_z, 
-			1.0f/v.m_w 
-		);
-	}
-	
-	friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( 
-			std::min( left.m_x, right.m_x ), 
-			std::min( left.m_y, right.m_y ), 
-			std::min( left.m_z, right.m_z ), 
-			std::min( left.m_w, right.m_w ) 
-		);
-	}
-	
-	friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( 
-			std::max( left.m_x, right.m_x ), 
-			std::max( left.m_y, right.m_y ), 
-			std::max( left.m_z, right.m_z ), 
-			std::max( left.m_w, right.m_w ) 
-		);
-	}
-	
-	friend Vec4 Truncate( Vec4::Arg v )
-	{
-		return Vec4(
-			v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), 
-			v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), 
-			v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ),
-			v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w )
-		);
-	}
-	
-	friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) 
-	{
-		return left.m_x < right.m_x
-			|| left.m_y < right.m_y
-			|| left.m_z < right.m_z
-			|| left.m_w < right.m_w;
-	}
-	
-private:
-	float m_x;
-	float m_y;
-	float m_z;
-	float m_w;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_SIMD_FLOAT_H
-
diff --git a/3rdparty/libsquish/singlecolourfit.cpp b/3rdparty/libsquish/singlecolourfit.cpp
deleted file mode 100644
index e8a011769..000000000
--- a/3rdparty/libsquish/singlecolourfit.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "singlecolourfit.h"
-#include "colourset.h"
-#include "colourblock.h"
-
-namespace squish {
-
-struct SourceBlock
-{
-	u8 start;
-	u8 end;
-	u8 error;
-};
-
-struct SingleColourLookup
-{
-	SourceBlock sources[2];
-};
-
-#include "singlecolourlookup.inl"
-
-static int FloatToInt( float a, int limit )
-{
-	// use ANSI round-to-zero behaviour to get round-to-nearest
-	int i = ( int )( a + 0.5f );
-
-	// clamp to the limit
-	if( i < 0 )
-		i = 0;
-	else if( i > limit )
-		i = limit; 
-
-	// done
-	return i;
-}
-
-SingleColourFit::SingleColourFit( ColourSet const* colours, int flags )
-  : ColourFit( colours, flags )
-{
-	// grab the single colour
-	Vec3 const* values = m_colours->GetPoints();
-	m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 );
-	m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 );
-	m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 );
-		
-	// initialise the best error
-	m_besterror = INT_MAX;
-}
-
-void SingleColourFit::Compress3( void* block )
-{
-	// build the table of lookups
-	SingleColourLookup const* const lookups[] = 
-	{
-		lookup_5_3, 
-		lookup_6_3, 
-		lookup_5_3
-	};
-	
-	// find the best end-points and index
-	ComputeEndPoints( lookups );
-	
-	// build the block if we win
-	if( m_error < m_besterror )
-	{
-		// remap the indices
-		u8 indices[16];
-		m_colours->RemapIndices( &m_index, indices );
-		
-		// save the block
-		WriteColourBlock3( m_start, m_end, indices, block );
-
-		// save the error
-		m_besterror = m_error;
-	}
-}
-
-void SingleColourFit::Compress4( void* block )
-{
-	// build the table of lookups
-	SingleColourLookup const* const lookups[] = 
-	{
-		lookup_5_4, 
-		lookup_6_4, 
-		lookup_5_4
-	};
-	
-	// find the best end-points and index
-	ComputeEndPoints( lookups );
-	
-	// build the block if we win
-	if( m_error < m_besterror )
-	{
-		// remap the indices
-		u8 indices[16];
-		m_colours->RemapIndices( &m_index, indices );
-		
-		// save the block
-		WriteColourBlock4( m_start, m_end, indices, block );
-
-		// save the error
-		m_besterror = m_error;
-	}
-}
-
-void SingleColourFit::ComputeEndPoints( SingleColourLookup const* const* lookups )
-{
-	// check each index combination (endpoint or intermediate)
-	m_error = INT_MAX;
-	for( int index = 0; index < 2; ++index )
-	{
-		// check the error for this codebook index
-		SourceBlock const* sources[3];
-		int error = 0;
-		for( int channel = 0; channel < 3; ++channel )
-		{
-			// grab the lookup table and index for this channel
-			SingleColourLookup const* lookup = lookups[channel];
-			int target = m_colour[channel];
-			
-			// store a pointer to the source for this channel
-			sources[channel] = lookup[target].sources + index;
-			
-			// accumulate the error
-			int diff = sources[channel]->error;
-			error += diff*diff;			
-		}
-		
-		// keep it if the error is lower
-		if( error < m_error )
-		{
-			m_start = Vec3(
-				( float )sources[0]->start/31.0f, 
-				( float )sources[1]->start/63.0f, 
-				( float )sources[2]->start/31.0f
-			);
-			m_end = Vec3(
-				( float )sources[0]->end/31.0f, 
-				( float )sources[1]->end/63.0f, 
-				( float )sources[2]->end/31.0f
-			);
-			m_index = ( u8 )( 2*index );
-			m_error = error;
-		}
-	}
-}
-
-} // namespace squish
diff --git a/3rdparty/libsquish/singlecolourfit.h b/3rdparty/libsquish/singlecolourfit.h
deleted file mode 100644
index 54ec17ebb..000000000
--- a/3rdparty/libsquish/singlecolourfit.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SINGLECOLOURFIT_H
-#define SQUISH_SINGLECOLOURFIT_H
-
-#include "squish.h"
-#include "colourfit.h"
-
-namespace squish {
-
-class ColourSet;
-struct SingleColourLookup;
-
-class SingleColourFit : public ColourFit
-{
-public:
-	SingleColourFit( ColourSet const* colours, int flags );
-	
-private:
-	virtual void Compress3( void* block );
-	virtual void Compress4( void* block );
-	
-	void ComputeEndPoints( SingleColourLookup const* const* lookups );
-	
-	u8 m_colour[3];
-	Vec3 m_start;
-	Vec3 m_end;
-	u8 m_index;
-	int m_error;
-	int m_besterror;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_SINGLECOLOURFIT_H
diff --git a/3rdparty/libsquish/singlecolourlookup.inl b/3rdparty/libsquish/singlecolourlookup.inl
deleted file mode 100644
index 5e911745e..000000000
--- a/3rdparty/libsquish/singlecolourlookup.inl
+++ /dev/null
@@ -1,1064 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-
-static SingleColourLookup const lookup_5_3[] = 
-{
-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
-	{ { { 0, 0, 1 }, { 0, 0, 1 } } },
-	{ { { 0, 0, 2 }, { 0, 0, 2 } } },
-	{ { { 0, 0, 3 }, { 0, 1, 1 } } },
-	{ { { 0, 0, 4 }, { 0, 1, 0 } } },
-	{ { { 1, 0, 3 }, { 0, 1, 1 } } },
-	{ { { 1, 0, 2 }, { 0, 1, 2 } } },
-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 0 }, { 0, 2, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 2 }, { 0, 2, 2 } } },
-	{ { { 1, 0, 3 }, { 0, 3, 1 } } },
-	{ { { 1, 0, 4 }, { 0, 3, 0 } } },
-	{ { { 2, 0, 3 }, { 0, 3, 1 } } },
-	{ { { 2, 0, 2 }, { 0, 3, 2 } } },
-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
-	{ { { 2, 0, 0 }, { 0, 4, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
-	{ { { 2, 0, 2 }, { 0, 4, 2 } } },
-	{ { { 2, 0, 3 }, { 0, 5, 1 } } },
-	{ { { 2, 0, 4 }, { 0, 5, 0 } } },
-	{ { { 3, 0, 3 }, { 0, 5, 1 } } },
-	{ { { 3, 0, 2 }, { 0, 5, 2 } } },
-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 3, 0, 0 }, { 0, 6, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 3, 0, 2 }, { 0, 6, 2 } } },
-	{ { { 3, 0, 3 }, { 0, 7, 1 } } },
-	{ { { 3, 0, 4 }, { 0, 7, 0 } } },
-	{ { { 4, 0, 4 }, { 0, 7, 1 } } },
-	{ { { 4, 0, 3 }, { 0, 7, 2 } } },
-	{ { { 4, 0, 2 }, { 1, 7, 1 } } },
-	{ { { 4, 0, 1 }, { 1, 7, 0 } } },
-	{ { { 4, 0, 0 }, { 0, 8, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 8, 1 } } },
-	{ { { 4, 0, 2 }, { 2, 7, 1 } } },
-	{ { { 4, 0, 3 }, { 2, 7, 0 } } },
-	{ { { 4, 0, 4 }, { 0, 9, 0 } } },
-	{ { { 5, 0, 3 }, { 0, 9, 1 } } },
-	{ { { 5, 0, 2 }, { 3, 7, 1 } } },
-	{ { { 5, 0, 1 }, { 3, 7, 0 } } },
-	{ { { 5, 0, 0 }, { 0, 10, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 10, 1 } } },
-	{ { { 5, 0, 2 }, { 0, 10, 2 } } },
-	{ { { 5, 0, 3 }, { 0, 11, 1 } } },
-	{ { { 5, 0, 4 }, { 0, 11, 0 } } },
-	{ { { 6, 0, 3 }, { 0, 11, 1 } } },
-	{ { { 6, 0, 2 }, { 0, 11, 2 } } },
-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
-	{ { { 6, 0, 0 }, { 0, 12, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
-	{ { { 6, 0, 2 }, { 0, 12, 2 } } },
-	{ { { 6, 0, 3 }, { 0, 13, 1 } } },
-	{ { { 6, 0, 4 }, { 0, 13, 0 } } },
-	{ { { 7, 0, 3 }, { 0, 13, 1 } } },
-	{ { { 7, 0, 2 }, { 0, 13, 2 } } },
-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
-	{ { { 7, 0, 0 }, { 0, 14, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
-	{ { { 7, 0, 2 }, { 0, 14, 2 } } },
-	{ { { 7, 0, 3 }, { 0, 15, 1 } } },
-	{ { { 7, 0, 4 }, { 0, 15, 0 } } },
-	{ { { 8, 0, 4 }, { 0, 15, 1 } } },
-	{ { { 8, 0, 3 }, { 0, 15, 2 } } },
-	{ { { 8, 0, 2 }, { 1, 15, 1 } } },
-	{ { { 8, 0, 1 }, { 1, 15, 0 } } },
-	{ { { 8, 0, 0 }, { 0, 16, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 16, 1 } } },
-	{ { { 8, 0, 2 }, { 2, 15, 1 } } },
-	{ { { 8, 0, 3 }, { 2, 15, 0 } } },
-	{ { { 8, 0, 4 }, { 0, 17, 0 } } },
-	{ { { 9, 0, 3 }, { 0, 17, 1 } } },
-	{ { { 9, 0, 2 }, { 3, 15, 1 } } },
-	{ { { 9, 0, 1 }, { 3, 15, 0 } } },
-	{ { { 9, 0, 0 }, { 0, 18, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 18, 1 } } },
-	{ { { 9, 0, 2 }, { 0, 18, 2 } } },
-	{ { { 9, 0, 3 }, { 0, 19, 1 } } },
-	{ { { 9, 0, 4 }, { 0, 19, 0 } } },
-	{ { { 10, 0, 3 }, { 0, 19, 1 } } },
-	{ { { 10, 0, 2 }, { 0, 19, 2 } } },
-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
-	{ { { 10, 0, 0 }, { 0, 20, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
-	{ { { 10, 0, 2 }, { 0, 20, 2 } } },
-	{ { { 10, 0, 3 }, { 0, 21, 1 } } },
-	{ { { 10, 0, 4 }, { 0, 21, 0 } } },
-	{ { { 11, 0, 3 }, { 0, 21, 1 } } },
-	{ { { 11, 0, 2 }, { 0, 21, 2 } } },
-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
-	{ { { 11, 0, 0 }, { 0, 22, 0 } } },
-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
-	{ { { 11, 0, 2 }, { 0, 22, 2 } } },
-	{ { { 11, 0, 3 }, { 0, 23, 1 } } },
-	{ { { 11, 0, 4 }, { 0, 23, 0 } } },
-	{ { { 12, 0, 4 }, { 0, 23, 1 } } },
-	{ { { 12, 0, 3 }, { 0, 23, 2 } } },
-	{ { { 12, 0, 2 }, { 1, 23, 1 } } },
-	{ { { 12, 0, 1 }, { 1, 23, 0 } } },
-	{ { { 12, 0, 0 }, { 0, 24, 0 } } },
-	{ { { 12, 0, 1 }, { 0, 24, 1 } } },
-	{ { { 12, 0, 2 }, { 2, 23, 1 } } },
-	{ { { 12, 0, 3 }, { 2, 23, 0 } } },
-	{ { { 12, 0, 4 }, { 0, 25, 0 } } },
-	{ { { 13, 0, 3 }, { 0, 25, 1 } } },
-	{ { { 13, 0, 2 }, { 3, 23, 1 } } },
-	{ { { 13, 0, 1 }, { 3, 23, 0 } } },
-	{ { { 13, 0, 0 }, { 0, 26, 0 } } },
-	{ { { 13, 0, 1 }, { 0, 26, 1 } } },
-	{ { { 13, 0, 2 }, { 0, 26, 2 } } },
-	{ { { 13, 0, 3 }, { 0, 27, 1 } } },
-	{ { { 13, 0, 4 }, { 0, 27, 0 } } },
-	{ { { 14, 0, 3 }, { 0, 27, 1 } } },
-	{ { { 14, 0, 2 }, { 0, 27, 2 } } },
-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
-	{ { { 14, 0, 0 }, { 0, 28, 0 } } },
-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
-	{ { { 14, 0, 2 }, { 0, 28, 2 } } },
-	{ { { 14, 0, 3 }, { 0, 29, 1 } } },
-	{ { { 14, 0, 4 }, { 0, 29, 0 } } },
-	{ { { 15, 0, 3 }, { 0, 29, 1 } } },
-	{ { { 15, 0, 2 }, { 0, 29, 2 } } },
-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
-	{ { { 15, 0, 0 }, { 0, 30, 0 } } },
-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
-	{ { { 15, 0, 2 }, { 0, 30, 2 } } },
-	{ { { 15, 0, 3 }, { 0, 31, 1 } } },
-	{ { { 15, 0, 4 }, { 0, 31, 0 } } },
-	{ { { 16, 0, 4 }, { 0, 31, 1 } } },
-	{ { { 16, 0, 3 }, { 0, 31, 2 } } },
-	{ { { 16, 0, 2 }, { 1, 31, 1 } } },
-	{ { { 16, 0, 1 }, { 1, 31, 0 } } },
-	{ { { 16, 0, 0 }, { 4, 28, 0 } } },
-	{ { { 16, 0, 1 }, { 4, 28, 1 } } },
-	{ { { 16, 0, 2 }, { 2, 31, 1 } } },
-	{ { { 16, 0, 3 }, { 2, 31, 0 } } },
-	{ { { 16, 0, 4 }, { 4, 29, 0 } } },
-	{ { { 17, 0, 3 }, { 4, 29, 1 } } },
-	{ { { 17, 0, 2 }, { 3, 31, 1 } } },
-	{ { { 17, 0, 1 }, { 3, 31, 0 } } },
-	{ { { 17, 0, 0 }, { 4, 30, 0 } } },
-	{ { { 17, 0, 1 }, { 4, 30, 1 } } },
-	{ { { 17, 0, 2 }, { 4, 30, 2 } } },
-	{ { { 17, 0, 3 }, { 4, 31, 1 } } },
-	{ { { 17, 0, 4 }, { 4, 31, 0 } } },
-	{ { { 18, 0, 3 }, { 4, 31, 1 } } },
-	{ { { 18, 0, 2 }, { 4, 31, 2 } } },
-	{ { { 18, 0, 1 }, { 5, 31, 1 } } },
-	{ { { 18, 0, 0 }, { 5, 31, 0 } } },
-	{ { { 18, 0, 1 }, { 5, 31, 1 } } },
-	{ { { 18, 0, 2 }, { 5, 31, 2 } } },
-	{ { { 18, 0, 3 }, { 6, 31, 1 } } },
-	{ { { 18, 0, 4 }, { 6, 31, 0 } } },
-	{ { { 19, 0, 3 }, { 6, 31, 1 } } },
-	{ { { 19, 0, 2 }, { 6, 31, 2 } } },
-	{ { { 19, 0, 1 }, { 7, 31, 1 } } },
-	{ { { 19, 0, 0 }, { 7, 31, 0 } } },
-	{ { { 19, 0, 1 }, { 7, 31, 1 } } },
-	{ { { 19, 0, 2 }, { 7, 31, 2 } } },
-	{ { { 19, 0, 3 }, { 8, 31, 1 } } },
-	{ { { 19, 0, 4 }, { 8, 31, 0 } } },
-	{ { { 20, 0, 4 }, { 8, 31, 1 } } },
-	{ { { 20, 0, 3 }, { 8, 31, 2 } } },
-	{ { { 20, 0, 2 }, { 9, 31, 1 } } },
-	{ { { 20, 0, 1 }, { 9, 31, 0 } } },
-	{ { { 20, 0, 0 }, { 12, 28, 0 } } },
-	{ { { 20, 0, 1 }, { 12, 28, 1 } } },
-	{ { { 20, 0, 2 }, { 10, 31, 1 } } },
-	{ { { 20, 0, 3 }, { 10, 31, 0 } } },
-	{ { { 20, 0, 4 }, { 12, 29, 0 } } },
-	{ { { 21, 0, 3 }, { 12, 29, 1 } } },
-	{ { { 21, 0, 2 }, { 11, 31, 1 } } },
-	{ { { 21, 0, 1 }, { 11, 31, 0 } } },
-	{ { { 21, 0, 0 }, { 12, 30, 0 } } },
-	{ { { 21, 0, 1 }, { 12, 30, 1 } } },
-	{ { { 21, 0, 2 }, { 12, 30, 2 } } },
-	{ { { 21, 0, 3 }, { 12, 31, 1 } } },
-	{ { { 21, 0, 4 }, { 12, 31, 0 } } },
-	{ { { 22, 0, 3 }, { 12, 31, 1 } } },
-	{ { { 22, 0, 2 }, { 12, 31, 2 } } },
-	{ { { 22, 0, 1 }, { 13, 31, 1 } } },
-	{ { { 22, 0, 0 }, { 13, 31, 0 } } },
-	{ { { 22, 0, 1 }, { 13, 31, 1 } } },
-	{ { { 22, 0, 2 }, { 13, 31, 2 } } },
-	{ { { 22, 0, 3 }, { 14, 31, 1 } } },
-	{ { { 22, 0, 4 }, { 14, 31, 0 } } },
-	{ { { 23, 0, 3 }, { 14, 31, 1 } } },
-	{ { { 23, 0, 2 }, { 14, 31, 2 } } },
-	{ { { 23, 0, 1 }, { 15, 31, 1 } } },
-	{ { { 23, 0, 0 }, { 15, 31, 0 } } },
-	{ { { 23, 0, 1 }, { 15, 31, 1 } } },
-	{ { { 23, 0, 2 }, { 15, 31, 2 } } },
-	{ { { 23, 0, 3 }, { 16, 31, 1 } } },
-	{ { { 23, 0, 4 }, { 16, 31, 0 } } },
-	{ { { 24, 0, 4 }, { 16, 31, 1 } } },
-	{ { { 24, 0, 3 }, { 16, 31, 2 } } },
-	{ { { 24, 0, 2 }, { 17, 31, 1 } } },
-	{ { { 24, 0, 1 }, { 17, 31, 0 } } },
-	{ { { 24, 0, 0 }, { 20, 28, 0 } } },
-	{ { { 24, 0, 1 }, { 20, 28, 1 } } },
-	{ { { 24, 0, 2 }, { 18, 31, 1 } } },
-	{ { { 24, 0, 3 }, { 18, 31, 0 } } },
-	{ { { 24, 0, 4 }, { 20, 29, 0 } } },
-	{ { { 25, 0, 3 }, { 20, 29, 1 } } },
-	{ { { 25, 0, 2 }, { 19, 31, 1 } } },
-	{ { { 25, 0, 1 }, { 19, 31, 0 } } },
-	{ { { 25, 0, 0 }, { 20, 30, 0 } } },
-	{ { { 25, 0, 1 }, { 20, 30, 1 } } },
-	{ { { 25, 0, 2 }, { 20, 30, 2 } } },
-	{ { { 25, 0, 3 }, { 20, 31, 1 } } },
-	{ { { 25, 0, 4 }, { 20, 31, 0 } } },
-	{ { { 26, 0, 3 }, { 20, 31, 1 } } },
-	{ { { 26, 0, 2 }, { 20, 31, 2 } } },
-	{ { { 26, 0, 1 }, { 21, 31, 1 } } },
-	{ { { 26, 0, 0 }, { 21, 31, 0 } } },
-	{ { { 26, 0, 1 }, { 21, 31, 1 } } },
-	{ { { 26, 0, 2 }, { 21, 31, 2 } } },
-	{ { { 26, 0, 3 }, { 22, 31, 1 } } },
-	{ { { 26, 0, 4 }, { 22, 31, 0 } } },
-	{ { { 27, 0, 3 }, { 22, 31, 1 } } },
-	{ { { 27, 0, 2 }, { 22, 31, 2 } } },
-	{ { { 27, 0, 1 }, { 23, 31, 1 } } },
-	{ { { 27, 0, 0 }, { 23, 31, 0 } } },
-	{ { { 27, 0, 1 }, { 23, 31, 1 } } },
-	{ { { 27, 0, 2 }, { 23, 31, 2 } } },
-	{ { { 27, 0, 3 }, { 24, 31, 1 } } },
-	{ { { 27, 0, 4 }, { 24, 31, 0 } } },
-	{ { { 28, 0, 4 }, { 24, 31, 1 } } },
-	{ { { 28, 0, 3 }, { 24, 31, 2 } } },
-	{ { { 28, 0, 2 }, { 25, 31, 1 } } },
-	{ { { 28, 0, 1 }, { 25, 31, 0 } } },
-	{ { { 28, 0, 0 }, { 28, 28, 0 } } },
-	{ { { 28, 0, 1 }, { 28, 28, 1 } } },
-	{ { { 28, 0, 2 }, { 26, 31, 1 } } },
-	{ { { 28, 0, 3 }, { 26, 31, 0 } } },
-	{ { { 28, 0, 4 }, { 28, 29, 0 } } },
-	{ { { 29, 0, 3 }, { 28, 29, 1 } } },
-	{ { { 29, 0, 2 }, { 27, 31, 1 } } },
-	{ { { 29, 0, 1 }, { 27, 31, 0 } } },
-	{ { { 29, 0, 0 }, { 28, 30, 0 } } },
-	{ { { 29, 0, 1 }, { 28, 30, 1 } } },
-	{ { { 29, 0, 2 }, { 28, 30, 2 } } },
-	{ { { 29, 0, 3 }, { 28, 31, 1 } } },
-	{ { { 29, 0, 4 }, { 28, 31, 0 } } },
-	{ { { 30, 0, 3 }, { 28, 31, 1 } } },
-	{ { { 30, 0, 2 }, { 28, 31, 2 } } },
-	{ { { 30, 0, 1 }, { 29, 31, 1 } } },
-	{ { { 30, 0, 0 }, { 29, 31, 0 } } },
-	{ { { 30, 0, 1 }, { 29, 31, 1 } } },
-	{ { { 30, 0, 2 }, { 29, 31, 2 } } },
-	{ { { 30, 0, 3 }, { 30, 31, 1 } } },
-	{ { { 30, 0, 4 }, { 30, 31, 0 } } },
-	{ { { 31, 0, 3 }, { 30, 31, 1 } } },
-	{ { { 31, 0, 2 }, { 30, 31, 2 } } },
-	{ { { 31, 0, 1 }, { 31, 31, 1 } } },
-	{ { { 31, 0, 0 }, { 31, 31, 0 } } }
-};
-
-static SingleColourLookup const lookup_6_3[] = 
-{
-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
-	{ { { 0, 0, 1 }, { 0, 1, 1 } } },
-	{ { { 0, 0, 2 }, { 0, 1, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 0 }, { 0, 2, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
-	{ { { 1, 0, 2 }, { 0, 3, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
-	{ { { 2, 0, 0 }, { 0, 4, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 5, 1 } } },
-	{ { { 2, 0, 2 }, { 0, 5, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 3, 0, 0 }, { 0, 6, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 7, 1 } } },
-	{ { { 3, 0, 2 }, { 0, 7, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 8, 1 } } },
-	{ { { 4, 0, 0 }, { 0, 8, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 9, 1 } } },
-	{ { { 4, 0, 2 }, { 0, 9, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 10, 1 } } },
-	{ { { 5, 0, 0 }, { 0, 10, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 11, 1 } } },
-	{ { { 5, 0, 2 }, { 0, 11, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
-	{ { { 6, 0, 0 }, { 0, 12, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 13, 1 } } },
-	{ { { 6, 0, 2 }, { 0, 13, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
-	{ { { 7, 0, 0 }, { 0, 14, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 15, 1 } } },
-	{ { { 7, 0, 2 }, { 0, 15, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 16, 1 } } },
-	{ { { 8, 0, 0 }, { 0, 16, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 17, 1 } } },
-	{ { { 8, 0, 2 }, { 0, 17, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 18, 1 } } },
-	{ { { 9, 0, 0 }, { 0, 18, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 19, 1 } } },
-	{ { { 9, 0, 2 }, { 0, 19, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
-	{ { { 10, 0, 0 }, { 0, 20, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 21, 1 } } },
-	{ { { 10, 0, 2 }, { 0, 21, 0 } } },
-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
-	{ { { 11, 0, 0 }, { 0, 22, 0 } } },
-	{ { { 11, 0, 1 }, { 0, 23, 1 } } },
-	{ { { 11, 0, 2 }, { 0, 23, 0 } } },
-	{ { { 12, 0, 1 }, { 0, 24, 1 } } },
-	{ { { 12, 0, 0 }, { 0, 24, 0 } } },
-	{ { { 12, 0, 1 }, { 0, 25, 1 } } },
-	{ { { 12, 0, 2 }, { 0, 25, 0 } } },
-	{ { { 13, 0, 1 }, { 0, 26, 1 } } },
-	{ { { 13, 0, 0 }, { 0, 26, 0 } } },
-	{ { { 13, 0, 1 }, { 0, 27, 1 } } },
-	{ { { 13, 0, 2 }, { 0, 27, 0 } } },
-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
-	{ { { 14, 0, 0 }, { 0, 28, 0 } } },
-	{ { { 14, 0, 1 }, { 0, 29, 1 } } },
-	{ { { 14, 0, 2 }, { 0, 29, 0 } } },
-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
-	{ { { 15, 0, 0 }, { 0, 30, 0 } } },
-	{ { { 15, 0, 1 }, { 0, 31, 1 } } },
-	{ { { 15, 0, 2 }, { 0, 31, 0 } } },
-	{ { { 16, 0, 2 }, { 1, 31, 1 } } },
-	{ { { 16, 0, 1 }, { 1, 31, 0 } } },
-	{ { { 16, 0, 0 }, { 0, 32, 0 } } },
-	{ { { 16, 0, 1 }, { 2, 31, 0 } } },
-	{ { { 16, 0, 2 }, { 0, 33, 0 } } },
-	{ { { 17, 0, 1 }, { 3, 31, 0 } } },
-	{ { { 17, 0, 0 }, { 0, 34, 0 } } },
-	{ { { 17, 0, 1 }, { 4, 31, 0 } } },
-	{ { { 17, 0, 2 }, { 0, 35, 0 } } },
-	{ { { 18, 0, 1 }, { 5, 31, 0 } } },
-	{ { { 18, 0, 0 }, { 0, 36, 0 } } },
-	{ { { 18, 0, 1 }, { 6, 31, 0 } } },
-	{ { { 18, 0, 2 }, { 0, 37, 0 } } },
-	{ { { 19, 0, 1 }, { 7, 31, 0 } } },
-	{ { { 19, 0, 0 }, { 0, 38, 0 } } },
-	{ { { 19, 0, 1 }, { 8, 31, 0 } } },
-	{ { { 19, 0, 2 }, { 0, 39, 0 } } },
-	{ { { 20, 0, 1 }, { 9, 31, 0 } } },
-	{ { { 20, 0, 0 }, { 0, 40, 0 } } },
-	{ { { 20, 0, 1 }, { 10, 31, 0 } } },
-	{ { { 20, 0, 2 }, { 0, 41, 0 } } },
-	{ { { 21, 0, 1 }, { 11, 31, 0 } } },
-	{ { { 21, 0, 0 }, { 0, 42, 0 } } },
-	{ { { 21, 0, 1 }, { 12, 31, 0 } } },
-	{ { { 21, 0, 2 }, { 0, 43, 0 } } },
-	{ { { 22, 0, 1 }, { 13, 31, 0 } } },
-	{ { { 22, 0, 0 }, { 0, 44, 0 } } },
-	{ { { 22, 0, 1 }, { 14, 31, 0 } } },
-	{ { { 22, 0, 2 }, { 0, 45, 0 } } },
-	{ { { 23, 0, 1 }, { 15, 31, 0 } } },
-	{ { { 23, 0, 0 }, { 0, 46, 0 } } },
-	{ { { 23, 0, 1 }, { 0, 47, 1 } } },
-	{ { { 23, 0, 2 }, { 0, 47, 0 } } },
-	{ { { 24, 0, 1 }, { 0, 48, 1 } } },
-	{ { { 24, 0, 0 }, { 0, 48, 0 } } },
-	{ { { 24, 0, 1 }, { 0, 49, 1 } } },
-	{ { { 24, 0, 2 }, { 0, 49, 0 } } },
-	{ { { 25, 0, 1 }, { 0, 50, 1 } } },
-	{ { { 25, 0, 0 }, { 0, 50, 0 } } },
-	{ { { 25, 0, 1 }, { 0, 51, 1 } } },
-	{ { { 25, 0, 2 }, { 0, 51, 0 } } },
-	{ { { 26, 0, 1 }, { 0, 52, 1 } } },
-	{ { { 26, 0, 0 }, { 0, 52, 0 } } },
-	{ { { 26, 0, 1 }, { 0, 53, 1 } } },
-	{ { { 26, 0, 2 }, { 0, 53, 0 } } },
-	{ { { 27, 0, 1 }, { 0, 54, 1 } } },
-	{ { { 27, 0, 0 }, { 0, 54, 0 } } },
-	{ { { 27, 0, 1 }, { 0, 55, 1 } } },
-	{ { { 27, 0, 2 }, { 0, 55, 0 } } },
-	{ { { 28, 0, 1 }, { 0, 56, 1 } } },
-	{ { { 28, 0, 0 }, { 0, 56, 0 } } },
-	{ { { 28, 0, 1 }, { 0, 57, 1 } } },
-	{ { { 28, 0, 2 }, { 0, 57, 0 } } },
-	{ { { 29, 0, 1 }, { 0, 58, 1 } } },
-	{ { { 29, 0, 0 }, { 0, 58, 0 } } },
-	{ { { 29, 0, 1 }, { 0, 59, 1 } } },
-	{ { { 29, 0, 2 }, { 0, 59, 0 } } },
-	{ { { 30, 0, 1 }, { 0, 60, 1 } } },
-	{ { { 30, 0, 0 }, { 0, 60, 0 } } },
-	{ { { 30, 0, 1 }, { 0, 61, 1 } } },
-	{ { { 30, 0, 2 }, { 0, 61, 0 } } },
-	{ { { 31, 0, 1 }, { 0, 62, 1 } } },
-	{ { { 31, 0, 0 }, { 0, 62, 0 } } },
-	{ { { 31, 0, 1 }, { 0, 63, 1 } } },
-	{ { { 31, 0, 2 }, { 0, 63, 0 } } },
-	{ { { 32, 0, 2 }, { 1, 63, 1 } } },
-	{ { { 32, 0, 1 }, { 1, 63, 0 } } },
-	{ { { 32, 0, 0 }, { 16, 48, 0 } } },
-	{ { { 32, 0, 1 }, { 2, 63, 0 } } },
-	{ { { 32, 0, 2 }, { 16, 49, 0 } } },
-	{ { { 33, 0, 1 }, { 3, 63, 0 } } },
-	{ { { 33, 0, 0 }, { 16, 50, 0 } } },
-	{ { { 33, 0, 1 }, { 4, 63, 0 } } },
-	{ { { 33, 0, 2 }, { 16, 51, 0 } } },
-	{ { { 34, 0, 1 }, { 5, 63, 0 } } },
-	{ { { 34, 0, 0 }, { 16, 52, 0 } } },
-	{ { { 34, 0, 1 }, { 6, 63, 0 } } },
-	{ { { 34, 0, 2 }, { 16, 53, 0 } } },
-	{ { { 35, 0, 1 }, { 7, 63, 0 } } },
-	{ { { 35, 0, 0 }, { 16, 54, 0 } } },
-	{ { { 35, 0, 1 }, { 8, 63, 0 } } },
-	{ { { 35, 0, 2 }, { 16, 55, 0 } } },
-	{ { { 36, 0, 1 }, { 9, 63, 0 } } },
-	{ { { 36, 0, 0 }, { 16, 56, 0 } } },
-	{ { { 36, 0, 1 }, { 10, 63, 0 } } },
-	{ { { 36, 0, 2 }, { 16, 57, 0 } } },
-	{ { { 37, 0, 1 }, { 11, 63, 0 } } },
-	{ { { 37, 0, 0 }, { 16, 58, 0 } } },
-	{ { { 37, 0, 1 }, { 12, 63, 0 } } },
-	{ { { 37, 0, 2 }, { 16, 59, 0 } } },
-	{ { { 38, 0, 1 }, { 13, 63, 0 } } },
-	{ { { 38, 0, 0 }, { 16, 60, 0 } } },
-	{ { { 38, 0, 1 }, { 14, 63, 0 } } },
-	{ { { 38, 0, 2 }, { 16, 61, 0 } } },
-	{ { { 39, 0, 1 }, { 15, 63, 0 } } },
-	{ { { 39, 0, 0 }, { 16, 62, 0 } } },
-	{ { { 39, 0, 1 }, { 16, 63, 1 } } },
-	{ { { 39, 0, 2 }, { 16, 63, 0 } } },
-	{ { { 40, 0, 1 }, { 17, 63, 1 } } },
-	{ { { 40, 0, 0 }, { 17, 63, 0 } } },
-	{ { { 40, 0, 1 }, { 18, 63, 1 } } },
-	{ { { 40, 0, 2 }, { 18, 63, 0 } } },
-	{ { { 41, 0, 1 }, { 19, 63, 1 } } },
-	{ { { 41, 0, 0 }, { 19, 63, 0 } } },
-	{ { { 41, 0, 1 }, { 20, 63, 1 } } },
-	{ { { 41, 0, 2 }, { 20, 63, 0 } } },
-	{ { { 42, 0, 1 }, { 21, 63, 1 } } },
-	{ { { 42, 0, 0 }, { 21, 63, 0 } } },
-	{ { { 42, 0, 1 }, { 22, 63, 1 } } },
-	{ { { 42, 0, 2 }, { 22, 63, 0 } } },
-	{ { { 43, 0, 1 }, { 23, 63, 1 } } },
-	{ { { 43, 0, 0 }, { 23, 63, 0 } } },
-	{ { { 43, 0, 1 }, { 24, 63, 1 } } },
-	{ { { 43, 0, 2 }, { 24, 63, 0 } } },
-	{ { { 44, 0, 1 }, { 25, 63, 1 } } },
-	{ { { 44, 0, 0 }, { 25, 63, 0 } } },
-	{ { { 44, 0, 1 }, { 26, 63, 1 } } },
-	{ { { 44, 0, 2 }, { 26, 63, 0 } } },
-	{ { { 45, 0, 1 }, { 27, 63, 1 } } },
-	{ { { 45, 0, 0 }, { 27, 63, 0 } } },
-	{ { { 45, 0, 1 }, { 28, 63, 1 } } },
-	{ { { 45, 0, 2 }, { 28, 63, 0 } } },
-	{ { { 46, 0, 1 }, { 29, 63, 1 } } },
-	{ { { 46, 0, 0 }, { 29, 63, 0 } } },
-	{ { { 46, 0, 1 }, { 30, 63, 1 } } },
-	{ { { 46, 0, 2 }, { 30, 63, 0 } } },
-	{ { { 47, 0, 1 }, { 31, 63, 1 } } },
-	{ { { 47, 0, 0 }, { 31, 63, 0 } } },
-	{ { { 47, 0, 1 }, { 32, 63, 1 } } },
-	{ { { 47, 0, 2 }, { 32, 63, 0 } } },
-	{ { { 48, 0, 2 }, { 33, 63, 1 } } },
-	{ { { 48, 0, 1 }, { 33, 63, 0 } } },
-	{ { { 48, 0, 0 }, { 48, 48, 0 } } },
-	{ { { 48, 0, 1 }, { 34, 63, 0 } } },
-	{ { { 48, 0, 2 }, { 48, 49, 0 } } },
-	{ { { 49, 0, 1 }, { 35, 63, 0 } } },
-	{ { { 49, 0, 0 }, { 48, 50, 0 } } },
-	{ { { 49, 0, 1 }, { 36, 63, 0 } } },
-	{ { { 49, 0, 2 }, { 48, 51, 0 } } },
-	{ { { 50, 0, 1 }, { 37, 63, 0 } } },
-	{ { { 50, 0, 0 }, { 48, 52, 0 } } },
-	{ { { 50, 0, 1 }, { 38, 63, 0 } } },
-	{ { { 50, 0, 2 }, { 48, 53, 0 } } },
-	{ { { 51, 0, 1 }, { 39, 63, 0 } } },
-	{ { { 51, 0, 0 }, { 48, 54, 0 } } },
-	{ { { 51, 0, 1 }, { 40, 63, 0 } } },
-	{ { { 51, 0, 2 }, { 48, 55, 0 } } },
-	{ { { 52, 0, 1 }, { 41, 63, 0 } } },
-	{ { { 52, 0, 0 }, { 48, 56, 0 } } },
-	{ { { 52, 0, 1 }, { 42, 63, 0 } } },
-	{ { { 52, 0, 2 }, { 48, 57, 0 } } },
-	{ { { 53, 0, 1 }, { 43, 63, 0 } } },
-	{ { { 53, 0, 0 }, { 48, 58, 0 } } },
-	{ { { 53, 0, 1 }, { 44, 63, 0 } } },
-	{ { { 53, 0, 2 }, { 48, 59, 0 } } },
-	{ { { 54, 0, 1 }, { 45, 63, 0 } } },
-	{ { { 54, 0, 0 }, { 48, 60, 0 } } },
-	{ { { 54, 0, 1 }, { 46, 63, 0 } } },
-	{ { { 54, 0, 2 }, { 48, 61, 0 } } },
-	{ { { 55, 0, 1 }, { 47, 63, 0 } } },
-	{ { { 55, 0, 0 }, { 48, 62, 0 } } },
-	{ { { 55, 0, 1 }, { 48, 63, 1 } } },
-	{ { { 55, 0, 2 }, { 48, 63, 0 } } },
-	{ { { 56, 0, 1 }, { 49, 63, 1 } } },
-	{ { { 56, 0, 0 }, { 49, 63, 0 } } },
-	{ { { 56, 0, 1 }, { 50, 63, 1 } } },
-	{ { { 56, 0, 2 }, { 50, 63, 0 } } },
-	{ { { 57, 0, 1 }, { 51, 63, 1 } } },
-	{ { { 57, 0, 0 }, { 51, 63, 0 } } },
-	{ { { 57, 0, 1 }, { 52, 63, 1 } } },
-	{ { { 57, 0, 2 }, { 52, 63, 0 } } },
-	{ { { 58, 0, 1 }, { 53, 63, 1 } } },
-	{ { { 58, 0, 0 }, { 53, 63, 0 } } },
-	{ { { 58, 0, 1 }, { 54, 63, 1 } } },
-	{ { { 58, 0, 2 }, { 54, 63, 0 } } },
-	{ { { 59, 0, 1 }, { 55, 63, 1 } } },
-	{ { { 59, 0, 0 }, { 55, 63, 0 } } },
-	{ { { 59, 0, 1 }, { 56, 63, 1 } } },
-	{ { { 59, 0, 2 }, { 56, 63, 0 } } },
-	{ { { 60, 0, 1 }, { 57, 63, 1 } } },
-	{ { { 60, 0, 0 }, { 57, 63, 0 } } },
-	{ { { 60, 0, 1 }, { 58, 63, 1 } } },
-	{ { { 60, 0, 2 }, { 58, 63, 0 } } },
-	{ { { 61, 0, 1 }, { 59, 63, 1 } } },
-	{ { { 61, 0, 0 }, { 59, 63, 0 } } },
-	{ { { 61, 0, 1 }, { 60, 63, 1 } } },
-	{ { { 61, 0, 2 }, { 60, 63, 0 } } },
-	{ { { 62, 0, 1 }, { 61, 63, 1 } } },
-	{ { { 62, 0, 0 }, { 61, 63, 0 } } },
-	{ { { 62, 0, 1 }, { 62, 63, 1 } } },
-	{ { { 62, 0, 2 }, { 62, 63, 0 } } },
-	{ { { 63, 0, 1 }, { 63, 63, 1 } } },
-	{ { { 63, 0, 0 }, { 63, 63, 0 } } }
-};
-
-static SingleColourLookup const lookup_5_4[] = 
-{
-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
-	{ { { 0, 0, 1 }, { 0, 1, 1 } } },
-	{ { { 0, 0, 2 }, { 0, 1, 0 } } },
-	{ { { 0, 0, 3 }, { 0, 1, 1 } } },
-	{ { { 0, 0, 4 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 3 }, { 0, 2, 0 } } },
-	{ { { 1, 0, 2 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
-	{ { { 1, 0, 0 }, { 0, 3, 0 } } },
-	{ { { 1, 0, 1 }, { 1, 2, 1 } } },
-	{ { { 1, 0, 2 }, { 1, 2, 0 } } },
-	{ { { 1, 0, 3 }, { 0, 4, 0 } } },
-	{ { { 1, 0, 4 }, { 0, 5, 1 } } },
-	{ { { 2, 0, 3 }, { 0, 5, 0 } } },
-	{ { { 2, 0, 2 }, { 0, 5, 1 } } },
-	{ { { 2, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 2, 0, 0 }, { 0, 6, 0 } } },
-	{ { { 2, 0, 1 }, { 2, 3, 1 } } },
-	{ { { 2, 0, 2 }, { 2, 3, 0 } } },
-	{ { { 2, 0, 3 }, { 0, 7, 0 } } },
-	{ { { 2, 0, 4 }, { 1, 6, 1 } } },
-	{ { { 3, 0, 3 }, { 1, 6, 0 } } },
-	{ { { 3, 0, 2 }, { 0, 8, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
-	{ { { 3, 0, 0 }, { 0, 9, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
-	{ { { 3, 0, 2 }, { 0, 10, 1 } } },
-	{ { { 3, 0, 3 }, { 0, 10, 0 } } },
-	{ { { 3, 0, 4 }, { 2, 7, 1 } } },
-	{ { { 4, 0, 4 }, { 2, 7, 0 } } },
-	{ { { 4, 0, 3 }, { 0, 11, 0 } } },
-	{ { { 4, 0, 2 }, { 1, 10, 1 } } },
-	{ { { 4, 0, 1 }, { 1, 10, 0 } } },
-	{ { { 4, 0, 0 }, { 0, 12, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 13, 1 } } },
-	{ { { 4, 0, 2 }, { 0, 13, 0 } } },
-	{ { { 4, 0, 3 }, { 0, 13, 1 } } },
-	{ { { 4, 0, 4 }, { 0, 14, 1 } } },
-	{ { { 5, 0, 3 }, { 0, 14, 0 } } },
-	{ { { 5, 0, 2 }, { 2, 11, 1 } } },
-	{ { { 5, 0, 1 }, { 2, 11, 0 } } },
-	{ { { 5, 0, 0 }, { 0, 15, 0 } } },
-	{ { { 5, 0, 1 }, { 1, 14, 1 } } },
-	{ { { 5, 0, 2 }, { 1, 14, 0 } } },
-	{ { { 5, 0, 3 }, { 0, 16, 0 } } },
-	{ { { 5, 0, 4 }, { 0, 17, 1 } } },
-	{ { { 6, 0, 3 }, { 0, 17, 0 } } },
-	{ { { 6, 0, 2 }, { 0, 17, 1 } } },
-	{ { { 6, 0, 1 }, { 0, 18, 1 } } },
-	{ { { 6, 0, 0 }, { 0, 18, 0 } } },
-	{ { { 6, 0, 1 }, { 2, 15, 1 } } },
-	{ { { 6, 0, 2 }, { 2, 15, 0 } } },
-	{ { { 6, 0, 3 }, { 0, 19, 0 } } },
-	{ { { 6, 0, 4 }, { 1, 18, 1 } } },
-	{ { { 7, 0, 3 }, { 1, 18, 0 } } },
-	{ { { 7, 0, 2 }, { 0, 20, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 21, 1 } } },
-	{ { { 7, 0, 0 }, { 0, 21, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 21, 1 } } },
-	{ { { 7, 0, 2 }, { 0, 22, 1 } } },
-	{ { { 7, 0, 3 }, { 0, 22, 0 } } },
-	{ { { 7, 0, 4 }, { 2, 19, 1 } } },
-	{ { { 8, 0, 4 }, { 2, 19, 0 } } },
-	{ { { 8, 0, 3 }, { 0, 23, 0 } } },
-	{ { { 8, 0, 2 }, { 1, 22, 1 } } },
-	{ { { 8, 0, 1 }, { 1, 22, 0 } } },
-	{ { { 8, 0, 0 }, { 0, 24, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 25, 1 } } },
-	{ { { 8, 0, 2 }, { 0, 25, 0 } } },
-	{ { { 8, 0, 3 }, { 0, 25, 1 } } },
-	{ { { 8, 0, 4 }, { 0, 26, 1 } } },
-	{ { { 9, 0, 3 }, { 0, 26, 0 } } },
-	{ { { 9, 0, 2 }, { 2, 23, 1 } } },
-	{ { { 9, 0, 1 }, { 2, 23, 0 } } },
-	{ { { 9, 0, 0 }, { 0, 27, 0 } } },
-	{ { { 9, 0, 1 }, { 1, 26, 1 } } },
-	{ { { 9, 0, 2 }, { 1, 26, 0 } } },
-	{ { { 9, 0, 3 }, { 0, 28, 0 } } },
-	{ { { 9, 0, 4 }, { 0, 29, 1 } } },
-	{ { { 10, 0, 3 }, { 0, 29, 0 } } },
-	{ { { 10, 0, 2 }, { 0, 29, 1 } } },
-	{ { { 10, 0, 1 }, { 0, 30, 1 } } },
-	{ { { 10, 0, 0 }, { 0, 30, 0 } } },
-	{ { { 10, 0, 1 }, { 2, 27, 1 } } },
-	{ { { 10, 0, 2 }, { 2, 27, 0 } } },
-	{ { { 10, 0, 3 }, { 0, 31, 0 } } },
-	{ { { 10, 0, 4 }, { 1, 30, 1 } } },
-	{ { { 11, 0, 3 }, { 1, 30, 0 } } },
-	{ { { 11, 0, 2 }, { 4, 24, 0 } } },
-	{ { { 11, 0, 1 }, { 1, 31, 1 } } },
-	{ { { 11, 0, 0 }, { 1, 31, 0 } } },
-	{ { { 11, 0, 1 }, { 1, 31, 1 } } },
-	{ { { 11, 0, 2 }, { 2, 30, 1 } } },
-	{ { { 11, 0, 3 }, { 2, 30, 0 } } },
-	{ { { 11, 0, 4 }, { 2, 31, 1 } } },
-	{ { { 12, 0, 4 }, { 2, 31, 0 } } },
-	{ { { 12, 0, 3 }, { 4, 27, 0 } } },
-	{ { { 12, 0, 2 }, { 3, 30, 1 } } },
-	{ { { 12, 0, 1 }, { 3, 30, 0 } } },
-	{ { { 12, 0, 0 }, { 4, 28, 0 } } },
-	{ { { 12, 0, 1 }, { 3, 31, 1 } } },
-	{ { { 12, 0, 2 }, { 3, 31, 0 } } },
-	{ { { 12, 0, 3 }, { 3, 31, 1 } } },
-	{ { { 12, 0, 4 }, { 4, 30, 1 } } },
-	{ { { 13, 0, 3 }, { 4, 30, 0 } } },
-	{ { { 13, 0, 2 }, { 6, 27, 1 } } },
-	{ { { 13, 0, 1 }, { 6, 27, 0 } } },
-	{ { { 13, 0, 0 }, { 4, 31, 0 } } },
-	{ { { 13, 0, 1 }, { 5, 30, 1 } } },
-	{ { { 13, 0, 2 }, { 5, 30, 0 } } },
-	{ { { 13, 0, 3 }, { 8, 24, 0 } } },
-	{ { { 13, 0, 4 }, { 5, 31, 1 } } },
-	{ { { 14, 0, 3 }, { 5, 31, 0 } } },
-	{ { { 14, 0, 2 }, { 5, 31, 1 } } },
-	{ { { 14, 0, 1 }, { 6, 30, 1 } } },
-	{ { { 14, 0, 0 }, { 6, 30, 0 } } },
-	{ { { 14, 0, 1 }, { 6, 31, 1 } } },
-	{ { { 14, 0, 2 }, { 6, 31, 0 } } },
-	{ { { 14, 0, 3 }, { 8, 27, 0 } } },
-	{ { { 14, 0, 4 }, { 7, 30, 1 } } },
-	{ { { 15, 0, 3 }, { 7, 30, 0 } } },
-	{ { { 15, 0, 2 }, { 8, 28, 0 } } },
-	{ { { 15, 0, 1 }, { 7, 31, 1 } } },
-	{ { { 15, 0, 0 }, { 7, 31, 0 } } },
-	{ { { 15, 0, 1 }, { 7, 31, 1 } } },
-	{ { { 15, 0, 2 }, { 8, 30, 1 } } },
-	{ { { 15, 0, 3 }, { 8, 30, 0 } } },
-	{ { { 15, 0, 4 }, { 10, 27, 1 } } },
-	{ { { 16, 0, 4 }, { 10, 27, 0 } } },
-	{ { { 16, 0, 3 }, { 8, 31, 0 } } },
-	{ { { 16, 0, 2 }, { 9, 30, 1 } } },
-	{ { { 16, 0, 1 }, { 9, 30, 0 } } },
-	{ { { 16, 0, 0 }, { 12, 24, 0 } } },
-	{ { { 16, 0, 1 }, { 9, 31, 1 } } },
-	{ { { 16, 0, 2 }, { 9, 31, 0 } } },
-	{ { { 16, 0, 3 }, { 9, 31, 1 } } },
-	{ { { 16, 0, 4 }, { 10, 30, 1 } } },
-	{ { { 17, 0, 3 }, { 10, 30, 0 } } },
-	{ { { 17, 0, 2 }, { 10, 31, 1 } } },
-	{ { { 17, 0, 1 }, { 10, 31, 0 } } },
-	{ { { 17, 0, 0 }, { 12, 27, 0 } } },
-	{ { { 17, 0, 1 }, { 11, 30, 1 } } },
-	{ { { 17, 0, 2 }, { 11, 30, 0 } } },
-	{ { { 17, 0, 3 }, { 12, 28, 0 } } },
-	{ { { 17, 0, 4 }, { 11, 31, 1 } } },
-	{ { { 18, 0, 3 }, { 11, 31, 0 } } },
-	{ { { 18, 0, 2 }, { 11, 31, 1 } } },
-	{ { { 18, 0, 1 }, { 12, 30, 1 } } },
-	{ { { 18, 0, 0 }, { 12, 30, 0 } } },
-	{ { { 18, 0, 1 }, { 14, 27, 1 } } },
-	{ { { 18, 0, 2 }, { 14, 27, 0 } } },
-	{ { { 18, 0, 3 }, { 12, 31, 0 } } },
-	{ { { 18, 0, 4 }, { 13, 30, 1 } } },
-	{ { { 19, 0, 3 }, { 13, 30, 0 } } },
-	{ { { 19, 0, 2 }, { 16, 24, 0 } } },
-	{ { { 19, 0, 1 }, { 13, 31, 1 } } },
-	{ { { 19, 0, 0 }, { 13, 31, 0 } } },
-	{ { { 19, 0, 1 }, { 13, 31, 1 } } },
-	{ { { 19, 0, 2 }, { 14, 30, 1 } } },
-	{ { { 19, 0, 3 }, { 14, 30, 0 } } },
-	{ { { 19, 0, 4 }, { 14, 31, 1 } } },
-	{ { { 20, 0, 4 }, { 14, 31, 0 } } },
-	{ { { 20, 0, 3 }, { 16, 27, 0 } } },
-	{ { { 20, 0, 2 }, { 15, 30, 1 } } },
-	{ { { 20, 0, 1 }, { 15, 30, 0 } } },
-	{ { { 20, 0, 0 }, { 16, 28, 0 } } },
-	{ { { 20, 0, 1 }, { 15, 31, 1 } } },
-	{ { { 20, 0, 2 }, { 15, 31, 0 } } },
-	{ { { 20, 0, 3 }, { 15, 31, 1 } } },
-	{ { { 20, 0, 4 }, { 16, 30, 1 } } },
-	{ { { 21, 0, 3 }, { 16, 30, 0 } } },
-	{ { { 21, 0, 2 }, { 18, 27, 1 } } },
-	{ { { 21, 0, 1 }, { 18, 27, 0 } } },
-	{ { { 21, 0, 0 }, { 16, 31, 0 } } },
-	{ { { 21, 0, 1 }, { 17, 30, 1 } } },
-	{ { { 21, 0, 2 }, { 17, 30, 0 } } },
-	{ { { 21, 0, 3 }, { 20, 24, 0 } } },
-	{ { { 21, 0, 4 }, { 17, 31, 1 } } },
-	{ { { 22, 0, 3 }, { 17, 31, 0 } } },
-	{ { { 22, 0, 2 }, { 17, 31, 1 } } },
-	{ { { 22, 0, 1 }, { 18, 30, 1 } } },
-	{ { { 22, 0, 0 }, { 18, 30, 0 } } },
-	{ { { 22, 0, 1 }, { 18, 31, 1 } } },
-	{ { { 22, 0, 2 }, { 18, 31, 0 } } },
-	{ { { 22, 0, 3 }, { 20, 27, 0 } } },
-	{ { { 22, 0, 4 }, { 19, 30, 1 } } },
-	{ { { 23, 0, 3 }, { 19, 30, 0 } } },
-	{ { { 23, 0, 2 }, { 20, 28, 0 } } },
-	{ { { 23, 0, 1 }, { 19, 31, 1 } } },
-	{ { { 23, 0, 0 }, { 19, 31, 0 } } },
-	{ { { 23, 0, 1 }, { 19, 31, 1 } } },
-	{ { { 23, 0, 2 }, { 20, 30, 1 } } },
-	{ { { 23, 0, 3 }, { 20, 30, 0 } } },
-	{ { { 23, 0, 4 }, { 22, 27, 1 } } },
-	{ { { 24, 0, 4 }, { 22, 27, 0 } } },
-	{ { { 24, 0, 3 }, { 20, 31, 0 } } },
-	{ { { 24, 0, 2 }, { 21, 30, 1 } } },
-	{ { { 24, 0, 1 }, { 21, 30, 0 } } },
-	{ { { 24, 0, 0 }, { 24, 24, 0 } } },
-	{ { { 24, 0, 1 }, { 21, 31, 1 } } },
-	{ { { 24, 0, 2 }, { 21, 31, 0 } } },
-	{ { { 24, 0, 3 }, { 21, 31, 1 } } },
-	{ { { 24, 0, 4 }, { 22, 30, 1 } } },
-	{ { { 25, 0, 3 }, { 22, 30, 0 } } },
-	{ { { 25, 0, 2 }, { 22, 31, 1 } } },
-	{ { { 25, 0, 1 }, { 22, 31, 0 } } },
-	{ { { 25, 0, 0 }, { 24, 27, 0 } } },
-	{ { { 25, 0, 1 }, { 23, 30, 1 } } },
-	{ { { 25, 0, 2 }, { 23, 30, 0 } } },
-	{ { { 25, 0, 3 }, { 24, 28, 0 } } },
-	{ { { 25, 0, 4 }, { 23, 31, 1 } } },
-	{ { { 26, 0, 3 }, { 23, 31, 0 } } },
-	{ { { 26, 0, 2 }, { 23, 31, 1 } } },
-	{ { { 26, 0, 1 }, { 24, 30, 1 } } },
-	{ { { 26, 0, 0 }, { 24, 30, 0 } } },
-	{ { { 26, 0, 1 }, { 26, 27, 1 } } },
-	{ { { 26, 0, 2 }, { 26, 27, 0 } } },
-	{ { { 26, 0, 3 }, { 24, 31, 0 } } },
-	{ { { 26, 0, 4 }, { 25, 30, 1 } } },
-	{ { { 27, 0, 3 }, { 25, 30, 0 } } },
-	{ { { 27, 0, 2 }, { 28, 24, 0 } } },
-	{ { { 27, 0, 1 }, { 25, 31, 1 } } },
-	{ { { 27, 0, 0 }, { 25, 31, 0 } } },
-	{ { { 27, 0, 1 }, { 25, 31, 1 } } },
-	{ { { 27, 0, 2 }, { 26, 30, 1 } } },
-	{ { { 27, 0, 3 }, { 26, 30, 0 } } },
-	{ { { 27, 0, 4 }, { 26, 31, 1 } } },
-	{ { { 28, 0, 4 }, { 26, 31, 0 } } },
-	{ { { 28, 0, 3 }, { 28, 27, 0 } } },
-	{ { { 28, 0, 2 }, { 27, 30, 1 } } },
-	{ { { 28, 0, 1 }, { 27, 30, 0 } } },
-	{ { { 28, 0, 0 }, { 28, 28, 0 } } },
-	{ { { 28, 0, 1 }, { 27, 31, 1 } } },
-	{ { { 28, 0, 2 }, { 27, 31, 0 } } },
-	{ { { 28, 0, 3 }, { 27, 31, 1 } } },
-	{ { { 28, 0, 4 }, { 28, 30, 1 } } },
-	{ { { 29, 0, 3 }, { 28, 30, 0 } } },
-	{ { { 29, 0, 2 }, { 30, 27, 1 } } },
-	{ { { 29, 0, 1 }, { 30, 27, 0 } } },
-	{ { { 29, 0, 0 }, { 28, 31, 0 } } },
-	{ { { 29, 0, 1 }, { 29, 30, 1 } } },
-	{ { { 29, 0, 2 }, { 29, 30, 0 } } },
-	{ { { 29, 0, 3 }, { 29, 30, 1 } } },
-	{ { { 29, 0, 4 }, { 29, 31, 1 } } },
-	{ { { 30, 0, 3 }, { 29, 31, 0 } } },
-	{ { { 30, 0, 2 }, { 29, 31, 1 } } },
-	{ { { 30, 0, 1 }, { 30, 30, 1 } } },
-	{ { { 30, 0, 0 }, { 30, 30, 0 } } },
-	{ { { 30, 0, 1 }, { 30, 31, 1 } } },
-	{ { { 30, 0, 2 }, { 30, 31, 0 } } },
-	{ { { 30, 0, 3 }, { 30, 31, 1 } } },
-	{ { { 30, 0, 4 }, { 31, 30, 1 } } },
-	{ { { 31, 0, 3 }, { 31, 30, 0 } } },
-	{ { { 31, 0, 2 }, { 31, 30, 1 } } },
-	{ { { 31, 0, 1 }, { 31, 31, 1 } } },
-	{ { { 31, 0, 0 }, { 31, 31, 0 } } }
-};
-
-static SingleColourLookup const lookup_6_4[] = 
-{
-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
-	{ { { 0, 0, 1 }, { 0, 1, 0 } } },
-	{ { { 0, 0, 2 }, { 0, 2, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
-	{ { { 1, 0, 0 }, { 0, 3, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 4, 0 } } },
-	{ { { 1, 0, 2 }, { 0, 5, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 2, 0, 0 }, { 0, 6, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 7, 0 } } },
-	{ { { 2, 0, 2 }, { 0, 8, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
-	{ { { 3, 0, 0 }, { 0, 9, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 10, 0 } } },
-	{ { { 3, 0, 2 }, { 0, 11, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 12, 1 } } },
-	{ { { 4, 0, 0 }, { 0, 12, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 13, 0 } } },
-	{ { { 4, 0, 2 }, { 0, 14, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 15, 1 } } },
-	{ { { 5, 0, 0 }, { 0, 15, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 16, 0 } } },
-	{ { { 5, 0, 2 }, { 1, 15, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 17, 0 } } },
-	{ { { 6, 0, 0 }, { 0, 18, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 19, 0 } } },
-	{ { { 6, 0, 2 }, { 3, 14, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 20, 0 } } },
-	{ { { 7, 0, 0 }, { 0, 21, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 22, 0 } } },
-	{ { { 7, 0, 2 }, { 4, 15, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 23, 0 } } },
-	{ { { 8, 0, 0 }, { 0, 24, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 25, 0 } } },
-	{ { { 8, 0, 2 }, { 6, 14, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 26, 0 } } },
-	{ { { 9, 0, 0 }, { 0, 27, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 28, 0 } } },
-	{ { { 9, 0, 2 }, { 7, 15, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 29, 0 } } },
-	{ { { 10, 0, 0 }, { 0, 30, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 31, 0 } } },
-	{ { { 10, 0, 2 }, { 9, 14, 0 } } },
-	{ { { 11, 0, 1 }, { 0, 32, 0 } } },
-	{ { { 11, 0, 0 }, { 0, 33, 0 } } },
-	{ { { 11, 0, 1 }, { 2, 30, 0 } } },
-	{ { { 11, 0, 2 }, { 0, 34, 0 } } },
-	{ { { 12, 0, 1 }, { 0, 35, 0 } } },
-	{ { { 12, 0, 0 }, { 0, 36, 0 } } },
-	{ { { 12, 0, 1 }, { 3, 31, 0 } } },
-	{ { { 12, 0, 2 }, { 0, 37, 0 } } },
-	{ { { 13, 0, 1 }, { 0, 38, 0 } } },
-	{ { { 13, 0, 0 }, { 0, 39, 0 } } },
-	{ { { 13, 0, 1 }, { 5, 30, 0 } } },
-	{ { { 13, 0, 2 }, { 0, 40, 0 } } },
-	{ { { 14, 0, 1 }, { 0, 41, 0 } } },
-	{ { { 14, 0, 0 }, { 0, 42, 0 } } },
-	{ { { 14, 0, 1 }, { 6, 31, 0 } } },
-	{ { { 14, 0, 2 }, { 0, 43, 0 } } },
-	{ { { 15, 0, 1 }, { 0, 44, 0 } } },
-	{ { { 15, 0, 0 }, { 0, 45, 0 } } },
-	{ { { 15, 0, 1 }, { 8, 30, 0 } } },
-	{ { { 15, 0, 2 }, { 0, 46, 0 } } },
-	{ { { 16, 0, 2 }, { 0, 47, 0 } } },
-	{ { { 16, 0, 1 }, { 1, 46, 0 } } },
-	{ { { 16, 0, 0 }, { 0, 48, 0 } } },
-	{ { { 16, 0, 1 }, { 0, 49, 0 } } },
-	{ { { 16, 0, 2 }, { 0, 50, 0 } } },
-	{ { { 17, 0, 1 }, { 2, 47, 0 } } },
-	{ { { 17, 0, 0 }, { 0, 51, 0 } } },
-	{ { { 17, 0, 1 }, { 0, 52, 0 } } },
-	{ { { 17, 0, 2 }, { 0, 53, 0 } } },
-	{ { { 18, 0, 1 }, { 4, 46, 0 } } },
-	{ { { 18, 0, 0 }, { 0, 54, 0 } } },
-	{ { { 18, 0, 1 }, { 0, 55, 0 } } },
-	{ { { 18, 0, 2 }, { 0, 56, 0 } } },
-	{ { { 19, 0, 1 }, { 5, 47, 0 } } },
-	{ { { 19, 0, 0 }, { 0, 57, 0 } } },
-	{ { { 19, 0, 1 }, { 0, 58, 0 } } },
-	{ { { 19, 0, 2 }, { 0, 59, 0 } } },
-	{ { { 20, 0, 1 }, { 7, 46, 0 } } },
-	{ { { 20, 0, 0 }, { 0, 60, 0 } } },
-	{ { { 20, 0, 1 }, { 0, 61, 0 } } },
-	{ { { 20, 0, 2 }, { 0, 62, 0 } } },
-	{ { { 21, 0, 1 }, { 8, 47, 0 } } },
-	{ { { 21, 0, 0 }, { 0, 63, 0 } } },
-	{ { { 21, 0, 1 }, { 1, 62, 0 } } },
-	{ { { 21, 0, 2 }, { 1, 63, 0 } } },
-	{ { { 22, 0, 1 }, { 10, 46, 0 } } },
-	{ { { 22, 0, 0 }, { 2, 62, 0 } } },
-	{ { { 22, 0, 1 }, { 2, 63, 0 } } },
-	{ { { 22, 0, 2 }, { 3, 62, 0 } } },
-	{ { { 23, 0, 1 }, { 11, 47, 0 } } },
-	{ { { 23, 0, 0 }, { 3, 63, 0 } } },
-	{ { { 23, 0, 1 }, { 4, 62, 0 } } },
-	{ { { 23, 0, 2 }, { 4, 63, 0 } } },
-	{ { { 24, 0, 1 }, { 13, 46, 0 } } },
-	{ { { 24, 0, 0 }, { 5, 62, 0 } } },
-	{ { { 24, 0, 1 }, { 5, 63, 0 } } },
-	{ { { 24, 0, 2 }, { 6, 62, 0 } } },
-	{ { { 25, 0, 1 }, { 14, 47, 0 } } },
-	{ { { 25, 0, 0 }, { 6, 63, 0 } } },
-	{ { { 25, 0, 1 }, { 7, 62, 0 } } },
-	{ { { 25, 0, 2 }, { 7, 63, 0 } } },
-	{ { { 26, 0, 1 }, { 16, 45, 0 } } },
-	{ { { 26, 0, 0 }, { 8, 62, 0 } } },
-	{ { { 26, 0, 1 }, { 8, 63, 0 } } },
-	{ { { 26, 0, 2 }, { 9, 62, 0 } } },
-	{ { { 27, 0, 1 }, { 16, 48, 0 } } },
-	{ { { 27, 0, 0 }, { 9, 63, 0 } } },
-	{ { { 27, 0, 1 }, { 10, 62, 0 } } },
-	{ { { 27, 0, 2 }, { 10, 63, 0 } } },
-	{ { { 28, 0, 1 }, { 16, 51, 0 } } },
-	{ { { 28, 0, 0 }, { 11, 62, 0 } } },
-	{ { { 28, 0, 1 }, { 11, 63, 0 } } },
-	{ { { 28, 0, 2 }, { 12, 62, 0 } } },
-	{ { { 29, 0, 1 }, { 16, 54, 0 } } },
-	{ { { 29, 0, 0 }, { 12, 63, 0 } } },
-	{ { { 29, 0, 1 }, { 13, 62, 0 } } },
-	{ { { 29, 0, 2 }, { 13, 63, 0 } } },
-	{ { { 30, 0, 1 }, { 16, 57, 0 } } },
-	{ { { 30, 0, 0 }, { 14, 62, 0 } } },
-	{ { { 30, 0, 1 }, { 14, 63, 0 } } },
-	{ { { 30, 0, 2 }, { 15, 62, 0 } } },
-	{ { { 31, 0, 1 }, { 16, 60, 0 } } },
-	{ { { 31, 0, 0 }, { 15, 63, 0 } } },
-	{ { { 31, 0, 1 }, { 24, 46, 0 } } },
-	{ { { 31, 0, 2 }, { 16, 62, 0 } } },
-	{ { { 32, 0, 2 }, { 16, 63, 0 } } },
-	{ { { 32, 0, 1 }, { 17, 62, 0 } } },
-	{ { { 32, 0, 0 }, { 25, 47, 0 } } },
-	{ { { 32, 0, 1 }, { 17, 63, 0 } } },
-	{ { { 32, 0, 2 }, { 18, 62, 0 } } },
-	{ { { 33, 0, 1 }, { 18, 63, 0 } } },
-	{ { { 33, 0, 0 }, { 27, 46, 0 } } },
-	{ { { 33, 0, 1 }, { 19, 62, 0 } } },
-	{ { { 33, 0, 2 }, { 19, 63, 0 } } },
-	{ { { 34, 0, 1 }, { 20, 62, 0 } } },
-	{ { { 34, 0, 0 }, { 28, 47, 0 } } },
-	{ { { 34, 0, 1 }, { 20, 63, 0 } } },
-	{ { { 34, 0, 2 }, { 21, 62, 0 } } },
-	{ { { 35, 0, 1 }, { 21, 63, 0 } } },
-	{ { { 35, 0, 0 }, { 30, 46, 0 } } },
-	{ { { 35, 0, 1 }, { 22, 62, 0 } } },
-	{ { { 35, 0, 2 }, { 22, 63, 0 } } },
-	{ { { 36, 0, 1 }, { 23, 62, 0 } } },
-	{ { { 36, 0, 0 }, { 31, 47, 0 } } },
-	{ { { 36, 0, 1 }, { 23, 63, 0 } } },
-	{ { { 36, 0, 2 }, { 24, 62, 0 } } },
-	{ { { 37, 0, 1 }, { 24, 63, 0 } } },
-	{ { { 37, 0, 0 }, { 32, 47, 0 } } },
-	{ { { 37, 0, 1 }, { 25, 62, 0 } } },
-	{ { { 37, 0, 2 }, { 25, 63, 0 } } },
-	{ { { 38, 0, 1 }, { 26, 62, 0 } } },
-	{ { { 38, 0, 0 }, { 32, 50, 0 } } },
-	{ { { 38, 0, 1 }, { 26, 63, 0 } } },
-	{ { { 38, 0, 2 }, { 27, 62, 0 } } },
-	{ { { 39, 0, 1 }, { 27, 63, 0 } } },
-	{ { { 39, 0, 0 }, { 32, 53, 0 } } },
-	{ { { 39, 0, 1 }, { 28, 62, 0 } } },
-	{ { { 39, 0, 2 }, { 28, 63, 0 } } },
-	{ { { 40, 0, 1 }, { 29, 62, 0 } } },
-	{ { { 40, 0, 0 }, { 32, 56, 0 } } },
-	{ { { 40, 0, 1 }, { 29, 63, 0 } } },
-	{ { { 40, 0, 2 }, { 30, 62, 0 } } },
-	{ { { 41, 0, 1 }, { 30, 63, 0 } } },
-	{ { { 41, 0, 0 }, { 32, 59, 0 } } },
-	{ { { 41, 0, 1 }, { 31, 62, 0 } } },
-	{ { { 41, 0, 2 }, { 31, 63, 0 } } },
-	{ { { 42, 0, 1 }, { 32, 61, 0 } } },
-	{ { { 42, 0, 0 }, { 32, 62, 0 } } },
-	{ { { 42, 0, 1 }, { 32, 63, 0 } } },
-	{ { { 42, 0, 2 }, { 41, 46, 0 } } },
-	{ { { 43, 0, 1 }, { 33, 62, 0 } } },
-	{ { { 43, 0, 0 }, { 33, 63, 0 } } },
-	{ { { 43, 0, 1 }, { 34, 62, 0 } } },
-	{ { { 43, 0, 2 }, { 42, 47, 0 } } },
-	{ { { 44, 0, 1 }, { 34, 63, 0 } } },
-	{ { { 44, 0, 0 }, { 35, 62, 0 } } },
-	{ { { 44, 0, 1 }, { 35, 63, 0 } } },
-	{ { { 44, 0, 2 }, { 44, 46, 0 } } },
-	{ { { 45, 0, 1 }, { 36, 62, 0 } } },
-	{ { { 45, 0, 0 }, { 36, 63, 0 } } },
-	{ { { 45, 0, 1 }, { 37, 62, 0 } } },
-	{ { { 45, 0, 2 }, { 45, 47, 0 } } },
-	{ { { 46, 0, 1 }, { 37, 63, 0 } } },
-	{ { { 46, 0, 0 }, { 38, 62, 0 } } },
-	{ { { 46, 0, 1 }, { 38, 63, 0 } } },
-	{ { { 46, 0, 2 }, { 47, 46, 0 } } },
-	{ { { 47, 0, 1 }, { 39, 62, 0 } } },
-	{ { { 47, 0, 0 }, { 39, 63, 0 } } },
-	{ { { 47, 0, 1 }, { 40, 62, 0 } } },
-	{ { { 47, 0, 2 }, { 48, 46, 0 } } },
-	{ { { 48, 0, 2 }, { 40, 63, 0 } } },
-	{ { { 48, 0, 1 }, { 41, 62, 0 } } },
-	{ { { 48, 0, 0 }, { 41, 63, 0 } } },
-	{ { { 48, 0, 1 }, { 48, 49, 0 } } },
-	{ { { 48, 0, 2 }, { 42, 62, 0 } } },
-	{ { { 49, 0, 1 }, { 42, 63, 0 } } },
-	{ { { 49, 0, 0 }, { 43, 62, 0 } } },
-	{ { { 49, 0, 1 }, { 48, 52, 0 } } },
-	{ { { 49, 0, 2 }, { 43, 63, 0 } } },
-	{ { { 50, 0, 1 }, { 44, 62, 0 } } },
-	{ { { 50, 0, 0 }, { 44, 63, 0 } } },
-	{ { { 50, 0, 1 }, { 48, 55, 0 } } },
-	{ { { 50, 0, 2 }, { 45, 62, 0 } } },
-	{ { { 51, 0, 1 }, { 45, 63, 0 } } },
-	{ { { 51, 0, 0 }, { 46, 62, 0 } } },
-	{ { { 51, 0, 1 }, { 48, 58, 0 } } },
-	{ { { 51, 0, 2 }, { 46, 63, 0 } } },
-	{ { { 52, 0, 1 }, { 47, 62, 0 } } },
-	{ { { 52, 0, 0 }, { 47, 63, 0 } } },
-	{ { { 52, 0, 1 }, { 48, 61, 0 } } },
-	{ { { 52, 0, 2 }, { 48, 62, 0 } } },
-	{ { { 53, 0, 1 }, { 56, 47, 0 } } },
-	{ { { 53, 0, 0 }, { 48, 63, 0 } } },
-	{ { { 53, 0, 1 }, { 49, 62, 0 } } },
-	{ { { 53, 0, 2 }, { 49, 63, 0 } } },
-	{ { { 54, 0, 1 }, { 58, 46, 0 } } },
-	{ { { 54, 0, 0 }, { 50, 62, 0 } } },
-	{ { { 54, 0, 1 }, { 50, 63, 0 } } },
-	{ { { 54, 0, 2 }, { 51, 62, 0 } } },
-	{ { { 55, 0, 1 }, { 59, 47, 0 } } },
-	{ { { 55, 0, 0 }, { 51, 63, 0 } } },
-	{ { { 55, 0, 1 }, { 52, 62, 0 } } },
-	{ { { 55, 0, 2 }, { 52, 63, 0 } } },
-	{ { { 56, 0, 1 }, { 61, 46, 0 } } },
-	{ { { 56, 0, 0 }, { 53, 62, 0 } } },
-	{ { { 56, 0, 1 }, { 53, 63, 0 } } },
-	{ { { 56, 0, 2 }, { 54, 62, 0 } } },
-	{ { { 57, 0, 1 }, { 62, 47, 0 } } },
-	{ { { 57, 0, 0 }, { 54, 63, 0 } } },
-	{ { { 57, 0, 1 }, { 55, 62, 0 } } },
-	{ { { 57, 0, 2 }, { 55, 63, 0 } } },
-	{ { { 58, 0, 1 }, { 56, 62, 1 } } },
-	{ { { 58, 0, 0 }, { 56, 62, 0 } } },
-	{ { { 58, 0, 1 }, { 56, 63, 0 } } },
-	{ { { 58, 0, 2 }, { 57, 62, 0 } } },
-	{ { { 59, 0, 1 }, { 57, 63, 1 } } },
-	{ { { 59, 0, 0 }, { 57, 63, 0 } } },
-	{ { { 59, 0, 1 }, { 58, 62, 0 } } },
-	{ { { 59, 0, 2 }, { 58, 63, 0 } } },
-	{ { { 60, 0, 1 }, { 59, 62, 1 } } },
-	{ { { 60, 0, 0 }, { 59, 62, 0 } } },
-	{ { { 60, 0, 1 }, { 59, 63, 0 } } },
-	{ { { 60, 0, 2 }, { 60, 62, 0 } } },
-	{ { { 61, 0, 1 }, { 60, 63, 1 } } },
-	{ { { 61, 0, 0 }, { 60, 63, 0 } } },
-	{ { { 61, 0, 1 }, { 61, 62, 0 } } },
-	{ { { 61, 0, 2 }, { 61, 63, 0 } } },
-	{ { { 62, 0, 1 }, { 62, 62, 1 } } },
-	{ { { 62, 0, 0 }, { 62, 62, 0 } } },
-	{ { { 62, 0, 1 }, { 62, 63, 0 } } },
-	{ { { 62, 0, 2 }, { 63, 62, 0 } } },
-	{ { { 63, 0, 1 }, { 63, 63, 1 } } },
-	{ { { 63, 0, 0 }, { 63, 63, 0 } } }
-};
diff --git a/3rdparty/libsquish/squish.cpp b/3rdparty/libsquish/squish.cpp
deleted file mode 100644
index cd91f8746..000000000
--- a/3rdparty/libsquish/squish.cpp
+++ /dev/null
@@ -1,260 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "squish.h"
-#include "colourset.h"
-#include "maths.h"
-#include "rangefit.h"
-#include "clusterfit.h"
-#include "colourblock.h"
-#include "alpha.h"
-#include "singlecolourfit.h"
-
-namespace squish {
-
-static int FixFlags( int flags )
-{
-	// grab the flag bits
-	int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
-	int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
-	int extra = flags & kWeightColourByAlpha;
-	
-	// set defaults
-	if ( method != kDxt3
-	&&   method != kDxt5
-	&&   method != kBc4
-	&&   method != kBc5 )
-	{
-		method = kDxt1;
-	}
-	if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
-		fit = kColourClusterFit;
-		
-	// done
-	return method | fit | extra;
-}
-
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
-	{
-		u8 alpha[16*4];
-		for( int i = 0; i < 16; ++i )
-		{
-			alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
-		}
-
-		u8* rBlock = reinterpret_cast< u8* >( block );
-		CompressAlphaDxt5( alpha, mask, rBlock );
-
-		if ( ( flags & ( kBc5 ) ) != 0 )
-		{
-			for( int i = 0; i < 16; ++i )
-			{
-				alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
-			}
-
-			u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
-			CompressAlphaDxt5( alpha, mask, gBlock );
-		}
-
-		return;
-	}
-
-	// get the block locations
-	void* colourBlock = block;
-	void* alphaBlock = block;
-	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
-		colourBlock = reinterpret_cast< u8* >( block ) + 8;
-
-	// create the minimal point set
-	ColourSet colours( rgba, mask, flags );
-	
-	// check the compression type and compress colour
-	if( colours.GetCount() == 1 )
-	{
-		// always do a single colour fit
-		SingleColourFit fit( &colours, flags );
-		fit.Compress( colourBlock );
-	}
-	else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
-	{
-		// do a range fit
-		RangeFit fit( &colours, flags, metric );
-		fit.Compress( colourBlock );
-	}
-	else
-	{
-		// default to a cluster fit (could be iterative or not)
-		ClusterFit fit( &colours, flags, metric );
-		fit.Compress( colourBlock );
-	}
-	
-	// compress alpha separately if necessary
-	if( ( flags & kDxt3 ) != 0 )
-		CompressAlphaDxt3( rgba, mask, alphaBlock );
-	else if( ( flags & kDxt5 ) != 0 )
-		CompressAlphaDxt5( rgba, mask, alphaBlock );
-}
-
-void Decompress( u8* rgba, void const* block, int flags )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// get the block locations
-	void const* colourBlock = block;
-	void const* alphaBock = block;
-	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
-		colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
-
-	// decompress colour
-	DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
-
-	// decompress alpha separately if necessary
-	if( ( flags & kDxt3 ) != 0 )
-		DecompressAlphaDxt3( rgba, alphaBock );
-	else if( ( flags & kDxt5 ) != 0 )
-		DecompressAlphaDxt5( rgba, alphaBock );
-}
-
-int GetStorageRequirements( int width, int height, int flags )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-	
-	// compute the storage requirements
-	int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
-	int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
-	return blockcount*blocksize;
-}
-
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// initialise the block output
-	u8* targetBlock = reinterpret_cast< u8* >( blocks );
-	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
-
-	// loop over blocks
-	for( int y = 0; y < height; y += 4 )
-	{
-		for( int x = 0; x < width; x += 4 )
-		{
-			// build the 4x4 block of pixels
-			u8 sourceRgba[16*4];
-			u8* targetPixel = sourceRgba;
-			int mask = 0;
-			for( int py = 0; py < 4; ++py )
-			{
-				for( int px = 0; px < 4; ++px )
-				{
-					// get the source pixel in the image
-					int sx = x + px;
-					int sy = y + py;
-					
-					// enable if we're in the image
-					if( sx < width && sy < height )
-					{
-						// copy the rgba value
-						u8 const* sourcePixel = rgba + 4*( width*sy + sx );
-						for( int i = 0; i < 4; ++i )
-							*targetPixel++ = *sourcePixel++;
-							
-						// enable this pixel
-						mask |= ( 1 << ( 4*py + px ) );
-					}
-					else
-					{
-						// skip this pixel as its outside the image
-						targetPixel += 4;
-					}
-				}
-			}
-			
-			// compress it into the output
-			CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
-			
-			// advance
-			targetBlock += bytesPerBlock;
-		}
-	}
-}
-
-void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// initialise the block input
-	u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
-	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
-
-	// loop over blocks
-	for( int y = 0; y < height; y += 4 )
-	{
-		for( int x = 0; x < width; x += 4 )
-		{
-			// decompress the block
-			u8 targetRgba[4*16];
-			Decompress( targetRgba, sourceBlock, flags );
-			
-			// write the decompressed pixels to the correct image locations
-			u8 const* sourcePixel = targetRgba;
-			for( int py = 0; py < 4; ++py )
-			{
-				for( int px = 0; px < 4; ++px )
-				{
-					// get the target location
-					int sx = x + px;
-					int sy = y + py;
-					if( sx < width && sy < height )
-					{
-						u8* targetPixel = rgba + 4*( width*sy + sx );
-						
-						// copy the rgba value
-						for( int i = 0; i < 4; ++i )
-							*targetPixel++ = *sourcePixel++;
-					}
-					else
-					{
-						// skip this pixel as its outside the image
-						sourcePixel += 4;
-					}
-				}
-			}
-			
-			// advance
-			sourceBlock += bytesPerBlock;
-		}
-	}
-}
-
-} // namespace squish
diff --git a/3rdparty/libsquish/squish.h b/3rdparty/libsquish/squish.h
deleted file mode 100644
index 175375f83..000000000
--- a/3rdparty/libsquish/squish.h
+++ /dev/null
@@ -1,269 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_H
-#define SQUISH_H
-
-//! All squish API functions live in this namespace.
-namespace squish {
-
-// -----------------------------------------------------------------------------
-
-//! Typedef a quantity that is a single unsigned byte.
-typedef unsigned char u8;
-
-// -----------------------------------------------------------------------------
-
-enum
-{
-	//! Use DXT1 compression.
-	kDxt1 = ( 1 << 0 ),
-
-	//! Use DXT3 compression.
-	kDxt3 = ( 1 << 1 ),
-
-	//! Use DXT5 compression.
-	kDxt5 = ( 1 << 2 ),
-
-	//! Use BC4 compression.
-	kBc4 = ( 1 << 3 ),
-
-	//! Use BC5 compression.
-	kBc5 = ( 1 << 4 ),
-
-	//! Use a slow but high quality colour compressor (the default).
-	kColourClusterFit = ( 1 << 5 ),
-
-	//! Use a fast but low quality colour compressor.
-	kColourRangeFit	= ( 1 << 6 ),
-
-	//! Weight the colour by alpha during cluster fit (disabled by default).
-	kWeightColourByAlpha = ( 1 << 7 ),
-
-	//! Use a very slow but very high quality colour compressor.
-	kColourIterativeClusterFit = ( 1 << 8 ),
-};
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Compresses a 4x4 block of pixels.
-
-	@param rgba		The rgba values of the 16 source pixels.
-	@param mask		The valid pixel mask.
-	@param block	Storage for the compressed DXT block.
-	@param flags	Compression flags.
-	@param metric	An optional perceptual metric.
-	
-	The source pixels should be presented as a contiguous array of 16 rgba
-	values, with each component as 1 byte each. In memory this should be:
-	
-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
-		
-	The mask parameter enables only certain pixels within the block. The lowest
-	bit enables the first pixel and so on up to the 16th bit. Bits beyond the
-	16th bit are ignored. Pixels that are not enabled are allowed to take
-	arbitrary colours in the output block. An example of how this can be used
-	is in the CompressImage function to disable pixels outside the bounds of
-	the image when the width or height is not divisible by 4.
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. When using DXT1 
-	compression, 8 bytes of storage are required for the compressed DXT block. 
-	DXT3 and DXT5 compression require 16 bytes of storage per block.
-	
-	The flags parameter can also specify a preferred colour compressor to use 
-	when fitting the RGB components of the data. Possible colour compressors 
-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
-	quality) or kColourIterativeClusterFit (slowest, best quality).
-		
-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
-	flag can be specified to weight the importance of each pixel by its alpha 
-	value. For images that are rendered using alpha blending, this can 
-	significantly increase the perceived quality.
-	
-	The metric parameter can be used to weight the relative importance of each
-	colour channel, or pass NULL to use the default uniform weight of 
-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
-	allowed either uniform or "perceptual" weights with the fixed values
-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
-	contiguous array of 3 floats.
-*/
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Compresses a 4x4 block of pixels.
-
-	@param rgba		The rgba values of the 16 source pixels.
-	@param block	Storage for the compressed DXT block.
-	@param flags	Compression flags.
-	@param metric	An optional perceptual metric.
-	
-	The source pixels should be presented as a contiguous array of 16 rgba
-	values, with each component as 1 byte each. In memory this should be:
-	
-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. When using DXT1 
-	compression, 8 bytes of storage are required for the compressed DXT block. 
-	DXT3 and DXT5 compression require 16 bytes of storage per block.
-	
-	The flags parameter can also specify a preferred colour compressor to use 
-	when fitting the RGB components of the data. Possible colour compressors 
-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
-	quality) or kColourIterativeClusterFit (slowest, best quality).
-		
-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
-	flag can be specified to weight the importance of each pixel by its alpha 
-	value. For images that are rendered using alpha blending, this can 
-	significantly increase the perceived quality.
-	
-	The metric parameter can be used to weight the relative importance of each
-	colour channel, or pass NULL to use the default uniform weight of 
-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
-	allowed either uniform or "perceptual" weights with the fixed values
-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
-	contiguous array of 3 floats.
-	
-	This method is an inline that calls CompressMasked with a mask of 0xffff, 
-	provided for compatibility with older versions of squish.
-*/
-inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
-{
-	CompressMasked( rgba, 0xffff, block, flags, metric );
-}
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Decompresses a 4x4 block of pixels.
-
-	@param rgba		Storage for the 16 decompressed pixels.
-	@param block	The compressed DXT block.
-	@param flags	Compression flags.
-
-	The decompressed pixels will be written as a contiguous array of 16 rgba
-	values, with each component as 1 byte each. In memory this is:
-	
-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. All other flags 
-	are ignored.
-*/
-void Decompress( u8* rgba, void const* block, int flags );
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Computes the amount of compressed storage required.
-
-	@param width	The width of the image.
-	@param height	The height of the image.
-	@param flags	Compression flags.
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. All other flags 
-	are ignored.
-	
-	Most DXT images will be a multiple of 4 in each dimension, but this 
-	function supports arbitrary size images by allowing the outer blocks to
-	be only partially used.
-*/
-int GetStorageRequirements( int width, int height, int flags );
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Compresses an image in memory.
-
-	@param rgba		The pixels of the source.
-	@param width	The width of the source image.
-	@param height	The height of the source image.
-	@param blocks	Storage for the compressed output.
-	@param flags	Compression flags.
-	@param metric	An optional perceptual metric.
-	
-	The source pixels should be presented as a contiguous array of width*height
-	rgba values, with each component as 1 byte each. In memory this should be:
-	
-		{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
-		
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. When using DXT1 
-	compression, 8 bytes of storage are required for each compressed DXT block. 
-	DXT3 and DXT5 compression require 16 bytes of storage per block.
-	
-	The flags parameter can also specify a preferred colour compressor to use 
-	when fitting the RGB components of the data. Possible colour compressors 
-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
-	quality) or kColourIterativeClusterFit (slowest, best quality).
-		
-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
-	flag can be specified to weight the importance of each pixel by its alpha 
-	value. For images that are rendered using alpha blending, this can 
-	significantly increase the perceived quality.
-	
-	The metric parameter can be used to weight the relative importance of each
-	colour channel, or pass NULL to use the default uniform weight of 
-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
-	allowed either uniform or "perceptual" weights with the fixed values
-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
-	contiguous array of 3 floats.
-	
-	Internally this function calls squish::CompressMasked for each block, which 
-	allows for pixels outside the image to take arbitrary values. The function 
-	squish::GetStorageRequirements can be called to compute the amount of memory
-	to allocate for the compressed output.
-*/
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Decompresses an image in memory.
-
-	@param rgba		Storage for the decompressed pixels.
-	@param width	The width of the source image.
-	@param height	The height of the source image.
-	@param blocks	The compressed DXT blocks.
-	@param flags	Compression flags.
-	
-	The decompressed pixels will be written as a contiguous array of width*height
-	16 rgba values, with each component as 1 byte each. In memory this is:
-	
-		{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
-		
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. All other flags 
-	are ignored.
-
-	Internally this function calls squish::Decompress for each block.
-*/
-void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags );
-
-// -----------------------------------------------------------------------------
-
-} // namespace squish
-
-#endif // ndef SQUISH_H
-
diff --git a/3rdparty/lodepng/README.md b/3rdparty/lodepng/README.md
deleted file mode 100644
index 35f925494..000000000
--- a/3rdparty/lodepng/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-LodePNG
--------
-
-PNG encoder and decoder in C and C++.
-
-Home page: http://lodev.org/lodepng/
-
-Only two files are needed to allow your program to read and write PNG files: lodepng.cpp and lodepng.h.
-
-The other files in the project are just examples, unit tests, etc...
diff --git a/3rdparty/lodepng/lodepng.cpp b/3rdparty/lodepng/lodepng.cpp
deleted file mode 100644
index 7baf7f927..000000000
--- a/3rdparty/lodepng/lodepng.cpp
+++ /dev/null
@@ -1,6224 +0,0 @@
-/*
-LodePNG version 20160501
-
-Copyright (c) 2005-2016 Lode Vandevenne
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-    1. The origin of this software must not be misrepresented; you must not
-    claim that you wrote the original software. If you use this software
-    in a product, an acknowledgment in the product documentation would be
-    appreciated but is not required.
-
-    2. Altered source versions must be plainly marked as such, and must not be
-    misrepresented as being the original software.
-
-    3. This notice may not be removed or altered from any source
-    distribution.
-*/
-
-/*
-The manual and changelog are in the header file "lodepng.h"
-Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
-*/
-
-#include "lodepng.h"
-
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
-#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
-#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
-#endif /*_MSC_VER */
-
-const char* LODEPNG_VERSION_STRING = "20160501";
-
-/*
-This source file is built up in the following large parts. The code sections
-with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
--Tools for C and common code for PNG and Zlib
--C Code for Zlib (huffman, deflate, ...)
--C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
--The C++ wrapper around all of the above
-*/
-
-/*The malloc, realloc and free functions defined here with "lodepng_" in front
-of the name, so that you can easily change them to others related to your
-platform if needed. Everything else in the code calls these. Pass
--DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
-#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
-define them in your own project's source files without needing to change
-lodepng source code. Don't forget to remove "static" if you copypaste them
-from here.*/
-
-#ifdef LODEPNG_COMPILE_ALLOCATORS
-static void* lodepng_malloc(size_t size)
-{
-  return malloc(size);
-}
-
-static void* lodepng_realloc(void* ptr, size_t new_size)
-{
-  return realloc(ptr, new_size);
-}
-
-static void lodepng_free(void* ptr)
-{
-  free(ptr);
-}
-#else /*LODEPNG_COMPILE_ALLOCATORS*/
-void* lodepng_malloc(size_t size);
-void* lodepng_realloc(void* ptr, size_t new_size);
-void lodepng_free(void* ptr);
-#endif /*LODEPNG_COMPILE_ALLOCATORS*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // Tools for C, and common code for PNG and Zlib.                       // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*
-Often in case of an error a value is assigned to a variable and then it breaks
-out of a loop (to go to the cleanup phase of a function). This macro does that.
-It makes the error handling code shorter and more readable.
-
-Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83);
-*/
-#define CERROR_BREAK(errorvar, code)\
-{\
-  errorvar = code;\
-  break;\
-}
-
-/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
-#define ERROR_BREAK(code) CERROR_BREAK(error, code)
-
-/*Set error var to the error code, and return it.*/
-#define CERROR_RETURN_ERROR(errorvar, code)\
-{\
-  errorvar = code;\
-  return code;\
-}
-
-/*Try the code, if it returns error, also return the error.*/
-#define CERROR_TRY_RETURN(call)\
-{\
-  unsigned error = call;\
-  if(error) return error;\
-}
-
-/*Set error var to the error code, and return from the void function.*/
-#define CERROR_RETURN(errorvar, code)\
-{\
-  errorvar = code;\
-  return;\
-}
-
-/*
-About uivector, ucvector and string:
--All of them wrap dynamic arrays or text strings in a similar way.
--LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
--The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
--They're not used in the interface, only internally in this file as static functions.
--As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
-*/
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*dynamic vector of unsigned ints*/
-typedef struct uivector
-{
-  unsigned* data;
-  size_t size; /*size in number of unsigned longs*/
-  size_t allocsize; /*allocated size in bytes*/
-} uivector;
-
-static void uivector_cleanup(void* p)
-{
-  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
-  lodepng_free(((uivector*)p)->data);
-  ((uivector*)p)->data = NULL;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_reserve(uivector* p, size_t allocsize)
-{
-  if(allocsize > p->allocsize)
-  {
-    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
-    void* data = lodepng_realloc(p->data, newsize);
-    if(data)
-    {
-      p->allocsize = newsize;
-      p->data = (unsigned*)data;
-    }
-    else return 0; /*error: not enough memory*/
-  }
-  return 1;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_resize(uivector* p, size_t size)
-{
-  if(!uivector_reserve(p, size * sizeof(unsigned))) return 0;
-  p->size = size;
-  return 1; /*success*/
-}
-
-/*resize and give all new elements the value*/
-static unsigned uivector_resizev(uivector* p, size_t size, unsigned value)
-{
-  size_t oldsize = p->size, i;
-  if(!uivector_resize(p, size)) return 0;
-  for(i = oldsize; i < size; ++i) p->data[i] = value;
-  return 1;
-}
-
-static void uivector_init(uivector* p)
-{
-  p->data = NULL;
-  p->size = p->allocsize = 0;
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_push_back(uivector* p, unsigned c)
-{
-  if(!uivector_resize(p, p->size + 1)) return 0;
-  p->data[p->size - 1] = c;
-  return 1;
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-/* /////////////////////////////////////////////////////////////////////////// */
-
-/*dynamic vector of unsigned chars*/
-typedef struct ucvector
-{
-  unsigned char* data;
-  size_t size; /*used size*/
-  size_t allocsize; /*allocated size*/
-} ucvector;
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_reserve(ucvector* p, size_t allocsize)
-{
-  if(allocsize > p->allocsize)
-  {
-    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
-    void* data = lodepng_realloc(p->data, newsize);
-    if(data)
-    {
-      p->allocsize = newsize;
-      p->data = (unsigned char*)data;
-    }
-    else return 0; /*error: not enough memory*/
-  }
-  return 1;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_resize(ucvector* p, size_t size)
-{
-  if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0;
-  p->size = size;
-  return 1; /*success*/
-}
-
-#ifdef LODEPNG_COMPILE_PNG
-
-static void ucvector_cleanup(void* p)
-{
-  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
-  lodepng_free(((ucvector*)p)->data);
-  ((ucvector*)p)->data = NULL;
-}
-
-static void ucvector_init(ucvector* p)
-{
-  p->data = NULL;
-  p->size = p->allocsize = 0;
-}
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*you can both convert from vector to buffer&size and vica versa. If you use
-init_buffer to take over a buffer and size, it is not needed to use cleanup*/
-static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size)
-{
-  p->data = buffer;
-  p->allocsize = p->size = size;
-}
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_push_back(ucvector* p, unsigned char c)
-{
-  if(!ucvector_resize(p, p->size + 1)) return 0;
-  p->data[p->size - 1] = c;
-  return 1;
-}
-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
-
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_PNG
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned string_resize(char** out, size_t size)
-{
-  char* data = (char*)lodepng_realloc(*out, size + 1);
-  if(data)
-  {
-    data[size] = 0; /*null termination char*/
-    *out = data;
-  }
-  return data != 0;
-}
-
-/*init a {char*, size_t} pair for use as string*/
-static void string_init(char** out)
-{
-  *out = NULL;
-  string_resize(out, 0);
-}
-
-/*free the above pair again*/
-static void string_cleanup(char** out)
-{
-  lodepng_free(*out);
-  *out = NULL;
-}
-
-static void string_set(char** out, const char* in)
-{
-  size_t insize = strlen(in), i;
-  if(string_resize(out, insize))
-  {
-    for(i = 0; i != insize; ++i)
-    {
-      (*out)[i] = in[i];
-    }
-  }
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-unsigned lodepng_read32bitInt(const unsigned char* buffer)
-{
-  return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]);
-}
-
-#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
-/*buffer must have at least 4 allocated bytes available*/
-static void lodepng_set32bitInt(unsigned char* buffer, unsigned value)
-{
-  buffer[0] = (unsigned char)((value >> 24) & 0xff);
-  buffer[1] = (unsigned char)((value >> 16) & 0xff);
-  buffer[2] = (unsigned char)((value >>  8) & 0xff);
-  buffer[3] = (unsigned char)((value      ) & 0xff);
-}
-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-static void lodepng_add32bitInt(ucvector* buffer, unsigned value)
-{
-  ucvector_resize(buffer, buffer->size + 4); /*todo: give error if resize failed*/
-  lodepng_set32bitInt(&buffer->data[buffer->size - 4], value);
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / File IO                                                                / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_DISK
-
-/* returns negative value on error. This should be pure C compatible, so no fstat. */
-static long lodepng_filesize(const char* filename)
-{
-  FILE* file;
-  long size;
-  file = fopen(filename, "rb");
-  if(!file) return -1;
-
-  if(fseek(file, 0, SEEK_END) != 0)
-  {
-    fclose(file);
-    return -1;
-  }
-
-  size = ftell(file);
-  /* It may give LONG_MAX as directory size, this is invalid for us. */
-  if(size == LONG_MAX) size = -1;
-
-  fclose(file);
-  return size;
-}
-
-/* load file into buffer that already has the correct allocated size. Returns error code.*/
-static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename)
-{
-  FILE* file;
-  size_t readsize;
-  file = fopen(filename, "rb");
-  if(!file) return 78;
-
-  readsize = fread(out, 1, size, file);
-  fclose(file);
-
-  if (readsize != size) return 78;
-  return 0;
-}
-
-unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename)
-{
-  long size = lodepng_filesize(filename);
-  if (size < 0) return 78;
-  *outsize = (size_t)size;
-
-  *out = (unsigned char*)lodepng_malloc((size_t)size);
-  if(!(*out) && size > 0) return 83; /*the above malloc failed*/
-
-  return lodepng_buffer_file(*out, (size_t)size, filename);
-}
-
-/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
-unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename)
-{
-  FILE* file;
-  file = fopen(filename, "wb" );
-  if(!file) return 79;
-  fwrite((char*)buffer , 1 , buffersize, file);
-  fclose(file);
-  return 0;
-}
-
-#endif /*LODEPNG_COMPILE_DISK*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // End of common code and tools. Begin of Zlib related code.            // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_ENCODER
-/*TODO: this ignores potential out of memory errors*/
-#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit)\
-{\
-  /*add a new byte at the end*/\
-  if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\
-  /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\
-  (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\
-  ++(*bitpointer);\
-}
-
-static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
-{
-  size_t i;
-  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1));
-}
-
-static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
-{
-  size_t i;
-  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1));
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-#define READBIT(bitpointer, bitstream) ((bitstream[bitpointer >> 3] >> (bitpointer & 0x7)) & (unsigned char)1)
-
-static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream)
-{
-  unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream));
-  ++(*bitpointer);
-  return result;
-}
-
-static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
-{
-  unsigned result = 0, i;
-  for(i = 0; i != nbits; ++i)
-  {
-    result += ((unsigned)READBIT(*bitpointer, bitstream)) << i;
-    ++(*bitpointer);
-  }
-  return result;
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Deflate - Huffman                                                      / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#define FIRST_LENGTH_CODE_INDEX 257
-#define LAST_LENGTH_CODE_INDEX 285
-/*256 literals, the end code, some length codes, and 2 unused codes*/
-#define NUM_DEFLATE_CODE_SYMBOLS 288
-/*the distance codes have their own symbols, 30 used, 2 unused*/
-#define NUM_DISTANCE_SYMBOLS 32
-/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
-#define NUM_CODE_LENGTH_CODES 19
-
-/*the base lengths represented by codes 257-285*/
-static const unsigned LENGTHBASE[29]
-  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
-     67, 83, 99, 115, 131, 163, 195, 227, 258};
-
-/*the extra bits used by codes 257-285 (added to base length)*/
-static const unsigned LENGTHEXTRA[29]
-  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
-      4,  4,  4,   4,   5,   5,   5,   5,   0};
-
-/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
-static const unsigned DISTANCEBASE[30]
-  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
-     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
-
-/*the extra bits of backwards distances (added to base)*/
-static const unsigned DISTANCEEXTRA[30]
-  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
-       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
-
-/*the order in which "code length alphabet code lengths" are stored, out of this
-the huffman tree of the dynamic huffman tree lengths is generated*/
-static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
-  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*
-Huffman tree struct, containing multiple representations of the tree
-*/
-typedef struct HuffmanTree
-{
-  unsigned* tree2d;
-  unsigned* tree1d;
-  unsigned* lengths; /*the lengths of the codes of the 1d-tree*/
-  unsigned maxbitlen; /*maximum number of bits a single code can get*/
-  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
-} HuffmanTree;
-
-/*function used for debug purposes to draw the tree in ascii art with C++*/
-/*
-static void HuffmanTree_draw(HuffmanTree* tree)
-{
-  std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl;
-  for(size_t i = 0; i != tree->tree1d.size; ++i)
-  {
-    if(tree->lengths.data[i])
-      std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl;
-  }
-  std::cout << std::endl;
-}*/
-
-static void HuffmanTree_init(HuffmanTree* tree)
-{
-  tree->tree2d = 0;
-  tree->tree1d = 0;
-  tree->lengths = 0;
-}
-
-static void HuffmanTree_cleanup(HuffmanTree* tree)
-{
-  lodepng_free(tree->tree2d);
-  lodepng_free(tree->tree1d);
-  lodepng_free(tree->lengths);
-}
-
-/*the tree representation used by the decoder. return value is error*/
-static unsigned HuffmanTree_make2DTree(HuffmanTree* tree)
-{
-  unsigned nodefilled = 0; /*up to which node it is filled*/
-  unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/
-  unsigned n, i;
-
-  tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned));
-  if(!tree->tree2d) return 83; /*alloc fail*/
-
-  /*
-  convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means
-  uninited, a value >= numcodes is an address to another bit, a value < numcodes
-  is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as
-  many columns as codes - 1.
-  A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
-  Here, the internal nodes are stored (what their 0 and 1 option point to).
-  There is only memory for such good tree currently, if there are more nodes
-  (due to too long length codes), error 55 will happen
-  */
-  for(n = 0; n < tree->numcodes * 2; ++n)
-  {
-    tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/
-  }
-
-  for(n = 0; n < tree->numcodes; ++n) /*the codes*/
-  {
-    for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/
-    {
-      unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1);
-      /*oversubscribed, see comment in lodepng_error_text*/
-      if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55;
-      if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/
-      {
-        if(i + 1 == tree->lengths[n]) /*last bit*/
-        {
-          tree->tree2d[2 * treepos + bit] = n; /*put the current code in it*/
-          treepos = 0;
-        }
-        else
-        {
-          /*put address of the next step in here, first that address has to be found of course
-          (it's just nodefilled + 1)...*/
-          ++nodefilled;
-          /*addresses encoded with numcodes added to it*/
-          tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes;
-          treepos = nodefilled;
-        }
-      }
-      else treepos = tree->tree2d[2 * treepos + bit] - tree->numcodes;
-    }
-  }
-
-  for(n = 0; n < tree->numcodes * 2; ++n)
-  {
-    if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/
-  }
-
-  return 0;
-}
-
-/*
-Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
-numcodes, lengths and maxbitlen must already be filled in correctly. return
-value is error.
-*/
-static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree)
-{
-  uivector blcount;
-  uivector nextcode;
-  unsigned error = 0;
-  unsigned bits, n;
-
-  uivector_init(&blcount);
-  uivector_init(&nextcode);
-
-  tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
-  if(!tree->tree1d) error = 83; /*alloc fail*/
-
-  if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0)
-  || !uivector_resizev(&nextcode, tree->maxbitlen + 1, 0))
-    error = 83; /*alloc fail*/
-
-  if(!error)
-  {
-    /*step 1: count number of instances of each code length*/
-    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]];
-    /*step 2: generate the nextcode values*/
-    for(bits = 1; bits <= tree->maxbitlen; ++bits)
-    {
-      nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1;
-    }
-    /*step 3: generate all the codes*/
-    for(n = 0; n != tree->numcodes; ++n)
-    {
-      if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++;
-    }
-  }
-
-  uivector_cleanup(&blcount);
-  uivector_cleanup(&nextcode);
-
-  if(!error) return HuffmanTree_make2DTree(tree);
-  else return error;
-}
-
-/*
-given the code lengths (as stored in the PNG file), generate the tree as defined
-by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
-return value is error.
-*/
-static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
-                                            size_t numcodes, unsigned maxbitlen)
-{
-  unsigned i;
-  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
-  if(!tree->lengths) return 83; /*alloc fail*/
-  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
-  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
-  tree->maxbitlen = maxbitlen;
-  return HuffmanTree_makeFromLengths2(tree);
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
-Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
-
-/*chain node for boundary package merge*/
-typedef struct BPMNode
-{
-  int weight; /*the sum of all weights in this chain*/
-  unsigned index; /*index of this leaf node (called "count" in the paper)*/
-  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
-  int in_use;
-} BPMNode;
-
-/*lists of chains*/
-typedef struct BPMLists
-{
-  /*memory pool*/
-  unsigned memsize;
-  BPMNode* memory;
-  unsigned numfree;
-  unsigned nextfree;
-  BPMNode** freelist;
-  /*two heads of lookahead chains per list*/
-  unsigned listsize;
-  BPMNode** chains0;
-  BPMNode** chains1;
-} BPMLists;
-
-/*creates a new chain node with the given parameters, from the memory in the lists */
-static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail)
-{
-  unsigned i;
-  BPMNode* result;
-
-  /*memory full, so garbage collect*/
-  if(lists->nextfree >= lists->numfree)
-  {
-    /*mark only those that are in use*/
-    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
-    for(i = 0; i != lists->listsize; ++i)
-    {
-      BPMNode* node;
-      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
-      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
-    }
-    /*collect those that are free*/
-    lists->numfree = 0;
-    for(i = 0; i != lists->memsize; ++i)
-    {
-      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
-    }
-    lists->nextfree = 0;
-  }
-
-  result = lists->freelist[lists->nextfree++];
-  result->weight = weight;
-  result->index = index;
-  result->tail = tail;
-  return result;
-}
-
-/*sort the leaves with stable mergesort*/
-static void bpmnode_sort(BPMNode* leaves, size_t num)
-{
-  BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num);
-  size_t width, counter = 0;
-  for(width = 1; width < num; width *= 2)
-  {
-    BPMNode* a = (counter & 1) ? mem : leaves;
-    BPMNode* b = (counter & 1) ? leaves : mem;
-    size_t p;
-    for(p = 0; p < num; p += 2 * width)
-    {
-      size_t q = (p + width > num) ? num : (p + width);
-      size_t r = (p + 2 * width > num) ? num : (p + 2 * width);
-      size_t i = p, j = q, k;
-      for(k = p; k < r; k++)
-      {
-        if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++];
-        else b[k] = a[j++];
-      }
-    }
-    counter++;
-  }
-  if(counter & 1) memcpy(leaves, mem, sizeof(*leaves) * num);
-  lodepng_free(mem);
-}
-
-/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
-static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num)
-{
-  unsigned lastindex = lists->chains1[c]->index;
-
-  if(c == 0)
-  {
-    if(lastindex >= numpresent) return;
-    lists->chains0[c] = lists->chains1[c];
-    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
-  }
-  else
-  {
-    /*sum of the weights of the head nodes of the previous lookahead chains.*/
-    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
-    lists->chains0[c] = lists->chains1[c];
-    if(lastindex < numpresent && sum > leaves[lastindex].weight)
-    {
-      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
-      return;
-    }
-    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
-    /*in the end we are only interested in the chain of the last list, so no
-    need to recurse if we're at the last one (this gives measurable speedup)*/
-    if(num + 1 < (int)(2 * numpresent - 2))
-    {
-      boundaryPM(lists, leaves, numpresent, c - 1, num);
-      boundaryPM(lists, leaves, numpresent, c - 1, num);
-    }
-  }
-}
-
-unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
-                                      size_t numcodes, unsigned maxbitlen)
-{
-  unsigned error = 0;
-  unsigned i;
-  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
-  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
-
-  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
-  if((1u << maxbitlen) < numcodes) return 80; /*error: represent all symbols*/
-
-  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
-  if(!leaves) return 83; /*alloc fail*/
-
-  for(i = 0; i != numcodes; ++i)
-  {
-    if(frequencies[i] > 0)
-    {
-      leaves[numpresent].weight = (int)frequencies[i];
-      leaves[numpresent].index = i;
-      ++numpresent;
-    }
-  }
-
-  for(i = 0; i != numcodes; ++i) lengths[i] = 0;
-
-  /*ensure at least two present symbols. There should be at least one symbol
-  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
-  make these work as well ensure there are at least two symbols. The
-  Package-Merge code below also doesn't work correctly if there's only one
-  symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/
-  if(numpresent == 0)
-  {
-    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
-  }
-  else if(numpresent == 1)
-  {
-    lengths[leaves[0].index] = 1;
-    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
-  }
-  else
-  {
-    BPMLists lists;
-    BPMNode* node;
-
-    bpmnode_sort(leaves, numpresent);
-
-    lists.listsize = maxbitlen;
-    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
-    lists.nextfree = 0;
-    lists.numfree = lists.memsize;
-    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
-    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
-    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
-    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
-    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
-
-      bpmnode_create(&lists, leaves[0].weight, 1, 0);
-      bpmnode_create(&lists, leaves[1].weight, 2, 0);
-
-      for(i = 0; i != lists.listsize; ++i)
-      {
-        lists.chains0[i] = &lists.memory[0];
-        lists.chains1[i] = &lists.memory[1];
-      }
-
-      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
-      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
-
-      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail)
-      {
-        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
-      }
-    }
-
-    lodepng_free(lists.memory);
-    lodepng_free(lists.freelist);
-    lodepng_free(lists.chains0);
-    lodepng_free(lists.chains1);
-  }
-
-  lodepng_free(leaves);
-  return error;
-}
-
-/*Create the Huffman tree given the symbol frequencies*/
-static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
-                                                size_t mincodes, size_t numcodes, unsigned maxbitlen)
-{
-  unsigned error = 0;
-  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
-  tree->maxbitlen = maxbitlen;
-  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
-  tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned));
-  if(!tree->lengths) return 83; /*alloc fail*/
-  /*initialize all lengths to 0*/
-  memset(tree->lengths, 0, numcodes * sizeof(unsigned));
-
-  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
-  if(!error) error = HuffmanTree_makeFromLengths2(tree);
-  return error;
-}
-
-static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index)
-{
-  return tree->tree1d[index];
-}
-
-static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index)
-{
-  return tree->lengths[index];
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
-static unsigned generateFixedLitLenTree(HuffmanTree* tree)
-{
-  unsigned i, error = 0;
-  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
-  if(!bitlen) return 83; /*alloc fail*/
-
-  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
-  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
-  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
-  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
-  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
-
-  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
-
-  lodepng_free(bitlen);
-  return error;
-}
-
-/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
-static unsigned generateFixedDistanceTree(HuffmanTree* tree)
-{
-  unsigned i, error = 0;
-  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
-  if(!bitlen) return 83; /*alloc fail*/
-
-  /*there are 32 distance codes, but 30-31 are unused*/
-  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
-  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
-
-  lodepng_free(bitlen);
-  return error;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/*
-returns the code, or (unsigned)(-1) if error happened
-inbitlength is the length of the complete buffer, in bits (so its byte length times 8)
-*/
-static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp,
-                                    const HuffmanTree* codetree, size_t inbitlength)
-{
-  unsigned treepos = 0, ct;
-  for(;;)
-  {
-    if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/
-    /*
-    decode the symbol from the tree. The "readBitFromStream" code is inlined in
-    the expression below because this is the biggest bottleneck while decoding
-    */
-    ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)];
-    ++(*bp);
-    if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/
-    else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/
-
-    if(treepos >= codetree->numcodes) return (unsigned)(-1); /*error: it appeared outside the codetree*/
-  }
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Inflator (Decompressor)                                                / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/
-static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d)
-{
-  /*TODO: check for out of memory errors*/
-  generateFixedLitLenTree(tree_ll);
-  generateFixedDistanceTree(tree_d);
-}
-
-/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
-static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
-                                      const unsigned char* in, size_t* bp, size_t inlength)
-{
-  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
-  unsigned error = 0;
-  unsigned n, HLIT, HDIST, HCLEN, i;
-  size_t inbitlength = inlength * 8;
-
-  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
-  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
-  unsigned* bitlen_d = 0; /*dist code lengths*/
-  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
-  unsigned* bitlen_cl = 0;
-  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
-
-  if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/
-
-  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
-  HLIT =  readBitsFromStream(bp, in, 5) + 257;
-  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
-  HDIST = readBitsFromStream(bp, in, 5) + 1;
-  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
-  HCLEN = readBitsFromStream(bp, in, 4) + 4;
-
-  if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/
-
-  HuffmanTree_init(&tree_cl);
-
-  while(!error)
-  {
-    /*read the code length codes out of 3 * (amount of code length codes) bits*/
-
-    bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
-    if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/);
-
-    for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i)
-    {
-      if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3);
-      else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/
-    }
-
-    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
-    if(error) break;
-
-    /*now we can use this tree to read the lengths for the tree that this function will return*/
-    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
-    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
-    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0;
-    for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0;
-
-    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
-    i = 0;
-    while(i < HLIT + HDIST)
-    {
-      unsigned code = huffmanDecodeSymbol(in, bp, &tree_cl, inbitlength);
-      if(code <= 15) /*a length code*/
-      {
-        if(i < HLIT) bitlen_ll[i] = code;
-        else bitlen_d[i - HLIT] = code;
-        ++i;
-      }
-      else if(code == 16) /*repeat previous*/
-      {
-        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
-        unsigned value; /*set value to the previous code*/
-
-        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
-
-        if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 2);
-
-        if(i < HLIT + 1) value = bitlen_ll[i - 1];
-        else value = bitlen_d[i - HLIT - 1];
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
-          if(i < HLIT) bitlen_ll[i] = value;
-          else bitlen_d[i - HLIT] = value;
-          ++i;
-        }
-      }
-      else if(code == 17) /*repeat "0" 3-10 times*/
-      {
-        unsigned replength = 3; /*read in the bits that indicate repeat length*/
-        if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 3);
-
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
-
-          if(i < HLIT) bitlen_ll[i] = 0;
-          else bitlen_d[i - HLIT] = 0;
-          ++i;
-        }
-      }
-      else if(code == 18) /*repeat "0" 11-138 times*/
-      {
-        unsigned replength = 11; /*read in the bits that indicate repeat length*/
-        if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 7);
-
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
-
-          if(i < HLIT) bitlen_ll[i] = 0;
-          else bitlen_d[i - HLIT] = 0;
-          ++i;
-        }
-      }
-      else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-      {
-        if(code == (unsigned)(-1))
-        {
-          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-          (10=no endcode, 11=wrong jump outside of tree)*/
-          error = (*bp) > inbitlength ? 10 : 11;
-        }
-        else error = 16; /*unexisting code, this can never happen*/
-        break;
-      }
-    }
-    if(error) break;
-
-    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
-
-    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
-    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
-    if(error) break;
-    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
-
-    break; /*end of error-while*/
-  }
-
-  lodepng_free(bitlen_cl);
-  lodepng_free(bitlen_ll);
-  lodepng_free(bitlen_d);
-  HuffmanTree_cleanup(&tree_cl);
-
-  return error;
-}
-
-/*inflate a block with dynamic of fixed Huffman tree*/
-static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size_t* bp,
-                                    size_t* pos, size_t inlength, unsigned btype)
-{
-  unsigned error = 0;
-  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
-  HuffmanTree tree_d; /*the huffman tree for distance codes*/
-  size_t inbitlength = inlength * 8;
-
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-
-  if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d);
-  else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength);
-
-  while(!error) /*decode all symbols until end reached, breaks at end code*/
-  {
-    /*code_ll is literal, length or end code*/
-    unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength);
-    if(code_ll <= 255) /*literal symbol*/
-    {
-      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
-      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
-      out->data[*pos] = (unsigned char)code_ll;
-      ++(*pos);
-    }
-    else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/
-    {
-      unsigned code_d, distance;
-      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
-      size_t start, forward, backward, length;
-
-      /*part 1: get length base*/
-      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
-
-      /*part 2: get extra bits and add the value of that to length*/
-      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
-      if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
-      length += readBitsFromStream(bp, in, numextrabits_l);
-
-      /*part 3: get distance code*/
-      code_d = huffmanDecodeSymbol(in, bp, &tree_d, inbitlength);
-      if(code_d > 29)
-      {
-        if(code_ll == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-        {
-          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-          (10=no endcode, 11=wrong jump outside of tree)*/
-          error = (*bp) > inlength * 8 ? 10 : 11;
-        }
-        else error = 18; /*error: invalid distance code (30-31 are never used)*/
-        break;
-      }
-      distance = DISTANCEBASE[code_d];
-
-      /*part 4: get extra bits from distance*/
-      numextrabits_d = DISTANCEEXTRA[code_d];
-      if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
-      distance += readBitsFromStream(bp, in, numextrabits_d);
-
-      /*part 5: fill in all the out[n] values based on the length and dist*/
-      start = (*pos);
-      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
-      backward = start - distance;
-
-      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
-      if (distance < length) {
-        for(forward = 0; forward < length; ++forward)
-        {
-          out->data[(*pos)++] = out->data[backward++];
-        }
-      } else {
-        memcpy(out->data + *pos, out->data + backward, length);
-        *pos += length;
-      }
-    }
-    else if(code_ll == 256)
-    {
-      break; /*end code, break the loop*/
-    }
-    else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-    {
-      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-      (10=no endcode, 11=wrong jump outside of tree)*/
-      error = ((*bp) > inlength * 8) ? 10 : 11;
-      break;
-    }
-  }
-
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-
-  return error;
-}
-
-static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength)
-{
-  size_t p;
-  unsigned LEN, NLEN, n, error = 0;
-
-  /*go to first boundary of byte*/
-  while(((*bp) & 0x7) != 0) ++(*bp);
-  p = (*bp) / 8; /*byte position*/
-
-  /*read LEN (2 bytes) and NLEN (2 bytes)*/
-  if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/
-  LEN = in[p] + 256u * in[p + 1]; p += 2;
-  NLEN = in[p] + 256u * in[p + 1]; p += 2;
-
-  /*check if 16-bit NLEN is really the one's complement of LEN*/
-  if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/
-
-  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
-
-  /*read the literal data: LEN bytes are now stored in the out buffer*/
-  if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/
-  for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++];
-
-  (*bp) = p * 8;
-
-  return error;
-}
-
-static unsigned lodepng_inflatev(ucvector* out,
-                                 const unsigned char* in, size_t insize,
-                                 const LodePNGDecompressSettings* settings)
-{
-  /*bit pointer in the "in" data, current byte is bp >> 3, current bit is bp & 0x7 (from lsb to msb of the byte)*/
-  size_t bp = 0;
-  unsigned BFINAL = 0;
-  size_t pos = 0; /*byte position in the out buffer*/
-  unsigned error = 0;
-
-  (void)settings;
-
-  while(!BFINAL)
-  {
-    unsigned BTYPE;
-    if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/
-    BFINAL = readBitFromStream(&bp, in);
-    BTYPE = 1u * readBitFromStream(&bp, in);
-    BTYPE += 2u * readBitFromStream(&bp, in);
-
-    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
-    else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/
-    else error = inflateHuffmanBlock(out, in, &bp, &pos, insize, BTYPE); /*compression, BTYPE 01 or 10*/
-
-    if(error) return error;
-  }
-
-  return error;
-}
-
-unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGDecompressSettings* settings)
-{
-  unsigned error;
-  ucvector v;
-  ucvector_init_buffer(&v, *out, *outsize);
-  error = lodepng_inflatev(&v, in, insize, settings);
-  *out = v.data;
-  *outsize = v.size;
-  return error;
-}
-
-static unsigned inflate(unsigned char** out, size_t* outsize,
-                        const unsigned char* in, size_t insize,
-                        const LodePNGDecompressSettings* settings)
-{
-  if(settings->custom_inflate)
-  {
-    return settings->custom_inflate(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_inflate(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Deflator (Compressor)                                                  / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
-
-/*bitlen is the size in bits of the code*/
-static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, unsigned bitlen)
-{
-  addBitsToStreamReversed(bp, compressed, code, bitlen);
-}
-
-/*search the index in the array, that has the largest value smaller than or equal to the given value,
-given array must be sorted (if no value is smaller, it returns the size of the given array)*/
-static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value)
-{
-  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
-  size_t left = 1;
-  size_t right = array_size - 1;
-
-  while(left <= right) {
-    size_t mid = (left + right) >> 1;
-    if (array[mid] >= value) right = mid - 1;
-    else left = mid + 1;
-  }
-  if(left >= array_size || array[left] > value) left--;
-  return left;
-}
-
-static void addLengthDistance(uivector* values, size_t length, size_t distance)
-{
-  /*values in encoded vector are those used by deflate:
-  0-255: literal bytes
-  256: end
-  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
-  286-287: invalid*/
-
-  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
-  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
-  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
-  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
-
-  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
-  uivector_push_back(values, extra_length);
-  uivector_push_back(values, dist_code);
-  uivector_push_back(values, extra_distance);
-}
-
-/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
-bytes as input because 3 is the minimum match length for deflate*/
-static const unsigned HASH_NUM_VALUES = 65536;
-static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
-
-typedef struct Hash
-{
-  int* head; /*hash value to head circular pos - can be outdated if went around window*/
-  /*circular pos to prev circular pos*/
-  unsigned short* chain;
-  int* val; /*circular pos to hash value*/
-
-  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
-  it's always going to be the zeros that dominate, so not important for PNG*/
-  int* headz; /*similar to head, but for chainz*/
-  unsigned short* chainz; /*those with same amount of zeros*/
-  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
-} Hash;
-
-static unsigned hash_init(Hash* hash, unsigned windowsize)
-{
-  unsigned i;
-  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
-  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
-  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-
-  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
-  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-
-  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros)
-  {
-    return 83; /*alloc fail*/
-  }
-
-  /*initialize hash table*/
-  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
-
-  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
-
-  return 0;
-}
-
-static void hash_cleanup(Hash* hash)
-{
-  lodepng_free(hash->head);
-  lodepng_free(hash->val);
-  lodepng_free(hash->chain);
-
-  lodepng_free(hash->zeros);
-  lodepng_free(hash->headz);
-  lodepng_free(hash->chainz);
-}
-
-
-
-static unsigned getHash(const unsigned char* data, size_t size, size_t pos)
-{
-  unsigned result = 0;
-  if(pos + 2 < size)
-  {
-    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
-    by zeroes due to the filters, a better hash does not have a significant
-    effect on speed in traversing the chain, and causes more time spend on
-    calculating the hash.*/
-    result ^= (unsigned)(data[pos + 0] << 0u);
-    result ^= (unsigned)(data[pos + 1] << 4u);
-    result ^= (unsigned)(data[pos + 2] << 8u);
-  } else {
-    size_t amount, i;
-    if(pos >= size) return 0;
-    amount = size - pos;
-    for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u));
-  }
-  return result & HASH_BIT_MASK;
-}
-
-static unsigned countZeros(const unsigned char* data, size_t size, size_t pos)
-{
-  const unsigned char* start = data + pos;
-  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
-  if(end > data + size) end = data + size;
-  data = start;
-  while(data != end && *data == 0) ++data;
-  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
-  return (unsigned)(data - start);
-}
-
-/*wpos = pos & (windowsize - 1)*/
-static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros)
-{
-  hash->val[wpos] = (int)hashval;
-  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
-  hash->head[hashval] = wpos;
-
-  hash->zeros[wpos] = numzeros;
-  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
-  hash->headz[numzeros] = wpos;
-}
-
-/*
-LZ77-encode the data. Return value is error code. The input are raw bytes, the output
-is in the form of unsigned integers with codes representing for example literal bytes, or
-length/distance pairs.
-It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
-sliding window (of windowsize) is used, and all past bytes in that window can be used as
-the "dictionary". A brute force search through all possible distances would be slow, and
-this hash technique is one out of several ways to speed this up.
-*/
-static unsigned encodeLZ77(uivector* out, Hash* hash,
-                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
-                           unsigned minmatch, unsigned nicematch, unsigned lazymatching)
-{
-  size_t pos;
-  unsigned i, error = 0;
-  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
-  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8;
-  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
-
-  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
-  unsigned numzeros = 0;
-
-  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
-  unsigned length;
-  unsigned lazy = 0;
-  unsigned lazylength = 0, lazyoffset = 0;
-  unsigned hashval;
-  unsigned current_offset, current_length;
-  unsigned prev_offset;
-  const unsigned char *lastptr, *foreptr, *backptr;
-  unsigned hashpos;
-
-  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
-  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
-
-  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
-
-  for(pos = inpos; pos < insize; ++pos)
-  {
-    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
-    unsigned chainlength = 0;
-
-    hashval = getHash(in, insize, pos);
-
-    if(usezeros && hashval == 0)
-    {
-      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
-      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
-    }
-    else
-    {
-      numzeros = 0;
-    }
-
-    updateHashChain(hash, wpos, hashval, numzeros);
-
-    /*the length and offset found for the current position*/
-    length = 0;
-    offset = 0;
-
-    hashpos = hash->chain[wpos];
-
-    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
-
-    /*search for the longest string*/
-    prev_offset = 0;
-    for(;;)
-    {
-      if(chainlength++ >= maxchainlength) break;
-      current_offset = hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize;
-
-      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
-      prev_offset = current_offset;
-      if(current_offset > 0)
-      {
-        /*test the next characters*/
-        foreptr = &in[pos];
-        backptr = &in[pos - current_offset];
-
-        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
-        if(numzeros >= 3)
-        {
-          unsigned skip = hash->zeros[hashpos];
-          if(skip > numzeros) skip = numzeros;
-          backptr += skip;
-          foreptr += skip;
-        }
-
-        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/
-        {
-          ++backptr;
-          ++foreptr;
-        }
-        current_length = (unsigned)(foreptr - &in[pos]);
-
-        if(current_length > length)
-        {
-          length = current_length; /*the longest length*/
-          offset = current_offset; /*the offset that is related to this longest length*/
-          /*jump out once a length of max length is found (speed gain). This also jumps
-          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
-          if(current_length >= nicematch) break;
-        }
-      }
-
-      if(hashpos == hash->chain[hashpos]) break;
-
-      if(numzeros >= 3 && length > numzeros)
-      {
-        hashpos = hash->chainz[hashpos];
-        if(hash->zeros[hashpos] != numzeros) break;
-      }
-      else
-      {
-        hashpos = hash->chain[hashpos];
-        /*outdated hash value, happens if particular value was not encountered in whole last window*/
-        if(hash->val[hashpos] != (int)hashval) break;
-      }
-    }
-
-    if(lazymatching)
-    {
-      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH)
-      {
-        lazy = 1;
-        lazylength = length;
-        lazyoffset = offset;
-        continue; /*try the next byte*/
-      }
-      if(lazy)
-      {
-        lazy = 0;
-        if(pos == 0) ERROR_BREAK(81);
-        if(length > lazylength + 1)
-        {
-          /*push the previous character as literal*/
-          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
-        }
-        else
-        {
-          length = lazylength;
-          offset = lazyoffset;
-          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
-          hash->headz[numzeros] = -1; /*idem*/
-          --pos;
-        }
-      }
-    }
-    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
-
-    /*encode it as length/distance pair or literal value*/
-    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/
-    {
-      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    else if(length < minmatch || (length == 3 && offset > 4096))
-    {
-      /*compensate for the fact that longer offsets have more extra bits, a
-      length of only 3 may be not worth it then*/
-      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    else
-    {
-      addLengthDistance(out, length, offset);
-      for(i = 1; i < length; ++i)
-      {
-        ++pos;
-        wpos = pos & (windowsize - 1);
-        hashval = getHash(in, insize, pos);
-        if(usezeros && hashval == 0)
-        {
-          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
-          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
-        }
-        else
-        {
-          numzeros = 0;
-        }
-        updateHashChain(hash, wpos, hashval, numzeros);
-      }
-    }
-  } /*end of the loop through each character of input*/
-
-  return error;
-}
-
-/* /////////////////////////////////////////////////////////////////////////// */
-
-static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize)
-{
-  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
-  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
-
-  size_t i, j, numdeflateblocks = (datasize + 65534) / 65535;
-  unsigned datapos = 0;
-  for(i = 0; i != numdeflateblocks; ++i)
-  {
-    unsigned BFINAL, BTYPE, LEN, NLEN;
-    unsigned char firstbyte;
-
-    BFINAL = (i == numdeflateblocks - 1);
-    BTYPE = 0;
-
-    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1));
-    ucvector_push_back(out, firstbyte);
-
-    LEN = 65535;
-    if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos;
-    NLEN = 65535 - LEN;
-
-    ucvector_push_back(out, (unsigned char)(LEN & 255));
-    ucvector_push_back(out, (unsigned char)(LEN >> 8));
-    ucvector_push_back(out, (unsigned char)(NLEN & 255));
-    ucvector_push_back(out, (unsigned char)(NLEN >> 8));
-
-    /*Decompressed data*/
-    for(j = 0; j < 65535 && datapos < datasize; ++j)
-    {
-      ucvector_push_back(out, data[datapos++]);
-    }
-  }
-
-  return 0;
-}
-
-/*
-write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
-tree_ll: the tree for lit and len codes.
-tree_d: the tree for distance codes.
-*/
-static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encoded,
-                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d)
-{
-  size_t i = 0;
-  for(i = 0; i != lz77_encoded->size; ++i)
-  {
-    unsigned val = lz77_encoded->data[i];
-    addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val));
-    if(val > 256) /*for a length code, 3 more things have to be added*/
-    {
-      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
-      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
-      unsigned length_extra_bits = lz77_encoded->data[++i];
-
-      unsigned distance_code = lz77_encoded->data[++i];
-
-      unsigned distance_index = distance_code;
-      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
-      unsigned distance_extra_bits = lz77_encoded->data[++i];
-
-      addBitsToStream(bp, out, length_extra_bits, n_length_extra_bits);
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_d, distance_code),
-                       HuffmanTree_getLength(tree_d, distance_code));
-      addBitsToStream(bp, out, distance_extra_bits, n_distance_extra_bits);
-    }
-  }
-}
-
-/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
-static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash,
-                               const unsigned char* data, size_t datapos, size_t dataend,
-                               const LodePNGCompressSettings* settings, unsigned final)
-{
-  unsigned error = 0;
-
-  /*
-  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
-  literal bytes and length/distance pairs. This is then huffman compressed with
-  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
-  another huffman tree is used for the dist values ("d"). These two trees are
-  stored using their code lengths, and to compress even more these code lengths
-  are also run-length encoded and huffman compressed. This gives a huffman tree
-  of code lengths "cl". The code lenghts used to describe this third tree are
-  the code length code lengths ("clcl").
-  */
-
-  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
-  uivector lz77_encoded;
-  HuffmanTree tree_ll; /*tree for lit,len values*/
-  HuffmanTree tree_d; /*tree for distance codes*/
-  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
-  uivector frequencies_ll; /*frequency of lit,len codes*/
-  uivector frequencies_d; /*frequency of dist codes*/
-  uivector frequencies_cl; /*frequency of code length codes*/
-  uivector bitlen_lld; /*lit,len,dist code lenghts (int bits), literally (without repeat codes).*/
-  uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)*/
-  /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
-  (these are written as is in the file, it would be crazy to compress these using yet another huffman
-  tree that needs to be represented by yet another set of code lengths)*/
-  uivector bitlen_cl;
-  size_t datasize = dataend - datapos;
-
-  /*
-  Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies:
-  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
-  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
-  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
-  */
-
-  unsigned BFINAL = final;
-  size_t numcodes_ll, numcodes_d, i;
-  unsigned HLIT, HDIST, HCLEN;
-
-  uivector_init(&lz77_encoded);
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-  HuffmanTree_init(&tree_cl);
-  uivector_init(&frequencies_ll);
-  uivector_init(&frequencies_d);
-  uivector_init(&frequencies_cl);
-  uivector_init(&bitlen_lld);
-  uivector_init(&bitlen_lld_e);
-  uivector_init(&bitlen_cl);
-
-  /*This while loop never loops due to a break at the end, it is here to
-  allow breaking out of it to the cleanup phase on error conditions.*/
-  while(!error)
-  {
-    if(settings->use_lz77)
-    {
-      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
-                         settings->minmatch, settings->nicematch, settings->lazymatching);
-      if(error) break;
-    }
-    else
-    {
-      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
-      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
-    }
-
-    if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/);
-    if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/);
-
-    /*Count the frequencies of lit, len and dist codes*/
-    for(i = 0; i != lz77_encoded.size; ++i)
-    {
-      unsigned symbol = lz77_encoded.data[i];
-      ++frequencies_ll.data[symbol];
-      if(symbol > 256)
-      {
-        unsigned dist = lz77_encoded.data[i + 2];
-        ++frequencies_d.data[dist];
-        i += 3;
-      }
-    }
-    frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
-
-    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
-    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15);
-    if(error) break;
-    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
-    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15);
-    if(error) break;
-
-    numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286;
-    numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30;
-    /*store the code lengths of both generated trees in bitlen_lld*/
-    for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i));
-    for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i));
-
-    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
-    17 (3-10 zeroes), 18 (11-138 zeroes)*/
-    for(i = 0; i != (unsigned)bitlen_lld.size; ++i)
-    {
-      unsigned j = 0; /*amount of repititions*/
-      while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j;
-
-      if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/
-      {
-        ++j; /*include the first zero*/
-        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/
-        {
-          uivector_push_back(&bitlen_lld_e, 17);
-          uivector_push_back(&bitlen_lld_e, j - 3);
-        }
-        else /*repeat code 18 supports max 138 zeroes*/
-        {
-          if(j > 138) j = 138;
-          uivector_push_back(&bitlen_lld_e, 18);
-          uivector_push_back(&bitlen_lld_e, j - 11);
-        }
-        i += (j - 1);
-      }
-      else if(j >= 3) /*repeat code for value other than zero*/
-      {
-        size_t k;
-        unsigned num = j / 6, rest = j % 6;
-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
-        for(k = 0; k < num; ++k)
-        {
-          uivector_push_back(&bitlen_lld_e, 16);
-          uivector_push_back(&bitlen_lld_e, 6 - 3);
-        }
-        if(rest >= 3)
-        {
-          uivector_push_back(&bitlen_lld_e, 16);
-          uivector_push_back(&bitlen_lld_e, rest - 3);
-        }
-        else j -= rest;
-        i += j;
-      }
-      else /*too short to benefit from repeat code*/
-      {
-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
-      }
-    }
-
-    /*generate tree_cl, the huffmantree of huffmantrees*/
-
-    if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != bitlen_lld_e.size; ++i)
-    {
-      ++frequencies_cl.data[bitlen_lld_e.data[i]];
-      /*after a repeat code come the bits that specify the number of repetitions,
-      those don't need to be in the frequencies_cl calculation*/
-      if(bitlen_lld_e.data[i] >= 16) ++i;
-    }
-
-    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data,
-                                            frequencies_cl.size, frequencies_cl.size, 7);
-    if(error) break;
-
-    if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != tree_cl.numcodes; ++i)
-    {
-      /*lenghts of code length tree is in the order as specified by deflate*/
-      bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]);
-    }
-    while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4)
-    {
-      /*remove zeros at the end, but minimum size must be 4*/
-      if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    if(error) break;
-
-    /*
-    Write everything into the output
-
-    After the BFINAL and BTYPE, the dynamic block consists out of the following:
-    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
-    - (HCLEN+4)*3 bits code lengths of code length alphabet
-    - HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length
-      alphabet, + possible repetition codes 16, 17, 18)
-    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
-      alphabet, + possible repetition codes 16, 17, 18)
-    - compressed data
-    - 256 (end code)
-    */
-
-    /*Write block type*/
-    addBitToStream(bp, out, BFINAL);
-    addBitToStream(bp, out, 0); /*first bit of BTYPE "dynamic"*/
-    addBitToStream(bp, out, 1); /*second bit of BTYPE "dynamic"*/
-
-    /*write the HLIT, HDIST and HCLEN values*/
-    HLIT = (unsigned)(numcodes_ll - 257);
-    HDIST = (unsigned)(numcodes_d - 1);
-    HCLEN = (unsigned)bitlen_cl.size - 4;
-    /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/
-    while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN;
-    addBitsToStream(bp, out, HLIT, 5);
-    addBitsToStream(bp, out, HDIST, 5);
-    addBitsToStream(bp, out, HCLEN, 4);
-
-    /*write the code lenghts of the code length alphabet*/
-    for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3);
-
-    /*write the lenghts of the lit/len AND the dist alphabet*/
-    for(i = 0; i != bitlen_lld_e.size; ++i)
-    {
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]),
-                       HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i]));
-      /*extra bits of repeat codes*/
-      if(bitlen_lld_e.data[i] == 16) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 2);
-      else if(bitlen_lld_e.data[i] == 17) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 3);
-      else if(bitlen_lld_e.data[i] == 18) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 7);
-    }
-
-    /*write the compressed data symbols*/
-    writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
-    /*error: the length of the end code 256 must be larger than 0*/
-    if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64);
-
-    /*write the end code*/
-    addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
-
-    break; /*end of error-while*/
-  }
-
-  /*cleanup*/
-  uivector_cleanup(&lz77_encoded);
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-  HuffmanTree_cleanup(&tree_cl);
-  uivector_cleanup(&frequencies_ll);
-  uivector_cleanup(&frequencies_d);
-  uivector_cleanup(&frequencies_cl);
-  uivector_cleanup(&bitlen_lld_e);
-  uivector_cleanup(&bitlen_lld);
-  uivector_cleanup(&bitlen_cl);
-
-  return error;
-}
-
-static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash,
-                             const unsigned char* data,
-                             size_t datapos, size_t dataend,
-                             const LodePNGCompressSettings* settings, unsigned final)
-{
-  HuffmanTree tree_ll; /*tree for literal values and length codes*/
-  HuffmanTree tree_d; /*tree for distance codes*/
-
-  unsigned BFINAL = final;
-  unsigned error = 0;
-  size_t i;
-
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-
-  generateFixedLitLenTree(&tree_ll);
-  generateFixedDistanceTree(&tree_d);
-
-  addBitToStream(bp, out, BFINAL);
-  addBitToStream(bp, out, 1); /*first bit of BTYPE*/
-  addBitToStream(bp, out, 0); /*second bit of BTYPE*/
-
-  if(settings->use_lz77) /*LZ77 encoded*/
-  {
-    uivector lz77_encoded;
-    uivector_init(&lz77_encoded);
-    error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
-                       settings->minmatch, settings->nicematch, settings->lazymatching);
-    if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
-    uivector_cleanup(&lz77_encoded);
-  }
-  else /*no LZ77, but still will be Huffman compressed*/
-  {
-    for(i = datapos; i < dataend; ++i)
-    {
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i]));
-    }
-  }
-  /*add END code*/
-  if(!error) addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
-
-  /*cleanup*/
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-
-  return error;
-}
-
-static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
-                                 const LodePNGCompressSettings* settings)
-{
-  unsigned error = 0;
-  size_t i, blocksize, numdeflateblocks;
-  size_t bp = 0; /*the bit pointer*/
-  Hash hash;
-
-  if(settings->btype > 2) return 61;
-  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
-  else if(settings->btype == 1) blocksize = insize;
-  else /*if(settings->btype == 2)*/
-  {
-    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
-    blocksize = insize / 8 + 8;
-    if(blocksize < 65536) blocksize = 65536;
-    if(blocksize > 262144) blocksize = 262144;
-  }
-
-  numdeflateblocks = (insize + blocksize - 1) / blocksize;
-  if(numdeflateblocks == 0) numdeflateblocks = 1;
-
-  error = hash_init(&hash, settings->windowsize);
-  if(error) return error;
-
-  for(i = 0; i != numdeflateblocks && !error; ++i)
-  {
-    unsigned final = (i == numdeflateblocks - 1);
-    size_t start = i * blocksize;
-    size_t end = start + blocksize;
-    if(end > insize) end = insize;
-
-    if(settings->btype == 1) error = deflateFixed(out, &bp, &hash, in, start, end, settings, final);
-    else if(settings->btype == 2) error = deflateDynamic(out, &bp, &hash, in, start, end, settings, final);
-  }
-
-  hash_cleanup(&hash);
-
-  return error;
-}
-
-unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGCompressSettings* settings)
-{
-  unsigned error;
-  ucvector v;
-  ucvector_init_buffer(&v, *out, *outsize);
-  error = lodepng_deflatev(&v, in, insize, settings);
-  *out = v.data;
-  *outsize = v.size;
-  return error;
-}
-
-static unsigned deflate(unsigned char** out, size_t* outsize,
-                        const unsigned char* in, size_t insize,
-                        const LodePNGCompressSettings* settings)
-{
-  if(settings->custom_deflate)
-  {
-    return settings->custom_deflate(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_deflate(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Adler32                                                                  */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len)
-{
-   unsigned s1 = adler & 0xffff;
-   unsigned s2 = (adler >> 16) & 0xffff;
-
-  while(len > 0)
-  {
-    /*at least 5550 sums can be done before the sums overflow, saving a lot of module divisions*/
-    unsigned amount = len > 5550 ? 5550 : len;
-    len -= amount;
-    while(amount > 0)
-    {
-      s1 += (*data++);
-      s2 += s1;
-      --amount;
-    }
-    s1 %= 65521;
-    s2 %= 65521;
-  }
-
-  return (s2 << 16) | s1;
-}
-
-/*Return the adler32 of the bytes data[0..len-1]*/
-static unsigned adler32(const unsigned char* data, unsigned len)
-{
-  return update_adler32(1L, data, len);
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Zlib                                                                   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                 size_t insize, const LodePNGDecompressSettings* settings)
-{
-  unsigned error = 0;
-  unsigned CM, CINFO, FDICT;
-
-  if(insize < 2) return 53; /*error, size of zlib data too small*/
-  /*read information from zlib header*/
-  if((in[0] * 256 + in[1]) % 31 != 0)
-  {
-    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
-    return 24;
-  }
-
-  CM = in[0] & 15;
-  CINFO = (in[0] >> 4) & 15;
-  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
-  FDICT = (in[1] >> 5) & 1;
-  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
-
-  if(CM != 8 || CINFO > 7)
-  {
-    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
-    return 25;
-  }
-  if(FDICT != 0)
-  {
-    /*error: the specification of PNG says about the zlib stream:
-      "The additional flags shall not specify a preset dictionary."*/
-    return 26;
-  }
-
-  error = inflate(out, outsize, in + 2, insize - 2, settings);
-  if(error) return error;
-
-  if(!settings->ignore_adler32)
-  {
-    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
-    unsigned checksum = adler32(*out, (unsigned)(*outsize));
-    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
-  }
-
-  return 0; /*no error*/
-}
-
-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                size_t insize, const LodePNGDecompressSettings* settings)
-{
-  if(settings->custom_zlib)
-  {
-    return settings->custom_zlib(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                               size_t insize, const LodePNGCompressSettings* settings)
-{
-  /*initially, *out must be NULL and outsize 0, if you just give some random *out
-  that's pointing to a non allocated buffer, this'll crash*/
-  ucvector outv;
-  size_t i;
-  unsigned error;
-  unsigned char* deflatedata = 0;
-  size_t deflatesize = 0;
-
-  /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
-  unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
-  unsigned FLEVEL = 0;
-  unsigned FDICT = 0;
-  unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
-  unsigned FCHECK = 31 - CMFFLG % 31;
-  CMFFLG += FCHECK;
-
-  /*ucvector-controlled version of the output buffer, for dynamic array*/
-  ucvector_init_buffer(&outv, *out, *outsize);
-
-  ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8));
-  ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255));
-
-  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
-
-  if(!error)
-  {
-    unsigned ADLER32 = adler32(in, (unsigned)insize);
-    for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]);
-    lodepng_free(deflatedata);
-    lodepng_add32bitInt(&outv, ADLER32);
-  }
-
-  *out = outv.data;
-  *outsize = outv.size;
-
-  return error;
-}
-
-/* compress using the default or custom zlib function */
-static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                              size_t insize, const LodePNGCompressSettings* settings)
-{
-  if(settings->custom_zlib)
-  {
-    return settings->custom_zlib(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_zlib_compress(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#else /*no LODEPNG_COMPILE_ZLIB*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                size_t insize, const LodePNGDecompressSettings* settings)
-{
-  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
-  return settings->custom_zlib(out, outsize, in, insize, settings);
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                              size_t insize, const LodePNGCompressSettings* settings)
-{
-  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
-  return settings->custom_zlib(out, outsize, in, insize, settings);
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/*this is a good tradeoff between speed and compression ratio*/
-#define DEFAULT_WINDOWSIZE 2048
-
-void lodepng_compress_settings_init(LodePNGCompressSettings* settings)
-{
-  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
-  settings->btype = 2;
-  settings->use_lz77 = 1;
-  settings->windowsize = DEFAULT_WINDOWSIZE;
-  settings->minmatch = 3;
-  settings->nicematch = 128;
-  settings->lazymatching = 1;
-
-  settings->custom_zlib = 0;
-  settings->custom_deflate = 0;
-  settings->custom_context = 0;
-}
-
-const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
-
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings)
-{
-  settings->ignore_adler32 = 0;
-
-  settings->custom_zlib = 0;
-  settings->custom_inflate = 0;
-  settings->custom_context = 0;
-}
-
-const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0};
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // End of Zlib related code. Begin of PNG related code.                 // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_PNG
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / CRC32                                                                  / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-
-#ifndef LODEPNG_NO_COMPILE_CRC
-/* CRC polynomial: 0xedb88320 */
-static unsigned lodepng_crc32_table[256] = {
-           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
-   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
-   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
-   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
-   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
-   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
-   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
-   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
-  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
-  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
-  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
-  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
-  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
-  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
-  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
-  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
-  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
-  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
-  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
-  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
-  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
-  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
-  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
-  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
-  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
-  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
-  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
-  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
-  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
-  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
-  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
-  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
-};
-
-/*Return the CRC of the bytes buf[0..len-1].*/
-unsigned lodepng_crc32(const unsigned char* data, size_t length)
-{
-  unsigned r = 0xffffffffu;
-  size_t i;
-  for(i = 0; i < length; ++i)
-  {
-    r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8);
-  }
-  return r ^ 0xffffffffu;
-}
-#else /* !LODEPNG_NO_COMPILE_CRC */
-unsigned lodepng_crc32(const unsigned char* data, size_t length);
-#endif /* !LODEPNG_NO_COMPILE_CRC */
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Reading and writing single bits and bytes from/to stream for LodePNG   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream)
-{
-  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
-  ++(*bitpointer);
-  return result;
-}
-
-static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
-{
-  unsigned result = 0;
-  size_t i;
-  for(i = 0 ; i < nbits; ++i)
-  {
-    result <<= 1;
-    result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
-  }
-  return result;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
-{
-  /*the current bit in bitstream must be 0 for this to work*/
-  if(bit)
-  {
-    /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/
-    bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7)));
-  }
-  ++(*bitpointer);
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
-{
-  /*the current bit in bitstream may be 0 or 1 for this to work*/
-  if(bit == 0) bitstream[(*bitpointer) >> 3] &=  (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7))));
-  else         bitstream[(*bitpointer) >> 3] |=  (1 << (7 - ((*bitpointer) & 0x7)));
-  ++(*bitpointer);
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG chunks                                                             / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-unsigned lodepng_chunk_length(const unsigned char* chunk)
-{
-  return lodepng_read32bitInt(&chunk[0]);
-}
-
-void lodepng_chunk_type(char type[5], const unsigned char* chunk)
-{
-  unsigned i;
-  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
-  type[4] = 0; /*null termination char*/
-}
-
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type)
-{
-  if(strlen(type) != 4) return 0;
-  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
-}
-
-unsigned char lodepng_chunk_ancillary(const unsigned char* chunk)
-{
-  return((chunk[4] & 32) != 0);
-}
-
-unsigned char lodepng_chunk_private(const unsigned char* chunk)
-{
-  return((chunk[6] & 32) != 0);
-}
-
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk)
-{
-  return((chunk[7] & 32) != 0);
-}
-
-unsigned char* lodepng_chunk_data(unsigned char* chunk)
-{
-  return &chunk[8];
-}
-
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk)
-{
-  return &chunk[8];
-}
-
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk)
-{
-  unsigned length = lodepng_chunk_length(chunk);
-  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
-  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
-  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
-  if(CRC != checksum) return 1;
-  else return 0;
-}
-
-void lodepng_chunk_generate_crc(unsigned char* chunk)
-{
-  unsigned length = lodepng_chunk_length(chunk);
-  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
-  lodepng_set32bitInt(chunk + 8 + length, CRC);
-}
-
-unsigned char* lodepng_chunk_next(unsigned char* chunk)
-{
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  return &chunk[total_chunk_length];
-}
-
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk)
-{
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  return &chunk[total_chunk_length];
-}
-
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk)
-{
-  unsigned i;
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  unsigned char *chunk_start, *new_buffer;
-  size_t new_length = (*outlength) + total_chunk_length;
-  if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/
-
-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
-  if(!new_buffer) return 83; /*alloc fail*/
-  (*out) = new_buffer;
-  (*outlength) = new_length;
-  chunk_start = &(*out)[new_length - total_chunk_length];
-
-  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
-
-  return 0;
-}
-
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data)
-{
-  unsigned i;
-  unsigned char *chunk, *new_buffer;
-  size_t new_length = (*outlength) + length + 12;
-  if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/
-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
-  if(!new_buffer) return 83; /*alloc fail*/
-  (*out) = new_buffer;
-  (*outlength) = new_length;
-  chunk = &(*out)[(*outlength) - length - 12];
-
-  /*1: length*/
-  lodepng_set32bitInt(chunk, (unsigned)length);
-
-  /*2: chunk name (4 letters)*/
-  chunk[4] = (unsigned char)type[0];
-  chunk[5] = (unsigned char)type[1];
-  chunk[6] = (unsigned char)type[2];
-  chunk[7] = (unsigned char)type[3];
-
-  /*3: the data*/
-  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
-
-  /*4: CRC (of the chunkname characters and the data)*/
-  lodepng_chunk_generate_crc(chunk);
-
-  return 0;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Color types and such                                                   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*return type is a LodePNG error code*/
-static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) /*bd = bitdepth*/
-{
-  switch(colortype)
-  {
-    case 0: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; /*grey*/
-    case 2: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGB*/
-    case 3: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break; /*palette*/
-    case 4: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*grey + alpha*/
-    case 6: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGBA*/
-    default: return 31;
-  }
-  return 0; /*allowed color type / bits combination*/
-}
-
-static unsigned getNumColorChannels(LodePNGColorType colortype)
-{
-  switch(colortype)
-  {
-    case 0: return 1; /*grey*/
-    case 2: return 3; /*RGB*/
-    case 3: return 1; /*palette*/
-    case 4: return 2; /*grey + alpha*/
-    case 6: return 4; /*RGBA*/
-  }
-  return 0; /*unexisting color type*/
-}
-
-static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth)
-{
-  /*bits per pixel is amount of channels * bits per channel*/
-  return getNumColorChannels(colortype) * bitdepth;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-void lodepng_color_mode_init(LodePNGColorMode* info)
-{
-  info->key_defined = 0;
-  info->key_r = info->key_g = info->key_b = 0;
-  info->colortype = LCT_RGBA;
-  info->bitdepth = 8;
-  info->palette = 0;
-  info->palettesize = 0;
-}
-
-void lodepng_color_mode_cleanup(LodePNGColorMode* info)
-{
-  lodepng_palette_clear(info);
-}
-
-unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source)
-{
-  size_t i;
-  lodepng_color_mode_cleanup(dest);
-  *dest = *source;
-  if(source->palette)
-  {
-    dest->palette = (unsigned char*)lodepng_malloc(1024);
-    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
-    for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i];
-  }
-  return 0;
-}
-
-static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b)
-{
-  size_t i;
-  if(a->colortype != b->colortype) return 0;
-  if(a->bitdepth != b->bitdepth) return 0;
-  if(a->key_defined != b->key_defined) return 0;
-  if(a->key_defined)
-  {
-    if(a->key_r != b->key_r) return 0;
-    if(a->key_g != b->key_g) return 0;
-    if(a->key_b != b->key_b) return 0;
-  }
-  /*if one of the palette sizes is 0, then we consider it to be the same as the
-  other: it means that e.g. the palette was not given by the user and should be
-  considered the same as the palette inside the PNG.*/
-  if(1/*a->palettesize != 0 && b->palettesize != 0*/) {
-    if(a->palettesize != b->palettesize) return 0;
-    for(i = 0; i != a->palettesize * 4; ++i)
-    {
-      if(a->palette[i] != b->palette[i]) return 0;
-    }
-  }
-  return 1;
-}
-
-void lodepng_palette_clear(LodePNGColorMode* info)
-{
-  if(info->palette) lodepng_free(info->palette);
-  info->palette = 0;
-  info->palettesize = 0;
-}
-
-unsigned lodepng_palette_add(LodePNGColorMode* info,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  unsigned char* data;
-  /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with
-  the max of 256 colors, it'll have the exact alloc size*/
-  if(!info->palette) /*allocate palette if empty*/
-  {
-    /*room for 256 colors with 4 bytes each*/
-    data = (unsigned char*)lodepng_realloc(info->palette, 1024);
-    if(!data) return 83; /*alloc fail*/
-    else info->palette = data;
-  }
-  info->palette[4 * info->palettesize + 0] = r;
-  info->palette[4 * info->palettesize + 1] = g;
-  info->palette[4 * info->palettesize + 2] = b;
-  info->palette[4 * info->palettesize + 3] = a;
-  ++info->palettesize;
-  return 0;
-}
-
-unsigned lodepng_get_bpp(const LodePNGColorMode* info)
-{
-  /*calculate bits per pixel out of colortype and bitdepth*/
-  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
-}
-
-unsigned lodepng_get_channels(const LodePNGColorMode* info)
-{
-  return getNumColorChannels(info->colortype);
-}
-
-unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info)
-{
-  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
-}
-
-unsigned lodepng_is_alpha_type(const LodePNGColorMode* info)
-{
-  return (info->colortype & 4) != 0; /*4 or 6*/
-}
-
-unsigned lodepng_is_palette_type(const LodePNGColorMode* info)
-{
-  return info->colortype == LCT_PALETTE;
-}
-
-unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info)
-{
-  size_t i;
-  for(i = 0; i != info->palettesize; ++i)
-  {
-    if(info->palette[i * 4 + 3] < 255) return 1;
-  }
-  return 0;
-}
-
-unsigned lodepng_can_have_alpha(const LodePNGColorMode* info)
-{
-  return info->key_defined
-      || lodepng_is_alpha_type(info)
-      || lodepng_has_palette_alpha(info);
-}
-
-size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  size_t bpp = lodepng_get_bpp(color);
-  size_t n = w * h;
-  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
-}
-
-size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth);
-  size_t n = w * h;
-  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
-}
-
-
-#ifdef LODEPNG_COMPILE_PNG
-#ifdef LODEPNG_COMPILE_DECODER
-/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer*/
-static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  size_t bpp = lodepng_get_bpp(color);
-  size_t line = ((w / 8) * bpp) + ((w & 7) * bpp + 7) / 8;
-  return h * line;
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-
-static void LodePNGUnknownChunks_init(LodePNGInfo* info)
-{
-  unsigned i;
-  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
-  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
-}
-
-static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info)
-{
-  unsigned i;
-  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
-}
-
-static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src)
-{
-  unsigned i;
-
-  LodePNGUnknownChunks_cleanup(dest);
-
-  for(i = 0; i != 3; ++i)
-  {
-    size_t j;
-    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
-    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
-    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
-    for(j = 0; j < src->unknown_chunks_size[i]; ++j)
-    {
-      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
-    }
-  }
-
-  return 0;
-}
-
-/******************************************************************************/
-
-static void LodePNGText_init(LodePNGInfo* info)
-{
-  info->text_num = 0;
-  info->text_keys = NULL;
-  info->text_strings = NULL;
-}
-
-static void LodePNGText_cleanup(LodePNGInfo* info)
-{
-  size_t i;
-  for(i = 0; i != info->text_num; ++i)
-  {
-    string_cleanup(&info->text_keys[i]);
-    string_cleanup(&info->text_strings[i]);
-  }
-  lodepng_free(info->text_keys);
-  lodepng_free(info->text_strings);
-}
-
-static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  size_t i = 0;
-  dest->text_keys = 0;
-  dest->text_strings = 0;
-  dest->text_num = 0;
-  for(i = 0; i != source->text_num; ++i)
-  {
-    CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
-  }
-  return 0;
-}
-
-void lodepng_clear_text(LodePNGInfo* info)
-{
-  LodePNGText_cleanup(info);
-}
-
-unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str)
-{
-  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
-  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
-  if(!new_keys || !new_strings)
-  {
-    lodepng_free(new_keys);
-    lodepng_free(new_strings);
-    return 83; /*alloc fail*/
-  }
-
-  ++info->text_num;
-  info->text_keys = new_keys;
-  info->text_strings = new_strings;
-
-  string_init(&info->text_keys[info->text_num - 1]);
-  string_set(&info->text_keys[info->text_num - 1], key);
-
-  string_init(&info->text_strings[info->text_num - 1]);
-  string_set(&info->text_strings[info->text_num - 1], str);
-
-  return 0;
-}
-
-/******************************************************************************/
-
-static void LodePNGIText_init(LodePNGInfo* info)
-{
-  info->itext_num = 0;
-  info->itext_keys = NULL;
-  info->itext_langtags = NULL;
-  info->itext_transkeys = NULL;
-  info->itext_strings = NULL;
-}
-
-static void LodePNGIText_cleanup(LodePNGInfo* info)
-{
-  size_t i;
-  for(i = 0; i != info->itext_num; ++i)
-  {
-    string_cleanup(&info->itext_keys[i]);
-    string_cleanup(&info->itext_langtags[i]);
-    string_cleanup(&info->itext_transkeys[i]);
-    string_cleanup(&info->itext_strings[i]);
-  }
-  lodepng_free(info->itext_keys);
-  lodepng_free(info->itext_langtags);
-  lodepng_free(info->itext_transkeys);
-  lodepng_free(info->itext_strings);
-}
-
-static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  size_t i = 0;
-  dest->itext_keys = 0;
-  dest->itext_langtags = 0;
-  dest->itext_transkeys = 0;
-  dest->itext_strings = 0;
-  dest->itext_num = 0;
-  for(i = 0; i != source->itext_num; ++i)
-  {
-    CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
-                                        source->itext_transkeys[i], source->itext_strings[i]));
-  }
-  return 0;
-}
-
-void lodepng_clear_itext(LodePNGInfo* info)
-{
-  LodePNGIText_cleanup(info);
-}
-
-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
-                           const char* transkey, const char* str)
-{
-  char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
-  char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
-  char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
-  char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
-  if(!new_keys || !new_langtags || !new_transkeys || !new_strings)
-  {
-    lodepng_free(new_keys);
-    lodepng_free(new_langtags);
-    lodepng_free(new_transkeys);
-    lodepng_free(new_strings);
-    return 83; /*alloc fail*/
-  }
-
-  ++info->itext_num;
-  info->itext_keys = new_keys;
-  info->itext_langtags = new_langtags;
-  info->itext_transkeys = new_transkeys;
-  info->itext_strings = new_strings;
-
-  string_init(&info->itext_keys[info->itext_num - 1]);
-  string_set(&info->itext_keys[info->itext_num - 1], key);
-
-  string_init(&info->itext_langtags[info->itext_num - 1]);
-  string_set(&info->itext_langtags[info->itext_num - 1], langtag);
-
-  string_init(&info->itext_transkeys[info->itext_num - 1]);
-  string_set(&info->itext_transkeys[info->itext_num - 1], transkey);
-
-  string_init(&info->itext_strings[info->itext_num - 1]);
-  string_set(&info->itext_strings[info->itext_num - 1], str);
-
-  return 0;
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-void lodepng_info_init(LodePNGInfo* info)
-{
-  lodepng_color_mode_init(&info->color);
-  info->interlace_method = 0;
-  info->compression_method = 0;
-  info->filter_method = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  info->background_defined = 0;
-  info->background_r = info->background_g = info->background_b = 0;
-
-  LodePNGText_init(info);
-  LodePNGIText_init(info);
-
-  info->time_defined = 0;
-  info->phys_defined = 0;
-
-  LodePNGUnknownChunks_init(info);
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-void lodepng_info_cleanup(LodePNGInfo* info)
-{
-  lodepng_color_mode_cleanup(&info->color);
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  LodePNGText_cleanup(info);
-  LodePNGIText_cleanup(info);
-
-  LodePNGUnknownChunks_cleanup(info);
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  lodepng_info_cleanup(dest);
-  *dest = *source;
-  lodepng_color_mode_init(&dest->color);
-  CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
-  CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
-
-  LodePNGUnknownChunks_init(dest);
-  CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-  return 0;
-}
-
-void lodepng_info_swap(LodePNGInfo* a, LodePNGInfo* b)
-{
-  LodePNGInfo temp = *a;
-  *a = *b;
-  *b = temp;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
-static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in)
-{
-  unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
-  /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
-  unsigned p = index & m;
-  in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
-  in = in << (bits * (m - p));
-  if(p == 0) out[index * bits / 8] = in;
-  else out[index * bits / 8] |= in;
-}
-
-typedef struct ColorTree ColorTree;
-
-/*
-One node of a color tree
-This is the data structure used to count the number of unique colors and to get a palette
-index for a color. It's like an octree, but because the alpha channel is used too, each
-node has 16 instead of 8 children.
-*/
-struct ColorTree
-{
-  ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
-  int index; /*the payload. Only has a meaningful value if this is in the last level*/
-};
-
-static void color_tree_init(ColorTree* tree)
-{
-  int i;
-  for(i = 0; i != 16; ++i) tree->children[i] = 0;
-  tree->index = -1;
-}
-
-static void color_tree_cleanup(ColorTree* tree)
-{
-  int i;
-  for(i = 0; i != 16; ++i)
-  {
-    if(tree->children[i])
-    {
-      color_tree_cleanup(tree->children[i]);
-      lodepng_free(tree->children[i]);
-    }
-  }
-}
-
-/*returns -1 if color not present, its index otherwise*/
-static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  int bit = 0;
-  for(bit = 0; bit < 8; ++bit)
-  {
-    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
-    if(!tree->children[i]) return -1;
-    else tree = tree->children[i];
-  }
-  return tree ? tree->index : -1;
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  return color_tree_get(tree, r, g, b, a) >= 0;
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*color is not allowed to already exist.
-Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/
-static void color_tree_add(ColorTree* tree,
-                           unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index)
-{
-  int bit;
-  for(bit = 0; bit < 8; ++bit)
-  {
-    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
-    if(!tree->children[i])
-    {
-      tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
-      color_tree_init(tree->children[i]);
-    }
-    tree = tree->children[i];
-  }
-  tree->index = (int)index;
-}
-
-/*put a pixel, given its RGBA color, into image of any color type*/
-static unsigned rgba8ToPixel(unsigned char* out, size_t i,
-                             const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
-    if(mode->bitdepth == 8) out[i] = grey;
-    else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = grey;
-    else
-    {
-      /*take the most significant bits of grey*/
-      grey = (grey >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1);
-      addColorBits(out, i, mode->bitdepth, grey);
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      out[i * 3 + 0] = r;
-      out[i * 3 + 1] = g;
-      out[i * 3 + 2] = b;
-    }
-    else
-    {
-      out[i * 6 + 0] = out[i * 6 + 1] = r;
-      out[i * 6 + 2] = out[i * 6 + 3] = g;
-      out[i * 6 + 4] = out[i * 6 + 5] = b;
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    int index = color_tree_get(tree, r, g, b, a);
-    if(index < 0) return 82; /*color not in palette*/
-    if(mode->bitdepth == 8) out[i] = index;
-    else addColorBits(out, i, mode->bitdepth, (unsigned)index);
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
-    if(mode->bitdepth == 8)
-    {
-      out[i * 2 + 0] = grey;
-      out[i * 2 + 1] = a;
-    }
-    else if(mode->bitdepth == 16)
-    {
-      out[i * 4 + 0] = out[i * 4 + 1] = grey;
-      out[i * 4 + 2] = out[i * 4 + 3] = a;
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      out[i * 4 + 0] = r;
-      out[i * 4 + 1] = g;
-      out[i * 4 + 2] = b;
-      out[i * 4 + 3] = a;
-    }
-    else
-    {
-      out[i * 8 + 0] = out[i * 8 + 1] = r;
-      out[i * 8 + 2] = out[i * 8 + 3] = g;
-      out[i * 8 + 4] = out[i * 8 + 5] = b;
-      out[i * 8 + 6] = out[i * 8 + 7] = a;
-    }
-  }
-
-  return 0; /*no error*/
-}
-
-/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
-static void rgba16ToPixel(unsigned char* out, size_t i,
-                         const LodePNGColorMode* mode,
-                         unsigned short r, unsigned short g, unsigned short b, unsigned short a)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
-    out[i * 2 + 0] = (grey >> 8) & 255;
-    out[i * 2 + 1] = grey & 255;
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    out[i * 6 + 0] = (r >> 8) & 255;
-    out[i * 6 + 1] = r & 255;
-    out[i * 6 + 2] = (g >> 8) & 255;
-    out[i * 6 + 3] = g & 255;
-    out[i * 6 + 4] = (b >> 8) & 255;
-    out[i * 6 + 5] = b & 255;
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
-    out[i * 4 + 0] = (grey >> 8) & 255;
-    out[i * 4 + 1] = grey & 255;
-    out[i * 4 + 2] = (a >> 8) & 255;
-    out[i * 4 + 3] = a & 255;
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    out[i * 8 + 0] = (r >> 8) & 255;
-    out[i * 8 + 1] = r & 255;
-    out[i * 8 + 2] = (g >> 8) & 255;
-    out[i * 8 + 3] = g & 255;
-    out[i * 8 + 4] = (b >> 8) & 255;
-    out[i * 8 + 5] = b & 255;
-    out[i * 8 + 6] = (a >> 8) & 255;
-    out[i * 8 + 7] = a & 255;
-  }
-}
-
-/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
-static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
-                               unsigned char* b, unsigned char* a,
-                               const unsigned char* in, size_t i,
-                               const LodePNGColorMode* mode)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = *g = *b = in[i];
-      if(mode->key_defined && *r == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-    else if(mode->bitdepth == 16)
-    {
-      *r = *g = *b = in[i * 2 + 0];
-      if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-    else
-    {
-      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
-      size_t j = i * mode->bitdepth;
-      unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
-      *r = *g = *b = (value * 255) / highest;
-      if(mode->key_defined && value == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
-      if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
-      else *a = 255;
-    }
-    else
-    {
-      *r = in[i * 6 + 0];
-      *g = in[i * 6 + 2];
-      *b = in[i * 6 + 4];
-      if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-         && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-         && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
-      else *a = 255;
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    unsigned index;
-    if(mode->bitdepth == 8) index = in[i];
-    else
-    {
-      size_t j = i * mode->bitdepth;
-      index = readBitsFromReversedStream(&j, in, mode->bitdepth);
-    }
-
-    if(index >= mode->palettesize)
-    {
-      /*This is an error according to the PNG spec, but common PNG decoders make it black instead.
-      Done here too, slightly faster due to no error handling needed.*/
-      *r = *g = *b = 0;
-      *a = 255;
-    }
-    else
-    {
-      *r = mode->palette[index * 4 + 0];
-      *g = mode->palette[index * 4 + 1];
-      *b = mode->palette[index * 4 + 2];
-      *a = mode->palette[index * 4 + 3];
-    }
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = *g = *b = in[i * 2 + 0];
-      *a = in[i * 2 + 1];
-    }
-    else
-    {
-      *r = *g = *b = in[i * 4 + 0];
-      *a = in[i * 4 + 2];
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = in[i * 4 + 0];
-      *g = in[i * 4 + 1];
-      *b = in[i * 4 + 2];
-      *a = in[i * 4 + 3];
-    }
-    else
-    {
-      *r = in[i * 8 + 0];
-      *g = in[i * 8 + 2];
-      *b = in[i * 8 + 4];
-      *a = in[i * 8 + 6];
-    }
-  }
-}
-
-/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
-mode test cases, optimized to convert the colors much faster, when converting
-to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with
-enough memory, if has_alpha is true the output is RGBA. mode has the color mode
-of the input buffer.*/
-static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels,
-                                unsigned has_alpha, const unsigned char* in,
-                                const LodePNGColorMode* mode)
-{
-  unsigned num_channels = has_alpha ? 4 : 3;
-  size_t i;
-  if(mode->colortype == LCT_GREY)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i];
-        if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255;
-      }
-    }
-    else if(mode->bitdepth == 16)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
-        if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
-      }
-    }
-    else
-    {
-      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
-      size_t j = 0;
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
-        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
-        if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
-      }
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 3 + 0];
-        buffer[1] = in[i * 3 + 1];
-        buffer[2] = in[i * 3 + 2];
-        if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r
-           && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255;
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 6 + 0];
-        buffer[1] = in[i * 6 + 2];
-        buffer[2] = in[i * 6 + 4];
-        if(has_alpha) buffer[3] = mode->key_defined
-           && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-           && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-           && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
-      }
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    unsigned index;
-    size_t j = 0;
-    for(i = 0; i != numpixels; ++i, buffer += num_channels)
-    {
-      if(mode->bitdepth == 8) index = in[i];
-      else index = readBitsFromReversedStream(&j, in, mode->bitdepth);
-
-      if(index >= mode->palettesize)
-      {
-        /*This is an error according to the PNG spec, but most PNG decoders make it black instead.
-        Done here too, slightly faster due to no error handling needed.*/
-        buffer[0] = buffer[1] = buffer[2] = 0;
-        if(has_alpha) buffer[3] = 255;
-      }
-      else
-      {
-        buffer[0] = mode->palette[index * 4 + 0];
-        buffer[1] = mode->palette[index * 4 + 1];
-        buffer[2] = mode->palette[index * 4 + 2];
-        if(has_alpha) buffer[3] = mode->palette[index * 4 + 3];
-      }
-    }
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
-        if(has_alpha) buffer[3] = in[i * 2 + 1];
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
-        if(has_alpha) buffer[3] = in[i * 4 + 2];
-      }
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 4 + 0];
-        buffer[1] = in[i * 4 + 1];
-        buffer[2] = in[i * 4 + 2];
-        if(has_alpha) buffer[3] = in[i * 4 + 3];
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 8 + 0];
-        buffer[1] = in[i * 8 + 2];
-        buffer[2] = in[i * 8 + 4];
-        if(has_alpha) buffer[3] = in[i * 8 + 6];
-      }
-    }
-  }
-}
-
-/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
-given color type, but the given color type must be 16-bit itself.*/
-static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
-                                const unsigned char* in, size_t i, const LodePNGColorMode* mode)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
-    if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
-    else *a = 65535;
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
-    *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
-    *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
-    if(mode->key_defined
-       && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-       && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-       && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
-    else *a = 65535;
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
-    *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
-    *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
-    *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
-    *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
-  }
-}
-
-unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
-                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
-                         unsigned w, unsigned h)
-{
-  size_t i;
-  ColorTree tree;
-  size_t numpixels = w * h;
-
-  if(lodepng_color_mode_equal(mode_out, mode_in))
-  {
-    size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
-    for(i = 0; i != numbytes; ++i) out[i] = in[i];
-    return 0;
-  }
-
-  if(mode_out->colortype == LCT_PALETTE)
-  {
-    size_t palettesize = mode_out->palettesize;
-    const unsigned char* palette = mode_out->palette;
-    size_t palsize = size_t(1) << mode_out->bitdepth;
-    /*if the user specified output palette but did not give the values, assume
-    they want the values of the input color type (assuming that one is palette).
-    Note that we never create a new palette ourselves.*/
-    if(palettesize == 0)
-    {
-      palettesize = mode_in->palettesize;
-      palette = mode_in->palette;
-    }
-    if(palettesize < palsize) palsize = palettesize;
-    color_tree_init(&tree);
-    for(i = 0; i != palsize; ++i)
-    {
-      const unsigned char* p = &palette[i * 4];
-      color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned int)(i));
-    }
-  }
-
-  if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16)
-  {
-    for(i = 0; i != numpixels; ++i)
-    {
-      unsigned short r = 0, g = 0, b = 0, a = 0;
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
-      rgba16ToPixel(out, i, mode_out, r, g, b, a);
-    }
-  }
-  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA)
-  {
-    getPixelColorsRGBA8(out, numpixels, 1, in, mode_in);
-  }
-  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB)
-  {
-    getPixelColorsRGBA8(out, numpixels, 0, in, mode_in);
-  }
-  else
-  {
-    unsigned char r = 0, g = 0, b = 0, a = 0;
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
-      CERROR_TRY_RETURN(rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a));
-    }
-  }
-
-  if(mode_out->colortype == LCT_PALETTE)
-  {
-    color_tree_cleanup(&tree);
-  }
-
-  return 0; /*no error*/
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-void lodepng_color_profile_init(LodePNGColorProfile* profile)
-{
-  profile->colored = 0;
-  profile->key = 0;
-  profile->alpha = 0;
-  profile->key_r = profile->key_g = profile->key_b = 0;
-  profile->numcolors = 0;
-  profile->bits = 1;
-}
-
-/*function used for debug purposes with C++*/
-/*void printColorProfile(LodePNGColorProfile* p)
-{
-  std::cout << "colored: " << (int)p->colored << ", ";
-  std::cout << "key: " << (int)p->key << ", ";
-  std::cout << "key_r: " << (int)p->key_r << ", ";
-  std::cout << "key_g: " << (int)p->key_g << ", ";
-  std::cout << "key_b: " << (int)p->key_b << ", ";
-  std::cout << "alpha: " << (int)p->alpha << ", ";
-  std::cout << "numcolors: " << (int)p->numcolors << ", ";
-  std::cout << "bits: " << (int)p->bits << std::endl;
-}*/
-
-/*Returns how many bits needed to represent given value (max 8 bit)*/
-static unsigned getValueRequiredBits(unsigned char value)
-{
-  if(value == 0 || value == 255) return 1;
-  /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
-  if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
-  return 8;
-}
-
-/*profile must already have been inited with mode.
-It's ok to set some parameters of profile to done already.*/
-unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
-                                   const unsigned char* in, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode)
-{
-  unsigned error = 0;
-  size_t i;
-  ColorTree tree;
-  size_t numpixels = w * h;
-
-  unsigned colored_done = lodepng_is_greyscale_type(mode) ? 1 : 0;
-  unsigned alpha_done = lodepng_can_have_alpha(mode) ? 0 : 1;
-  unsigned numcolors_done = 0;
-  unsigned bpp = lodepng_get_bpp(mode);
-  unsigned bits_done = bpp == 1 ? 1 : 0;
-  unsigned maxnumcolors = 257;
-  unsigned sixteen = 0;
-  if(bpp <= 8) maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256));
-
-  color_tree_init(&tree);
-
-  /*Check if the 16-bit input is truly 16-bit*/
-  if(mode->bitdepth == 16)
-  {
-    unsigned short r, g, b, a;
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
-      if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
-         (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/
-      {
-        sixteen = 1;
-        break;
-      }
-    }
-  }
-
-  if(sixteen)
-  {
-    unsigned short r = 0, g = 0, b = 0, a = 0;
-    profile->bits = 16;
-    bits_done = numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
-
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
-
-      if(!colored_done && (r != g || r != b))
-      {
-        profile->colored = 1;
-        colored_done = 1;
-      }
-
-      if(!alpha_done)
-      {
-        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
-        if(a != 65535 && (a != 0 || (profile->key && !matchkey)))
-        {
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-        else if(a == 0 && !profile->alpha && !profile->key)
-        {
-          profile->key = 1;
-          profile->key_r = r;
-          profile->key_g = g;
-          profile->key_b = b;
-        }
-        else if(a == 65535 && profile->key && matchkey)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-        }
-      }
-      if(alpha_done && numcolors_done && colored_done && bits_done) break;
-    }
-
-    if(profile->key && !profile->alpha)
-    {
-      for(i = 0; i != numpixels; ++i)
-      {
-        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
-        if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-        }
-      }
-    }
-  }
-  else /* < 16-bit */
-  {
-    unsigned char r = 0, g = 0, b = 0, a = 0;
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode);
-
-      if(!bits_done && profile->bits < 8)
-      {
-        /*only r is checked, < 8 bits is only relevant for greyscale*/
-        unsigned bits = getValueRequiredBits(r);
-        if(bits > profile->bits) profile->bits = bits;
-      }
-      bits_done = (profile->bits >= bpp);
-
-      if(!colored_done && (r != g || r != b))
-      {
-        profile->colored = 1;
-        colored_done = 1;
-        if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
-      }
-
-      if(!alpha_done)
-      {
-        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
-        if(a != 255 && (a != 0 || (profile->key && !matchkey)))
-        {
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-        else if(a == 0 && !profile->alpha && !profile->key)
-        {
-          profile->key = 1;
-          profile->key_r = r;
-          profile->key_g = g;
-          profile->key_b = b;
-        }
-        else if(a == 255 && profile->key && matchkey)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-      }
-
-      if(!numcolors_done)
-      {
-        if(!color_tree_has(&tree, r, g, b, a))
-        {
-          color_tree_add(&tree, r, g, b, a, profile->numcolors);
-          if(profile->numcolors < 256)
-          {
-            unsigned char* p = profile->palette;
-            unsigned n = profile->numcolors;
-            p[n * 4 + 0] = r;
-            p[n * 4 + 1] = g;
-            p[n * 4 + 2] = b;
-            p[n * 4 + 3] = a;
-          }
-          ++profile->numcolors;
-          numcolors_done = profile->numcolors >= maxnumcolors;
-        }
-      }
-
-      if(alpha_done && numcolors_done && colored_done && bits_done) break;
-    }
-
-    if(profile->key && !profile->alpha)
-    {
-      for(i = 0; i != numpixels; ++i)
-      {
-        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode);
-        if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-        }
-      }
-    }
-
-    /*make the profile's key always 16-bit for consistency - repeat each byte twice*/
-    profile->key_r += (profile->key_r << 8);
-    profile->key_g += (profile->key_g << 8);
-    profile->key_b += (profile->key_b << 8);
-  }
-
-  color_tree_cleanup(&tree);
-  return error;
-}
-
-/*Automatically chooses color type that gives smallest amount of bits in the
-output image, e.g. grey if there are only greyscale pixels, palette if there
-are less than 256 colors, ...
-Updates values of mode with a potentially smaller color model. mode_out should
-contain the user chosen color model, but will be overwritten with the new chosen one.*/
-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in)
-{
-  LodePNGColorProfile prof;
-  unsigned error = 0;
-  unsigned i, n, palettebits, grey_ok, palette_ok;
-
-  lodepng_color_profile_init(&prof);
-  error = lodepng_get_color_profile(&prof, image, w, h, mode_in);
-  if(error) return error;
-  mode_out->key_defined = 0;
-
-  if(prof.key && w * h <= 16)
-  {
-    prof.alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
-    if(prof.bits < 8) prof.bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-  }
-  grey_ok = !prof.colored && !prof.alpha; /*grey without alpha, with potentially low bits*/
-  n = prof.numcolors;
-  palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
-  palette_ok = n <= 256 && (n * 2 < w * h) && prof.bits <= 8;
-  if(w * h < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
-  if(grey_ok && prof.bits <= palettebits) palette_ok = 0; /*grey is less overhead*/
-
-  if(palette_ok)
-  {
-    unsigned char* p = prof.palette;
-    lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
-    for(i = 0; i != prof.numcolors; ++i)
-    {
-      error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
-      if(error) break;
-    }
-
-    mode_out->colortype = LCT_PALETTE;
-    mode_out->bitdepth = palettebits;
-
-    if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
-        && mode_in->bitdepth == mode_out->bitdepth)
-    {
-      /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
-      lodepng_color_mode_cleanup(mode_out);
-      lodepng_color_mode_copy(mode_out, mode_in);
-    }
-  }
-  else /*8-bit or 16-bit per channel*/
-  {
-    mode_out->bitdepth = prof.bits;
-    mode_out->colortype = prof.alpha ? (prof.colored ? LCT_RGBA : LCT_GREY_ALPHA)
-                                     : (prof.colored ? LCT_RGB : LCT_GREY);
-
-    if(prof.key && !prof.alpha)
-    {
-      unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/
-      mode_out->key_r = prof.key_r & mask;
-      mode_out->key_g = prof.key_g & mask;
-      mode_out->key_b = prof.key_b & mask;
-      mode_out->key_defined = 1;
-    }
-  }
-
-  return error;
-}
-
-#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
-
-/*
-Paeth predicter, used by PNG filter type 4
-The parameters are of type short, but should come from unsigned chars, the shorts
-are only needed to make the paeth calculation correct.
-*/
-static unsigned char paethPredictor(short a, short b, short c)
-{
-  short pa = abs(b - c);
-  short pb = abs(a - c);
-  short pc = abs(a + b - c - c);
-
-  if(pc < pa && pc < pb) return (unsigned char)c;
-  else if(pb < pa) return (unsigned char)b;
-  else return (unsigned char)a;
-}
-
-/*shared values used by multiple Adam7 related functions*/
-
-static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
-static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
-static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
-static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
-
-/*
-Outputs various dimensions and positions in the image related to the Adam7 reduced images.
-passw: output containing the width of the 7 passes
-passh: output containing the height of the 7 passes
-filter_passstart: output containing the index of the start and end of each
- reduced image with filter bytes
-padded_passstart output containing the index of the start and end of each
- reduced image when without filter bytes but with padded scanlines
-passstart: output containing the index of the start and end of each reduced
- image without padding between scanlines, but still padding between the images
-w, h: width and height of non-interlaced image
-bpp: bits per pixel
-"padded" is only relevant if bpp is less than 8 and a scanline or image does not
- end at a full byte
-*/
-static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
-                                size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp)
-{
-  /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
-  unsigned i;
-
-  /*calculate width and height in pixels of each pass*/
-  for(i = 0; i != 7; ++i)
-  {
-    passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
-    passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
-    if(passw[i] == 0) passh[i] = 0;
-    if(passh[i] == 0) passw[i] = 0;
-  }
-
-  filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
-  for(i = 0; i != 7; ++i)
-  {
-    /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
-    filter_passstart[i + 1] = filter_passstart[i]
-                            + ((passw[i] && passh[i]) ? passh[i] * (1 + (passw[i] * bpp + 7) / 8) : 0);
-    /*bits padded if needed to fill full byte at end of each scanline*/
-    padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7) / 8);
-    /*only padded at end of reduced image*/
-    passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7) / 8;
-  }
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG Decoder                                                            / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*read the information from the header and store it in the LodePNGInfo. return value is error*/
-unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
-                         const unsigned char* in, size_t insize)
-{
-  LodePNGInfo* info = &state->info_png;
-  if(insize == 0 || in == 0)
-  {
-    CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
-  }
-  if(insize < 33)
-  {
-    CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
-  }
-
-  /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
-  lodepng_info_cleanup(info);
-  lodepng_info_init(info);
-
-  if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
-     || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10)
-  {
-    CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
-  }
-  if(lodepng_chunk_length(in + 8) != 13)
-  {
-    CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
-  }
-  if(!lodepng_chunk_type_equals(in + 8, "IHDR"))
-  {
-    CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
-  }
-
-  /*read the values given in the header*/
-  *w = lodepng_read32bitInt(&in[16]);
-  *h = lodepng_read32bitInt(&in[20]);
-  info->color.bitdepth = in[24];
-  info->color.colortype = (LodePNGColorType)in[25];
-  info->compression_method = in[26];
-  info->filter_method = in[27];
-  info->interlace_method = in[28];
-
-  if(*w == 0 || *h == 0)
-  {
-    CERROR_RETURN_ERROR(state->error, 93);
-  }
-
-  if(!state->decoder.ignore_crc)
-  {
-    unsigned CRC = lodepng_read32bitInt(&in[29]);
-    unsigned checksum = lodepng_crc32(&in[12], 17);
-    if(CRC != checksum)
-    {
-      CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
-    }
-  }
-
-  /*error: only compression method 0 is allowed in the specification*/
-  if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
-  /*error: only filter method 0 is allowed in the specification*/
-  if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
-  /*error: only interlace methods 0 and 1 exist in the specification*/
-  if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
-
-  state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
-  return state->error;
-}
-
-static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
-                                 size_t bytewidth, unsigned char filterType, size_t length)
-{
-  /*
-  For PNG filter method 0
-  unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
-  the filter works byte per byte (bytewidth = 1)
-  precon is the previous unfiltered scanline, recon the result, scanline the current one
-  the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
-  recon and scanline MAY be the same memory address! precon must be disjoint.
-  */
-
-  size_t i;
-  switch(filterType)
-  {
-    case 0:
-      for(i = 0; i != length; ++i) recon[i] = scanline[i];
-      break;
-    case 1:
-      for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
-      for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
-      break;
-    case 2:
-      if(precon)
-      {
-        for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
-      }
-      else
-      {
-        for(i = 0; i != length; ++i) recon[i] = scanline[i];
-      }
-      break;
-    case 3:
-      if(precon)
-      {
-        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1);
-        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1);
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
-        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1);
-      }
-      break;
-    case 4:
-      if(precon)
-      {
-        for(i = 0; i != bytewidth; ++i)
-        {
-          recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
-        }
-        for(i = bytewidth; i < length; ++i)
-        {
-          recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
-        }
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i)
-        {
-          recon[i] = scanline[i];
-        }
-        for(i = bytewidth; i < length; ++i)
-        {
-          /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
-          recon[i] = (scanline[i] + recon[i - bytewidth]);
-        }
-      }
-      break;
-    default: return 36; /*error: unexisting filter type given*/
-  }
-  return 0;
-}
-
-static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  /*
-  For PNG filter method 0
-  this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
-  out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
-  w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
-  in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
-  */
-
-  unsigned y;
-  unsigned char* prevline = 0;
-
-  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
-  size_t bytewidth = (bpp + 7) / 8;
-  size_t linebytes = (w * bpp + 7) / 8;
-
-  for(y = 0; y < h; ++y)
-  {
-    size_t outindex = linebytes * y;
-    size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-    unsigned char filterType = in[inindex];
-
-    CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
-
-    prevline = &out[outindex];
-  }
-
-  return 0;
-}
-
-/*
-in: Adam7 interlaced image, with no padding bits between scanlines, but between
- reduced images so that each reduced image starts at a byte.
-out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
-bpp: bits per pixel
-out has the following size in bits: w * h * bpp.
-in is possibly bigger due to padding bits between reduced images.
-out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
-(because that's likely a little bit faster)
-NOTE: comments about padding bits are only relevant if bpp < 8
-*/
-static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  unsigned passw[7], passh[7];
-  size_t filter_passstart[8], padded_passstart[8], passstart[8];
-  unsigned i;
-
-  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-  if(bpp >= 8)
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      size_t bytewidth = bpp / 8;
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
-        size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
-        for(b = 0; b < bytewidth; ++b)
-        {
-          out[pixeloutstart + b] = in[pixelinstart + b];
-        }
-      }
-    }
-  }
-  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      unsigned ilinebits = bpp * passw[i];
-      unsigned olinebits = bpp * w;
-      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
-        obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
-        for(b = 0; b < bpp; ++b)
-        {
-          unsigned char bit = readBitFromReversedStream(&ibp, in);
-          /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/
-          setBitOfReversedStream0(&obp, out, bit);
-        }
-      }
-    }
-  }
-}
-
-static void removePaddingBits(unsigned char* out, const unsigned char* in,
-                              size_t olinebits, size_t ilinebits, unsigned h)
-{
-  /*
-  After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
-  to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
-  for the Adam7 code, the color convert code and the output to the user.
-  in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
-  have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
-  also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
-  only useful if (ilinebits - olinebits) is a value in the range 1..7
-  */
-  unsigned y;
-  size_t diff = ilinebits - olinebits;
-  size_t ibp = 0, obp = 0; /*input and output bit pointers*/
-  for(y = 0; y < h; ++y)
-  {
-    size_t x;
-    for(x = 0; x < olinebits; ++x)
-    {
-      unsigned char bit = readBitFromReversedStream(&ibp, in);
-      setBitOfReversedStream(&obp, out, bit);
-    }
-    ibp += diff;
-  }
-}
-
-/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
-the IDAT chunks (with filter index bytes and possible padding bits)
-return value is error*/
-static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
-                                     unsigned w, unsigned h, const LodePNGInfo* info_png)
-{
-  /*
-  This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
-  Steps:
-  *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8)
-  *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
-  NOTE: the in buffer will be overwritten with intermediate data!
-  */
-  unsigned bpp = lodepng_get_bpp(&info_png->color);
-  if(bpp == 0) return 31; /*error: invalid colortype*/
-
-  if(info_png->interlace_method == 0)
-  {
-    if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
-    {
-      CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
-      removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h);
-    }
-    /*we can immediately filter into the out buffer, no other steps needed*/
-    else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
-  }
-  else /*interlace_method is 1 (Adam7)*/
-  {
-    unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
-    unsigned i;
-
-    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-    for(i = 0; i != 7; ++i)
-    {
-      CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
-      /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
-      move bytes instead of bits or move not at all*/
-      if(bpp < 8)
-      {
-        /*remove padding bits in scanlines; after this there still may be padding
-        bits between the different reduced images: each reduced image still starts nicely at a byte*/
-        removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
-                          ((passw[i] * bpp + 7) / 8) * 8, passh[i]);
-      }
-    }
-
-    Adam7_deinterlace(out, in, w, h, bpp);
-  }
-
-  return 0;
-}
-
-static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
-{
-  unsigned pos = 0, i;
-  if(color->palette) lodepng_free(color->palette);
-  color->palettesize = chunkLength / 3;
-  color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize);
-  if(!color->palette && color->palettesize)
-  {
-    color->palettesize = 0;
-    return 83; /*alloc fail*/
-  }
-  if(color->palettesize > 256) return 38; /*error: palette too big*/
-
-  for(i = 0; i != color->palettesize; ++i)
-  {
-    color->palette[4 * i + 0] = data[pos++]; /*R*/
-    color->palette[4 * i + 1] = data[pos++]; /*G*/
-    color->palette[4 * i + 2] = data[pos++]; /*B*/
-    color->palette[4 * i + 3] = 255; /*alpha*/
-  }
-
-  return 0; /* OK */
-}
-
-static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
-{
-  unsigned i;
-  if(color->colortype == LCT_PALETTE)
-  {
-    /*error: more alpha values given than there are palette entries*/
-    if(chunkLength > color->palettesize) return 38;
-
-    for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
-  }
-  else if(color->colortype == LCT_GREY)
-  {
-    /*error: this chunk must be 2 bytes for greyscale image*/
-    if(chunkLength != 2) return 30;
-
-    color->key_defined = 1;
-    color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
-  }
-  else if(color->colortype == LCT_RGB)
-  {
-    /*error: this chunk must be 6 bytes for RGB image*/
-    if(chunkLength != 6) return 41;
-
-    color->key_defined = 1;
-    color->key_r = 256u * data[0] + data[1];
-    color->key_g = 256u * data[2] + data[3];
-    color->key_b = 256u * data[4] + data[5];
-  }
-  else return 42; /*error: tRNS chunk not allowed for other color models*/
-
-  return 0; /* OK */
-}
-
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*background color chunk (bKGD)*/
-static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(info->color.colortype == LCT_PALETTE)
-  {
-    /*error: this chunk must be 1 byte for indexed color image*/
-    if(chunkLength != 1) return 43;
-
-    info->background_defined = 1;
-    info->background_r = info->background_g = info->background_b = data[0];
-  }
-  else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
-  {
-    /*error: this chunk must be 2 bytes for greyscale image*/
-    if(chunkLength != 2) return 44;
-
-    info->background_defined = 1;
-    info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
-  }
-  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
-  {
-    /*error: this chunk must be 6 bytes for greyscale image*/
-    if(chunkLength != 6) return 45;
-
-    info->background_defined = 1;
-    info->background_r = 256u * data[0] + data[1];
-    info->background_g = 256u * data[2] + data[3];
-    info->background_b = 256u * data[4] + data[5];
-  }
-
-  return 0; /* OK */
-}
-
-/*text chunk (tEXt)*/
-static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  char *key = 0, *str = 0;
-  unsigned i;
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    unsigned length, string2_begin;
-
-    length = 0;
-    while(length < chunkLength && data[length] != 0) ++length;
-    /*even though it's not allowed by the standard, no error is thrown if
-    there's no null termination char, if the text is empty*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    string2_begin = length + 1; /*skip keyword null terminator*/
-
-    length = chunkLength < string2_begin ? 0 : chunkLength - string2_begin;
-    str = (char*)lodepng_malloc(length + 1);
-    if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    str[length] = 0;
-    for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i];
-
-    error = lodepng_add_text(info, key, str);
-
-    break;
-  }
-
-  lodepng_free(key);
-  lodepng_free(str);
-
-  return error;
-}
-
-/*compressed text chunk (zTXt)*/
-static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
-                               const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  unsigned i;
-
-  unsigned length, string2_begin;
-  char *key = 0;
-  ucvector decoded;
-
-  ucvector_init(&decoded);
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
-    if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
-
-    string2_begin = length + 2;
-    if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
-
-    length = chunkLength - string2_begin;
-    /*will fail if zlib error, e.g. if length is too small*/
-    error = zlib_decompress(&decoded.data, &decoded.size,
-                            (unsigned char*)(&data[string2_begin]),
-                            length, zlibsettings);
-    if(error) break;
-    ucvector_push_back(&decoded, 0);
-
-    error = lodepng_add_text(info, key, (char*)decoded.data);
-
-    break;
-  }
-
-  lodepng_free(key);
-  ucvector_cleanup(&decoded);
-
-  return error;
-}
-
-/*international text chunk (iTXt)*/
-static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
-                               const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  unsigned i;
-
-  unsigned length, begin, compressed;
-  char *key = 0, *langtag = 0, *transkey = 0;
-  ucvector decoded;
-  ucvector_init(&decoded);
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    /*Quick check if the chunk length isn't too small. Even without check
-    it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
-    if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
-
-    /*read the key*/
-    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
-    if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    /*read the compression method*/
-    compressed = data[length + 1];
-    if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
-
-    /*even though it's not allowed by the standard, no error is thrown if
-    there's no null termination char, if the text is empty for the next 3 texts*/
-
-    /*read the langtag*/
-    begin = length + 3;
-    length = 0;
-    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
-
-    langtag = (char*)lodepng_malloc(length + 1);
-    if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    langtag[length] = 0;
-    for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i];
-
-    /*read the transkey*/
-    begin += length + 1;
-    length = 0;
-    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
-
-    transkey = (char*)lodepng_malloc(length + 1);
-    if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    transkey[length] = 0;
-    for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i];
-
-    /*read the actual text*/
-    begin += length + 1;
-
-    length = chunkLength < begin ? 0 : chunkLength - begin;
-
-    if(compressed)
-    {
-      /*will fail if zlib error, e.g. if length is too small*/
-      error = zlib_decompress(&decoded.data, &decoded.size,
-                              (unsigned char*)(&data[begin]),
-                              length, zlibsettings);
-      if(error) break;
-      if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size;
-      ucvector_push_back(&decoded, 0);
-    }
-    else
-    {
-      if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/);
-
-      decoded.data[length] = 0;
-      for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i];
-    }
-
-    error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data);
-
-    break;
-  }
-
-  lodepng_free(key);
-  lodepng_free(langtag);
-  lodepng_free(transkey);
-  ucvector_cleanup(&decoded);
-
-  return error;
-}
-
-static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
-
-  info->time_defined = 1;
-  info->time.year = 256u * data[0] + data[1];
-  info->time.month = data[2];
-  info->time.day = data[3];
-  info->time.hour = data[4];
-  info->time.minute = data[5];
-  info->time.second = data[6];
-
-  return 0; /* OK */
-}
-
-static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
-
-  info->phys_defined = 1;
-  info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
-  info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
-  info->phys_unit = data[8];
-
-  return 0; /* OK */
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
-static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
-                          LodePNGState* state,
-                          const unsigned char* in, size_t insize)
-{
-  unsigned char IEND = 0;
-  const unsigned char* chunk;
-  size_t i;
-  ucvector idat; /*the data from idat chunks*/
-  ucvector scanlines;
-  size_t predict;
-  size_t numpixels;
-  size_t outsize = 0;
-
-  /*for unknown chunk order*/
-  unsigned unknown = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-  /*provide some proper output values if error will happen*/
-  *out = 0;
-
-  state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
-  if(state->error) return;
-
-  numpixels = *w * *h;
-
-  /*multiplication overflow*/
-  if(*h != 0 && numpixels / *h != *w) CERROR_RETURN(state->error, 92);
-  /*multiplication overflow possible further below. Allows up to 2^31-1 pixel
-  bytes with 16-bit RGBA, the rest is room for filter bytes.*/
-  if(numpixels > 268435455) CERROR_RETURN(state->error, 92);
-
-  ucvector_init(&idat);
-  chunk = &in[33]; /*first byte of the first chunk after the header*/
-
-  /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
-  IDAT data is put at the start of the in buffer*/
-  while(!IEND && !state->error)
-  {
-    unsigned chunkLength;
-    const unsigned char* data; /*the data in the chunk*/
-
-    /*error: size of the in buffer too small to contain next chunk*/
-    if((size_t)((chunk - in) + 12) > insize || chunk < in) CERROR_BREAK(state->error, 30);
-
-    /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
-    chunkLength = lodepng_chunk_length(chunk);
-    /*error: chunk length larger than the max PNG chunk size*/
-    if(chunkLength > 2147483647) CERROR_BREAK(state->error, 63);
-
-    if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in)
-    {
-      CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
-    }
-
-    data = lodepng_chunk_data_const(chunk);
-
-    /*IDAT chunk, containing compressed image data*/
-    if(lodepng_chunk_type_equals(chunk, "IDAT"))
-    {
-      size_t oldsize = idat.size;
-      if(!ucvector_resize(&idat, oldsize + chunkLength)) CERROR_BREAK(state->error, 83 /*alloc fail*/);
-      for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i];
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      critical_pos = 3;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-    /*IEND chunk*/
-    else if(lodepng_chunk_type_equals(chunk, "IEND"))
-    {
-      IEND = 1;
-    }
-    /*palette chunk (PLTE)*/
-    else if(lodepng_chunk_type_equals(chunk, "PLTE"))
-    {
-      state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
-      if(state->error) break;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      critical_pos = 2;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-    /*palette transparency chunk (tRNS)*/
-    else if(lodepng_chunk_type_equals(chunk, "tRNS"))
-    {
-      state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
-      if(state->error) break;
-    }
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*background color chunk (bKGD)*/
-    else if(lodepng_chunk_type_equals(chunk, "bKGD"))
-    {
-      state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-    /*text chunk (tEXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "tEXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    /*compressed text chunk (zTXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "zTXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    /*international text chunk (iTXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "iTXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    else if(lodepng_chunk_type_equals(chunk, "tIME"))
-    {
-      state->error = readChunk_tIME(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-    else if(lodepng_chunk_type_equals(chunk, "pHYs"))
-    {
-      state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    else /*it's not an implemented chunk type, so ignore it: skip over the data*/
-    {
-      /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
-      if(!lodepng_chunk_ancillary(chunk)) CERROR_BREAK(state->error, 69);
-
-      unknown = 1;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      if(state->decoder.remember_unknown_chunks)
-      {
-        state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
-                                            &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
-        if(state->error) break;
-      }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-
-    if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/
-    {
-      if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
-    }
-
-    if(!IEND) chunk = lodepng_chunk_next_const(chunk);
-  }
-
-  ucvector_init(&scanlines);
-  /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
-  If the decompressed size does not match the prediction, the image must be corrupt.*/
-  if(state->info_png.interlace_method == 0)
-  {
-    /*The extra *h is added because this are the filter bytes every scanline starts with*/
-    predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color) + *h;
-  }
-  else
-  {
-    /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/
-    const LodePNGColorMode* color = &state->info_png.color;
-    predict = 0;
-    predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
-    if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
-    predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color) + ((*h + 3) >> 3);
-    if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color) + ((*h + 3) >> 2);
-    predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color) + ((*h + 1) >> 2);
-    if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color) + ((*h + 1) >> 1);
-    predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color) + ((*h + 0) >> 1);
-  }
-  if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/
-  if(!state->error)
-  {
-    state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data,
-                                   idat.size, &state->decoder.zlibsettings);
-    if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/
-  }
-  ucvector_cleanup(&idat);
-
-  if(!state->error)
-  {
-    outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
-    *out = (unsigned char*)lodepng_malloc(outsize);
-    if(!*out) state->error = 83; /*alloc fail*/
-  }
-  if(!state->error)
-  {
-    for(i = 0; i < outsize; i++) (*out)[i] = 0;
-    state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png);
-  }
-  ucvector_cleanup(&scanlines);
-}
-
-unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
-                        LodePNGState* state,
-                        const unsigned char* in, size_t insize)
-{
-  *out = 0;
-  decodeGeneric(out, w, h, state, in, insize);
-  if(state->error) return state->error;
-  if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color))
-  {
-    /*same color type, no copying or converting of data needed*/
-    /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
-    the raw image has to the end user*/
-    if(!state->decoder.color_convert)
-    {
-      state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
-      if(state->error) return state->error;
-    }
-  }
-  else
-  {
-    /*color conversion needed; sort of copy of the data*/
-    unsigned char* data = *out;
-    size_t outsize;
-
-    /*TODO: check if this works according to the statement in the documentation: "The converter can convert
-    from greyscale input color type, to 8-bit greyscale or greyscale with alpha"*/
-    if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
-       && !(state->info_raw.bitdepth == 8))
-    {
-      return 56; /*unsupported color mode conversion*/
-    }
-
-    outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
-    *out = (unsigned char*)lodepng_malloc(outsize);
-    if(!(*out))
-    {
-      state->error = 83; /*alloc fail*/
-    }
-    else state->error = lodepng_convert(*out, data, &state->info_raw,
-                                        &state->info_png.color, *w, *h);
-    lodepng_free(data);
-  }
-  return state->error;
-}
-
-unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
-                               size_t insize, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned error;
-  LodePNGState state;
-  lodepng_state_init(&state);
-  state.info_raw.colortype = colortype;
-  state.info_raw.bitdepth = bitdepth;
-  error = lodepng_decode(out, w, h, &state, in, insize);
-  lodepng_state_cleanup(&state);
-  return error;
-}
-
-unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
-{
-  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
-}
-
-unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
-{
-  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
-                             LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer = 0;
-  size_t buffersize;
-  unsigned error;
-  error = lodepng_load_file(&buffer, &buffersize, filename);
-  if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
-{
-  return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
-}
-
-unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
-{
-  return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
-}
-#endif /*LODEPNG_COMPILE_DISK*/
-
-void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings)
-{
-  settings->color_convert = 1;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  settings->read_text_chunks = 1;
-  settings->remember_unknown_chunks = 0;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-  settings->ignore_crc = 0;
-  lodepng_decompress_settings_init(&settings->zlibsettings);
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
-
-void lodepng_state_init(LodePNGState* state)
-{
-#ifdef LODEPNG_COMPILE_DECODER
-  lodepng_decoder_settings_init(&state->decoder);
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-  lodepng_encoder_settings_init(&state->encoder);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-  lodepng_color_mode_init(&state->info_raw);
-  lodepng_info_init(&state->info_png);
-  state->error = 1;
-}
-
-void lodepng_state_cleanup(LodePNGState* state)
-{
-  lodepng_color_mode_cleanup(&state->info_raw);
-  lodepng_info_cleanup(&state->info_png);
-}
-
-void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source)
-{
-  lodepng_state_cleanup(dest);
-  *dest = *source;
-  lodepng_color_mode_init(&dest->info_raw);
-  lodepng_info_init(&dest->info_png);
-  dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
-  dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
-}
-
-#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG Encoder                                                            / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*chunkName must be string of 4 characters*/
-static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length)
-{
-  CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data));
-  out->allocsize = out->size; /*fix the allocsize again*/
-  return 0;
-}
-
-static void writeSignature(ucvector* out)
-{
-  /*8 bytes PNG signature, aka the magic bytes*/
-  ucvector_push_back(out, 137);
-  ucvector_push_back(out, 80);
-  ucvector_push_back(out, 78);
-  ucvector_push_back(out, 71);
-  ucvector_push_back(out, 13);
-  ucvector_push_back(out, 10);
-  ucvector_push_back(out, 26);
-  ucvector_push_back(out, 10);
-}
-
-static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
-                              LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method)
-{
-  unsigned error = 0;
-  ucvector header;
-  ucvector_init(&header);
-
-  lodepng_add32bitInt(&header, w); /*width*/
-  lodepng_add32bitInt(&header, h); /*height*/
-  ucvector_push_back(&header, (unsigned char)bitdepth); /*bit depth*/
-  ucvector_push_back(&header, (unsigned char)colortype); /*color type*/
-  ucvector_push_back(&header, 0); /*compression method*/
-  ucvector_push_back(&header, 0); /*filter method*/
-  ucvector_push_back(&header, interlace_method); /*interlace method*/
-
-  error = addChunk(out, "IHDR", header.data, header.size);
-  ucvector_cleanup(&header);
-
-  return error;
-}
-
-static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector PLTE;
-  ucvector_init(&PLTE);
-  for(i = 0; i != info->palettesize * 4; ++i)
-  {
-    /*add all channels except alpha channel*/
-    if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]);
-  }
-  error = addChunk(out, "PLTE", PLTE.data, PLTE.size);
-  ucvector_cleanup(&PLTE);
-
-  return error;
-}
-
-static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector tRNS;
-  ucvector_init(&tRNS);
-  if(info->colortype == LCT_PALETTE)
-  {
-    size_t amount = info->palettesize;
-    /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
-    for(i = info->palettesize; i != 0; --i)
-    {
-      if(info->palette[4 * (i - 1) + 3] == 255) --amount;
-      else break;
-    }
-    /*add only alpha channel*/
-    for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]);
-  }
-  else if(info->colortype == LCT_GREY)
-  {
-    if(info->key_defined)
-    {
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
-    }
-  }
-  else if(info->colortype == LCT_RGB)
-  {
-    if(info->key_defined)
-    {
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255));
-    }
-  }
-
-  error = addChunk(out, "tRNS", tRNS.data, tRNS.size);
-  ucvector_cleanup(&tRNS);
-
-  return error;
-}
-
-static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
-                              LodePNGCompressSettings* zlibsettings)
-{
-  ucvector zlibdata;
-  unsigned error = 0;
-
-  /*compress with the Zlib compressor*/
-  ucvector_init(&zlibdata);
-  error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings);
-  if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size);
-  ucvector_cleanup(&zlibdata);
-
-  return error;
-}
-
-static unsigned addChunk_IEND(ucvector* out)
-{
-  unsigned error = 0;
-  error = addChunk(out, "IEND", 0, 0);
-  return error;
-}
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-
-static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector text;
-  ucvector_init(&text);
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&text, 0); /*0 termination char*/
-  for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]);
-  error = addChunk(out, "tEXt", text.data, text.size);
-  ucvector_cleanup(&text);
-
-  return error;
-}
-
-static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
-                              LodePNGCompressSettings* zlibsettings)
-{
-  unsigned error = 0;
-  ucvector data, compressed;
-  size_t i, textsize = strlen(textstring);
-
-  ucvector_init(&data);
-  ucvector_init(&compressed);
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*0 termination char*/
-  ucvector_push_back(&data, 0); /*compression method: 0*/
-
-  error = zlib_compress(&compressed.data, &compressed.size,
-                        (unsigned char*)textstring, textsize, zlibsettings);
-  if(!error)
-  {
-    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
-    error = addChunk(out, "zTXt", data.data, data.size);
-  }
-
-  ucvector_cleanup(&compressed);
-  ucvector_cleanup(&data);
-  return error;
-}
-
-static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag,
-                              const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings)
-{
-  unsigned error = 0;
-  ucvector data;
-  size_t i, textsize = strlen(textstring);
-
-  ucvector_init(&data);
-
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*null termination char*/
-  ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/
-  ucvector_push_back(&data, 0); /*compression method*/
-  for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]);
-  ucvector_push_back(&data, 0); /*null termination char*/
-  for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]);
-  ucvector_push_back(&data, 0); /*null termination char*/
-
-  if(compressed)
-  {
-    ucvector compressed_data;
-    ucvector_init(&compressed_data);
-    error = zlib_compress(&compressed_data.data, &compressed_data.size,
-                          (unsigned char*)textstring, textsize, zlibsettings);
-    if(!error)
-    {
-      for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]);
-    }
-    ucvector_cleanup(&compressed_data);
-  }
-  else /*not compressed*/
-  {
-    for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]);
-  }
-
-  if(!error) error = addChunk(out, "iTXt", data.data, data.size);
-  ucvector_cleanup(&data);
-  return error;
-}
-
-static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info)
-{
-  unsigned error = 0;
-  ucvector bKGD;
-  ucvector_init(&bKGD);
-  if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
-  }
-  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255));
-  }
-  else if(info->color.colortype == LCT_PALETTE)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/
-  }
-
-  error = addChunk(out, "bKGD", bKGD.data, bKGD.size);
-  ucvector_cleanup(&bKGD);
-
-  return error;
-}
-
-static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time)
-{
-  unsigned error = 0;
-  unsigned char* data = (unsigned char*)lodepng_malloc(7);
-  if(!data) return 83; /*alloc fail*/
-  data[0] = (unsigned char)(time->year >> 8);
-  data[1] = (unsigned char)(time->year & 255);
-  data[2] = (unsigned char)time->month;
-  data[3] = (unsigned char)time->day;
-  data[4] = (unsigned char)time->hour;
-  data[5] = (unsigned char)time->minute;
-  data[6] = (unsigned char)time->second;
-  error = addChunk(out, "tIME", data, 7);
-  lodepng_free(data);
-  return error;
-}
-
-static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info)
-{
-  unsigned error = 0;
-  ucvector data;
-  ucvector_init(&data);
-
-  lodepng_add32bitInt(&data, info->phys_x);
-  lodepng_add32bitInt(&data, info->phys_y);
-  ucvector_push_back(&data, info->phys_unit);
-
-  error = addChunk(out, "pHYs", data.data, data.size);
-  ucvector_cleanup(&data);
-
-  return error;
-}
-
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
-                           size_t length, size_t bytewidth, unsigned char filterType)
-{
-  size_t i;
-  switch(filterType)
-  {
-    case 0: /*None*/
-      for(i = 0; i != length; ++i) out[i] = scanline[i];
-      break;
-    case 1: /*Sub*/
-      for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-      for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
-      break;
-    case 2: /*Up*/
-      if(prevline)
-      {
-        for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
-      }
-      else
-      {
-        for(i = 0; i != length; ++i) out[i] = scanline[i];
-      }
-      break;
-    case 3: /*Average*/
-      if(prevline)
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
-        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
-      }
-      break;
-    case 4: /*Paeth*/
-      if(prevline)
-      {
-        /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
-        for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
-        for(i = bytewidth; i < length; ++i)
-        {
-          out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
-        }
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-        /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
-        for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
-      }
-      break;
-    default: return; /*unexisting filter type given*/
-  }
-}
-
-/* log2 approximation. A slight bit faster than std::log. */
-static float flog2(float f)
-{
-  float result = 0;
-  while(f > 32) { result += 4; f /= 16; }
-  while(f > 2) { ++result; f /= 2; }
-  return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f);
-}
-
-static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
-                       const LodePNGColorMode* info, const LodePNGEncoderSettings* settings)
-{
-  /*
-  For PNG filter method 0
-  out must be a buffer with as size: h + (w * h * bpp + 7) / 8, because there are
-  the scanlines with 1 extra byte per scanline
-  */
-
-  unsigned bpp = lodepng_get_bpp(info);
-  /*the width of a scanline in bytes, not including the filter type*/
-  size_t linebytes = (w * bpp + 7) / 8;
-  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
-  size_t bytewidth = (bpp + 7) / 8;
-  const unsigned char* prevline = 0;
-  unsigned x, y;
-  unsigned error = 0;
-  LodePNGFilterStrategy strategy = settings->filter_strategy;
-
-  /*
-  There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
-   *  If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
-      use fixed filtering, with the filter None).
-   * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
-     not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
-     all five filters and select the filter that produces the smallest sum of absolute values per row.
-  This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
-
-  If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
-  but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
-  heuristic is used.
-  */
-  if(settings->filter_palette_zero &&
-     (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO;
-
-  if(bpp == 0) return 31; /*error: invalid color type*/
-
-  if(strategy == LFS_ZERO)
-  {
-    for(y = 0; y != h; ++y)
-    {
-      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-      size_t inindex = linebytes * y;
-      out[outindex] = 0; /*filter type byte*/
-      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0);
-      prevline = &in[inindex];
-    }
-  }
-  else if(strategy == LFS_MINSUM)
-  {
-    /*adaptive filtering*/
-    size_t sum[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    size_t smallest = 0;
-    unsigned char type, bestType = 0;
-
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-
-    if(!error)
-    {
-      for(y = 0; y != h; ++y)
-      {
-        /*try the 5 filter types*/
-        for(type = 0; type != 5; ++type)
-        {
-          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-
-          /*calculate the sum of the result*/
-          sum[type] = 0;
-          if(type == 0)
-          {
-            for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]);
-          }
-          else
-          {
-            for(x = 0; x != linebytes; ++x)
-            {
-              /*For differences, each byte should be treated as signed, values above 127 are negative
-              (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
-              This means filtertype 0 is almost never chosen, but that is justified.*/
-              unsigned char s = attempt[type][x];
-              sum[type] += s < 128 ? s : (255U - s);
-            }
-          }
-
-          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
-          if(type == 0 || sum[type] < smallest)
-          {
-            bestType = type;
-            smallest = sum[type];
-          }
-        }
-
-        prevline = &in[y * linebytes];
-
-        /*now fill the out values*/
-        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-      }
-    }
-
-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
-  }
-  else if(strategy == LFS_ENTROPY)
-  {
-    float sum[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    float smallest = 0;
-    unsigned type, bestType = 0;
-    unsigned count[256];
-
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-
-    for(y = 0; y != h; ++y)
-    {
-      /*try the 5 filter types*/
-      for(type = 0; type != 5; ++type)
-      {
-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-        for(x = 0; x != 256; ++x) count[x] = 0;
-        for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
-        ++count[type]; /*the filter type itself is part of the scanline*/
-        sum[type] = 0;
-        for(x = 0; x != 256; ++x)
-        {
-          float p = count[x] / (float)(linebytes + 1);
-          sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p;
-        }
-        /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
-        if(type == 0 || sum[type] < smallest)
-        {
-          bestType = type;
-          smallest = sum[type];
-        }
-      }
-
-      prevline = &in[y * linebytes];
-
-      /*now fill the out values*/
-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-    }
-
-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
-  }
-  else if(strategy == LFS_PREDEFINED)
-  {
-    for(y = 0; y != h; ++y)
-    {
-      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-      size_t inindex = linebytes * y;
-      unsigned char type = settings->predefined_filters[y];
-      out[outindex] = type; /*filter type byte*/
-      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
-      prevline = &in[inindex];
-    }
-  }
-  else if(strategy == LFS_BRUTE_FORCE)
-  {
-    /*brute force filter chooser.
-    deflate the scanline after every filter attempt to see which one deflates best.
-    This is very slow and gives only slightly smaller, sometimes even larger, result*/
-    size_t size[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    size_t smallest = 0;
-    unsigned type = 0, bestType = 0;
-    unsigned char* dummy;
-    LodePNGCompressSettings zlibsettings = settings->zlibsettings;
-    /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
-    to simulate the true case where the tree is the same for the whole image. Sometimes it gives
-    better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
-    cases better compression. It does make this a bit less slow, so it's worth doing this.*/
-    zlibsettings.btype = 1;
-    /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
-    images only, so disable it*/
-    zlibsettings.custom_zlib = 0;
-    zlibsettings.custom_deflate = 0;
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-    for(y = 0; y != h; ++y) /*try the 5 filter types*/
-    {
-      for(type = 0; type != 5; ++type)
-      {
-        unsigned testsize = linebytes;
-        /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
-
-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-        size[type] = 0;
-        dummy = 0;
-        zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
-        lodepng_free(dummy);
-        /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
-        if(type == 0 || size[type] < smallest)
-        {
-          bestType = type;
-          smallest = size[type];
-        }
-      }
-      prevline = &in[y * linebytes];
-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-    }
-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
-  }
-  else return 88; /* unknown filter strategy */
-
-  return error;
-}
-
-static void addPaddingBits(unsigned char* out, const unsigned char* in,
-                           size_t olinebits, size_t ilinebits, unsigned h)
-{
-  /*The opposite of the removePaddingBits function
-  olinebits must be >= ilinebits*/
-  unsigned y;
-  size_t diff = olinebits - ilinebits;
-  size_t obp = 0, ibp = 0; /*bit pointers*/
-  for(y = 0; y != h; ++y)
-  {
-    size_t x;
-    for(x = 0; x < ilinebits; ++x)
-    {
-      unsigned char bit = readBitFromReversedStream(&ibp, in);
-      setBitOfReversedStream(&obp, out, bit);
-    }
-    /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
-    "Use of uninitialised value of size ###" warning from valgrind*/
-    for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
-  }
-}
-
-/*
-in: non-interlaced image with size w*h
-out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
- no padding bits between scanlines, but between reduced images so that each
- reduced image starts at a byte.
-bpp: bits per pixel
-there are no padding bits, not between scanlines, not between reduced images
-in has the following size in bits: w * h * bpp.
-out is possibly bigger due to padding bits between reduced images
-NOTE: comments about padding bits are only relevant if bpp < 8
-*/
-static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  unsigned passw[7], passh[7];
-  size_t filter_passstart[8], padded_passstart[8], passstart[8];
-  unsigned i;
-
-  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-  if(bpp >= 8)
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      size_t bytewidth = bpp / 8;
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
-        size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
-        for(b = 0; b < bytewidth; ++b)
-        {
-          out[pixeloutstart + b] = in[pixelinstart + b];
-        }
-      }
-    }
-  }
-  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      unsigned ilinebits = bpp * passw[i];
-      unsigned olinebits = bpp * w;
-      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
-        obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
-        for(b = 0; b < bpp; ++b)
-        {
-          unsigned char bit = readBitFromReversedStream(&ibp, in);
-          setBitOfReversedStream(&obp, out, bit);
-        }
-      }
-    }
-  }
-}
-
-/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
-return value is error**/
-static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                    unsigned w, unsigned h,
-                                    const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings)
-{
-  /*
-  This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
-  *) if no Adam7: 1) add padding bits (= posible extra bits per scanline if bpp < 8) 2) filter
-  *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
-  */
-  unsigned bpp = lodepng_get_bpp(&info_png->color);
-  unsigned error = 0;
-
-  if(info_png->interlace_method == 0)
-  {
-    *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/
-    *out = (unsigned char*)lodepng_malloc(*outsize);
-    if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
-      if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
-      {
-        unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8));
-        if(!padded) error = 83; /*alloc fail*/
-        if(!error)
-        {
-          addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h);
-          error = filter(*out, padded, w, h, &info_png->color, settings);
-        }
-        lodepng_free(padded);
-      }
-      else
-      {
-        /*we can immediately filter into the out buffer, no other steps needed*/
-        error = filter(*out, in, w, h, &info_png->color, settings);
-      }
-    }
-  }
-  else /*interlace_method is 1 (Adam7)*/
-  {
-    unsigned passw[7], passh[7];
-    size_t filter_passstart[8], padded_passstart[8], passstart[8];
-    unsigned char* adam7;
-
-    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-    *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
-    *out = (unsigned char*)lodepng_malloc(*outsize);
-    if(!(*out)) error = 83; /*alloc fail*/
-
-    adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
-    if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      unsigned i;
-
-      Adam7_interlace(adam7, in, w, h, bpp);
-      for(i = 0; i != 7; ++i)
-      {
-        if(bpp < 8)
-        {
-          unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
-          if(!padded) ERROR_BREAK(83); /*alloc fail*/
-          addPaddingBits(padded, &adam7[passstart[i]],
-                         ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]);
-          error = filter(&(*out)[filter_passstart[i]], padded,
-                         passw[i], passh[i], &info_png->color, settings);
-          lodepng_free(padded);
-        }
-        else
-        {
-          error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
-                         passw[i], passh[i], &info_png->color, settings);
-        }
-
-        if(error) break;
-      }
-    }
-
-    lodepng_free(adam7);
-  }
-
-  return error;
-}
-
-/*
-palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA...
-returns 0 if the palette is opaque,
-returns 1 if the palette has a single color with alpha 0 ==> color key
-returns 2 if the palette is semi-translucent.
-*/
-static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize)
-{
-  size_t i;
-  unsigned key = 0;
-  unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/
-  for(i = 0; i != palettesize; ++i)
-  {
-    if(!key && palette[4 * i + 3] == 0)
-    {
-      r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2];
-      key = 1;
-      i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/
-    }
-    else if(palette[4 * i + 3] != 255) return 2;
-    /*when key, no opaque RGB may have key's RGB*/
-    else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2;
-  }
-  return key;
-}
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize)
-{
-  unsigned char* inchunk = data;
-  while((size_t)(inchunk - data) < datasize)
-  {
-    CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
-    out->allocsize = out->size; /*fix the allocsize again*/
-    inchunk = lodepng_chunk_next(inchunk);
-  }
-  return 0;
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-unsigned lodepng_encode(unsigned char** out, size_t* outsize,
-                        const unsigned char* image, unsigned w, unsigned h,
-                        LodePNGState* state)
-{
-  LodePNGInfo info;
-  ucvector outv;
-  unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
-  size_t datasize = 0;
-
-  /*provide some proper output values if error will happen*/
-  *out = 0;
-  *outsize = 0;
-  state->error = 0;
-
-  lodepng_info_init(&info);
-  lodepng_info_copy(&info, &state->info_png);
-
-  if((info.color.colortype == LCT_PALETTE || state->encoder.force_palette)
-      && (info.color.palettesize == 0 || info.color.palettesize > 256))
-  {
-    state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
-    return state->error;
-  }
-
-  if(state->encoder.auto_convert)
-  {
-    state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw);
-  }
-  if(state->error) return state->error;
-
-  if(state->encoder.zlibsettings.btype > 2)
-  {
-    CERROR_RETURN_ERROR(state->error, 61); /*error: unexisting btype*/
-  }
-  if(state->info_png.interlace_method > 1)
-  {
-    CERROR_RETURN_ERROR(state->error, 71); /*error: unexisting interlace mode*/
-  }
-
-  state->error = checkColorValidity(info.color.colortype, info.color.bitdepth);
-  if(state->error) return state->error; /*error: unexisting color type given*/
-  state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
-  if(state->error) return state->error; /*error: unexisting color type given*/
-
-  if(!lodepng_color_mode_equal(&state->info_raw, &info.color))
-  {
-    unsigned char* converted;
-    size_t size = (w * h * (size_t)lodepng_get_bpp(&info.color) + 7) / 8;
-
-    converted = (unsigned char*)lodepng_malloc(size);
-    if(!converted && size) state->error = 83; /*alloc fail*/
-    if(!state->error)
-    {
-      state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
-    }
-    if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
-    lodepng_free(converted);
-  }
-  else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
-
-  ucvector_init(&outv);
-  while(!state->error) /*while only executed once, to break on error*/
-  {
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    size_t i;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*write signature and chunks*/
-    writeSignature(&outv);
-    /*IHDR*/
-    addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*unknown chunks between IHDR and PLTE*/
-    if(info.unknown_chunks_data[0])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*PLTE*/
-    if(info.color.colortype == LCT_PALETTE)
-    {
-      addChunk_PLTE(&outv, &info.color);
-    }
-    if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA))
-    {
-      addChunk_PLTE(&outv, &info.color);
-    }
-    /*tRNS*/
-    if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0)
-    {
-      addChunk_tRNS(&outv, &info.color);
-    }
-    if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined)
-    {
-      addChunk_tRNS(&outv, &info.color);
-    }
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*bKGD (must come between PLTE and the IDAt chunks*/
-    if(info.background_defined) addChunk_bKGD(&outv, &info);
-    /*pHYs (must come before the IDAT chunks)*/
-    if(info.phys_defined) addChunk_pHYs(&outv, &info);
-
-    /*unknown chunks between PLTE and IDAT*/
-    if(info.unknown_chunks_data[1])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*IDAT (multiple IDAT chunks must be consecutive)*/
-    state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
-    if(state->error) break;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*tIME*/
-    if(info.time_defined) addChunk_tIME(&outv, &info.time);
-    /*tEXt and/or zTXt*/
-    for(i = 0; i != info.text_num; ++i)
-    {
-      if(strlen(info.text_keys[i]) > 79)
-      {
-        state->error = 66; /*text chunk too large*/
-        break;
-      }
-      if(strlen(info.text_keys[i]) < 1)
-      {
-        state->error = 67; /*text chunk too small*/
-        break;
-      }
-      if(state->encoder.text_compression)
-      {
-        addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
-      }
-      else
-      {
-        addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
-      }
-    }
-    /*LodePNG version id in text chunk*/
-    if(state->encoder.add_id)
-    {
-      unsigned alread_added_id_text = 0;
-      for(i = 0; i != info.text_num; ++i)
-      {
-        if(!strcmp(info.text_keys[i], "LodePNG"))
-        {
-          alread_added_id_text = 1;
-          break;
-        }
-      }
-      if(alread_added_id_text == 0)
-      {
-        addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
-      }
-    }
-    /*iTXt*/
-    for(i = 0; i != info.itext_num; ++i)
-    {
-      if(strlen(info.itext_keys[i]) > 79)
-      {
-        state->error = 66; /*text chunk too large*/
-        break;
-      }
-      if(strlen(info.itext_keys[i]) < 1)
-      {
-        state->error = 67; /*text chunk too small*/
-        break;
-      }
-      addChunk_iTXt(&outv, state->encoder.text_compression,
-                    info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
-                    &state->encoder.zlibsettings);
-    }
-
-    /*unknown chunks between IDAT and IEND*/
-    if(info.unknown_chunks_data[2])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    addChunk_IEND(&outv);
-
-    break; /*this isn't really a while loop; no error happened so break out now!*/
-  }
-
-  lodepng_info_cleanup(&info);
-  lodepng_free(data);
-  /*instead of cleaning the vector up, give it to the output*/
-  *out = outv.data;
-  *outsize = outv.size;
-
-  return state->error;
-}
-
-unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
-                               unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned error;
-  LodePNGState state;
-  lodepng_state_init(&state);
-  state.info_raw.colortype = colortype;
-  state.info_raw.bitdepth = bitdepth;
-  state.info_png.color.colortype = colortype;
-  state.info_png.color.bitdepth = bitdepth;
-  lodepng_encode(out, outsize, image, w, h, &state);
-  error = state.error;
-  lodepng_state_cleanup(&state);
-  return error;
-}
-
-unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
-}
-
-unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
-                             LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
-  if(!error) error = lodepng_save_file(buffer, buffersize, filename);
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
-}
-
-unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
-}
-#endif /*LODEPNG_COMPILE_DISK*/
-
-void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings)
-{
-  lodepng_compress_settings_init(&settings->zlibsettings);
-  settings->filter_palette_zero = 1;
-  settings->filter_strategy = LFS_MINSUM;
-  settings->auto_convert = 1;
-  settings->force_palette = 0;
-  settings->predefined_filters = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  settings->add_id = 0;
-  settings->text_compression = 1;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ERROR_TEXT
-/*
-This returns the description of a numerical error code in English. This is also
-the documentation of all the error codes.
-*/
-const char* lodepng_error_text(unsigned code)
-{
-  switch(code)
-  {
-    case 0: return "no error, everything went ok";
-    case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
-    case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
-    case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
-    case 13: return "problem while processing dynamic deflate block";
-    case 14: return "problem while processing dynamic deflate block";
-    case 15: return "problem while processing dynamic deflate block";
-    case 16: return "unexisting code while processing dynamic deflate block";
-    case 17: return "end of out buffer memory reached while inflating";
-    case 18: return "invalid distance code while inflating";
-    case 19: return "end of out buffer memory reached while inflating";
-    case 20: return "invalid deflate block BTYPE encountered while decoding";
-    case 21: return "NLEN is not ones complement of LEN in a deflate block";
-     /*end of out buffer memory reached while inflating:
-     This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
-     all the pixels of the image, given the color depth and image dimensions. Something that doesn't
-     happen in a normal, well encoded, PNG image.*/
-    case 22: return "end of out buffer memory reached while inflating";
-    case 23: return "end of in buffer memory reached while inflating";
-    case 24: return "invalid FCHECK in zlib header";
-    case 25: return "invalid compression method in zlib header";
-    case 26: return "FDICT encountered in zlib header while it's not used for PNG";
-    case 27: return "PNG file is smaller than a PNG header";
-    /*Checks the magic file header, the first 8 bytes of the PNG file*/
-    case 28: return "incorrect PNG signature, it's no PNG or corrupted";
-    case 29: return "first chunk is not the header chunk";
-    case 30: return "chunk length too large, chunk broken off at end of file";
-    case 31: return "illegal PNG color type or bpp";
-    case 32: return "illegal PNG compression method";
-    case 33: return "illegal PNG filter method";
-    case 34: return "illegal PNG interlace method";
-    case 35: return "chunk length of a chunk is too large or the chunk too small";
-    case 36: return "illegal PNG filter type encountered";
-    case 37: return "illegal bit depth for this color type given";
-    case 38: return "the palette is too big"; /*more than 256 colors*/
-    case 39: return "more palette alpha values given in tRNS chunk than there are colors in the palette";
-    case 40: return "tRNS chunk has wrong size for greyscale image";
-    case 41: return "tRNS chunk has wrong size for RGB image";
-    case 42: return "tRNS chunk appeared while it was not allowed for this color type";
-    case 43: return "bKGD chunk has wrong size for palette image";
-    case 44: return "bKGD chunk has wrong size for greyscale image";
-    case 45: return "bKGD chunk has wrong size for RGB image";
-    case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
-    case 49: return "jumped past memory while generating dynamic huffman tree";
-    case 50: return "jumped past memory while generating dynamic huffman tree";
-    case 51: return "jumped past memory while inflating huffman block";
-    case 52: return "jumped past memory while inflating";
-    case 53: return "size of zlib data too small";
-    case 54: return "repeat symbol in tree while there was no value symbol yet";
-    /*jumped past tree while generating huffman tree, this could be when the
-    tree will have more leaves than symbols after generating it out of the
-    given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
-    case 55: return "jumped past tree while generating huffman tree";
-    case 56: return "given output image colortype or bitdepth not supported for color conversion";
-    case 57: return "invalid CRC encountered (checking CRC can be disabled)";
-    case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
-    case 59: return "requested color conversion not supported";
-    case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
-    case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
-    /*LodePNG leaves the choice of RGB to greyscale conversion formula to the user.*/
-    case 62: return "conversion from color to greyscale not supported";
-    case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk"; /*(2^31-1)*/
-    /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
-    case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
-    case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
-    case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
-    case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
-    case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
-    case 71: return "unexisting interlace mode given to encoder (must be 0 or 1)";
-    case 72: return "while decoding, unexisting compression method encountering in zTXt or iTXt chunk (it must be 0)";
-    case 73: return "invalid tIME chunk size";
-    case 74: return "invalid pHYs chunk size";
-    /*length could be wrong, or data chopped off*/
-    case 75: return "no null termination char found while decoding text chunk";
-    case 76: return "iTXt chunk too short to contain required bytes";
-    case 77: return "integer overflow in buffer size";
-    case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
-    case 79: return "failed to open file for writing";
-    case 80: return "tried creating a tree of 0 symbols";
-    case 81: return "lazy matching at pos 0 is impossible";
-    case 82: return "color conversion to palette requested while a color isn't in palette";
-    case 83: return "memory allocation failed";
-    case 84: return "given image too small to contain all pixels to be encoded";
-    case 86: return "impossible offset in lz77 encoding (internal bug)";
-    case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
-    case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
-    case 89: return "text chunk keyword too short or long: must have size 1-79";
-    /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
-    case 90: return "windowsize must be a power of two";
-    case 91: return "invalid decompressed idat size";
-    case 92: return "too many pixels, not supported";
-    case 93: return "zero width or height is invalid";
-    case 94: return "header chunk must have a size of 13 bytes";
-  }
-  return "unknown error code";
-}
-#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // C++ Wrapper                                                          // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_CPP
-namespace lodepng
-{
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename)
-{
-  long size = lodepng_filesize(filename.c_str());
-  if(size < 0) return 78;
-  buffer.resize((size_t)size);
-  return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str());
-}
-
-/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename)
-{
-  return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str());
-}
-#endif /* LODEPNG_COMPILE_DISK */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_DECODER
-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                    const LodePNGDecompressSettings& settings)
-{
-  unsigned char* buffer = 0;
-  size_t buffersize = 0;
-  unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                    const LodePNGDecompressSettings& settings)
-{
-  return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
-}
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                  const LodePNGCompressSettings& settings)
-{
-  unsigned char* buffer = 0;
-  size_t buffersize = 0;
-  unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                  const LodePNGCompressSettings& settings)
-{
-  return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
-}
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_ZLIB */
-
-
-#ifdef LODEPNG_COMPILE_PNG
-
-State::State()
-{
-  lodepng_state_init(this);
-}
-
-State::State(const State& other)
-{
-  lodepng_state_init(this);
-  lodepng_state_copy(this, &other);
-}
-
-State::~State()
-{
-  lodepng_state_cleanup(this);
-}
-
-State& State::operator=(const State& other)
-{
-  lodepng_state_copy(this, &other);
-  return *this;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
-                size_t insize, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
-  if(buffer && !error)
-  {
-    State state;
-    state.info_raw.colortype = colortype;
-    state.info_raw.bitdepth = bitdepth;
-    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth)
-{
-  return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const unsigned char* in, size_t insize)
-{
-  unsigned char* buffer = NULL;
-  unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
-  if(buffer && !error)
-  {
-    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-  }
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const std::vector<unsigned char>& in)
-{
-  return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  std::vector<unsigned char> buffer;
-  unsigned error = load_file(buffer, filename);
-  if(error) return error;
-  return decode(out, w, h, buffer, colortype, bitdepth);
-}
-#endif /* LODEPNG_COMPILE_DECODER */
-#endif /* LODEPNG_COMPILE_DISK */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
-  return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                State& state)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                State& state)
-{
-  if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
-  return encode(out, in.empty() ? 0 : &in[0], w, h, state);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned encode(const std::string& filename,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  std::vector<unsigned char> buffer;
-  unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
-  if(!error) error = save_file(buffer, filename);
-  return error;
-}
-
-unsigned encode(const std::string& filename,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
-  return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
-}
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_PNG */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/
diff --git a/3rdparty/lodepng/lodepng.h b/3rdparty/lodepng/lodepng.h
deleted file mode 100644
index 94e81955b..000000000
--- a/3rdparty/lodepng/lodepng.h
+++ /dev/null
@@ -1,1759 +0,0 @@
-/*
-LodePNG version 20160501
-
-Copyright (c) 2005-2016 Lode Vandevenne
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-    1. The origin of this software must not be misrepresented; you must not
-    claim that you wrote the original software. If you use this software
-    in a product, an acknowledgment in the product documentation would be
-    appreciated but is not required.
-
-    2. Altered source versions must be plainly marked as such, and must not be
-    misrepresented as being the original software.
-
-    3. This notice may not be removed or altered from any source
-    distribution.
-*/
-
-#ifndef LODEPNG_H
-#define LODEPNG_H
-
-#include <string.h> /*for size_t*/
-
-extern const char* LODEPNG_VERSION_STRING;
-
-/*
-The following #defines are used to create code sections. They can be disabled
-to disable code sections, which can give faster compile time and smaller binary.
-The "NO_COMPILE" defines are designed to be used to pass as defines to the
-compiler command to disable them without modifying this header, e.g.
--DLODEPNG_NO_COMPILE_ZLIB for gcc.
-In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
-allow implementing a custom lodepng_crc32.
-*/
-/*deflate & zlib. If disabled, you must specify alternative zlib functions in
-the custom_zlib field of the compress and decompress settings*/
-#ifndef LODEPNG_NO_COMPILE_ZLIB
-#define LODEPNG_COMPILE_ZLIB
-#endif
-/*png encoder and png decoder*/
-#ifndef LODEPNG_NO_COMPILE_PNG
-#define LODEPNG_COMPILE_PNG
-#endif
-/*deflate&zlib decoder and png decoder*/
-#ifndef LODEPNG_NO_COMPILE_DECODER
-#define LODEPNG_COMPILE_DECODER
-#endif
-/*deflate&zlib encoder and png encoder*/
-#ifndef LODEPNG_NO_COMPILE_ENCODER
-#define LODEPNG_COMPILE_ENCODER
-#endif
-/*the optional built in harddisk file loading and saving functions*/
-#ifndef LODEPNG_NO_COMPILE_DISK
-#define LODEPNG_COMPILE_DISK
-#endif
-/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
-#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
-#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
-#endif
-/*ability to convert error numerical codes to English text string*/
-#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
-#define LODEPNG_COMPILE_ERROR_TEXT
-#endif
-/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
-you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
-source files with custom allocators.*/
-#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
-#define LODEPNG_COMPILE_ALLOCATORS
-#endif
-/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
-#ifdef __cplusplus
-#ifndef LODEPNG_NO_COMPILE_CPP
-#define LODEPNG_COMPILE_CPP
-#endif
-#endif
-
-#ifdef LODEPNG_COMPILE_CPP
-#include <vector>
-#include <string>
-#endif /*LODEPNG_COMPILE_CPP*/
-
-#ifdef LODEPNG_COMPILE_PNG
-/*The PNG color types (also used for raw).*/
-typedef enum LodePNGColorType
-{
-  LCT_GREY = 0, /*greyscale: 1,2,4,8,16 bit*/
-  LCT_RGB = 2, /*RGB: 8,16 bit*/
-  LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
-  LCT_GREY_ALPHA = 4, /*greyscale with alpha: 8,16 bit*/
-  LCT_RGBA = 6 /*RGB with alpha: 8,16 bit*/
-} LodePNGColorType;
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Converts PNG data in memory to raw pixel data.
-out: Output parameter. Pointer to buffer that will contain the raw pixel data.
-     After decoding, its size is w * h * (bytes per pixel) bytes larger than
-     initially. Bytes per pixel depends on colortype and bitdepth.
-     Must be freed after usage with free(*out).
-     Note: for 16-bit per channel colors, uses big endian format like PNG does.
-w: Output parameter. Pointer to width of pixel data.
-h: Output parameter. Pointer to height of pixel data.
-in: Memory buffer with the PNG file.
-insize: size of the in buffer.
-colortype: the desired color type for the raw output image. See explanation on PNG color types.
-bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
-Return value: LodePNG error code (0 means no error).
-*/
-unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
-                               const unsigned char* in, size_t insize,
-                               LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
-unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
-                          const unsigned char* in, size_t insize);
-
-/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
-unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
-                          const unsigned char* in, size_t insize);
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load PNG from disk, from file with given name.
-Same as the other decode functions, but instead takes a filename as input.
-*/
-unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
-                             const char* filename,
-                             LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
-unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
-                               const char* filename);
-
-/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
-unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
-                               const char* filename);
-#endif /*LODEPNG_COMPILE_DISK*/
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
-  of the output PNG image cannot be chosen, they are automatically determined
-  by the colortype, bitdepth and content of the input pixel data.
-  Note: for 16-bit per channel colors, needs big endian format like PNG does.
-out: Output parameter. Pointer to buffer that will contain the PNG image data.
-     Must be freed after usage with free(*out).
-outsize: Output parameter. Pointer to the size in bytes of the out buffer.
-image: The raw pixel data to encode. The size of this buffer should be
-       w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
-w: width of the raw pixel data in pixels.
-h: height of the raw pixel data in pixels.
-colortype: the color type of the raw input image. See explanation on PNG color types.
-bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
-Return value: LodePNG error code (0 means no error).
-*/
-unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
-                               const unsigned char* image, unsigned w, unsigned h,
-                               LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
-unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
-                          const unsigned char* image, unsigned w, unsigned h);
-
-/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
-unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
-                          const unsigned char* image, unsigned w, unsigned h);
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts raw pixel data into a PNG file on disk.
-Same as the other encode functions, but instead takes a filename as output.
-NOTE: This overwrites existing files without warning!
-*/
-unsigned lodepng_encode_file(const char* filename,
-                             const unsigned char* image, unsigned w, unsigned h,
-                             LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
-unsigned lodepng_encode32_file(const char* filename,
-                               const unsigned char* image, unsigned w, unsigned h);
-
-/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
-unsigned lodepng_encode24_file(const char* filename,
-                               const unsigned char* image, unsigned w, unsigned h);
-#endif /*LODEPNG_COMPILE_DISK*/
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-
-#ifdef LODEPNG_COMPILE_CPP
-namespace lodepng
-{
-#ifdef LODEPNG_COMPILE_DECODER
-/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
-is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const unsigned char* in, size_t insize,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::vector<unsigned char>& in,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts PNG file from disk to raw pixel data in memory.
-Same as the other decode functions, but instead takes a filename as input.
-*/
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::string& filename,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
-is that of the raw input data. The output PNG color type will be auto chosen.*/
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts 32-bit RGBA raw pixel data into a PNG file on disk.
-Same as the other encode functions, but instead takes a filename as output.
-NOTE: This overwrites existing files without warning!
-*/
-unsigned encode(const std::string& filename,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned encode(const std::string& filename,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_ENCODER */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ERROR_TEXT
-/*Returns an English description of the numerical error code.*/
-const char* lodepng_error_text(unsigned code);
-#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*Settings for zlib decompression*/
-typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
-struct LodePNGDecompressSettings
-{
-  unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
-
-  /*use custom zlib decoder instead of built in one (default: null)*/
-  unsigned (*custom_zlib)(unsigned char**, size_t*,
-                          const unsigned char*, size_t,
-                          const LodePNGDecompressSettings*);
-  /*use custom deflate decoder instead of built in one (default: null)
-  if custom_zlib is used, custom_deflate is ignored since only the built in
-  zlib function will call custom_deflate*/
-  unsigned (*custom_inflate)(unsigned char**, size_t*,
-                             const unsigned char*, size_t,
-                             const LodePNGDecompressSettings*);
-
-  const void* custom_context; /*optional custom settings for custom functions*/
-};
-
-extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Settings for zlib compression. Tweaking these settings tweaks the balance
-between speed and compression ratio.
-*/
-typedef struct LodePNGCompressSettings LodePNGCompressSettings;
-struct LodePNGCompressSettings /*deflate = compress*/
-{
-  /*LZ77 related settings*/
-  unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
-  unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
-  unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
-  unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
-  unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
-  unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
-
-  /*use custom zlib encoder instead of built in one (default: null)*/
-  unsigned (*custom_zlib)(unsigned char**, size_t*,
-                          const unsigned char*, size_t,
-                          const LodePNGCompressSettings*);
-  /*use custom deflate encoder instead of built in one (default: null)
-  if custom_zlib is used, custom_deflate is ignored since only the built in
-  zlib function will call custom_deflate*/
-  unsigned (*custom_deflate)(unsigned char**, size_t*,
-                             const unsigned char*, size_t,
-                             const LodePNGCompressSettings*);
-
-  const void* custom_context; /*optional custom settings for custom functions*/
-};
-
-extern const LodePNGCompressSettings lodepng_default_compress_settings;
-void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_PNG
-/*
-Color mode of an image. Contains all information required to decode the pixel
-bits to RGBA colors. This information is the same as used in the PNG file
-format, and is used both for PNG and raw image data in LodePNG.
-*/
-typedef struct LodePNGColorMode
-{
-  /*header (IHDR)*/
-  LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
-  unsigned bitdepth;  /*bits per sample, see PNG standard or documentation further in this header file*/
-
-  /*
-  palette (PLTE and tRNS)
-
-  Dynamically allocated with the colors of the palette, including alpha.
-  When encoding a PNG, to store your colors in the palette of the LodePNGColorMode, first use
-  lodepng_palette_clear, then for each color use lodepng_palette_add.
-  If you encode an image without alpha with palette, don't forget to put value 255 in each A byte of the palette.
-
-  When decoding, by default you can ignore this palette, since LodePNG already
-  fills the palette colors in the pixels of the raw RGBA output.
-
-  The palette is only supported for color type 3.
-  */
-  unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/
-  size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/
-
-  /*
-  transparent color key (tRNS)
-
-  This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
-  For greyscale PNGs, r, g and b will all 3 be set to the same.
-
-  When decoding, by default you can ignore this information, since LodePNG sets
-  pixels with this key to transparent already in the raw RGBA output.
-
-  The color key is only supported for color types 0 and 2.
-  */
-  unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
-  unsigned key_r;       /*red/greyscale component of color key*/
-  unsigned key_g;       /*green component of color key*/
-  unsigned key_b;       /*blue component of color key*/
-} LodePNGColorMode;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_color_mode_init(LodePNGColorMode* info);
-void lodepng_color_mode_cleanup(LodePNGColorMode* info);
-/*return value is error code (0 means no error)*/
-unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
-
-void lodepng_palette_clear(LodePNGColorMode* info);
-/*add 1 color to the palette*/
-unsigned lodepng_palette_add(LodePNGColorMode* info,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a);
-
-/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
-unsigned lodepng_get_bpp(const LodePNGColorMode* info);
-/*get the amount of color channels used, based on colortype in the struct.
-If a palette is used, it counts as 1 channel.*/
-unsigned lodepng_get_channels(const LodePNGColorMode* info);
-/*is it a greyscale type? (only colortype 0 or 4)*/
-unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
-/*has it got an alpha channel? (only colortype 2 or 6)*/
-unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
-/*has it got a palette? (only colortype 3)*/
-unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
-/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
-Loops through the palette to check this.*/
-unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
-/*
-Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
-Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
-Returns false if the image can only have opaque pixels.
-In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
-or if "key_defined" is true.
-*/
-unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
-/*Returns the byte size of a raw image buffer with given width, height and color mode*/
-size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*The information of a Time chunk in PNG.*/
-typedef struct LodePNGTime
-{
-  unsigned year;    /*2 bytes used (0-65535)*/
-  unsigned month;   /*1-12*/
-  unsigned day;     /*1-31*/
-  unsigned hour;    /*0-23*/
-  unsigned minute;  /*0-59*/
-  unsigned second;  /*0-60 (to allow for leap seconds)*/
-} LodePNGTime;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*Information about the PNG image, except pixels, width and height.*/
-typedef struct LodePNGInfo
-{
-  /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
-  unsigned compression_method;/*compression method of the original file. Always 0.*/
-  unsigned filter_method;     /*filter method of the original file*/
-  unsigned interlace_method;  /*interlace method of the original file*/
-  LodePNGColorMode color;     /*color type and bits, palette and transparency of the PNG file*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  /*
-  suggested background color chunk (bKGD)
-  This color uses the same color mode as the PNG (except alpha channel), which can be 1-bit to 16-bit.
-
-  For greyscale PNGs, r, g and b will all 3 be set to the same. When encoding
-  the encoder writes the red one. For palette PNGs: When decoding, the RGB value
-  will be stored, not a palette index. But when encoding, specify the index of
-  the palette in background_r, the other two are then ignored.
-
-  The decoder does not use this background color to edit the color of pixels.
-  */
-  unsigned background_defined; /*is a suggested background color given?*/
-  unsigned background_r;       /*red component of suggested background color*/
-  unsigned background_g;       /*green component of suggested background color*/
-  unsigned background_b;       /*blue component of suggested background color*/
-
-  /*
-  non-international text chunks (tEXt and zTXt)
-
-  The char** arrays each contain num strings. The actual messages are in
-  text_strings, while text_keys are keywords that give a short description what
-  the actual text represents, e.g. Title, Author, Description, or anything else.
-
-  A keyword is minimum 1 character and maximum 79 characters long. It's
-  discouraged to use a single line length longer than 79 characters for texts.
-
-  Don't allocate these text buffers yourself. Use the init/cleanup functions
-  correctly and use lodepng_add_text and lodepng_clear_text.
-  */
-  size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
-  char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
-  char** text_strings; /*the actual text*/
-
-  /*
-  international text chunks (iTXt)
-  Similar to the non-international text chunks, but with additional strings
-  "langtags" and "transkeys".
-  */
-  size_t itext_num; /*the amount of international texts in this PNG*/
-  char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
-  char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
-  char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
-  char** itext_strings; /*the actual international text - UTF-8 string*/
-
-  /*time chunk (tIME)*/
-  unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
-  LodePNGTime time;
-
-  /*phys chunk (pHYs)*/
-  unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
-  unsigned phys_x; /*pixels per unit in x direction*/
-  unsigned phys_y; /*pixels per unit in y direction*/
-  unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
-
-  /*
-  unknown chunks
-  There are 3 buffers, one for each position in the PNG where unknown chunks can appear
-  each buffer contains all unknown chunks for that position consecutively
-  The 3 buffers are the unknown chunks between certain critical chunks:
-  0: IHDR-PLTE, 1: PLTE-IDAT, 2: IDAT-IEND
-  Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
-  later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
-  */
-  unsigned char* unknown_chunks_data[3];
-  size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGInfo;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_info_init(LodePNGInfo* info);
-void lodepng_info_cleanup(LodePNGInfo* info);
-/*return value is error code (0 means no error)*/
-unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
-unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
-
-void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
-                           const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*
-Converts raw buffer from one color type to another color type, based on
-LodePNGColorMode structs to describe the input and output color type.
-See the reference manual at the end of this header file to see which color conversions are supported.
-return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
-The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
-of the output color type (lodepng_get_bpp).
-For < 8 bpp images, there should not be padding bits at the end of scanlines.
-For 16-bit per channel colors, uses big endian format like PNG does.
-Return value is LodePNG error code
-*/
-unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
-                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
-                         unsigned w, unsigned h);
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Settings for the decoder. This contains settings for the PNG and the Zlib
-decoder, but not the Info settings from the Info structs.
-*/
-typedef struct LodePNGDecoderSettings
-{
-  LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
-
-  unsigned ignore_crc; /*ignore CRC checksums*/
-
-  unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
-  /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
-  unsigned remember_unknown_chunks;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGDecoderSettings;
-
-void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
-typedef enum LodePNGFilterStrategy
-{
-  /*every filter at zero*/
-  LFS_ZERO,
-  /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
-  LFS_MINSUM,
-  /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
-  on the image, this is better or worse than minsum.*/
-  LFS_ENTROPY,
-  /*
-  Brute-force-search PNG filters by compressing each filter for each scanline.
-  Experimental, very slow, and only rarely gives better compression than MINSUM.
-  */
-  LFS_BRUTE_FORCE,
-  /*use predefined_filters buffer: you specify the filter type for each scanline*/
-  LFS_PREDEFINED
-} LodePNGFilterStrategy;
-
-/*Gives characteristics about the colors of the image, which helps decide which color model to use for encoding.
-Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/
-typedef struct LodePNGColorProfile
-{
-  unsigned colored; /*not greyscale*/
-  unsigned key; /*if true, image is not opaque. Only if true and alpha is false, color key is possible.*/
-  unsigned short key_r; /*these values are always in 16-bit bitdepth in the profile*/
-  unsigned short key_g;
-  unsigned short key_b;
-  unsigned alpha; /*alpha channel or alpha palette required*/
-  unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/
-  unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/
-  unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for greyscale only. 16 if 16-bit per channel required.*/
-} LodePNGColorProfile;
-
-void lodepng_color_profile_init(LodePNGColorProfile* profile);
-
-/*Get a LodePNGColorProfile of the image.*/
-unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in);
-/*The function LodePNG uses internally to decide the PNG color with auto_convert.
-Chooses an optimal color model, e.g. grey if only grey pixels, palette if < 256 colors, ...*/
-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in);
-
-/*Settings for the encoder.*/
-typedef struct LodePNGEncoderSettings
-{
-  LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
-
-  unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
-
-  /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
-  8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
-  completely follow the official PNG heuristic, filter_palette_zero must be true and
-  filter_strategy must be LFS_MINSUM*/
-  unsigned filter_palette_zero;
-  /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
-  Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
-  LodePNGFilterStrategy filter_strategy;
-  /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
-  the same length as the amount of scanlines in the image, and each value must <= 5. You
-  have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
-  must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
-  const unsigned char* predefined_filters;
-
-  /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
-  If colortype is 3, PLTE is _always_ created.*/
-  unsigned force_palette;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  /*add LodePNG identifier and version as a text chunk, for debugging*/
-  unsigned add_id;
-  /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
-  unsigned text_compression;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGEncoderSettings;
-
-void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-
-#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
-/*The settings, state and information for extended encoding and decoding.*/
-typedef struct LodePNGState
-{
-#ifdef LODEPNG_COMPILE_DECODER
-  LodePNGDecoderSettings decoder; /*the decoding settings*/
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-  LodePNGEncoderSettings encoder; /*the encoding settings*/
-#endif /*LODEPNG_COMPILE_ENCODER*/
-  LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
-  LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
-  unsigned error;
-#ifdef LODEPNG_COMPILE_CPP
-  /* For the lodepng::State subclass. */
-  virtual ~LodePNGState(){}
-#endif
-} LodePNGState;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_state_init(LodePNGState* state);
-void lodepng_state_cleanup(LodePNGState* state);
-void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
-#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
-getting much more information about the PNG image and color mode.
-*/
-unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
-                        LodePNGState* state,
-                        const unsigned char* in, size_t insize);
-
-/*
-Read the PNG header, but not the actual data. This returns only the information
-that is in the header chunk of the PNG, such as width, height and color type. The
-information is placed in the info_png field of the LodePNGState.
-*/
-unsigned lodepng_inspect(unsigned* w, unsigned* h,
-                         LodePNGState* state,
-                         const unsigned char* in, size_t insize);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
-unsigned lodepng_encode(unsigned char** out, size_t* outsize,
-                        const unsigned char* image, unsigned w, unsigned h,
-                        LodePNGState* state);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*
-The lodepng_chunk functions are normally not needed, except to traverse the
-unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
-It also allows traversing the chunks of an encoded PNG file yourself.
-
-PNG standard chunk naming conventions:
-First byte: uppercase = critical, lowercase = ancillary
-Second byte: uppercase = public, lowercase = private
-Third byte: must be uppercase
-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
-*/
-
-/*
-Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
-There must be at least 4 bytes to read from. If the result value is too large,
-it may be corrupt data.
-*/
-unsigned lodepng_chunk_length(const unsigned char* chunk);
-
-/*puts the 4-byte type in null terminated string*/
-void lodepng_chunk_type(char type[5], const unsigned char* chunk);
-
-/*check if the type is the given type*/
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
-
-/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
-unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
-
-/*0: public, 1: private (see PNG standard)*/
-unsigned char lodepng_chunk_private(const unsigned char* chunk);
-
-/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
-
-/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
-unsigned char* lodepng_chunk_data(unsigned char* chunk);
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
-
-/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
-
-/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
-void lodepng_chunk_generate_crc(unsigned char* chunk);
-
-/*iterate to next chunks. don't use on IEND chunk, as there is no next chunk then*/
-unsigned char* lodepng_chunk_next(unsigned char* chunk);
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk);
-
-/*
-Appends chunk to the data in out. The given chunk should already have its chunk header.
-The out variable and outlength are updated to reflect the new reallocated buffer.
-Returns error code (0 if it went ok)
-*/
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk);
-
-/*
-Appends new chunk to out. The chunk to append is given by giving its length, type
-and data separately. The type is a 4-letter string.
-The out variable and outlength are updated to reflect the new reallocated buffer.
-Returne error code (0 if it went ok)
-*/
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data);
-
-
-/*Calculate CRC32 of buffer*/
-unsigned lodepng_crc32(const unsigned char* buf, size_t len);
-#endif /*LODEPNG_COMPILE_PNG*/
-
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*
-This zlib part can be used independently to zlib compress and decompress a
-buffer. It cannot be used to create gzip files however, and it only supports the
-part of zlib that is required for PNG, it does not support dictionaries.
-*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
-unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGDecompressSettings* settings);
-
-/*
-Decompresses Zlib data. Reallocates the out buffer and appends the data. The
-data must be according to the zlib specification.
-Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
-buffer and *outsize its size in bytes. out must be freed by user after usage.
-*/
-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
-                                 const unsigned char* in, size_t insize,
-                                 const LodePNGDecompressSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Compresses data with Zlib. Reallocates the out buffer and appends the data.
-Zlib adds a small header and trailer around the deflate data.
-The data is output in the format of the zlib specification.
-Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
-buffer and *outsize its size in bytes. out must be freed by user after usage.
-*/
-unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
-                               const unsigned char* in, size_t insize,
-                               const LodePNGCompressSettings* settings);
-
-/*
-Find length-limited Huffman code for given frequencies. This function is in the
-public interface only for tests, it's used internally by lodepng_deflate.
-*/
-unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
-                                      size_t numcodes, unsigned maxbitlen);
-
-/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
-unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGCompressSettings* settings);
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load a file from disk into buffer. The function allocates the out buffer, and
-after usage you should free it.
-out: output parameter, contains pointer to loaded buffer.
-outsize: output parameter, size of the allocated out buffer
-filename: the path to the file to load
-return value: error code (0 means ok)
-*/
-unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
-
-/*
-Save a file from buffer to disk. Warning, if it exists, this function overwrites
-the file without warning!
-buffer: the buffer to write
-buffersize: size of the buffer to write
-filename: the path to the file to save to
-return value: error code (0 means ok)
-*/
-unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
-#endif /*LODEPNG_COMPILE_DISK*/
-
-#ifdef LODEPNG_COMPILE_CPP
-/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
-namespace lodepng
-{
-#ifdef LODEPNG_COMPILE_PNG
-class State : public LodePNGState
-{
-  public:
-    State();
-    State(const State& other);
-    virtual ~State();
-    State& operator=(const State& other);
-};
-
-#ifdef LODEPNG_COMPILE_DECODER
-/* Same as other lodepng::decode, but using a State for more settings and information. */
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const unsigned char* in, size_t insize);
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const std::vector<unsigned char>& in);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/* Same as other lodepng::encode, but using a State for more settings and information. */
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                State& state);
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                State& state);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load a file from disk into an std::vector.
-return value: error code (0 means ok)
-*/
-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
-
-/*
-Save the binary data in an std::vector to a file on disk. The file is overwritten
-without warning.
-*/
-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_PNG */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_DECODER
-/* Zlib-decompress an unsigned char buffer */
-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
-
-/* Zlib-decompress an std::vector */
-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/* Zlib-compress an unsigned char buffer */
-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
-
-/* Zlib-compress an std::vector */
-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_ZLIB */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/
-
-/*
-TODO:
-[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
-[.] check compatibility with various compilers  - done but needs to be redone for every newer version
-[X] converting color to 16-bit per channel types
-[ ] read all public PNG chunk types (but never let the color profile and gamma ones touch RGB values)
-[ ] make sure encoder generates no chunks with size > (2^31)-1
-[ ] partial decoding (stream processing)
-[X] let the "isFullyOpaque" function check color keys and transparent palettes too
-[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
-[ ] don't stop decoding on errors like 69, 57, 58 (make warnings)
-[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
-[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
-[ ] allow user to give data (void*) to custom allocator
-*/
-
-#endif /*LODEPNG_H inclusion guard*/
-
-/*
-LodePNG Documentation
----------------------
-
-0. table of contents
---------------------
-
-  1. about
-   1.1. supported features
-   1.2. features not supported
-  2. C and C++ version
-  3. security
-  4. decoding
-  5. encoding
-  6. color conversions
-    6.1. PNG color types
-    6.2. color conversions
-    6.3. padding bits
-    6.4. A note about 16-bits per channel and endianness
-  7. error values
-  8. chunks and PNG editing
-  9. compiler support
-  10. examples
-   10.1. decoder C++ example
-   10.2. decoder C example
-  11. state settings reference
-  12. changes
-  13. contact information
-
-
-1. about
---------
-
-PNG is a file format to store raster images losslessly with good compression,
-supporting different color types and alpha channel.
-
-LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
-Specification (Second Edition) - W3C Recommendation 10 November 2003.
-
-The specifications used are:
-
-*) Portable Network Graphics (PNG) Specification (Second Edition):
-     http://www.w3.org/TR/2003/REC-PNG-20031110
-*) RFC 1950 ZLIB Compressed Data Format version 3.3:
-     http://www.gzip.org/zlib/rfc-zlib.html
-*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
-     http://www.gzip.org/zlib/rfc-deflate.html
-
-The most recent version of LodePNG can currently be found at
-http://lodev.org/lodepng/
-
-LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
-extra functionality.
-
-LodePNG exists out of two files:
--lodepng.h: the header file for both C and C++
--lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
-
-If you want to start using LodePNG right away without reading this doc, get the
-examples from the LodePNG website to see how to use it in code, or check the
-smaller examples in chapter 13 here.
-
-LodePNG is simple but only supports the basic requirements. To achieve
-simplicity, the following design choices were made: There are no dependencies
-on any external library. There are functions to decode and encode a PNG with
-a single function call, and extended versions of these functions taking a
-LodePNGState struct allowing to specify or get more information. By default
-the colors of the raw image are always RGB or RGBA, no matter what color type
-the PNG file uses. To read and write files, there are simple functions to
-convert the files to/from buffers in memory.
-
-This all makes LodePNG suitable for loading textures in games, demos and small
-programs, ... It's less suitable for full fledged image editors, loading PNGs
-over network (it requires all the image data to be available before decoding can
-begin), life-critical systems, ...
-
-1.1. supported features
------------------------
-
-The following features are supported by the decoder:
-
-*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
-   or the same color type as the PNG
-*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
-*) Adam7 interlace and deinterlace for any color type
-*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
-*) support for alpha channels, including RGBA color model, translucent palettes and color keying
-*) zlib decompression (inflate)
-*) zlib compression (deflate)
-*) CRC32 and ADLER32 checksums
-*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
-*) the following chunks are supported (generated/interpreted) by both encoder and decoder:
-    IHDR: header information
-    PLTE: color palette
-    IDAT: pixel data
-    IEND: the final chunk
-    tRNS: transparency for palettized images
-    tEXt: textual information
-    zTXt: compressed textual information
-    iTXt: international textual information
-    bKGD: suggested background color
-    pHYs: physical dimensions
-    tIME: modification time
-
-1.2. features not supported
----------------------------
-
-The following features are _not_ supported:
-
-*) some features needed to make a conformant PNG-Editor might be still missing.
-*) partial loading/stream processing. All data must be available and is processed in one call.
-*) The following public chunks are not supported but treated as unknown chunks by LodePNG
-    cHRM, gAMA, iCCP, sRGB, sBIT, hIST, sPLT
-   Some of these are not supported on purpose: LodePNG wants to provide the RGB values
-   stored in the pixels, not values modified by system dependent gamma or color models.
-
-
-2. C and C++ version
---------------------
-
-The C version uses buffers allocated with alloc that you need to free()
-yourself. You need to use init and cleanup functions for each struct whenever
-using a struct from the C version to avoid exploits and memory leaks.
-
-The C++ version has extra functions with std::vectors in the interface and the
-lodepng::State class which is a LodePNGState with constructor and destructor.
-
-These files work without modification for both C and C++ compilers because all
-the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
-ignore it, and the C code is made to compile both with strict ISO C90 and C++.
-
-To use the C++ version, you need to rename the source file to lodepng.cpp
-(instead of lodepng.c), and compile it with a C++ compiler.
-
-To use the C version, you need to rename the source file to lodepng.c (instead
-of lodepng.cpp), and compile it with a C compiler.
-
-
-3. Security
------------
-
-Even if carefully designed, it's always possible that LodePNG contains possible
-exploits. If you discover one, please let me know, and it will be fixed.
-
-When using LodePNG, care has to be taken with the C version of LodePNG, as well
-as the C-style structs when working with C++. The following conventions are used
-for all C-style structs:
-
--if a struct has a corresponding init function, always call the init function when making a new one
--if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
--if a struct has a corresponding copy function, use the copy function instead of "=".
- The destination must also be inited already.
-
-
-4. Decoding
------------
-
-Decoding converts a PNG compressed image to a raw pixel buffer.
-
-Most documentation on using the decoder is at its declarations in the header
-above. For C, simple decoding can be done with functions such as
-lodepng_decode32, and more advanced decoding can be done with the struct
-LodePNGState and lodepng_decode. For C++, all decoding can be done with the
-various lodepng::decode functions, and lodepng::State can be used for advanced
-features.
-
-When using the LodePNGState, it uses the following fields for decoding:
-*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
-*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
-*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
-
-LodePNGInfo info_png
---------------------
-
-After decoding, this contains extra information of the PNG image, except the actual
-pixels, width and height because these are already gotten directly from the decoder
-functions.
-
-It contains for example the original color type of the PNG image, text comments,
-suggested background color, etc... More details about the LodePNGInfo struct are
-at its declaration documentation.
-
-LodePNGColorMode info_raw
--------------------------
-
-When decoding, here you can specify which color type you want
-the resulting raw image to be. If this is different from the colortype of the
-PNG, then the decoder will automatically convert the result. This conversion
-always works, except if you want it to convert a color PNG to greyscale or to
-a palette with missing colors.
-
-By default, 32-bit color is used for the result.
-
-LodePNGDecoderSettings decoder
-------------------------------
-
-The settings can be used to ignore the errors created by invalid CRC and Adler32
-chunks, and to disable the decoding of tEXt chunks.
-
-There's also a setting color_convert, true by default. If false, no conversion
-is done, the resulting data will be as it was in the PNG (after decompression)
-and you'll have to puzzle the colors of the pixels together yourself using the
-color type information in the LodePNGInfo.
-
-
-5. Encoding
------------
-
-Encoding converts a raw pixel buffer to a PNG compressed image.
-
-Most documentation on using the encoder is at its declarations in the header
-above. For C, simple encoding can be done with functions such as
-lodepng_encode32, and more advanced decoding can be done with the struct
-LodePNGState and lodepng_encode. For C++, all encoding can be done with the
-various lodepng::encode functions, and lodepng::State can be used for advanced
-features.
-
-Like the decoder, the encoder can also give errors. However it gives less errors
-since the encoder input is trusted, the decoder input (a PNG image that could
-be forged by anyone) is not trusted.
-
-When using the LodePNGState, it uses the following fields for encoding:
-*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
-*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
-*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
-
-LodePNGInfo info_png
---------------------
-
-When encoding, you use this the opposite way as when decoding: for encoding,
-you fill in the values you want the PNG to have before encoding. By default it's
-not needed to specify a color type for the PNG since it's automatically chosen,
-but it's possible to choose it yourself given the right settings.
-
-The encoder will not always exactly match the LodePNGInfo struct you give,
-it tries as close as possible. Some things are ignored by the encoder. The
-encoder uses, for example, the following settings from it when applicable:
-colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
-background color, the interlace method, unknown chunks, ...
-
-When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
-If the palette contains any colors for which the alpha channel is not 255 (so
-there are translucent colors in the palette), it'll add a tRNS chunk.
-
-LodePNGColorMode info_raw
--------------------------
-
-You specify the color type of the raw image that you give to the input here,
-including a possible transparent color key and palette you happen to be using in
-your raw image data.
-
-By default, 32-bit color is assumed, meaning your input has to be in RGBA
-format with 4 bytes (unsigned chars) per pixel.
-
-LodePNGEncoderSettings encoder
-------------------------------
-
-The following settings are supported (some are in sub-structs):
-*) auto_convert: when this option is enabled, the encoder will
-automatically choose the smallest possible color mode (including color key) that
-can encode the colors of all pixels without information loss.
-*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
-   2 = dynamic huffman tree (best compression). Should be 2 for proper
-   compression.
-*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
-   true for proper compression.
-*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
-   2048 by default, but can be set to 32768 for better, but slow, compression.
-*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
-   chunk if force_palette is true. This can used as suggested palette to convert
-   to by viewers that don't support more than 256 colors (if those still exist)
-*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
-*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
-  zTXt chunks use zlib compression on the text. This gives a smaller result on
-  large texts but a larger result on small texts (such as a single program name).
-  It's all tEXt or all zTXt though, there's no separate setting per text yet.
-
-
-6. color conversions
---------------------
-
-An important thing to note about LodePNG, is that the color type of the PNG, and
-the color type of the raw image, are completely independent. By default, when
-you decode a PNG, you get the result as a raw image in the color type you want,
-no matter whether the PNG was encoded with a palette, greyscale or RGBA color.
-And if you encode an image, by default LodePNG will automatically choose the PNG
-color type that gives good compression based on the values of colors and amount
-of colors in the image. It can be configured to let you control it instead as
-well, though.
-
-To be able to do this, LodePNG does conversions from one color mode to another.
-It can convert from almost any color type to any other color type, except the
-following conversions: RGB to greyscale is not supported, and converting to a
-palette when the palette doesn't have a required color is not supported. This is
-not supported on purpose: this is information loss which requires a color
-reduction algorithm that is beyong the scope of a PNG encoder (yes, RGB to grey
-is easy, but there are multiple ways if you want to give some channels more
-weight).
-
-By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
-color, no matter what color type the PNG has. And by default when encoding,
-LodePNG automatically picks the best color model for the output PNG, and expects
-the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
-the color format of the images yourself, you can skip this chapter.
-
-6.1. PNG color types
---------------------
-
-A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
-as well as palettized color modes. After the zlib decompression and unfiltering
-in the PNG image is done, the raw pixel data will have that color type and thus
-a certain amount of bits per pixel. If you want the output raw image after
-decoding to have another color type, a conversion is done by LodePNG.
-
-The PNG specification gives the following color types:
-
-0: greyscale, bit depths 1, 2, 4, 8, 16
-2: RGB, bit depths 8 and 16
-3: palette, bit depths 1, 2, 4 and 8
-4: greyscale with alpha, bit depths 8 and 16
-6: RGBA, bit depths 8 and 16
-
-Bit depth is the amount of bits per pixel per color channel. So the total amount
-of bits per pixel is: amount of channels * bitdepth.
-
-6.2. color conversions
-----------------------
-
-As explained in the sections about the encoder and decoder, you can specify
-color types and bit depths in info_png and info_raw to change the default
-behaviour.
-
-If, when decoding, you want the raw image to be something else than the default,
-you need to set the color type and bit depth you want in the LodePNGColorMode,
-or the parameters colortype and bitdepth of the simple decoding function.
-
-If, when encoding, you use another color type than the default in the raw input
-image, you need to specify its color type and bit depth in the LodePNGColorMode
-of the raw image, or use the parameters colortype and bitdepth of the simple
-encoding function.
-
-If, when encoding, you don't want LodePNG to choose the output PNG color type
-but control it yourself, you need to set auto_convert in the encoder settings
-to false, and specify the color type you want in the LodePNGInfo of the
-encoder (including palette: it can generate a palette if auto_convert is true,
-otherwise not).
-
-If the input and output color type differ (whether user chosen or auto chosen),
-LodePNG will do a color conversion, which follows the rules below, and may
-sometimes result in an error.
-
-To avoid some confusion:
--the decoder converts from PNG to raw image
--the encoder converts from raw image to PNG
--the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
--the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
--when encoding, the color type in LodePNGInfo is ignored if auto_convert
- is enabled, it is automatically generated instead
--when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
- PNG image, but it can be ignored since the raw image has the color type you requested instead
--if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
- between the color types is done if the color types are supported. If it is not
- supported, an error is returned. If the types are the same, no conversion is done.
--even though some conversions aren't supported, LodePNG supports loading PNGs from any
- colortype and saving PNGs to any colortype, sometimes it just requires preparing
- the raw image correctly before encoding.
--both encoder and decoder use the same color converter.
-
-Non supported color conversions:
--color to greyscale: no error is thrown, but the result will look ugly because
-only the red channel is taken
--anything to palette when that palette does not have that color in it: in this
-case an error is thrown
-
-Supported color conversions:
--anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
--any grey or grey+alpha, to grey or grey+alpha
--anything to a palette, as long as the palette has the requested colors in it
--removing alpha channel
--higher to smaller bitdepth, and vice versa
-
-If you want no color conversion to be done (e.g. for speed or control):
--In the encoder, you can make it save a PNG with any color type by giving the
-raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
-false.
--In the decoder, you can make it store the pixel data in the same color type
-as the PNG has, by setting the color_convert setting to false. Settings in
-info_raw are then ignored.
-
-The function lodepng_convert does the color conversion. It is available in the
-interface but normally isn't needed since the encoder and decoder already call
-it.
-
-6.3. padding bits
------------------
-
-In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
-have a bit amount that isn't a multiple of 8, then padding bits are used so that each
-scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
-The raw input image you give to the encoder, and the raw output image you get from the decoder
-will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
-of 7 pixels, the first pixel of the second scanline will the the 8th bit of the first byte,
-not the first bit of a new byte.
-
-6.4. A note about 16-bits per channel and endianness
-----------------------------------------------------
-
-LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
-for any other color format. The 16-bit values are stored in big endian (most
-significant byte first) in these arrays. This is the opposite order of the
-little endian used by x86 CPU's.
-
-LodePNG always uses big endian because the PNG file format does so internally.
-Conversions to other formats than PNG uses internally are not supported by
-LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
-colors, the order in which you store R, G, B and A, and so on. Supporting and
-converting to/from all that is outside the scope of LodePNG.
-
-This may mean that, depending on your use case, you may want to convert the big
-endian output of LodePNG to little endian with a for loop. This is certainly not
-always needed, many applications and libraries support big endian 16-bit colors
-anyway, but it means you cannot simply cast the unsigned char* buffer to an
-unsigned short* buffer on x86 CPUs.
-
-
-7. error values
----------------
-
-All functions in LodePNG that return an error code, return 0 if everything went
-OK, or a non-zero code if there was an error.
-
-The meaning of the LodePNG error values can be retrieved with the function
-lodepng_error_text: given the numerical error code, it returns a description
-of the error in English as a string.
-
-Check the implementation of lodepng_error_text to see the meaning of each code.
-
-
-8. chunks and PNG editing
--------------------------
-
-If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
-editor that should follow the rules about handling of unknown chunks, or if your
-program is able to read other types of chunks than the ones handled by LodePNG,
-then that's possible with the chunk functions of LodePNG.
-
-A PNG chunk has the following layout:
-
-4 bytes length
-4 bytes type name
-length bytes data
-4 bytes CRC
-
-8.1. iterating through chunks
------------------------------
-
-If you have a buffer containing the PNG image data, then the first chunk (the
-IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
-signature of the PNG and are not part of a chunk. But if you start at byte 8
-then you have a chunk, and can check the following things of it.
-
-NOTE: none of these functions check for memory buffer boundaries. To avoid
-exploits, always make sure the buffer contains all the data of the chunks.
-When using lodepng_chunk_next, make sure the returned value is within the
-allocated memory.
-
-unsigned lodepng_chunk_length(const unsigned char* chunk):
-
-Get the length of the chunk's data. The total chunk length is this length + 12.
-
-void lodepng_chunk_type(char type[5], const unsigned char* chunk):
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
-
-Get the type of the chunk or compare if it's a certain type
-
-unsigned char lodepng_chunk_critical(const unsigned char* chunk):
-unsigned char lodepng_chunk_private(const unsigned char* chunk):
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
-
-Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
-Check if the chunk is private (public chunks are part of the standard, private ones not).
-Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
-chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
-program doesn't handle that type of unknown chunk.
-
-unsigned char* lodepng_chunk_data(unsigned char* chunk):
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
-
-Get a pointer to the start of the data of the chunk.
-
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
-void lodepng_chunk_generate_crc(unsigned char* chunk):
-
-Check if the crc is correct or generate a correct one.
-
-unsigned char* lodepng_chunk_next(unsigned char* chunk):
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
-
-Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
-functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
-data available in the buffer to be able to go to the next chunk.
-
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk):
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data):
-
-These functions are used to create new chunks that are appended to the data in *out that has
-length *outlength. The append function appends an existing chunk to the new data. The create
-function creates a new chunk with the given parameters and appends it. Type is the 4-letter
-name of the chunk.
-
-8.2. chunks in info_png
------------------------
-
-The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
-buffers (each with size) to contain 3 types of unknown chunks:
-the ones that come before the PLTE chunk, the ones that come between the PLTE
-and the IDAT chunks, and the ones that come after the IDAT chunks.
-It's necessary to make the distionction between these 3 cases because the PNG
-standard forces to keep the ordering of unknown chunks compared to the critical
-chunks, but does not force any other ordering rules.
-
-info_png.unknown_chunks_data[0] is the chunks before PLTE
-info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
-info_png.unknown_chunks_data[2] is the chunks after IDAT
-
-The chunks in these 3 buffers can be iterated through and read by using the same
-way described in the previous subchapter.
-
-When using the decoder to decode a PNG, you can make it store all unknown chunks
-if you set the option settings.remember_unknown_chunks to 1. By default, this
-option is off (0).
-
-The encoder will always encode unknown chunks that are stored in the info_png.
-If you need it to add a particular chunk that isn't known by LodePNG, you can
-use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
-info_png.unknown_chunks_data[x].
-
-Chunks that are known by LodePNG should not be added in that way. E.g. to make
-LodePNG add a bKGD chunk, set background_defined to true and add the correct
-parameters there instead.
-
-
-9. compiler support
--------------------
-
-No libraries other than the current standard C library are needed to compile
-LodePNG. For the C++ version, only the standard C++ library is needed on top.
-Add the files lodepng.c(pp) and lodepng.h to your project, include
-lodepng.h where needed, and your program can read/write PNG files.
-
-It is compatible with C90 and up, and C++03 and up.
-
-If performance is important, use optimization when compiling! For both the
-encoder and decoder, this makes a large difference.
-
-Make sure that LodePNG is compiled with the same compiler of the same version
-and with the same settings as the rest of the program, or the interfaces with
-std::vectors and std::strings in C++ can be incompatible.
-
-CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
-
-*) gcc and g++
-
-LodePNG is developed in gcc so this compiler is natively supported. It gives no
-warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
-version 4.7.1 on Linux, 32-bit and 64-bit.
-
-*) Clang
-
-Fully supported and warning-free.
-
-*) Mingw
-
-The Mingw compiler (a port of gcc for Windows) should be fully supported by
-LodePNG.
-
-*) Visual Studio and Visual C++ Express Edition
-
-LodePNG should be warning-free with warning level W4. Two warnings were disabled
-with pragmas though: warning 4244 about implicit conversions, and warning 4996
-where it wants to use a non-standard function fopen_s instead of the standard C
-fopen.
-
-Visual Studio may want "stdafx.h" files to be included in each source file and
-give an error "unexpected end of file while looking for precompiled header".
-This is not standard C++ and will not be added to the stock LodePNG. You can
-disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
-Precompiled Headers, and set it to Not Using Precompiled Headers there.
-
-NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
-VS6, are not guaranteed to work.
-
-*) Compilers on Macintosh
-
-LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
-C and C++.
-
-*) Other Compilers
-
-If you encounter problems on any compilers, feel free to let me know and I may
-try to fix it if the compiler is modern and standards complient.
-
-
-10. examples
-------------
-
-This decoder example shows the most basic usage of LodePNG. More complex
-examples can be found on the LodePNG website.
-
-10.1. decoder C++ example
--------------------------
-
-#include "lodepng.h"
-#include <iostream>
-
-int main(int argc, char *argv[])
-{
-  const char* filename = argc > 1 ? argv[1] : "test.png";
-
-  //load and decode
-  std::vector<unsigned char> image;
-  unsigned width, height;
-  unsigned error = lodepng::decode(image, width, height, filename);
-
-  //if there's an error, display it
-  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
-
-  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
-}
-
-10.2. decoder C example
------------------------
-
-#include "lodepng.h"
-
-int main(int argc, char *argv[])
-{
-  unsigned error;
-  unsigned char* image;
-  size_t width, height;
-  const char* filename = argc > 1 ? argv[1] : "test.png";
-
-  error = lodepng_decode32_file(&image, &width, &height, filename);
-
-  if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
-
-  / * use image here * /
-
-  free(image);
-  return 0;
-}
-
-11. state settings reference
-----------------------------
-
-A quick reference of some settings to set on the LodePNGState
-
-For decoding:
-
-state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
-state.decoder.zlibsettings.custom_...: use custom inflate function
-state.decoder.ignore_crc: ignore CRC checksums
-state.decoder.color_convert: convert internal PNG color to chosen one
-state.decoder.read_text_chunks: whether to read in text metadata chunks
-state.decoder.remember_unknown_chunks: whether to read in unknown chunks
-state.info_raw.colortype: desired color type for decoded image
-state.info_raw.bitdepth: desired bit depth for decoded image
-state.info_raw....: more color settings, see struct LodePNGColorMode
-state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
-
-For encoding:
-
-state.encoder.zlibsettings.btype: disable compression by setting it to 0
-state.encoder.zlibsettings.use_lz77: use LZ77 in compression
-state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
-state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
-state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
-state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
-state.encoder.zlibsettings.custom_...: use custom deflate function
-state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
-state.encoder.filter_palette_zero: PNG filter strategy for palette
-state.encoder.filter_strategy: PNG filter strategy to encode with
-state.encoder.force_palette: add palette even if not encoding to one
-state.encoder.add_id: add LodePNG identifier and version as a text chunk
-state.encoder.text_compression: use compressed text chunks for metadata
-state.info_raw.colortype: color type of raw input image you provide
-state.info_raw.bitdepth: bit depth of raw input image you provide
-state.info_raw: more color settings, see struct LodePNGColorMode
-state.info_png.color.colortype: desired color type if auto_convert is false
-state.info_png.color.bitdepth: desired bit depth if auto_convert is false
-state.info_png.color....: more color settings, see struct LodePNGColorMode
-state.info_png....: more PNG related settings, see struct LodePNGInfo
-
-
-12. changes
------------
-
-The version number of LodePNG is the date of the change given in the format
-yyyymmdd.
-
-Some changes aren't backwards compatible. Those are indicated with a (!)
-symbol.
-
-*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort).
-*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within
-   the limits of pure C90).
-*) 08 dec 2015: Made load_file function return error if file can't be opened.
-*) 24 okt 2015: Bugfix with decoding to palette output.
-*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
-*) 23 aug 2014: Reduced needless memory usage of decoder.
-*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
-    simplicity. Made ColorProfile public.
-*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
-*) 22 dec 2013: Power of two windowsize required for optimization.
-*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
-*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
-*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_"
-    prefix for the custom allocators and made it possible with a new #define to
-    use custom ones in your project without needing to change lodepng's code.
-*) 28 jan 2013: Bugfix with color key.
-*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
-*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode.
-    (no palette). Better deflate tree encoding. New compression tweak settings.
-    Faster color conversions while decoding. Some internal cleanups.
-*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
-*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions
-    and made it work with function pointers instead.
-*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
-    and free functions and toggle #defines from compiler flags. Small fixes.
-*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible.
-*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed
-    redundant C++ codec classes. Reduced amount of structs. Everything changed,
-    but it is cleaner now imho and functionality remains the same. Also fixed
-    several bugs and shrunk the implementation code. Made new samples.
-*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best
-    PNG color model and bit depth, based on the amount and type of colors of the
-    raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
-*) 9 okt 2011: simpler hash chain implementation for the encoder.
-*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
-*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
-    A bug with the PNG filtertype heuristic was fixed, so that it chooses much
-    better ones (it's quite significant). A setting to do an experimental, slow,
-    brute force search for PNG filter types is added.
-*) 17 aug 2011 (!): changed some C zlib related function names.
-*) 16 aug 2011: made the code less wide (max 120 characters per line).
-*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
-*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
-*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
-    to optimize long sequences of zeros.
-*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
-    LodePNG_InfoColor_canHaveAlpha functions for convenience.
-*) 7 nov 2010: added LodePNG_error_text function to get error code description.
-*) 30 okt 2010: made decoding slightly faster
-*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
-     Reorganized the documentation and the declaration order in the header.
-*) 08 aug 2010: only changed some comments and external samples.
-*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
-*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
-*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
-    read by ignoring the problem but windows apps couldn't.
-*) 06 jun 2008: added more error checks for out of memory cases.
-*) 26 apr 2008: added a few more checks here and there to ensure more safety.
-*) 06 mar 2008: crash with encoding of strings fixed
-*) 02 feb 2008: support for international text chunks added (iTXt)
-*) 23 jan 2008: small cleanups, and #defines to divide code in sections
-*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
-*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
-*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
-    Also various fixes, such as in the deflate and the padding bits code.
-*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
-    filtering code of encoder.
-*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
-    C++ wrapper around this provides an interface almost identical to before.
-    Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
-    are together in these files but it works both for C and C++ compilers.
-*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
-*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
-*) 09 aug 2007: some VS2005 warnings removed again
-*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
-*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
-*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
-    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
-*) 02 jun 2007: made the encoder add a tag with version by default
-*) 27 may 2007: zlib and png code separated (but still in the same file),
-    simple encoder/decoder functions added for more simple usage cases
-*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
-    moved some examples from here to lodepng_examples.cpp
-*) 12 may 2007: palette decoding bug fixed
-*) 24 apr 2007: changed the license from BSD to the zlib license
-*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
-*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
-    palettized PNG images. Plus little interface change with palette and texts.
-*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
-    Fixed a bug where the end code of a block had length 0 in the Huffman tree.
-*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
-    and supported by the encoder, resulting in smaller PNGs at the output.
-*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
-*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
-    greyscale type to 8-bit greyscale with or without alpha.
-*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
-    to convert to and is more uniform. See the manual for how it works now.
-*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
-    encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
-    at last made the decoder give errors for incorrect Adler32 or Crc.
-*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
-*) 29 dec 2006: Added support for encoding images without alpha channel, and
-    cleaned out code as well as making certain parts faster.
-*) 28 dec 2006: Added "Settings" to the encoder.
-*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
-    Removed some code duplication in the decoder. Fixed little bug in an example.
-*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
-    Fixed a bug of the decoder with 16-bit per color.
-*) 15 okt 2006: Changed documentation structure
-*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
-    given image buffer, however for now it's not compressed.
-*) 08 sep 2006: (!) Changed to interface with a Decoder class
-*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
-    way. Renamed decodePNG to decodePNGGeneric.
-*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
-    struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
-*) 28 jul 2006: Cleaned the code and added new error checks.
-    Corrected terminology "deflate" into "inflate".
-*) 23 jun 2006: Added SDL example in the documentation in the header, this
-    example allows easy debugging by displaying the PNG and its transparency.
-*) 22 jun 2006: (!) Changed way to obtain error value. Added
-    loadFile function for convenience. Made decodePNG32 faster.
-*) 21 jun 2006: (!) Changed type of info vector to unsigned.
-    Changed position of palette in info vector. Fixed an important bug that
-    happened on PNGs with an uncompressed block.
-*) 16 jun 2006: Internally changed unsigned into unsigned where
-    needed, and performed some optimizations.
-*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
-    in LodePNG namespace. Changed the order of the parameters. Rewrote the
-    documentation in the header. Renamed files to lodepng.cpp and lodepng.h
-*) 22 apr 2006: Optimized and improved some code
-*) 07 sep 2005: (!) Changed to std::vector interface
-*) 12 aug 2005: Initial release (C++, decoder only)
-
-
-13. contact information
------------------------
-
-Feel free to contact me with suggestions, problems, comments, ... concerning
-LodePNG. If you encounter a PNG image that doesn't work properly with this
-decoder, feel free to send it and I'll use it to find and fix the problem.
-
-My email address is (puzzle the account and domain together with an @ symbol):
-Domain: gmail dot com.
-Account: lode dot vandevenne.
-
-
-Copyright (c) 2005-2016 Lode Vandevenne
-*/
diff --git a/3rdparty/nvtt/NVIDIA_Texture_Tools_LICENSE.txt b/3rdparty/nvtt/NVIDIA_Texture_Tools_LICENSE.txt
deleted file mode 100644
index c422f717e..000000000
--- a/3rdparty/nvtt/NVIDIA_Texture_Tools_LICENSE.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-NVIDIA Texture Tools 2.0 is licensed under the MIT license.
-
-Copyright (c) 2007 NVIDIA Corporation
-
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation
-files (the "Software"), to deal in the Software without
-restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following
-conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
diff --git a/3rdparty/nvtt/bc6h/bits.h b/3rdparty/nvtt/bc6h/bits.h
deleted file mode 100644
index c47a7c6e9..000000000
--- a/3rdparty/nvtt/bc6h/bits.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-#ifndef _ZOH_BITS_H
-#define _ZOH_BITS_H
-
-// read/write a bitstream
-
-#include "nvcore/debug.h"
-
-namespace ZOH {
-
-class Bits
-{
-public:
-
-	Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
-	Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
-
-	void write(int value, int nbits) {
-		nvAssert (nbits >= 0 && nbits < 32);
-		nvAssert (sizeof(int)>= 4);
-		for (int i=0; i<nbits; ++i)
-			writeone(value>>i);
-	}
-	int read(int nbits) { 
-		nvAssert (nbits >= 0 && nbits < 32);
-		nvAssert (sizeof(int)>= 4);
-		int out = 0;
-		for (int i=0; i<nbits; ++i)
-			out |= readone() << i;
-		return out;
-	}
-	int getptr() { return bptr; }
-	void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
-	int getsize() { return bend; }
-
-private:
-	int	bptr;		// next bit to read
-	int bend;		// last written bit + 1
-	char *bits;		// ptr to user bit stream
-	const char *cbits;	// ptr to const user bit stream
-	int maxbits;	// max size of user bit stream
-	char readonly;	// 1 if this is a read-only stream
-
-	int readone() {
-		nvAssert (bptr < bend);
-		if (bptr >= bend) return 0;
-		int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
-		++bptr;
-		return bit != 0;
-	}
-	void writeone(int bit) {
-		nvAssert (!readonly); // "Writing a read-only bit stream"
-		nvAssert (bptr < maxbits);
-		if (bptr >= maxbits) return;
-		if (bit&1)
-			bits[bptr>>3] |= 1 << (bptr & 7);
-		else
-			bits[bptr>>3] &= ~(1 << (bptr & 7));
-		if (bptr++ >= bend) bend = bptr;
-	}
-};
-
-}
-
-#endif
diff --git a/3rdparty/nvtt/bc6h/shapes_two.h b/3rdparty/nvtt/bc6h/shapes_two.h
deleted file mode 100644
index 2fc555995..000000000
--- a/3rdparty/nvtt/bc6h/shapes_two.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-#pragma once
-#ifndef _ZOH_SHAPES_TWO_H
-#define _ZOH_SHAPES_TWO_H
-
-// shapes for two regions
-
-#define NREGIONS 2
-#define NSHAPES 64
-#define SHAPEBITS 6
-
-static const int shapes[NSHAPES*16] = 
-{
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   0, 0, 0, 1,   
-0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
-0, 0, 0, 1,   1, 1, 1, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 1,   0, 0, 0, 0,   
-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 0,   
-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 0,   0, 1, 1, 1,   
-1, 0, 0, 0,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 1, 1,   
-1, 1, 1, 0,   0, 0, 0, 0,   1, 0, 0, 0,   0, 0, 0, 1,   
-1, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 0, 0, 0,   
-
-0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 1, 1, 1,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 1,   
-0, 0, 0, 0,   1, 1, 0, 0,   1, 0, 0, 0,   0, 0, 1, 1,   
-0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   0, 0, 0, 1,   
-
-0, 0, 1, 1,   0, 0, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-0, 0, 0, 0,   1, 1, 0, 0,   0, 1, 1, 0,   1, 1, 0, 0,   
-
-0, 0, 0, 1,   0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 1,   1, 0, 0, 1,   
-1, 1, 1, 0,   1, 1, 1, 1,   1, 0, 0, 0,   1, 0, 0, 1,   
-1, 0, 0, 0,   0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   
-
-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   0, 0, 1, 1,   
-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   0, 0, 1, 1,   
-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   1, 1, 0, 0,   
-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   1, 1, 0, 0,   
-
-0, 0, 1, 1,   0, 1, 0, 1,   0, 1, 1, 0,   0, 1, 0, 1,   
-1, 1, 0, 0,   0, 1, 0, 1,   1, 0, 0, 1,   1, 0, 1, 0,   
-0, 0, 1, 1,   1, 0, 1, 0,   0, 1, 1, 0,   1, 0, 1, 0,   
-1, 1, 0, 0,   1, 0, 1, 0,   1, 0, 0, 1,   0, 1, 0, 1,   
-
-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   1, 0, 1, 1,   
-1, 1, 0, 0,   1, 1, 0, 0,   0, 1, 0, 0,   1, 1, 0, 1,   
-1, 1, 1, 0,   1, 0, 0, 0,   1, 1, 0, 0,   1, 1, 0, 0,   
-
-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 0, 0,   
-1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   0, 1, 1, 0,   
-0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   0, 0, 0, 0,   
-
-0, 1, 0, 0,   0, 0, 1, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
-1, 1, 1, 0,   0, 1, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-0, 1, 0, 0,   0, 0, 1, 0,   0, 1, 1, 1,   1, 1, 1, 0,   
-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 0,   0, 1, 0, 0,   
-
-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 1, 1,   
-1, 1, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   
-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   1, 1, 0, 0,   
-0, 0, 1, 1,   1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   
-
-0, 1, 1, 0,   0, 1, 1, 0,   0, 1, 1, 1,   0, 0, 0, 1,   
-1, 1, 0, 0,   0, 0, 1, 1,   1, 1, 1, 0,   1, 0, 0, 0,   
-1, 1, 0, 0,   0, 0, 1, 1,   1, 0, 0, 0,   1, 1, 1, 0,   
-1, 0, 0, 1,   1, 0, 0, 1,   0, 0, 0, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-1, 1, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 0,   0, 1, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 1, 1, 1,   
-
-};
-
-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
-
-static const int shapeindex_to_compressed_indices[NSHAPES*2] = 
-{
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-
-	0,15,  0, 2,  0, 8,  0, 2,
-	0, 2,  0, 8,  0, 8,  0,15,
-	0, 2,  0, 8,  0, 2,  0, 2,
-	0, 8,  0, 8,  0, 2,  0, 2,
-
-	0,15,  0,15,  0, 6,  0, 8,
-	0, 2,  0, 8,  0,15,  0,15,
-	0, 2,  0, 8,  0, 2,  0, 2,
-	0, 2,  0,15,  0,15,  0, 6,
-
-	0, 6,  0, 2,  0, 6,  0, 8,
-	0,15,  0,15,  0, 2,  0, 2,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0, 2,  0, 2,  0,15
-
-};
-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*2+(region)]
-
-#endif
diff --git a/3rdparty/nvtt/bc6h/tile.h b/3rdparty/nvtt/bc6h/tile.h
deleted file mode 100644
index 6e642a269..000000000
--- a/3rdparty/nvtt/bc6h/tile.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-#ifndef _ZOH_TILE_H
-#define _ZOH_TILE_H
-
-#include "zoh_utils.h"
-#include "nvmath/vector.h"
-#include <math.h>
-
-namespace ZOH {
-
-//#define	USE_IMPORTANCE_MAP	1		// define this if you want to increase importance of some pixels in tile
-class Tile
-{
-public:
-	// NOTE: this returns the appropriately-clamped BIT PATTERN of the half as an INTEGRAL float value
-	static float half2float(uint16 h)
-	{
-		return (float) Utils::ushort_to_format(h);
-	}
-	// NOTE: this is the inverse of the above operation
-	static uint16 float2half(float f)
-	{
-		return Utils::format_to_ushort((int)f);
-	}
-
-	// look for adjacent pixels that are identical. if there are enough of them, increase their importance
-	void generate_importance_map()
-	{
-		// initialize
-		for (int y=0; y<size_y; ++y)
-		for (int x=0; x<size_x; ++x)
-		{
-			// my importance is increased if I am identical to any of my 4-neighbors
-			importance_map[y][x] = match_4_neighbor(x,y) ? 5.0f : 1.0f;
-		}
-	}
-	bool is_equal(int x, int y, int xn, int yn)
-	{
-		if (xn < 0 || xn >= size_x || yn < 0 || yn >= size_y)
-			return false;
-		return( (data[y][x].x == data[yn][xn].x) &&
-				(data[y][x].y == data[yn][xn].y) &&
-				(data[y][x].z == data[yn][xn].z) );
-	}
-
-#ifdef USE_IMPORTANCE_MAP
-	bool match_4_neighbor(int x, int y)
-	{
-		return is_equal(x,y,x-1,y) || is_equal(x,y,x+1,y) || is_equal(x,y,x,y-1) || is_equal(x,y,x,y+1);
-	}
-#else
-	bool match_4_neighbor(int, int)
-	{
-		return false;
-	}
-#endif
-
-	Tile() {};
-	~Tile(){};
-	Tile(int xs, int ys) {size_x = xs; size_y = ys;}
-
-	static const int TILE_H = 4;
-	static const int TILE_W = 4;
-	static const int TILE_TOTAL = TILE_H * TILE_W;
-    nv::Vector3 data[TILE_H][TILE_W];
-	float importance_map[TILE_H][TILE_W];
-	int	size_x, size_y;			// actual size of tile
-};
-
-}
-
-#endif // _ZOH_TILE_H
diff --git a/3rdparty/nvtt/bc6h/zoh.cpp b/3rdparty/nvtt/bc6h/zoh.cpp
deleted file mode 100644
index 3053ea159..000000000
--- a/3rdparty/nvtt/bc6h/zoh.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// the zoh compressor and decompressor
-
-#include "tile.h"
-#include "zoh.h"
-
-#include <string.h> // memcpy
-
-using namespace ZOH;
-
-
-bool ZOH::isone(const char *block)
-{
-	char code = block[0] & 0x1F;
-
-	return (code == 0x03 || code == 0x07 || code == 0x0b || code == 0x0f);
-}
-
-void ZOH::compress(const Tile &t, char *block)
-{
-	char oneblock[ZOH::BLOCKSIZE], twoblock[ZOH::BLOCKSIZE];
-
-	float mseone = ZOH::compressone(t, oneblock);
-	float msetwo = ZOH::compresstwo(t, twoblock);
-
-	if (mseone <= msetwo)
-		memcpy(block, oneblock, ZOH::BLOCKSIZE);
-	else
-		memcpy(block, twoblock, ZOH::BLOCKSIZE);
-}
-
-void ZOH::decompress(const char *block, Tile &t)
-{
-	if (ZOH::isone(block))
-		ZOH::decompressone(block, t);
-	else
-		ZOH::decompresstwo(block, t);
-}
-
-/*
-void ZOH::compress(string inf, string zohf)
-{
-	Array2D<Rgba> pixels;
-	int w, h;
-	char block[ZOH::BLOCKSIZE];
-
-	Exr::readRgba(inf, pixels, w, h);
-	FILE *zohfile = fopen(zohf.c_str(), "wb");
-	if (zohfile == NULL) throw "Unable to open .zoh file for write";
-
-	// stuff for progress bar O.o
-	int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
-	int tilecnt = 0;
-	int ndots = 25;
-	int dotcnt = 0;
-	printf("Progress [");
-	for (int i=0; i<ndots;++i) printf(" ");
-	printf("]\rProgress ["); fflush(stdout);
-
-	// convert to tiles and compress each tile
-	for (int y=0; y<h; y+=Tile::TILE_H)
-	{
-		int ysize = min(Tile::TILE_H, h-y);
-		for (int x=0; x<w; x+=Tile::TILE_W)
-		{
-			int xsize = min(Tile::TILE_W, w-x);
-			Tile t(xsize, ysize);
-
-			t.insert(pixels, x, y);
-
-			ZOH::compress(t, block);
-			if (fwrite(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
-				throw "File error on write";
-
-			// progress bar
-			++tilecnt;
-			if (tilecnt > (ntiles * dotcnt)/ndots) { printf("."); fflush(stdout); ++dotcnt; }
-		}
-	}
-
-	printf("]\n");		// advance to next line finally
-
-	if (fclose(zohfile)) throw "Close failed on .zoh file";
-}
-
-static int str2int(std::string s)
-{
-	int thing;
-	std::stringstream str (stringstream::in | stringstream::out);
-	str << s;
-	str >> thing;
-	return thing;
-}
-
-// zoh file name is ...-w-h.zoh, extract width and height
-static void extract(string zohf, int &w, int &h)
-{
-	size_t n = zohf.rfind('.', zohf.length()-1);
-	size_t n1 = zohf.rfind('-', n-1);
-	size_t n2 = zohf.rfind('-', n1-1);
-	string width = zohf.substr(n2+1, n1-n2-1);
-	w = str2int(width);
-	string height = zohf.substr(n1+1, n-n1-1);
-	h = str2int(height);
-}
-
-static int mode_to_prec[] = {
-	10,7,11,10,
-	10,7,11,11,
-	10,7,11,12,
-	10,7,9,16,
-	10,7,8,-1,
-	10,7,8,-1,
-	10,7,8,-1,
-	10,7,6,-1,
-};
-
-static int shapeindexhist[32], modehist[32], prechistone[16], prechisttwo[16], oneregion, tworegions;
-
-static void stats(char block[ZOH::BLOCKSIZE])
-{
-	char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++;
-	int prec = mode_to_prec[mode];
-	nvAssert (prec != -1);
-	if (!ZOH::isone(block))
-	{
-		tworegions++;
-		prechisttwo[prec]++;
-		int shapeindex = ((block[0] & 0xe0) >> 5) | ((block[1] & 0x3) << 3);
-		shapeindexhist[shapeindex]++;
-	}
-	else
-	{
-		oneregion++;
-		prechistone[prec]++;
-	}
-}
-
-static void printstats()
-{
-	printf("\nPrecision histogram 10b to 16b one region: "); for (int i=10; i<=16; ++i) printf("%d,", prechistone[i]);
-	printf("\nPrecision histogram 6b to 11b two regions: "); for (int i=6; i<=11; ++i) printf("%d,", prechisttwo[i]);
-	printf("\nMode histogram: "); for (int i=0; i<32; ++i) printf("%d,", modehist[i]);
-	printf("\nShape index histogram: "); for (int i=0; i<32; ++i) printf("%d,", shapeindexhist[i]);
-	printf("\nOne region %5.2f%%  Two regions %5.2f%%", 100.0*oneregion/float(oneregion+tworegions), 100.0*tworegions/float(oneregion+tworegions));
-	printf("\n");
-}
-
-void ZOH::decompress(string zohf, string outf)
-{
-	Array2D<Rgba> pixels;
-	int w, h;
-	char block[ZOH::BLOCKSIZE];
-
-	extract(zohf, w, h);
-	FILE *zohfile = fopen(zohf.c_str(), "rb");
-	if (zohfile == NULL) throw "Unable to open .zoh file for read";
-	pixels.resizeErase(h, w);
-
-	// convert to tiles and decompress each tile
-	for (int y=0; y<h; y+=Tile::TILE_H)
-	{
-		int ysize = min(Tile::TILE_H, h-y);
-		for (int x=0; x<w; x+=Tile::TILE_W)
-		{
-			int xsize = min(Tile::TILE_W, w-x);
-			Tile t(xsize, ysize);
-
-			if (fread(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
-				throw "File error on read";
-
-			stats(block);	// collect statistics
-
-			ZOH::decompress(block, t);
-
-			t.extract(pixels, x, y);
-		}
-	}
-	if (fclose(zohfile)) throw "Close failed on .zoh file";
-	Exr::writeRgba(outf, pixels, w, h);
-
-#ifndef EXTERNAL_RELEASE
-	printstats();	// print statistics
-#endif
-}
-*/
diff --git a/3rdparty/nvtt/bc6h/zoh.h b/3rdparty/nvtt/bc6h/zoh.h
deleted file mode 100644
index d3003cbc3..000000000
--- a/3rdparty/nvtt/bc6h/zoh.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-#pragma once
-#ifndef _ZOH_H
-#define _ZOH_H
-
-#include "tile.h"
-
-namespace ZOH {
-
-// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f
-
-static const int NREGIONS_TWO	= 2;
-static const int NREGIONS_ONE	= 1;
-static const int NCHANNELS		= 3;
-
-struct FltEndpts
-{
-    nv::Vector3 A;
-    nv::Vector3 B;
-};
-
-struct IntEndpts
-{
-	int A[NCHANNELS];
-	int B[NCHANNELS];
-};
-
-struct ComprEndpts
-{
-	uint A[NCHANNELS];
-	uint B[NCHANNELS];
-};
-
-static const int BLOCKSIZE=16;
-static const int BITSIZE=128;
-
-void compress(const Tile &t, char *block);
-void decompress(const char *block, Tile &t);
-
-float compressone(const Tile &t, char *block);
-float compresstwo(const Tile &t, char *block);
-void decompressone(const char *block, Tile &t);
-void decompresstwo(const char *block, Tile &t);
-
-float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block);
-float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]);
-
-float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block);
-float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]);
-
-bool isone(const char *block);
-
-}
-
-#endif // _ZOH_H
diff --git a/3rdparty/nvtt/bc6h/zoh_utils.cpp b/3rdparty/nvtt/bc6h/zoh_utils.cpp
deleted file mode 100644
index fde3200e6..000000000
--- a/3rdparty/nvtt/bc6h/zoh_utils.cpp
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Utility and common routines
-
-#include "zoh_utils.h"
-#include "nvmath/vector.inl"
-#include <math.h>
-
-using namespace nv;
-using namespace ZOH;
-
-static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64};										// divided by 64
-static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};		// divided by 64
-
-/*static*/ Format Utils::FORMAT;
-
-int Utils::lerp(int a, int b, int i, int denom)
-{
-	nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
-	nvDebugCheck (i >= 0 && i <= denom);
-
-	int round = 32, shift = 6;
-	const int *weights;
-
-	switch(denom)
-	{
-	case 3:		denom *= 5; i *= 5;	// fall through to case 15
-	case 15:	weights = denom15_weights_64; break;
-	case 7:		weights = denom7_weights_64; break;
-	default:	nvDebugCheck(0);
-	}
-
-	return (a*weights[denom-i] +b*weights[i] + round) >> shift;
-}
-
-Vector3 Utils::lerp(const Vector3& a, const Vector3 &b, int i, int denom)
-{
-	nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
-	nvDebugCheck (i >= 0 && i <= denom);
-
-	int shift = 6;
-	const int *weights;
-
-	switch(denom)
-	{
-	case 3:		denom *= 5; i *= 5;	// fall through to case 15
-	case 15:	weights = denom15_weights_64; break;
-	case 7:		weights = denom7_weights_64; break;
-	default:	nvUnreachable();
-	}
-
-	// no need to round these as this is an exact division
-	return (a*float(weights[denom-i]) +b*float(weights[i])) / float(1 << shift);
-}
-
-
-/*
-	For unsigned f16, clamp the input to [0,F16MAX]. Thus u15.
-	For signed f16, clamp the input to [-F16MAX,F16MAX]. Thus s16.
-
-	The conversions proceed as follows:
-
-	unsigned f16: get bits. if high bit set, clamp to 0, else clamp to F16MAX.
-	signed f16: get bits. extract exp+mantissa and clamp to F16MAX. return -value if sign bit was set, else value
-	unsigned int: get bits. return as a positive value.
-	signed int. get bits. return as a value in -32768..32767.
-
-	The inverse conversions are just the inverse of the above.
-*/
-
-// clamp the 3 channels of the input vector to the allowable range based on FORMAT
-// note that each channel is a float storing the allowable range as a bit pattern converted to float
-// that is, for unsigned f16 say, we would clamp each channel to the range [0, F16MAX]
-
-void Utils::clamp(Vector3 &v)
-{
-	for (int i=0; i<3; ++i)
-	{
-		switch(Utils::FORMAT)
-		{
-		case UNSIGNED_F16:
-			if (v.component[i] < 0.0) v.component[i] = 0;
-			else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
-			break;
-
-		case SIGNED_F16:
-			if (v.component[i] < -F16MAX) v.component[i] = -F16MAX;
-			else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
-			break;
-
-		default:
-			nvUnreachable();
-		}
-	}
-}
-
-// convert a u16 value to s17 (represented as an int) based on the format expected
-int Utils::ushort_to_format(unsigned short input)
-{
-	int out, s;
-
-	// clamp to the valid range we are expecting
-	switch (Utils::FORMAT)
-	{
-	case UNSIGNED_F16:
-		if (input & F16S_MASK) out = 0;
-		else if (input > F16MAX) out = F16MAX;
-		else out = input;
-		break;
-
-	case SIGNED_F16:
-		s = input & F16S_MASK;
-		input &= F16EM_MASK;
-		if (input > F16MAX) out = F16MAX;
-		else out = input;
-		out = s ? -out : out;
-		break;
-	}
-	return out;
-}
-
-// convert a s17 value to u16 based on the format expected
-unsigned short Utils::format_to_ushort(int input)
-{
-	unsigned short out;
-
-	// clamp to the valid range we are expecting
-	switch (Utils::FORMAT)
-	{
-	case UNSIGNED_F16:
-		nvDebugCheck (input >= 0 && input <= F16MAX);
-		out = input;
-		break;
-
-	case SIGNED_F16:
-		nvDebugCheck (input >= -F16MAX && input <= F16MAX);
-		// convert to sign-magnitude
-		int s;
-		if (input < 0) { s = F16S_MASK; input = -input; }
-		else           { s = 0; }
-		out = s | input;
-		break;
-	}
-	return out;
-}
-
-// quantize the input range into equal-sized bins
-int Utils::quantize(float value, int prec)
-{
-	int q, ivalue, s;
-
-	nvDebugCheck (prec > 1);	// didn't bother to make it work for 1
-
-	value = (float)floor(value + 0.5);
-
-	int bias = (prec > 10) ? ((1<<(prec-1))-1) : 0;	// bias precisions 11..16 to get a more accurate quantization
-
-	switch (Utils::FORMAT)
-	{
-	case UNSIGNED_F16:
-		nvDebugCheck (value >= 0 && value <= F16MAX);
-		ivalue = (int)value;
-		q = ((ivalue << prec) + bias) / (F16MAX+1);
-		nvDebugCheck (q >= 0 && q < (1 << prec));
-		break;
-
-	case SIGNED_F16:
-		nvDebugCheck (value >= -F16MAX && value <= F16MAX);
-		// convert to sign-magnitude
-		ivalue = (int)value;
-		if (ivalue < 0) { s = 1; ivalue = -ivalue; } else s = 0;
-
-		q = ((ivalue << (prec-1)) + bias) / (F16MAX+1);
-		if (s)
-			q = -q;
-		nvDebugCheck (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
-		break;
-	}
-
-	return q;
-}
-
-int Utils::finish_unquantize(int q, int prec)
-{
-	if (Utils::FORMAT == UNSIGNED_F16)
-		return (q * 31) >> 6;										// scale the magnitude by 31/64
-	else if (Utils::FORMAT == SIGNED_F16)
-		return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5;		// scale the magnitude by 31/32
-	else
-		return q;
-}
-
-// unquantize each bin to midpoint of original bin range, except
-// for the end bins which we push to an endpoint of the bin range.
-// we do this to ensure we can represent all possible original values.
-// the asymmetric end bins do not affect PSNR for the test images.
-//
-// code this function assuming an arbitrary bit pattern as the encoded block
-int Utils::unquantize(int q, int prec)
-{
-	int unq, s;
-
-	nvDebugCheck (prec > 1);	// not implemented for prec 1
-
-	switch (Utils::FORMAT)
-	{
-	// modify this case to move the multiplication by 31 after interpolation.
-	// Need to use finish_unquantize.
-
-	// since we have 16 bits available, let's unquantize this to 16 bits unsigned
-	// thus the scale factor is [0-7c00)/[0-10000) = 31/64
-	case UNSIGNED_F16:
-		if (prec >= 15) 
-			unq = q;
-		else if (q == 0) 
-			unq = 0;
-		else if (q == ((1<<prec)-1)) 
-			unq = U16MAX;
-		else
-			unq = (q * (U16MAX+1) + (U16MAX+1)/2) >> prec;
-		break;
-
-	// here, let's stick with S16 (no apparent quality benefit from going to S17)
-	// range is (-7c00..7c00)/(-8000..8000) = 31/32
-	case SIGNED_F16:
-		// don't remove this test even though it appears equivalent to the code below
-		// as it isn't -- the code below can overflow for prec = 16
-		if (prec >= 16)
-			unq = q;
-		else
-		{
-			if (q < 0) { s = 1; q = -q; } else s = 0;
-
-			if (q == 0)
-				unq = 0;
-			else if (q >= ((1<<(prec-1))-1))
-				unq = s ? -S16MAX : S16MAX;
-			else
-			{
-				unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1);
-				if (s)
-					unq = -unq;
-			}
-		}
-		break;
-	}
-	return unq;
-}
-
-
-
-// pick a norm!
-#define	NORM_EUCLIDEAN 1
-
-float Utils::norm(const Vector3 &a, const Vector3 &b)
-{
-#ifdef	NORM_EUCLIDEAN
-	return lengthSquared(a - b);
-#endif
-#ifdef	NORM_ABS
-	Vector3 err = a - b;
-	return fabs(err.x) + fabs(err.y) + fabs(err.z);
-#endif
-}
-
-// parse <name>[<start>{:<end>}]{,}	
-// the pointer starts here         ^
-// name is 1 or 2 chars and matches field names. start and end are decimal numbers
-void Utils::parse(const char *encoding, int &ptr, Field &field, int &endbit, int &len)
-{
-	if (ptr <= 0) return;
-	--ptr;
-	if (encoding[ptr] == ',') --ptr;
-	nvDebugCheck (encoding[ptr] == ']');
-	--ptr;
-	endbit = 0;
-	int scale = 1;
-	while (encoding[ptr] != ':' && encoding[ptr] != '[')
-	{
-		nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
-		endbit += (encoding[ptr--] - '0') * scale;
-		scale *= 10;
-	}
-	int startbit = 0; scale = 1;
-	if (encoding[ptr] == '[')
-		startbit = endbit;
-	else  
-	{
-		ptr--;
-		while (encoding[ptr] != '[')
-		{
-			nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
-			startbit += (encoding[ptr--] - '0') * scale;
-			scale *= 10;
-		}
-	}
-	len = startbit - endbit + 1;	// startbit>=endbit note
-	--ptr;
-	if (encoding[ptr] == 'm')		field = FIELD_M;
-	else if (encoding[ptr] == 'd')	field = FIELD_D;
-	else {
-		// it's wxyz
-		nvDebugCheck (encoding[ptr] >= 'w' && encoding[ptr] <= 'z');
-		int foo = encoding[ptr--] - 'w';
-		// now it is r g or b
-		if (encoding[ptr] == 'r')		foo += 10;
-		else if (encoding[ptr] == 'g')	foo += 20;
-		else if (encoding[ptr] == 'b')	foo += 30;
-		else nvDebugCheck(0);
-		field = (Field) foo;
-	}
-}
-
-
diff --git a/3rdparty/nvtt/bc6h/zoh_utils.h b/3rdparty/nvtt/bc6h/zoh_utils.h
deleted file mode 100644
index 3ce33ce94..000000000
--- a/3rdparty/nvtt/bc6h/zoh_utils.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// utility class holding common routines
-#ifndef _ZOH_UTILS_H
-#define _ZOH_UTILS_H
-
-#include "nvmath/vector.h"
-
-namespace ZOH {
-
-inline int SIGN_EXTEND(int x, int nb) { return ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))); }
-
-enum Field {
-    FIELD_M = 1,	// mode
-    FIELD_D = 2,	// distribution/shape
-    FIELD_RW = 10+0, FIELD_RX = 10+1, FIELD_RY = 10+2, FIELD_RZ = 10+3,	// red channel endpoints or deltas
-    FIELD_GW = 20+0, FIELD_GX = 20+1, FIELD_GY = 20+2, FIELD_GZ = 20+3,	// green channel endpoints or deltas
-    FIELD_BW = 30+0, FIELD_BX = 30+1, FIELD_BY = 30+2, FIELD_BZ = 30+3,	// blue channel endpoints or deltas
-};
-
-// some constants
-static const int F16S_MASK	=  0x8000;		// f16 sign mask
-static const int F16EM_MASK	=  0x7fff;		// f16 exp & mantissa mask
-static const int U16MAX		=  0xffff;
-static const int S16MIN		= -0x8000;
-static const int S16MAX		=  0x7fff;
-static const int INT16_MASK	=  0xffff;
-static const int F16MAX		=  0x7bff;		// MAXFLT bit pattern for halfs
-
-enum Format { UNSIGNED_F16, SIGNED_F16 };
-
-class Utils
-{
-public:
-    static Format FORMAT;     // this is a global -- we're either handling unsigned or unsigned half values
-
-    // error metrics
-    static float norm(const nv::Vector3 &a, const nv::Vector3 &b);
-    static float mpsnr_norm(const nv::Vector3 &a, int exposure, const nv::Vector3 &b);
-
-    // conversion & clamp
-    static int ushort_to_format(unsigned short input);
-    static unsigned short format_to_ushort(int input);
-
-    // clamp to format
-    static void clamp(nv::Vector3 &v);
-
-    // quantization and unquantization
-    static int finish_unquantize(int q, int prec);
-    static int unquantize(int q, int prec);
-    static int quantize(float value, int prec);
-
-    static void parse(const char *encoding, int &ptr, Field & field, int &endbit, int &len);
-
-    // lerping
-    static int lerp(int a, int b, int i, int denom);
-    static nv::Vector3 lerp(const nv::Vector3 & a, const nv::Vector3 & b, int i, int denom);
-};
-
-}
-
-#endif // _ZOH_UTILS_H
diff --git a/3rdparty/nvtt/bc6h/zohone.cpp b/3rdparty/nvtt/bc6h/zohone.cpp
deleted file mode 100644
index 365267614..000000000
--- a/3rdparty/nvtt/bc6h/zohone.cpp
+++ /dev/null
@@ -1,799 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// one region zoh compress/decompress code
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-#include "bits.h"
-#include "tile.h"
-#include "zoh.h"
-#include "zoh_utils.h"
-
-#include "nvmath/vector.inl"
-#include "nvmath/fitting.h"
-
-#include <string.h> // strlen
-#include <float.h> // FLT_MAX
-
-using namespace nv;
-using namespace ZOH;
-
-#define NINDICES	16
-#define	INDEXBITS	4
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-
-#define	NSHAPES	1
-
-static const int shapes[NSHAPES] =
-{
-    0x0000
-};	// only 1 shape
-
-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NDELTA	2
-
-struct Chanpat
-{
-    int prec[NDELTA];		// precision pattern for one channel
-};
-
-struct Pattern
-{
-    Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel
-    int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-    int mode;				// associated mode value
-    int modebits;			// number of mode bits
-    const char *encoding;	// verilog description of encoding for this mode
-};
-
-#define MAXMODEBITS	5
-#define	MAXMODES (1<<MAXMODEBITS)
-
-#define	NPATTERNS 4
-
-static const Pattern patterns[NPATTERNS] =
-{
-    16,4,  16,4,  16,4,   1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    12,8,  12,8,  12,8,   1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    11,9,  11,9,  11,9,   1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    10,10, 10,10, 10,10,  0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-};
-
-// mapping of mode to the corresponding index in pattern
-static const int mode_to_pat[MAXMODES] = {
-    -1,-1,-1,
-    3,	// 0x03
-    -1,-1,-1,
-    2,	// 0x07
-    -1,-1,-1,
-    1,	// 0x0b
-    -1,-1,-1,
-    0,	// 0x0f
-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-};
-
-#define	R_0(ep)	(ep)[0].A[i]
-#define	R_1(ep)	(ep)[0].B[i]
-#define	MASK(n)	((1<<(n))-1)
-
-// compress endpoints
-static void compress_endpts(const IntEndpts in[NREGIONS_ONE], ComprEndpts out[NREGIONS_ONE], const Pattern &p)
-{
-    if (p.transformed)
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
-            R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
-        }
-    }
-    else
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
-            R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
-        }
-    }
-}
-
-// decompress endpoints
-static void decompress_endpts(const ComprEndpts in[NREGIONS_ONE], IntEndpts out[NREGIONS_ONE], const Pattern &p)
-{
-    bool issigned = Utils::FORMAT == SIGNED_F16;
-
-    if (p.transformed)
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
-            int t;
-            t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
-            R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
-        }
-    }
-    else
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
-            R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
-        }
-    }
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS_ONE], int prec, IntEndpts q_endpts[NREGIONS_ONE])
-{
-    for (int region = 0; region < NREGIONS_ONE; ++region)
-    {
-        q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
-        q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
-        q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
-        q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
-        q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
-        q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
-    }
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_one have a 0 high-order bit
-// index_one is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-    int index_positions[NREGIONS_ONE];
-
-    index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
-
-    for (int region = 0; region < NREGIONS_ONE; ++region)
-    {
-        int x = index_positions[region] & 3;
-        int y = (index_positions[region] >> 2) & 3;
-        nvDebugCheck(REGION(x,y,shapeindex) == region);		// double check the table
-        if (indices[y][x] & HIGH_INDEXBIT)
-        {
-            // high bit is set, swap the endpts and indices for this region
-            int t;
-            for (int i=0; i<NCHANNELS; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-            for (int y = 0; y < Tile::TILE_H; y++)
-                for (int x = 0; x < Tile::TILE_W; x++)
-                    if (REGION(x,y,shapeindex) == region)
-                        indices[y][x] = NINDICES - 1 - indices[y][x];
-        }
-    }
-}
-
-// endpoints fit only if the compression was lossless
-static bool endpts_fit(const IntEndpts orig[NREGIONS_ONE], const ComprEndpts compressed[NREGIONS_ONE], const Pattern &p)
-{
-    IntEndpts uncompressed[NREGIONS_ONE];
-
-    decompress_endpts(compressed, uncompressed, p);
-
-    for (int j=0; j<NREGIONS_ONE; ++j)
-	for (int i=0; i<NCHANNELS; ++i)
-	{
-        if (orig[j].A[i] != uncompressed[j].A[i]) return false;
-        if (orig[j].B[i] != uncompressed[j].B[i]) return false;
-    }
-    return true;
-}
-
-static void write_header(const ComprEndpts endpts[NREGIONS_ONE], const Pattern &p, Bits &out)
-{
-    // interpret the verilog backwards and process it
-    int m = p.mode;
-    int rw = endpts[0].A[0], rx = endpts[0].B[0];
-    int gw = endpts[0].A[1], gx = endpts[0].B[1];
-    int bw = endpts[0].A[2], bx = endpts[0].B[2];
-    int ptr = int(strlen(p.encoding));
-    while (ptr)
-    {
-        Field field;
-        int endbit, len;
-
-		// !!!UNDONE: get rid of string parsing!!!
-        Utils::parse(p.encoding, ptr, field, endbit, len);
-        switch(field)
-        {
-        case FIELD_M:	out.write( m >> endbit, len); break;
-        case FIELD_RW:	out.write(rw >> endbit, len); break;
-        case FIELD_RX:	out.write(rx >> endbit, len); break;
-        case FIELD_GW:	out.write(gw >> endbit, len); break;
-        case FIELD_GX:	out.write(gx >> endbit, len); break;
-        case FIELD_BW:	out.write(bw >> endbit, len); break;
-        case FIELD_BX:	out.write(bx >> endbit, len); break;
-
-        case FIELD_D:
-        case FIELD_RY:
-        case FIELD_RZ:
-        case FIELD_GY:
-        case FIELD_GZ:
-        case FIELD_BY:
-        case FIELD_BZ:
-        default: nvUnreachable();
-        }
-    }
-}
-
-static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p)
-{
-    // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
-    int mode = in.read(2);
-    if (mode != 0x00 && mode != 0x01)
-        mode = (in.read(3) << 2) | mode;
-
-    int pat_index = mode_to_pat[mode];
-
-    nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
-    nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
-
-    p = patterns[pat_index];
-
-    int d;
-    int rw, rx;
-    int gw, gx;
-    int bw, bx;
-
-    d = 0;
-    rw = rx = 0;
-    gw = gx = 0;
-    bw = bx = 0;
-
-    int ptr = int(strlen(p.encoding));
-
-    while (ptr)
-    {
-        Field field;
-        int endbit, len;
-
-		// !!!UNDONE: get rid of string parsing!!!
-        Utils::parse(p.encoding, ptr, field, endbit, len);
-
-        switch(field)
-        {
-        case FIELD_M:	break;	// already processed so ignore
-        case FIELD_RW:	rw |= in.read(len) << endbit; break;
-        case FIELD_RX:	rx |= in.read(len) << endbit; break;
-        case FIELD_GW:	gw |= in.read(len) << endbit; break;
-        case FIELD_GX:	gx |= in.read(len) << endbit; break;
-        case FIELD_BW:	bw |= in.read(len) << endbit; break;
-        case FIELD_BX:	bx |= in.read(len) << endbit; break;
-
-        case FIELD_D:
-        case FIELD_RY:
-        case FIELD_RZ:
-        case FIELD_GY:
-        case FIELD_GZ:
-        case FIELD_BY:
-        case FIELD_BZ:
-        default: nvUnreachable();
-        }
-    }
-
-    nvDebugCheck (in.getptr() == 128 - 63);
-
-    endpts[0].A[0] = rw; endpts[0].B[0] = rx;
-    endpts[0].A[1] = gw; endpts[0].B[1] = gx;
-    endpts[0].A[2] = bw; endpts[0].B[2] = bx;
-}
-
-// compress index 0
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-    {
-        int x = POS_TO_X(pos);
-        int y = POS_TO_Y(pos);
-
-        out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0));
-    }
-}
-
-static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-    Bits out(block, ZOH::BITSIZE);
-
-    write_header(endpts, p, out);
-
-    write_indices(indices, shapeindex, out);
-
-    nvDebugCheck(out.getptr() == ZOH::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
-{
-    // scale endpoints
-    int a, b;			// really need a IntVector3...
-
-    a = Utils::unquantize(endpts.A[0], prec);
-    b = Utils::unquantize(endpts.B[0], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-
-    a = Utils::unquantize(endpts.A[1], prec);
-    b = Utils::unquantize(endpts.B[1], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-
-    a = Utils::unquantize(endpts.A[2], prec);
-    b = Utils::unquantize(endpts.B[2], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-}
-
-// position 0 was compressed
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-    {
-        int x = POS_TO_X(pos);
-        int y = POS_TO_Y(pos);
-
-        indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0));
-    }
-}
-
-void ZOH::decompressone(const char *block, Tile &t)
-{
-    Bits in(block, ZOH::BITSIZE);
-
-    Pattern p;
-    IntEndpts endpts[NREGIONS_ONE];
-    ComprEndpts compr_endpts[NREGIONS_ONE];
-
-    read_header(in, compr_endpts, p);
-    int shapeindex = 0;		// only one shape
-
-    decompress_endpts(compr_endpts, endpts, p);
-
-    Vector3 palette[NREGIONS_ONE][NINDICES];
-    for (int r = 0; r < NREGIONS_ONE; ++r)
-        generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
-
-    // read indices
-    int indices[Tile::TILE_H][Tile::TILE_W];
-
-    read_indices(in, shapeindex, indices);
-
-    nvDebugCheck(in.getptr() == ZOH::BITSIZE);
-
-    // lookup
-    for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-            t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
-{
-    Vector3 palette[NINDICES];
-    float toterr = 0;
-    Vector3 err;
-
-    generate_palette_quantized(endpts, prec, palette);
-
-    for (int i = 0; i < np; ++i)
-    {
-        float err, besterr;
-
-        besterr = Utils::norm(colors[i], palette[0]) * importance[i];
-
-        for (int j = 1; j < NINDICES && besterr > 0; ++j)
-        {
-            err = Utils::norm(colors[i], palette[j]) * importance[i];
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-                besterr = err;
-        }
-        toterr += besterr;
-    }
-    return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec, 
-                           int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE])
-{
-    // build list of possibles
-    Vector3 palette[NREGIONS_ONE][NINDICES];
-
-    for (int region = 0; region < NREGIONS_ONE; ++region)
-    {
-        generate_palette_quantized(endpts[region], prec, &palette[region][0]);
-        toterr[region] = 0;
-    }
-
-    Vector3 err;
-
-    for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-        int region = REGION(x,y,shapeindex);
-        float err, besterr;
-
-        besterr = Utils::norm(tile.data[y][x], palette[region][0]);
-        indices[y][x] = 0;
-
-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
-        {
-            err = Utils::norm(tile.data[y][x], palette[region][i]);
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-            {
-                besterr = err;
-                indices[y][x] = i;
-            }
-        }
-        toterr[region] += besterr;
-    }
-}
-
-static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
-                          float old_err, int do_b)
-{
-    // we have the old endpoints: old_endpts
-    // we have the perturbed endpoints: new_endpts
-    // we have the temporary endpoints: temp_endpts
-
-    IntEndpts temp_endpts;
-    float min_err = old_err;		// start with the best current error
-    int beststep;
-
-    // copy real endpoints so we can perturb them
-    for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
-
-    // do a logarithmic search for the best error for this endpoint (which)
-    for (int step = 1 << (prec-1); step; step >>= 1)
-    {
-        bool improved = false;
-        for (int sign = -1; sign <= 1; sign += 2)
-        {
-            if (do_b == 0)
-            {
-                temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-                if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-                    continue;
-            }
-            else
-            {
-                temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-                if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-                    continue;
-            }
-
-            float err = map_colors(colors, importance, np, temp_endpts, prec);
-
-            if (err < min_err)
-            {
-                improved = true;
-                min_err = err;
-                beststep = sign * step;
-            }
-        }
-        // if this was an improvement, move the endpoint and continue search from there
-        if (improved)
-        {
-            if (do_b == 0)
-                new_endpts.A[ch] += beststep;
-            else
-                new_endpts.B[ch] += beststep;
-        }
-    }
-    return min_err;
-}
-
-static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
-{
-    float opt_err = orig_err;
-    for (int ch = 0; ch < NCHANNELS; ++ch)
-    {
-        opt_endpts.A[ch] = orig_endpts.A[ch];
-        opt_endpts.B[ch] = orig_endpts.B[ch];
-    }
-    /*
-        err0 = perturb(rgb0, delta0)
-        err1 = perturb(rgb1, delta1)
-        if (err0 < err1)
-            if (err0 >= initial_error) break
-            rgb0 += delta0
-            next = 1
-        else
-            if (err1 >= initial_error) break
-            rgb1 += delta1
-            next = 0
-        initial_err = map()
-        for (;;)
-            err = perturb(next ? rgb1:rgb0, delta)
-            if (err >= initial_err) break
-            next? rgb1 : rgb0 += delta
-            initial_err = err
-	*/
-    IntEndpts new_a, new_b;
-    IntEndpts new_endpt;
-    int do_b;
-
-    // now optimize each channel separately
-    for (int ch = 0; ch < NCHANNELS; ++ch)
-    {
-        // figure out which endpoint when perturbed gives the most improvement and start there
-        // if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1);	// perturb endpt B
-
-        if (err0 < err1)
-        {
-            if (err0 >= opt_err)
-                continue;
-
-            opt_endpts.A[ch] = new_a.A[ch];
-            opt_err = err0;
-            do_b = 1;		// do B next
-        }
-        else
-        {
-            if (err1 >= opt_err)
-                continue;
-            opt_endpts.B[ch] = new_b.B[ch];
-            opt_err = err1;
-            do_b = 0;		// do A next
-        }
-
-        // now alternate endpoints and keep trying until there is no improvement
-        for (;;)
-        {
-            float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
-            if (err >= opt_err)
-                break;
-            if (do_b == 0)
-                opt_endpts.A[ch] = new_endpt.A[ch];
-            else
-                opt_endpts.B[ch] = new_endpt.B[ch];
-            opt_err = err;
-            do_b = 1 - do_b;	// now move the other endpoint
-        }
-    }
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE], 
-                            const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE])
-{
-    Vector3 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-    float err = 0;
-
-    for (int region=0; region<NREGIONS_ONE; ++region)
-    {
-        // collect the pixels in the region
-        int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    ++np;
-                }
-            }
-        }
-
-        optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
-    }
-}
-
-/* optimization algorithm
-    for each pattern
-        convert endpoints using pattern precision
-        assign indices and get initial error
-        compress indices (and possibly reorder endpoints)
-        transform endpoints
-        if transformed endpoints fit pattern
-            get original endpoints back
-            optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-            compress new indices
-            transform new endpoints
-            if new endpoints fit pattern AND if error is improved
-                emit compressed block with new data
-            else
-                emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block)
-{
-    float orig_err[NREGIONS_ONE], opt_err[NREGIONS_ONE], orig_toterr, opt_toterr;
-    IntEndpts orig_endpts[NREGIONS_ONE], opt_endpts[NREGIONS_ONE];
-    ComprEndpts compr_orig[NREGIONS_ONE], compr_opt[NREGIONS_ONE];
-    int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-    for (int sp = 0; sp < NPATTERNS; ++sp)
-    {
-        // precisions for all channels need to be the same
-        for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
-
-        quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
-        assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
-        swap_indices(orig_endpts, orig_indices, shapeindex_best);
-        compress_endpts(orig_endpts, compr_orig, patterns[sp]);
-        if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
-        {
-            optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
-            assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
-            swap_indices(opt_endpts, opt_indices, shapeindex_best);
-            compress_endpts(opt_endpts, compr_opt, patterns[sp]);
-            orig_toterr = opt_toterr = 0;
-            for (int i=0; i < NREGIONS_ONE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-
-            if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
-            {
-                emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
-                return opt_toterr;
-            }
-            else
-            {
-                // either it stopped fitting when we optimized it, or there was no improvement
-                // so go back to the unoptimized endpoints which we know will fit
-                emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
-                return orig_toterr;
-            }
-        }
-    }
-
-	nvAssert (false); // "No candidate found, should never happen (refineone.)";
-	return FLT_MAX;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
-{
-    for (int region = 0; region < NREGIONS_ONE; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-            palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_ONE])
-{
-    // build list of possibles
-    Vector3 palette[NREGIONS_ONE][NINDICES];
-
-    generate_palette_unquantized(endpts, palette);
-
-    float toterr = 0;
-    Vector3 err;
-
-    for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-        int region = REGION(x,y,shapeindex);
-        float err, besterr;
-
-        besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
-
-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
-        {
-            err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-                besterr = err;
-        }
-        toterr += besterr;
-    }
-    return toterr;
-}
-
-float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE])
-{
-    for (int region=0; region<NREGIONS_ONE; ++region)
-    {
-        int np = 0;
-        Vector3 colors[Tile::TILE_TOTAL];
-        Vector3 mean(0,0,0);
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x,y,shapeindex) == region)
-                {
-                    colors[np] = tile.data[y][x];
-                    mean += tile.data[y][x];
-                    ++np;
-                }
-            }
-        }
-
-        // handle simple cases
-        if (np == 0)
-        {
-            Vector3 zero(0,0,0);
-            endpts[region].A = zero;
-            endpts[region].B = zero;
-            continue;
-        }
-        else if (np == 1)
-        {
-            endpts[region].A = colors[0];
-            endpts[region].B = colors[0];
-            continue;
-        }
-        else if (np == 2)
-        {
-            endpts[region].A = colors[0];
-            endpts[region].B = colors[1];
-            continue;
-        }
-
-        mean /= float(np);
-
-        Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-        // project each pixel value along the principal direction
-        float minp = FLT_MAX, maxp = -FLT_MAX;
-        for (int i = 0; i < np; i++)
-        {
-            float dp = dot(colors[i]-mean, direction);
-            if (dp < minp) minp = dp;
-            if (dp > maxp) maxp = dp;
-        }
-
-        // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-        endpts[region].A = mean + minp*direction;
-        endpts[region].B = mean + maxp*direction;
-
-        // clamp endpoints
-        // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-        // shape based on endpoints being clamped
-        Utils::clamp(endpts[region].A);
-        Utils::clamp(endpts[region].B);
-    }
-
-    return map_colors(tile, shapeindex, endpts);
-}
-
-float ZOH::compressone(const Tile &t, char *block)
-{
-    int shapeindex_best = 0;
-    FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE];
-    float msebest = FLT_MAX;
-
-    /*
-		collect the mse values that are within 5% of the best values
-		optimize each one and choose the best
-	*/
-    // hack for now -- just use the best value WORK
-    for (int i=0; i<NSHAPES && msebest>0.0; ++i)
-    {
-        float mse = roughone(t, i, tempendpts);
-        if (mse < msebest)
-        {
-            msebest = mse;
-            shapeindex_best = i;
-            memcpy(endptsbest, tempendpts, sizeof(endptsbest));
-        }
-
-    }
-    return refineone(t, shapeindex_best, endptsbest, block);
-}
diff --git a/3rdparty/nvtt/bc6h/zohtwo.cpp b/3rdparty/nvtt/bc6h/zohtwo.cpp
deleted file mode 100644
index 5a142945b..000000000
--- a/3rdparty/nvtt/bc6h/zohtwo.cpp
+++ /dev/null
@@ -1,883 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// two regions zoh compress/decompress code
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-/* optimization algorithm
-
-	get initial float endpoints
-	convert endpoints using 16 bit precision, transform, and get bit delta. choose likely endpoint compression candidates.
-		note that there will be 1 or 2 candidates; 2 will be chosen when the delta values are close to the max possible.
-	for each EC candidate in order from max precision to smaller precision
-		convert endpoints using the appropriate precision.
-		optimize the endpoints and minimize square error. save the error and index assignments. apply index compression as well.
-			(thus the endpoints and indices are in final form.)
-		transform and get bit delta.
-		if the bit delta fits, exit
-	if we ended up with no candidates somehow, choose the tail set of EC candidates and retry. this should happen hardly ever.
-		add a state variable to nvDebugCheck we only do this once.
-	convert to bit stream.
-	return the error.
-
-	Global optimization
-		order all tiles based on their errors
-		do something special for high-error tiles
-			the goal here is to try to avoid tiling artifacts. but I think this is a research problem. let's just generate an error image...
-
-	display an image that shows partitioning and precision selected for each tile
-*/
-
-#include "bits.h"
-#include "tile.h"
-#include "zoh.h"
-#include "zoh_utils.h"
-
-#include "nvmath/fitting.h"
-#include "nvmath/vector.inl"
-
-#include <string.h> // strlen
-#include <float.h> // FLT_MAX
-
-using namespace nv;
-using namespace ZOH;
-
-#define NINDICES	8
-#define	INDEXBITS	3
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#include "shapes_two.h"
-// use only the first 32 available shapes
-#undef NSHAPES
-#undef SHAPEBITS
-#define NSHAPES 32
-#define SHAPEBITS 5
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NDELTA	4
-
-struct Chanpat
-{
-    int prec[NDELTA];		// precision pattern for one channel
-};
-
-struct Pattern
-{
-    Chanpat chan[NCHANNELS];    // allow different bit patterns per channel -- but we still want constant precision per channel
-    int transformed;            // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-    int mode;                   // associated mode value
-    int modebits;               // number of mode bits
-    const char *encoding;       // verilog description of encoding for this mode
-};
-
-#define MAXMODEBITS	5
-#define	MAXMODES (1<<MAXMODEBITS)
-
-#define	NPATTERNS 10
-
-static const Pattern patterns[NPATTERNS] =
-{
-    11,5,5,5,	11,4,4,4,	11,4,4,4,	1,	0x02, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],rw[10],rx[4:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    11,4,4,4,	11,5,5,5,	11,4,4,4,	1,	0x06, 5, "d[4:0],bz[3],gy[4],rz[3:0],bz[2],bz[0],ry[3:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],gw[10],gx[4:0],gy[3:0],gz[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    11,4,4,4,	11,4,4,4,	11,5,5,5,	1,	0x0a, 5, "d[4:0],bz[3],bz[4],rz[3:0],bz[2:1],ry[3:0],by[3:0],bw[10],bx[4:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],by[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    10,5,5,5,	10,5,5,5,	10,5,5,5,	1,	0x00, 2, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bw[9:0],gw[9:0],rw[9:0],bz[4],by[4],gy[4],m[1:0]",
-    9,5,5,5,	9,5,5,5,	9,5,5,5,	1,	0x0e, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bw[8:0],gy[4],gw[8:0],by[4],rw[8:0],m[4:0]",
-    8,6,6,6,	8,5,5,5,	8,5,5,5,	1,	0x12, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],rx[5:0],bz[4:3],bw[7:0],gy[4],bz[2],gw[7:0],by[4],gz[4],rw[7:0],m[4:0]",
-    8,5,5,5,	8,6,6,6,	8,5,5,5,	1,	0x16, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],gx[5:0],gy[3:0],gz[4],rx[4:0],bz[4],gz[5],bw[7:0],gy[4],gy[5],gw[7:0],by[4],bz[0],rw[7:0],m[4:0]",
-    8,5,5,5,	8,5,5,5,	8,6,6,6,	1,	0x1a, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bx[5:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bz[5],bw[7:0],gy[4],by[5],gw[7:0],by[4],bz[1],rw[7:0],m[4:0]",
-    7,6,6,6,	7,6,6,6,	7,6,6,6,	1,	0x01, 2, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],bw[6:0],gy[4],bz[2],by[5],gw[6:0],by[4],bz[1:0],rw[6:0],gz[5:4],gy[5],m[1:0]",
-    6,6,6,6,	6,6,6,6,	6,6,6,6,	0,	0x1e, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],gz[5],bw[5:0],gy[4],bz[2],by[5],gy[5],gw[5:0],by[4],bz[1:0],gz[4],rw[5:0],m[4:0]",
-};
-
-// mapping of mode to the corresponding index in pattern
-// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f -- return -2 for these
-static const int mode_to_pat[MAXMODES] = {	
-    3,	// 0x00
-    8,	// 0x01
-    0,	// 0x02
-    -1,-1,-1,
-    1,	// 0x06
-    -1,-1,-1,
-    2,	// 0x0a
-    -1,-1,-1,
-    4,	// 0x0e
-    -1,-1,-1,
-    5,	// 0x12
-    -2,-1,-1,
-    6,	// 0x16
-    -2,-1,-1,
-    7,	// 0x1a
-    -2,-1,-1,
-    9,	// 0x1e
-    -2
-};
-
-#define	R_0(ep)	(ep)[0].A[i]
-#define	R_1(ep)	(ep)[0].B[i]
-#define	R_2(ep)	(ep)[1].A[i]
-#define	R_3(ep)	(ep)[1].B[i]
-#define	MASK(n)	((1<<(n))-1)
-
-// compress endpoints
-static void compress_endpts(const IntEndpts in[NREGIONS_TWO], ComprEndpts out[NREGIONS_TWO], const Pattern &p)
-{
-    if (p.transformed)
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
-            R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
-            R_2(out) = (R_2(in) - R_0(in)) & MASK(p.chan[i].prec[2]);
-            R_3(out) = (R_3(in) - R_0(in)) & MASK(p.chan[i].prec[3]);
-        }
-    }
-    else
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
-            R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
-            R_2(out) = R_2(in) & MASK(p.chan[i].prec[2]);
-            R_3(out) = R_3(in) & MASK(p.chan[i].prec[3]);
-        }
-    }
-}
-
-// decompress endpoints
-static void decompress_endpts(const ComprEndpts in[NREGIONS_TWO], IntEndpts out[NREGIONS_TWO], const Pattern &p)
-{
-    bool issigned = Utils::FORMAT == SIGNED_F16;
-
-    if (p.transformed)
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
-            int t;
-            t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
-            R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
-            t = SIGN_EXTEND(R_2(in), p.chan[i].prec[2]);
-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
-            R_2(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
-            t = SIGN_EXTEND(R_3(in), p.chan[i].prec[3]);
-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
-            R_3(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
-        }
-    }
-    else
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
-            R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
-            R_2(out) = issigned ? SIGN_EXTEND(R_2(in),p.chan[i].prec[2]) : R_2(in);
-            R_3(out) = issigned ? SIGN_EXTEND(R_3(in),p.chan[i].prec[3]) : R_3(in);
-        }
-    }
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS_TWO], int prec, IntEndpts q_endpts[NREGIONS_TWO])
-{
-    for (int region = 0; region < NREGIONS_TWO; ++region)
-    {
-        q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
-        q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
-        q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
-        q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
-        q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
-        q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
-    }
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndpts endpts[NREGIONS_TWO], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-    for (int region = 0; region < NREGIONS_TWO; ++region)
-    {
-        int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-        int x = POS_TO_X(position);
-        int y = POS_TO_Y(position);
-        nvDebugCheck(REGION(x,y,shapeindex) == region);		// double check the table
-        if (indices[y][x] & HIGH_INDEXBIT)
-        {
-            // high bit is set, swap the endpts and indices for this region
-            int t;
-            for (int i=0; i<NCHANNELS; ++i)
-            {
-                t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-            }
-
-            for (int y = 0; y < Tile::TILE_H; y++)
-                for (int x = 0; x < Tile::TILE_W; x++)
-                    if (REGION(x,y,shapeindex) == region)
-                        indices[y][x] = NINDICES - 1 - indices[y][x];
-        }
-    }
-}
-
-// endpoints fit only if the compression was lossless
-static bool endpts_fit(const IntEndpts orig[NREGIONS_TWO], const ComprEndpts compressed[NREGIONS_TWO], const Pattern &p)
-{
-    IntEndpts uncompressed[NREGIONS_TWO];
-
-    decompress_endpts(compressed, uncompressed, p);
-
-    for (int j=0; j<NREGIONS_TWO; ++j)
-    {
-	for (int i=0; i<NCHANNELS; ++i)
-	{
-            if (orig[j].A[i] != uncompressed[j].A[i]) return false;
-            if (orig[j].B[i] != uncompressed[j].B[i]) return false;
-        }
-    }
-    return true;
-}
-
-static void write_header(const ComprEndpts endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, Bits &out)
-{
-    // interpret the verilog backwards and process it
-    int m = p.mode;
-    int d = shapeindex;
-    int rw = endpts[0].A[0], rx = endpts[0].B[0], ry = endpts[1].A[0], rz = endpts[1].B[0];
-    int gw = endpts[0].A[1], gx = endpts[0].B[1], gy = endpts[1].A[1], gz = endpts[1].B[1];
-    int bw = endpts[0].A[2], bx = endpts[0].B[2], by = endpts[1].A[2], bz = endpts[1].B[2];
-    int ptr = int(strlen(p.encoding));
-    while (ptr)
-    {
-        Field field;
-        int endbit, len;
-
-		// !!!UNDONE: get rid of string parsing!!!
-        Utils::parse(p.encoding, ptr, field, endbit, len);
-        switch(field)
-        {
-        case FIELD_M:	out.write( m >> endbit, len); break;
-        case FIELD_D:	out.write( d >> endbit, len); break;
-        case FIELD_RW:	out.write(rw >> endbit, len); break;
-        case FIELD_RX:	out.write(rx >> endbit, len); break;
-        case FIELD_RY:	out.write(ry >> endbit, len); break;
-        case FIELD_RZ:	out.write(rz >> endbit, len); break;
-        case FIELD_GW:	out.write(gw >> endbit, len); break;
-        case FIELD_GX:	out.write(gx >> endbit, len); break;
-        case FIELD_GY:	out.write(gy >> endbit, len); break;
-        case FIELD_GZ:	out.write(gz >> endbit, len); break;
-        case FIELD_BW:	out.write(bw >> endbit, len); break;
-        case FIELD_BX:	out.write(bx >> endbit, len); break;
-        case FIELD_BY:	out.write(by >> endbit, len); break;
-        case FIELD_BZ:	out.write(bz >> endbit, len); break;
-        default: nvUnreachable();
-        }
-    }
-}
-
-static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p)
-{
-    // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
-    int mode = in.read(2);
-    if (mode != 0x00 && mode != 0x01)
-        mode = (in.read(3) << 2) | mode;
-
-    int pat_index = mode_to_pat[mode];
-
-    if (pat_index == -2)
-        return false;		// reserved mode found
-
-    nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
-    nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
-
-    p = patterns[pat_index];
-
-    int d;
-    int rw, rx, ry, rz;
-    int gw, gx, gy, gz;
-    int bw, bx, by, bz;
-
-    d = 0;
-    rw = rx = ry = rz = 0;
-    gw = gx = gy = gz = 0;
-    bw = bx = by = bz = 0;
-
-    int ptr = int(strlen(p.encoding));
-
-    while (ptr)
-    {
-        Field field;
-        int endbit, len;
-
-		// !!!UNDONE: get rid of string parsing!!!
-        Utils::parse(p.encoding, ptr, field, endbit, len);
-
-        switch(field)
-        {
-        case FIELD_M:	break;	// already processed so ignore
-        case FIELD_D:	 d |= in.read(len) << endbit; break;
-        case FIELD_RW:	rw |= in.read(len) << endbit; break;
-        case FIELD_RX:	rx |= in.read(len) << endbit; break;
-        case FIELD_RY:	ry |= in.read(len) << endbit; break;
-        case FIELD_RZ:	rz |= in.read(len) << endbit; break;
-        case FIELD_GW:	gw |= in.read(len) << endbit; break;
-        case FIELD_GX:	gx |= in.read(len) << endbit; break;
-        case FIELD_GY:	gy |= in.read(len) << endbit; break;
-        case FIELD_GZ:	gz |= in.read(len) << endbit; break;
-        case FIELD_BW:	bw |= in.read(len) << endbit; break;
-        case FIELD_BX:	bx |= in.read(len) << endbit; break;
-        case FIELD_BY:	by |= in.read(len) << endbit; break;
-        case FIELD_BZ:	bz |= in.read(len) << endbit; break;
-        default: nvUnreachable();
-        }
-    }
-
-    nvDebugCheck (in.getptr() == 128 - 46);
-
-    shapeindex = d;
-    endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz;
-    endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz;
-    endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz;
-
-    return true;
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-    int positions[NREGIONS_TWO];
-
-    for (int r = 0; r < NREGIONS_TWO; ++r)
-        positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-    {
-        int x = POS_TO_X(pos);
-        int y = POS_TO_Y(pos);
-
-        bool match = false;
-
-        for (int r = 0; r < NREGIONS_TWO; ++r)
-            if (positions[r] == pos) { match = true; break; }
-
-        out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-    }
-}
-
-static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-    Bits out(block, ZOH::BITSIZE);
-
-    write_header(compr_endpts, shapeindex, p, out);
-
-    write_indices(indices, shapeindex, out);
-
-    nvDebugCheck(out.getptr() == ZOH::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
-{
-    // scale endpoints
-    int a, b;			// really need a IntVector3...
-
-    a = Utils::unquantize(endpts.A[0], prec);
-    b = Utils::unquantize(endpts.B[0], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-
-    a = Utils::unquantize(endpts.A[1], prec);
-    b = Utils::unquantize(endpts.B[1], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-
-    a = Utils::unquantize(endpts.A[2], prec);
-    b = Utils::unquantize(endpts.B[2], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-    int positions[NREGIONS_TWO];
-
-    for (int r = 0; r < NREGIONS_TWO; ++r)
-        positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-    {
-        int x = POS_TO_X(pos);
-        int y = POS_TO_Y(pos);
-
-        bool match = false;
-
-        for (int r = 0; r < NREGIONS_TWO; ++r)
-            if (positions[r] == pos) { match = true; break; }
-
-        indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-    }
-}
-
-void ZOH::decompresstwo(const char *block, Tile &t)
-{
-    Bits in(block, ZOH::BITSIZE);
-
-    Pattern p;
-    IntEndpts endpts[NREGIONS_TWO];
-    ComprEndpts compr_endpts[NREGIONS_TWO];
-    int shapeindex;
-
-    if (!read_header(in, compr_endpts, shapeindex, p))
-    {
-        // reserved mode, return all zeroes
-        for (int y = 0; y < Tile::TILE_H; y++)
-            for (int x = 0; x < Tile::TILE_W; x++)
-                t.data[y][x] = Vector3(0.0f);
-
-        return;
-    }
-
-    decompress_endpts(compr_endpts, endpts, p);
-
-    Vector3 palette[NREGIONS_TWO][NINDICES];
-    for (int r = 0; r < NREGIONS_TWO; ++r)
-        generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
-
-    int indices[Tile::TILE_H][Tile::TILE_W];
-
-    read_indices(in, shapeindex, indices);
-
-    nvDebugCheck(in.getptr() == ZOH::BITSIZE);
-
-    // lookup
-    for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-        t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
-{
-    Vector3 palette[NINDICES];
-    float toterr = 0;
-    Vector3 err;
-
-    generate_palette_quantized(endpts, prec, palette);
-
-    for (int i = 0; i < np; ++i)
-    {
-        float err, besterr;
-
-        besterr = Utils::norm(colors[i], palette[0]) * importance[i];
-
-        for (int j = 1; j < NINDICES && besterr > 0; ++j)
-        {
-            err = Utils::norm(colors[i], palette[j]) * importance[i];
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-                besterr = err;
-        }
-        toterr += besterr;
-    }
-    return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec, 
-                           int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_TWO])
-{
-    // build list of possibles
-    Vector3 palette[NREGIONS_TWO][NINDICES];
-
-    for (int region = 0; region < NREGIONS_TWO; ++region)
-    {
-        generate_palette_quantized(endpts[region], prec, &palette[region][0]);
-        toterr[region] = 0;
-    }
-
-    Vector3 err;
-
-    for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-        int region = REGION(x,y,shapeindex);
-        float err, besterr;
-
-        besterr = Utils::norm(tile.data[y][x], palette[region][0]);
-        indices[y][x] = 0;
-
-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
-        {
-            err = Utils::norm(tile.data[y][x], palette[region][i]);
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-            {
-                besterr = err;
-                indices[y][x] = i;
-            }
-        }
-        toterr[region] += besterr;
-    }
-}
-
-static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
-                          float old_err, int do_b)
-{
-    // we have the old endpoints: old_endpts
-    // we have the perturbed endpoints: new_endpts
-    // we have the temporary endpoints: temp_endpts
-
-    IntEndpts temp_endpts;
-    float min_err = old_err;		// start with the best current error
-    int beststep;
-
-    // copy real endpoints so we can perturb them
-    for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
-
-    // do a logarithmic search for the best error for this endpoint (which)
-    for (int step = 1 << (prec-1); step; step >>= 1)
-    {
-        bool improved = false;
-        for (int sign = -1; sign <= 1; sign += 2)
-        {
-            if (do_b == 0)
-            {
-                temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-                if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-                    continue;
-            }
-            else
-            {
-                temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-                if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-                    continue;
-            }
-
-            float err = map_colors(colors, importance, np, temp_endpts, prec);
-
-            if (err < min_err)
-            {
-                improved = true;
-                min_err = err;
-                beststep = sign * step;
-            }
-        }
-        // if this was an improvement, move the endpoint and continue search from there
-        if (improved)
-        {
-            if (do_b == 0)
-                new_endpts.A[ch] += beststep;
-            else
-                new_endpts.B[ch] += beststep;
-        }
-    }
-    return min_err;
-}
-
-static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
-{
-    float opt_err = orig_err;
-    for (int ch = 0; ch < NCHANNELS; ++ch)
-    {
-        opt_endpts.A[ch] = orig_endpts.A[ch];
-        opt_endpts.B[ch] = orig_endpts.B[ch];
-    }
-    /*
-        err0 = perturb(rgb0, delta0)
-        err1 = perturb(rgb1, delta1)
-        if (err0 < err1)
-            if (err0 >= initial_error) break
-            rgb0 += delta0
-            next = 1
-        else
-            if (err1 >= initial_error) break
-            rgb1 += delta1
-            next = 0
-        initial_err = map()
-        for (;;)
-            err = perturb(next ? rgb1:rgb0, delta)
-            if (err >= initial_err) break
-            next? rgb1 : rgb0 += delta
-            initial_err = err
-    */
-    IntEndpts new_a, new_b;
-    IntEndpts new_endpt;
-    int do_b;
-
-    // now optimize each channel separately
-    for (int ch = 0; ch < NCHANNELS; ++ch)
-    {
-        // figure out which endpoint when perturbed gives the most improvement and start there
-        // if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1);	// perturb endpt B
-
-        if (err0 < err1)
-        {
-            if (err0 >= opt_err)
-                continue;
-
-            opt_endpts.A[ch] = new_a.A[ch];
-            opt_err = err0;
-            do_b = 1;		// do B next
-        }
-        else
-        {
-            if (err1 >= opt_err)
-                continue;
-            opt_endpts.B[ch] = new_b.B[ch];
-            opt_err = err1;
-            do_b = 0;		// do A next
-        }
-
-        // now alternate endpoints and keep trying until there is no improvement
-        for (;;)
-        {
-            float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
-            if (err >= opt_err)
-                break;
-            if (do_b == 0)
-                opt_endpts.A[ch] = new_endpt.A[ch];
-            else
-                opt_endpts.B[ch] = new_endpt.B[ch];
-            opt_err = err;
-            do_b = 1 - do_b;	// now move the other endpoint
-        }
-    }
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_TWO], 
-                            const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO])
-{
-    Vector3 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-    float err = 0;
-
-    for (int region=0; region<NREGIONS_TWO; ++region)
-    {
-        // collect the pixels in the region
-        int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++)
-            for (int x = 0; x < tile.size_x; x++)
-                if (REGION(x,y,shapeindex) == region)
-                {
-            pixels[np] = tile.data[y][x];
-            importance[np] = tile.importance_map[y][x];
-            ++np;
-        }
-
-        optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
-    }
-}
-
-/* optimization algorithm
-    for each pattern
-        convert endpoints using pattern precision
-        assign indices and get initial error
-        compress indices (and possibly reorder endpoints)
-        transform endpoints
-        if transformed endpoints fit pattern
-            get original endpoints back
-            optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-            compress new indices
-            transform new endpoints
-            if new endpoints fit pattern AND if error is improved
-                emit compressed block with new data
-            else
-                emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-float ZOH::refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block)
-{
-    float orig_err[NREGIONS_TWO], opt_err[NREGIONS_TWO], orig_toterr, opt_toterr;
-    IntEndpts orig_endpts[NREGIONS_TWO], opt_endpts[NREGIONS_TWO];
-    ComprEndpts compr_orig[NREGIONS_TWO], compr_opt[NREGIONS_TWO];
-    int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-    for (int sp = 0; sp < NPATTERNS; ++sp)
-    {
-        // precisions for all channels need to be the same
-        for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
-
-        quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
-        assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
-        swap_indices(orig_endpts, orig_indices, shapeindex_best);
-        compress_endpts(orig_endpts, compr_orig, patterns[sp]);
-        if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
-        {
-            optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
-            assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
-            swap_indices(opt_endpts, opt_indices, shapeindex_best);
-            compress_endpts(opt_endpts, compr_opt, patterns[sp]);
-            orig_toterr = opt_toterr = 0;
-            for (int i=0; i < NREGIONS_TWO; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-            if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
-            {
-                emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
-                return opt_toterr;
-            }
-            else
-            {
-                // either it stopped fitting when we optimized it, or there was no improvement
-                // so go back to the unoptimized endpoints which we know will fit
-                emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
-                return orig_toterr;
-            }
-        }
-    }
-    nvAssert(false); //throw "No candidate found, should never happen (refinetwo.)";
-	return FLT_MAX;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])
-{
-    for (int region = 0; region < NREGIONS_TWO; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-            palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_TWO])
-{
-    // build list of possibles
-    Vector3 palette[NREGIONS_TWO][NINDICES];
-
-    generate_palette_unquantized(endpts, palette);
-
-    float toterr = 0;
-    Vector3 err;
-
-    for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-        int region = REGION(x,y,shapeindex);
-        float err, besterr;
-
-        besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
-
-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
-        {
-            err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-                besterr = err;
-        }
-        toterr += besterr;
-    }
-    return toterr;
-}
-
-float ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO])
-{
-    for (int region=0; region<NREGIONS_TWO; ++region)
-    {
-        int np = 0;
-        Vector3 colors[Tile::TILE_TOTAL];
-        Vector3 mean(0,0,0);
-
-        for (int y = 0; y < tile.size_y; y++)
-            for (int x = 0; x < tile.size_x; x++)
-                if (REGION(x,y,shapeindex) == region)
-                {
-            colors[np] = tile.data[y][x];
-            mean += tile.data[y][x];
-            ++np;
-        }
-
-        // handle simple cases
-        if (np == 0)
-        {
-            Vector3 zero(0,0,0);
-            endpts[region].A = zero;
-            endpts[region].B = zero;
-            continue;
-        }
-        else if (np == 1)
-        {
-            endpts[region].A = colors[0];
-            endpts[region].B = colors[0];
-            continue;
-        }
-        else if (np == 2)
-        {
-            endpts[region].A = colors[0];
-            endpts[region].B = colors[1];
-            continue;
-        }
-
-        mean /= float(np);
-
-        Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-        // project each pixel value along the principal direction
-        float minp = FLT_MAX, maxp = -FLT_MAX;
-        for (int i = 0; i < np; i++)
-        {
-            float dp = dot(colors[i]-mean, direction);
-            if (dp < minp) minp = dp;
-            if (dp > maxp) maxp = dp;
-        }
-
-        // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-        endpts[region].A = mean + minp*direction;
-        endpts[region].B = mean + maxp*direction;
-
-        // clamp endpoints
-        // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-        // shape based on endpoints being clamped
-        Utils::clamp(endpts[region].A);
-        Utils::clamp(endpts[region].B);
-    }
-
-    return map_colors(tile, shapeindex, endpts);
-}
-
-float ZOH::compresstwo(const Tile &t, char *block)
-{
-    int shapeindex_best = 0;
-    FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO];
-    float msebest = FLT_MAX;
-
-    /*
-    collect the mse values that are within 5% of the best values
-    optimize each one and choose the best
-    */
-    // hack for now -- just use the best value WORK
-    for (int i=0; i<NSHAPES && msebest>0.0; ++i)
-    {
-        float mse = roughtwo(t, i, tempendpts);
-        if (mse < msebest)
-        {
-            msebest = mse;
-            shapeindex_best = i;
-            memcpy(endptsbest, tempendpts, sizeof(endptsbest));
-        }
-
-    }
-    return refinetwo(t, shapeindex_best, endptsbest, block);
-}
-
diff --git a/3rdparty/nvtt/bc7/avpcl.cpp b/3rdparty/nvtt/bc7/avpcl.cpp
deleted file mode 100644
index 8e0b1695f..000000000
--- a/3rdparty/nvtt/bc7/avpcl.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// the avpcl compressor and decompressor
-
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include <string.h>
-#include <float.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-// global flags
-bool AVPCL::flag_premult = false;
-bool AVPCL::flag_nonuniform = false;
-bool AVPCL::flag_nonuniform_ati = false;
-
-// global mode
-bool AVPCL::mode_rgb = false;		// true if image had constant alpha = 255
-
-void AVPCL::compress(const Tile &t, char *block)
-{
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	float mse_mode0 = AVPCL::compress_mode0(t, tempblock);		if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode1 = AVPCL::compress_mode1(t, tempblock);		if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode2 = AVPCL::compress_mode2(t, tempblock);		if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode3 = AVPCL::compress_mode3(t, tempblock);		if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode4 = AVPCL::compress_mode4(t, tempblock);		if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode5 = AVPCL::compress_mode5(t, tempblock);		if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode6 = AVPCL::compress_mode6(t, tempblock);		if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode7 = AVPCL::compress_mode7(t, tempblock);		if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-		
-	/*if (errfile)
-	{
-		float errs[21];
-		int nerrs = 8;
-		errs[0] = mse_mode0; 
-		errs[1] = mse_mode1; 
-		errs[2] = mse_mode2; 
-		errs[3] = mse_mode3; 
-		errs[4] = mse_mode4; 
-		errs[5] = mse_mode5; 
-		errs[6] = mse_mode6; 
-		errs[7] = mse_mode7;
-		if (fwrite(errs, sizeof(float), nerrs, errfile) != nerrs)
-			throw "Write error on error file";
-	}*/
-}
-
-/*
-static int getbit(char *b, int start)
-{
-	if (start < 0 || start >= 128) return 0; // out of range
-
-	int ix = start >> 3;
-	return (b[ix] & (1 << (start & 7))) != 0;
-}
-
-static int getbits(char *b, int start, int len)
-{
-	int out = 0;
-	for (int i=0; i<len; ++i)
-		out |= getbit(b, start+i) << i;
-	return out;
-}
-
-static void setbit(char *b, int start, int bit)
-{
-	if (start < 0 || start >= 128) return; // out of range
-
-	int ix = start >> 3;
-
-	if (bit & 1)
-		b[ix] |= (1 << (start & 7));
-	else
-		b[ix] &= ~(1 << (start & 7));
-}
-
-static void setbits(char *b, int start, int len, int bits)
-{
-	for (int i=0; i<len; ++i)
-		setbit(b, start+i, bits >> i);
-}
-*/
-
-void AVPCL::decompress(const char *cblock, Tile &t)
-{
-	char block[AVPCL::BLOCKSIZE];
-	memcpy(block, cblock, AVPCL::BLOCKSIZE);
-
-	switch(getmode(block))
-	{
-	case 0:	AVPCL::decompress_mode0(block, t);	break;
-	case 1:	AVPCL::decompress_mode1(block, t);	break;
-	case 2:	AVPCL::decompress_mode2(block, t);	break;
-	case 3:	AVPCL::decompress_mode3(block, t);	break;
-	case 4:	AVPCL::decompress_mode4(block, t);	break;
-	case 5:	AVPCL::decompress_mode5(block, t);	break;
-	case 6:	AVPCL::decompress_mode6(block, t);	break;
-	case 7:	AVPCL::decompress_mode7(block, t);	break;
-	case 8: // return a black tile if you get a reserved mode
-		for (int y=0; y<Tile::TILE_H; ++y)
-			for (int x=0; x<Tile::TILE_W; ++x)
-				t.data[y][x].set(0, 0, 0, 0);
-		break;
-	default: nvUnreachable();
-	}
-}
-
-/*
-void AVPCL::compress(string inf, string avpclf, string errf)
-{
-	Array2D<RGBA> pixels;
-	int w, h;
-	char block[AVPCL::BLOCKSIZE];
-
-	Targa::read(inf, pixels, w, h);
-	FILE *avpclfile = fopen(avpclf.c_str(), "wb");
-	if (avpclfile == NULL) throw "Unable to open .avpcl file for write";
-	FILE *errfile = NULL;
-	if (errf != "")
-	{
-		errfile = fopen(errf.c_str(), "wb");
-		if (errfile == NULL) throw "Unable to open error file for write";
-	}
-
-	// Look at alpha channel and override the premult flag if alpha is constant (but only if premult is set)
-	if (AVPCL::flag_premult)
-	{
-		if (AVPCL::mode_rgb)
-		{
-			AVPCL::flag_premult = false;
-			cout << endl << "NOTE: Source image alpha is constant 255, turning off premultiplied-alpha error metric." << endl << endl;
-		}
-	}
-
-	// stuff for progress bar O.o
-	int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
-	int tilecnt = 0;
-	clock_t start, prev, cur;
-
-	start = prev = clock();
-
-	// convert to tiles and compress each tile
-	for (int y=0; y<h; y+=Tile::TILE_H)
-	{
-		int ysize = min(Tile::TILE_H, h-y);
-		for (int x=0; x<w; x+=Tile::TILE_W)
-		{
-			if ((tilecnt%100) == 0) { cur = clock(); printf("Progress %d of %d, %5.2f seconds per 100 tiles\r", tilecnt, ntiles, float(cur-prev)/CLOCKS_PER_SEC); fflush(stdout); prev = cur; }
-
-			int xsize = min(Tile::TILE_W, w-x);
-			Tile t(xsize, ysize);
-
-			t.insert(pixels, x, y);
-
-			AVPCL::compress(t, block, errfile);
-			if (fwrite(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
-				throw "File error on write";
-
-			// progress bar
-			++tilecnt;
-		}
-	}
-
-	cur = clock();
-	printf("\nTotal time to compress: %.2f seconds\n\n", float(cur-start)/CLOCKS_PER_SEC);		// advance to next line finally
-
-	if (fclose(avpclfile)) throw "Close failed on .avpcl file";
-	if (errfile && fclose(errfile)) throw "Close failed on error file";
-}
-
-static int str2int(std::string s) 
-{
-	int thing;
-	std::stringstream str (stringstream::in | stringstream::out);
-	str << s;
-	str >> thing;
-	return thing;
-}
-
-// avpcl file name is ...-w-h-RGB[A].avpcl, extract width and height
-static void extract(string avpclf, int &w, int &h, bool &mode_rgb)
-{
-	size_t n = avpclf.rfind('.', avpclf.length()-1);
-	size_t n1 = avpclf.rfind('-', n-1);
-	size_t n2 = avpclf.rfind('-', n1-1);
-	size_t n3 = avpclf.rfind('-', n2-1);
-	//	...-wwww-hhhh-RGB[A].avpcl
-	//     ^    ^    ^      ^
-	//     n3   n2   n1     n n3<n2<n1<n
-	string width = avpclf.substr(n3+1, n2-n3-1);
-	w = str2int(width);
-	string height = avpclf.substr(n2+1, n1-n2-1);
-	h = str2int(height);
-	string mode = avpclf.substr(n1+1, n-n1-1);
-	mode_rgb = mode == "RGB";
-}
-
-static int modehist[8];
-
-static void stats(char block[AVPCL::BLOCKSIZE])
-{
-	int m = AVPCL::getmode(block);
-	modehist[m]++;
-}
-
-static void printstats()
-{
-	printf("\nMode histogram: "); for (int i=0; i<8; ++i) { printf("%d,", modehist[i]); }
-	printf("\n");
-}
-
-void AVPCL::decompress(string avpclf, string outf)
-{
-	Array2D<RGBA> pixels;
-	int w, h;
-	char block[AVPCL::BLOCKSIZE];
-
-	extract(avpclf, w, h, AVPCL::mode_rgb);
-	FILE *avpclfile = fopen(avpclf.c_str(), "rb");
-	if (avpclfile == NULL) throw "Unable to open .avpcl file for read";
-	pixels.resizeErase(h, w);
-
-	// convert to tiles and decompress each tile
-	for (int y=0; y<h; y+=Tile::TILE_H)
-	{
-		int ysize = min(Tile::TILE_H, h-y);
-		for (int x=0; x<w; x+=Tile::TILE_W)
-		{
-			int xsize = min(Tile::TILE_W, w-x);
-			Tile t(xsize, ysize);
-
-			if (fread(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
-				throw "File error on read";
-
-			stats(block);	// collect statistics
-		
-			AVPCL::decompress(block, t);
-
-			t.extract(pixels, x, y);
-		}
-	}
-	if (fclose(avpclfile)) throw "Close failed on .avpcl file";
-
-	Targa::write(outf, pixels, w, h);
-
-	printstats();	// print statistics
-}
-*/
diff --git a/3rdparty/nvtt/bc7/avpcl.h b/3rdparty/nvtt/bc7/avpcl.h
deleted file mode 100644
index 44ea50497..000000000
--- a/3rdparty/nvtt/bc7/avpcl.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_H
-#define _AVPCL_H
-
-#include "tile.h"
-#include "bits.h"
-
-#define	DISABLE_EXHAUSTIVE	1	// define this if you don't want to spend a lot of time on exhaustive compression
-#define	USE_ZOH_INTERP		1	// use zoh interpolator, otherwise use exact avpcl interpolators
-#define	USE_ZOH_INTERP_ROUNDED 1	// use the rounded versions!
-
-namespace AVPCL {
-
-static const int NREGIONS_TWO	= 2;
-static const int NREGIONS_THREE	= 3;
-
-static const int BLOCKSIZE=16;
-static const int BITSIZE=128;
-
-// global flags
-extern bool flag_premult;
-extern bool flag_nonuniform;
-extern bool flag_nonuniform_ati;
-
-// global mode
-extern bool mode_rgb;		// true if image had constant alpha = 255
-
-void compress(const Tile &t, char *block);
-void decompress(const char *block, Tile &t);
-
-float compress_mode0(const Tile &t, char *block);
-void decompress_mode0(const char *block, Tile &t);
-
-float compress_mode1(const Tile &t, char *block);
-void decompress_mode1(const char *block, Tile &t);
-
-float compress_mode2(const Tile &t, char *block);
-void decompress_mode2(const char *block, Tile &t);
-
-float compress_mode3(const Tile &t, char *block);
-void decompress_mode3(const char *block, Tile &t);
-
-float compress_mode4(const Tile &t, char *block);
-void decompress_mode4(const char *block, Tile &t);
-
-float compress_mode5(const Tile &t, char *block);
-void decompress_mode5(const char *block, Tile &t);
-
-float compress_mode6(const Tile &t, char *block);
-void decompress_mode6(const char *block, Tile &t);
-
-float compress_mode7(const Tile &t, char *block);
-void decompress_mode7(const char *block, Tile &t);
-
-inline int getmode(Bits &in)
-{
-	int mode = 0;
-
-	if (in.read(1))			mode = 0;
-	else if (in.read(1))	mode = 1;
-	else if (in.read(1))	mode = 2;
-	else if (in.read(1))	mode = 3;
-	else if (in.read(1))	mode = 4;
-	else if (in.read(1))	mode = 5;
-	else if (in.read(1))	mode = 6;
-	else if (in.read(1))	mode = 7;
-	else mode = 8;	// reserved
-	return mode;
-}
-inline int getmode(const char *block)
-{
-	int bits = block[0], mode = 0;
-
-	if (bits & 1) mode = 0;
-	else if ((bits&3) == 2) mode = 1;
-	else if ((bits&7) == 4) mode = 2;
-	else if ((bits & 0xF) == 8) mode = 3;
-	else if ((bits & 0x1F) == 16) mode = 4;
-	else if ((bits & 0x3F) == 32) mode = 5;
-	else if ((bits & 0x7F) == 64) mode = 6;
-	else if ((bits & 0xFF) == 128) mode = 7;
-	else mode = 8;	// reserved
-	return mode;
-}
-
-}
-
-#endif
diff --git a/3rdparty/nvtt/bc7/avpcl_mode0.cpp b/3rdparty/nvtt/bc7/avpcl_mode0.cpp
deleted file mode 100644
index 82dd60755..000000000
--- a/3rdparty/nvtt/bc7/avpcl_mode0.cpp
+++ /dev/null
@@ -1,1066 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-//  x1		444.1x6 16p 45b (3bi)
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_three.h"
-
-// use only the first 16 available shapes
-#undef NSHAPES
-#undef SHAPEBITS
-#define NSHAPES 16
-#define SHAPEBITS 4
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
-
-#define NINDICES	8
-#define	INDEXBITS	3
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-    const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red			green			blue			xfm	mode  mb
-	4,4,4,4,4,4,	4,4,4,4,4,4,	4,4,4,4,4,4,	0,	0x1, 1, "",	// really 444.1 x 6
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGB];
-	int endpt_b_prec[NCHANNELS_RGB];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-// endpoints are 555,555; reduce to 444,444 and put the lsb bit majority in compr_bits
-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.A[j] & 1;
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 16);
-	}
-	compr_endpts.a_lsb = onescnt >= 2;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.B[j] & 1;
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.B[j] < 16);
-	}
-	compr_endpts.b_lsb = onescnt >= 2;
-}
-
-static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
-	}
-}
-
-static void uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_2 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_2 q_endpts[NREGIONS])
-{
-	IntEndptsRGB full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGB; ++i) 
-			{
-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-			}
-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGB_2 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		out.write(endpts[i].a_lsb, 1);
-		out.write(endpts[i].b_lsb, 1);
-	}
-
-	nvAssert (out.getptr() == 83);
-}
-
-static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-	
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		endpts[i].a_lsb  = in.read(1);
-		endpts[i].b_lsb  = in.read(1);
-	}
-
-	nvAssert (in.getptr() == 83);
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGB endpts;
-
-	uncompress_one(endpts_2, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	// constant alpha
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = 255.0f;
-}
-
-static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
-{
-	nvUnreachable();
-}
-
-void AVPCL::decompress_mode0(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGB_2 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, 
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGB_2 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGB_2 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGB_2 new_a, new_b;
-	IntEndptsRGB_2 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGB_2 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.a_lsb = lsbmode & 1;
-			temp_in.b_lsb = (lsbmode >> 1) & 1;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-			float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-			float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-    nvAssert(false); // throw "No candidate found, should never happen (mode avpcl 0).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-// for this mode, we assume alpha = 255 constant and compress only the RGB portion.
-// however, we do the error check against the actual alpha values supplied for the tile.
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[2];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				if (np < 2) alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*Vector4(direction, 0);
-		endpts[region].B = mean + maxp*Vector4(direction, 0);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode0(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-
diff --git a/3rdparty/nvtt/bc7/avpcl_mode1.cpp b/3rdparty/nvtt/bc7/avpcl_mode1.cpp
deleted file mode 100644
index fb1bfea88..000000000
--- a/3rdparty/nvtt/bc7/avpcl_mode1.cpp
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x10	(666x2).1 (666x2).1 64p 3bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_two.h"
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	2		// number of different lsb modes per region. since we have one .1 per region, that can have 2 values
-
-#define NINDICES	8
-#define	INDEXBITS	3
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		xfm	mode  mb
-	6,6,6,6,	6,6,6,6,	6,6,6,6,	0,	0x2, 2, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGB];
-	int endpt_b_prec[NCHANNELS_RGB];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	6,6,6, 6,6,6, 6,6,6, 6,6,6,	
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-
-static void transform_forward(IntEndptsRGB_1 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-// endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits
-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_1& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.A[j] & 1;
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		onescnt += endpts.B[j] & 1;
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 64);
-		nvAssert (compr_endpts.B[j] < 64);
-	}
-	compr_endpts.lsb = onescnt >= 3;
-}
-
-static void uncompress_one(const IntEndptsRGB_1& compr_endpts, IntEndptsRGB& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.lsb;
-	}
-}
-
-static void uncompress_endpoints(const IntEndptsRGB_1 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_1 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_1 q_endpts[NREGIONS])
-{
-	IntEndptsRGB full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndptsRGB_1 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGB; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGB_1 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-
-static void write_header(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-		out.write(endpts[i].lsb, 1);
-
-	nvAssert (out.getptr() == 82);
-}
-
-static void read_header(Bits &in, IntEndptsRGB_1 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-		endpts[i].lsb  = in.read(1);
-	
-	nvAssert (in.getptr() == 82);
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGB_1 &endpts_1, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGB endpts;
-
-	uncompress_one(endpts_1, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// note: don't simplify to a + ((b-a)*i + BIAS)/DENOM as that doesn't work due to the way C handles integer division of negatives
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	// constant alpha
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = 255.0f;
-}
-
-// sign extend but only if it was transformed
-static void sign_extend(Pattern &p, IntEndptsRGB_1 endpts[NREGIONS])
-{
-	nvUnreachable();
-}
-
-void AVPCL::decompress_mode1(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGB_1 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, 
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGB_1 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGB_1 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_1 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGB_1 new_a, new_b;
-	IntEndptsRGB_1 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							IntEndptsRGB_1 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_1 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGB_1 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.lsb = lsbmode;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-			float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGB_1 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			//nvAssert(opt_toterr <= orig_toterr);
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 1).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			float err = Utils::metric4(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[2];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				if (np < 2) alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*Vector4(direction, 0);
-		endpts[region].B = mean + maxp*Vector4(direction, 0);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode1(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-
diff --git a/3rdparty/nvtt/bc7/avpcl_mode2.cpp b/3rdparty/nvtt/bc7/avpcl_mode2.cpp
deleted file mode 100644
index 380ffcef5..000000000
--- a/3rdparty/nvtt/bc7/avpcl_mode2.cpp
+++ /dev/null
@@ -1,1004 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x100 555x6 64p 2bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_three.h"
-
-using namespace nv;
-using namespace AVPCL;
-
-#define NINDICES	4
-#define	INDEXBITS	2
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	6
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red			green			blue			xfm	mode  mb
-	5,5,5,5,5,5,	5,5,5,5,5,5,	5,5,5,5,5,5,	0,	0x4, 3, "",
-};
-
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGB];
-	int endpt_b_prec[NCHANNELS_RGB];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS_THREE];
-};
-
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-#define	R_0	ep[0].A[i]
-#define	R_1 ep[0].B[i]
-#define	R_2 ep[1].A[i]
-#define	R_3	ep[1].B[i]
-
-static void transform_forward(IntEndptsRGB ep[NREGIONS])
-{
-	for (int i=0; i<NCHANNELS_RGB; ++i)
-	{
-		R_1 -= R_3; R_2 -= R_3; R_0 -= R_3;
-	}
-}
-
-static void transform_inverse(IntEndptsRGB ep[NREGIONS])
-{
-	for (int i=0; i<NCHANNELS_RGB; ++i)
-	{
-		R_0 += R_3; R_2 += R_3; R_1 += R_3;
-	}
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, IntEndptsRGB q_endpts[NREGIONS_THREE])
-{
-	for (int region = 0; region < NREGIONS_THREE; ++region)
-	{
-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndptsRGB endpts[NREGIONS_THREE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS_THREE; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGB; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGB endpts[NREGIONS_THREE], const Pattern &p)
-{
-	return true;
-}
-
-
-static void write_header(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS_THREE; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[i*2+0]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[i*2+1]);
-		}
-	nvAssert (out.getptr() == 99);
-}
-
-static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS_THREE; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[i*2+0]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[i*2+1]);
-		}
-	nvAssert (in.getptr() == 99);
-}
-
-
-// WORK PLACEHOLDER -- keep it simple for now
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS_THREE];
-
-	for (int r = 0; r < NREGIONS_THREE; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS_THREE; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS_THREE];
-
-	for (int r = 0; r < NREGIONS_THREE; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS_THREE; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	// constant alpha
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = 255.0f;
-}
-
-// sign extend but only if it was transformed
-static void sign_extend(Pattern &p, IntEndptsRGB endpts[NREGIONS_THREE])
-{
-	nvAssert (p.transformed != 0);
-
-	for (int i=0; i<NCHANNELS_RGB; ++i)
-	{
-		// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
-		endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[1]);
-		endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[2]);
-		endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[3]);
-		endpts[2].A[i] = SIGN_EXTEND(endpts[2].A[i], p.chan[i].nbitsizes[4]);
-		endpts[2].B[i] = SIGN_EXTEND(endpts[2].B[i], p.chan[i].nbitsizes[5]);
-	}
-}
-
-void AVPCL::decompress_mode2(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGB endpts[NREGIONS_THREE];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS_THREE][NINDICES];
-	for (int r = 0; r < NREGIONS_THREE; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_THREE])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS_THREE][NINDICES];
-
-	for (int region = 0; region < NREGIONS_THREE; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts, 
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGB temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGB temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGB new_a, new_b;
-	IntEndptsRGB new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_THREE], 
-							const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGB temp_in, temp_out;
-
-	for (int region=0; region<NREGIONS_THREE; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// make sure we have a valid error for temp_in
-		// we didn't change temp_in, so orig_err[region] is still valid
-		float temp_in_err = orig_err[region];
-
-		// now try to optimize these endpoints
-		float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-		// if we find an improvement, update the best so far and correct the output endpoints and errors
-		if (temp_out_err < best_err)
-		{
-			best_err = temp_out_err;
-			opt_err[region] = temp_out_err;
-			opt_endpts[region] = temp_out;
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_THREE], char *block)
-{
-	float orig_err[NREGIONS_THREE], opt_err[NREGIONS_THREE], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGB orig_endpts[NREGIONS_THREE], opt_endpts[NREGIONS_THREE];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS_THREE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 2).";
-	return FLT_MAX;
-
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vector4 palette[NREGIONS_THREE][NINDICES])
-{
-	for (int region = 0; region < NREGIONS_THREE; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS_THREE][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE])
-{
-	for (int region=0; region<NREGIONS_THREE; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[2];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				if (np < 2) alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*Vector4(direction, 0);
-		endpts[region].B = mean + maxp*Vector4(direction, 0);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode2(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS_THREE];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-
diff --git a/3rdparty/nvtt/bc7/avpcl_mode3.cpp b/3rdparty/nvtt/bc7/avpcl_mode3.cpp
deleted file mode 100644
index 0020d8aef..000000000
--- a/3rdparty/nvtt/bc7/avpcl_mode3.cpp
+++ /dev/null
@@ -1,1059 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x1000 777.1x4 64p 2bi (30b)
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_two.h"
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
-
-#define NINDICES	4
-#define	INDEXBITS	2
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-#define	NREGIONS  2
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		xfm	mode  mb
-	7,7,7,7,	7,7,7,7,	7,7,7,7,	0,	0x8, 4, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGB];
-	int endpt_b_prec[NCHANNELS_RGB];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	7,7,7, 7,7,7, 7,7,7, 7,7,7,
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-// endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits
-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.A[j] & 1;
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 128);
-	}
-	compr_endpts.a_lsb = onescnt >= 2;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.B[j] & 1;
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.B[j] < 128);
-	}
-	compr_endpts.b_lsb = onescnt >= 2;
-}
-
-static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
-	}
-}
-
-static void uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_2 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_2 q_endpts[NREGIONS])
-{
-	IntEndptsRGB full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGB; ++i) 
-			{
-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-			}
-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGB_2 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		out.write(endpts[i].a_lsb, 1);
-		out.write(endpts[i].b_lsb, 1);
-	}
-
-	nvAssert (out.getptr() == 98);
-}
-
-static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-	
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		endpts[i].a_lsb  = in.read(1);
-		endpts[i].b_lsb  = in.read(1);
-	}
-
-	nvAssert (in.getptr() == 98);
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGB endpts;
-
-	uncompress_one(endpts_2, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	// constant alpha
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = 255.0f;
-}
-
-static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
-{
-	nvUnreachable();
-}
-
-void AVPCL::decompress_mode3(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGB_2 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-            float err = Utils::metric4(colors[i], palette[j]) * importance[i];
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, 
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGB_2 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGB_2 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGB_2 new_a, new_b;
-	IntEndptsRGB_2 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGB_2 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.a_lsb = lsbmode & 1;
-			temp_in.b_lsb = (lsbmode >> 1) & 1;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 3).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[2];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				if (np < 2) alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*Vector4(direction, 0);
-		endpts[region].B = mean + maxp*Vector4(direction, 0);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode3(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-
diff --git a/3rdparty/nvtt/bc7/avpcl_mode4.cpp b/3rdparty/nvtt/bc7/avpcl_mode4.cpp
deleted file mode 100644
index 5115d7ced..000000000
--- a/3rdparty/nvtt/bc7/avpcl_mode4.cpp
+++ /dev/null
@@ -1,1214 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x10000 2r 1i 555x2 6x2 2bi 3bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
-// array 0 is always the RGB array and array 1 is always the A array
-#define	NINDEXARRAYS	2
-#define	INDEXARRAY_RGB	0
-#define INDEXARRAY_A	1
-#define INDEXARRAY_2BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
-#define INDEXARRAY_3BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
-
-#define NINDICES3	8
-#define	INDEXBITS3	3
-#define	HIGH_INDEXBIT3	(1<<(INDEXBITS3-1))
-#define	DENOM3		(NINDICES3-1)
-#define	BIAS3		(DENOM3/2)
-
-#define NINDICES2	4
-#define	INDEXBITS2	2
-#define	HIGH_INDEXBIT2	(1<<(INDEXBITS2-1))
-#define	DENOM2		(NINDICES2-1)
-#define	BIAS2		(DENOM2/2)
-
-#define	NINDICES_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
-#define	INDEXBITS_RGB(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
-#define	HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
-#define	DENOM_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
-#define	BIAS_RGB(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
-
-#define	NINDICES_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
-#define	INDEXBITS_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
-#define	HIGH_INDEXBIT_A(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
-#define	DENOM_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
-#define	BIAS_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
-
-#define	NSHAPES	1
-
-static int shapes[NSHAPES] =
-{
-	0x0000,
-};
-
-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
-
-#define NREGIONS	1			// keep the region stuff in just in case...
-
-// encoded index compression location: region 0 is always at 0,0.
-
-#define	NBITSIZES	2			// one endpoint pair
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
-	int transform_mode;		// x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	TRANSFORM_MODE_ALPHA	1
-#define	TRANSFORM_MODE_RGB	2
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		alpha	xfm	mode  mb encoding
-	5,5,		5,5,		5,5,		6,6,	0x0, 0x10, 5, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGBA];
-	int endpt_b_prec[NCHANNELS_RGBA];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	5,5,5,6,	5,5,5,6,
-};
-
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-#define	R_0	ep[0].A[i]
-#define	R_1 ep[0].B[i]
-
-static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
-{
-	int i;
-
-	if (transform_mode & TRANSFORM_MODE_RGB)
-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
-			R_1 -= R_0;
-	if (transform_mode & TRANSFORM_MODE_ALPHA)
-	{
-		i = CHANNEL_A;
-		R_1 -= R_0;
-	}
-}
-
-static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
-{
-	int i;
-
-	if (transform_mode & TRANSFORM_MODE_RGB)
-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
-			R_1 += R_0;
-	if (transform_mode & TRANSFORM_MODE_ALPHA)
-	{
-		i = CHANNEL_A;
-		R_1 += R_0;
-	}
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
-		q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
-
-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
-		q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
-{
-	int index_positions[NREGIONS];
-
-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int x = index_positions[region] & 3;
-		int y = (index_positions[region] >> 2) & 3;
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-
-		// swap RGB
-		if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
-		}
-
-		// swap A
-		if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
-{
-	// ignore shapeindex
-	out.write(p.mode, p.modebits);
-	out.write(rotatemode, ROTATEMODE_BITS);
-	out.write(indexmode, INDEXMODE_BITS);
-	for (int i=0; i<NREGIONS; ++i)
-		for (int j=0; j<NCHANNELS_RGBA; ++j)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
-		}
-	nvAssert (out.getptr() == 50);
-}
-
-static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	p = patterns[pat_index];
-
-	shapeindex = 0;		// we don't have any
-
-	rotatemode = in.read(ROTATEMODE_BITS);
-	indexmode = in.read(INDEXMODE_BITS);
-	for (int i=0; i<NREGIONS; ++i)
-		for (int j=0; j<NCHANNELS_RGBA; ++j)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
-		}
-	nvAssert (in.getptr() == 50);
-}
-
-static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
-{
-	// the indices we shorten is always index 0
-
-	// do the 2 bit indices first
-	nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0));	// write i..[1:0] or i..[0]
-
-	// then the 3 bit indices
-	nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0));	// write i..[2:0] or i..[1:0]
-}
-
-static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
-{
-	// the indices we shorten is always index 0
-
-	// do the 2 bit indices first
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0));		// read i..[1:0] or i..[0]
-
-	// then the 3 bit indices
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0));		// read i..[1:0] or i..[0]
-}
-
-static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
-
-	write_indices(indices, shapeindex, indexmode, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
-{
-	// scale endpoints for RGB
-	int a, b;
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
-
-	// interpolate R
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
-
-	// interpolate G
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
-
-	// interpolate B
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); 
-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
-
-	// interpolate A
-	for (int i = 0; i < NINDICES_A(indexmode); ++i)
-		palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
-
-}
-
-static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
-{
-	for (int i=0; i<NCHANNELS_RGBA; ++i)
-	{
-		if (p.transform_mode)
-		{
-			// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
-			endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
-			endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
-			endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
-		}
-	}
-}
-
-static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
-{
-	out.size_x = in.size_x;
-	out.size_y = in.size_y;
-
-	for (int y=0; y<in.size_y; ++y)
-	for (int x=0; x<in.size_x; ++x)
-	{
-		float t;
-		out.data[y][x] = in.data[y][x];
-
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: break;
-		case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		default: nvUnreachable();
-		}
-	}
-}
-
-void AVPCL::decompress_mode4(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGBA endpts[NREGIONS];
-	int shapeindex, pat_index, rotatemode, indexmode;
-
-	read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
-	
-	sign_extend(p, endpts);
-
-	if (p.transform_mode)
-		transform_inverse(p.transform_mode, endpts);
-
-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
-
-	for (int region = 0; region < NREGIONS; ++region)
-		generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
-
-	int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indexmode, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	Tile temp(t.size_x, t.size_y);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
-
-	rotate_tile(temp, rotatemode, t);
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
-// exceeds what we already have
-static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	Vector3 palette_rgb[NINDICES3];	// could be nindices2
-	float palette_a[NINDICES3];	// could be nindices2
-	float toterr = 0;
-
-	generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
-
-	Vector3 rgb;
-	float a;
-
-	for (int i = 0; i < np; ++i)
-	{
-		float err, besterr;
-		float palette_alpha = 0, tile_alpha = 0;
-
-		if(AVPCL::flag_premult)
-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
-
-		rgb.x = (colors[i]).x;
-		rgb.y = (colors[i]).y;
-		rgb.z = (colors[i]).z;
-		a = (colors[i]).w;
-
-		// compute the two indices separately
-		// if we're doing premultiplied alpha, we need to choose first the index that
-		// determines the alpha value, and then do the other index
-
-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
-		{
-			// do A index first as it has the alpha
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
-			{
-				err = Utils::metric1(a, palette_a[j], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					palette_alpha = palette_a[j];
-					indices[INDEXARRAY_A][i] = j;
-				}
-			}
-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
-
-			// do RGB index
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][i] = j;
-				}
-			}
-			toterr += besterr;
-			if (toterr > current_besterr)
-			{
-				// fill out bogus index values so it's initialized at least
-				for (int k = i; k < np; ++k)
-				{
-					indices[INDEXARRAY_RGB][k] = -1;
-					indices[INDEXARRAY_A][k] = -1;
-				}
-				return FLT_MAX;
-			}
-		}
-		else
-		{
-			// do RGB index
-			besterr = FLT_MAX;
-			int bestindex;
-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
-											 Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					bestindex = j;
-					indices[INDEXARRAY_RGB][i] = j;
-				}
-			}
-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
-			toterr += besterr;
-
-			// do A index
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
-											 Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][i] = j;
-				}
-			}
-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
-			if (toterr > current_besterr)
-			{
-				// fill out bogus index values so it's initialized at least
-				for (int k = i; k < np; ++k)
-				{
-					indices[INDEXARRAY_RGB][k] = -1;
-					indices[INDEXARRAY_A][k] = -1;
-				}
-				return FLT_MAX;
-			}
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector3 rgb;
-	float a;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr;
-		float palette_alpha = 0, tile_alpha = 0;
-
-		rgb.x = (tile.data[y][x]).x;
-		rgb.y = (tile.data[y][x]).y;
-		rgb.z = (tile.data[y][x]).z;
-		a = (tile.data[y][x]).w;
-
-		if(AVPCL::flag_premult)
-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
-
-		// compute the two indices separately
-		// if we're doing premultiplied alpha, we need to choose first the index that
-		// determines the alpha value, and then do the other index
-
-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
-		{
-			// do A index first as it has the alpha
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
-			{
-				err = Utils::metric1(a, palette_a[region][i], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][y][x] = i;
-					palette_alpha = palette_a[region][i];
-				}
-			}
-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
-
-			// do RGB index
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][y][x] = i;
-				}
-			}
-			toterr[region] += besterr;
-		}
-		else
-		{
-			// do RGB index first as it has the alpha
-			besterr = FLT_MAX;
-			int bestindex;
-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
-											 Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][y][x] = i;
-					bestindex = i;
-				}
-			}
-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
-			toterr[region] += besterr;
-
-			// do A index
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
-											 Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][y][x] = i;
-				}
-			}
-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
-		}
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, 
-						  float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGBA temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	for (int j=0; j<NINDEXARRAYS; ++j)
-	for (int i=0; i<np; ++i)
-		indices[j][i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					indices[j][i] = temp_indices[j][i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	IntEndptsRGBA temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	for (int j=0; j<NINDEXARRAYS; ++j)
-	for (int i=0; i<np; ++i)
-		indices[j][i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					good_indices[j][i] = temp_indices[j][i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					good_indices[j][i] = temp_indices[j][i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		for (int j=0; j<NINDEXARRAYS; ++j)
-		for (int i=0; i<np; ++i)
-			indices[j][i] = good_indices[j][i];
-	}
-
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGBA new_a, new_b;
-	IntEndptsRGBA new_endpt;
-	int do_b;
-	int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-		float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = temp_indices0[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[j][i] = temp_indices0[j][i];
-					nvAssert (orig_indices[j][i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS], 
-							const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGBA temp_in, temp_out;
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// make sure we have a valid error for temp_in
-		// we didn't change temp_in, so orig_err[region] is still valid
-		float temp_in_err = orig_err[region];
-
-		// now try to optimize these endpoints
-        float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-		// if we find an improvement, update the best so far and correct the output endpoints and errors
-		if (temp_out_err < best_err)
-		{
-			best_err = temp_out_err;
-			opt_err[region] = temp_out_err;
-			opt_endpts[region] = temp_out;
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-
-		assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
-
-		if (patterns[sp].transform_mode)
-			transform_forward(patterns[sp].transform_mode, orig_endpts);
-
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transform_mode)
-				transform_inverse(patterns[sp].transform_mode, orig_endpts);
-
-			optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-
-			assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
-
-			if (patterns[sp].transform_mode)
-				transform_forward(patterns[sp].transform_mode, opt_endpts);
-
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transform_mode)
-					transform_forward(patterns[sp].transform_mode, orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 4).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	if (v.w < 0.0f) v.w = 0.0f;
-	if (v.w > 255.0f) v.w = 255.0f;
-}
-
-// compute initial endpoints for the "RGB" portion and the "A" portion. 
-// Note these channels may have been rotated.
-static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[Tile::TILE_TOTAL];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		float mina = FLT_MAX, maxa = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-
-			dp = alphas[i] - mean.w;
-			if (dp < mina) mina = dp;
-			if (dp > maxa) maxa = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + Vector4(minp*direction, mina);
-		endpts[region].B = mean + Vector4(maxp*direction, maxa);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-}
-
-float AVPCL::compress_mode4(const Tile &t, char *block)
-{
-	FltEndpts endpts[NREGIONS];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-	int shape = 0;
-	Tile t1;
-
-	// try all rotations. refine tries the 2 different indexings.
-	for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
-	{
-		rotate_tile(t, r, t1);
-		rough(t1, shape, endpts);
-		for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
-		{
-			float mse = refine(t1, shape, r, i, endpts, tempblock);
-			if (mse < msebest)
-			{
-				memcpy(block, tempblock, sizeof(tempblock));
-				msebest = mse;
-			}
-		}
-	}
-	return msebest;
-}
diff --git a/3rdparty/nvtt/bc7/avpcl_mode5.cpp b/3rdparty/nvtt/bc7/avpcl_mode5.cpp
deleted file mode 100644
index f1f163658..000000000
--- a/3rdparty/nvtt/bc7/avpcl_mode5.cpp
+++ /dev/null
@@ -1,1216 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x100000 2r 777x2 8x2 2bi 2bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
-// array 0 is always the RGB array and array 1 is always the A array
-#define	NINDEXARRAYS	2
-#define	INDEXARRAY_RGB	0
-#define INDEXARRAY_A	1
-#define INDEXARRAY_2BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
-#define INDEXARRAY_3BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
-
-#define NINDICES3	4
-#define	INDEXBITS3	2
-#define	HIGH_INDEXBIT3	(1<<(INDEXBITS3-1))
-#define	DENOM3		(NINDICES3-1)
-#define	BIAS3		(DENOM3/2)
-
-#define NINDICES2	4
-#define	INDEXBITS2	2
-#define	HIGH_INDEXBIT2	(1<<(INDEXBITS2-1))
-#define	DENOM2		(NINDICES2-1)
-#define	BIAS2		(DENOM2/2)
-
-#define	NINDICES_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
-#define	INDEXBITS_RGB(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
-#define	HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
-#define	DENOM_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
-#define	BIAS_RGB(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
-
-#define	NINDICES_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
-#define	INDEXBITS_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
-#define	HIGH_INDEXBIT_A(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
-#define	DENOM_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
-#define	BIAS_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
-
-#define	NSHAPES	1
-
-static int shapes[NSHAPES] =
-{
-	0x0000,
-};
-
-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
-
-#define NREGIONS	1			// keep the region stuff in just in case...
-
-// encoded index compression location: region 0 is always at 0,0.
-
-#define	NBITSIZES	2			// one endpoint pair
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
-	int transform_mode;		// x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	TRANSFORM_MODE_ALPHA	1
-#define	TRANSFORM_MODE_RGB	2
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		alpha	xfm	mode  mb encoding
-	7,7,		7,7,		7,7,		8,8,	0x0, 0x20, 6, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGBA];
-	int endpt_b_prec[NCHANNELS_RGBA];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	7,7,7,8,	7,7,7,8,
-};
-
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-#define	R_0	ep[0].A[i]
-#define	R_1 ep[0].B[i]
-
-static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
-{
-	int i;
-
-	if (transform_mode & TRANSFORM_MODE_RGB)
-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
-			R_1 -= R_0;
-	if (transform_mode & TRANSFORM_MODE_ALPHA)
-	{
-		i = CHANNEL_A;
-		R_1 -= R_0;
-	}
-}
-
-static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
-{
-	int i;
-
-	if (transform_mode & TRANSFORM_MODE_RGB)
-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
-			R_1 += R_0;
-	if (transform_mode & TRANSFORM_MODE_ALPHA)
-	{
-		i = CHANNEL_A;
-		R_1 += R_0;
-	}
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
-		q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
-
-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
-		q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
-{
-	int index_positions[NREGIONS];
-
-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int x = index_positions[region] & 3;
-		int y = (index_positions[region] >> 2) & 3;
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-
-		// swap RGB
-		if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
-		}
-
-		// swap A
-		if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
-{
-	// ignore shapeindex
-	out.write(p.mode, p.modebits);
-	out.write(rotatemode, ROTATEMODE_BITS);
-//	out.write(indexmode, INDEXMODE_BITS);
-	for (int i=0; i<NREGIONS; ++i)
-		for (int j=0; j<NCHANNELS_RGBA; ++j)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
-		}
-	nvAssert (out.getptr() == 66);
-}
-
-static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	p = patterns[pat_index];
-
-	shapeindex = 0;		// we don't have any
-
-	rotatemode = in.read(ROTATEMODE_BITS);
-
-	indexmode = 0;		// we don't have any
-
-	for (int i=0; i<NREGIONS; ++i)
-		for (int j=0; j<NCHANNELS_RGBA; ++j)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
-		}
-	nvAssert (in.getptr() == 66);
-}
-
-static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
-{
-	// the indices we shorten is always index 0
-
-	// do the 2 bit indices first
-	nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0));	// write i..[1:0] or i..[0]
-
-	// then the 3 bit indices
-	nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0));	// write i..[2:0] or i..[1:0]
-}
-
-static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
-{
-	// the indices we shorten is always index 0
-
-	// do the 2 bit indices first
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0));		// read i..[1:0] or i..[0]
-
-	// then the 3 bit indices
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0));		// read i..[1:0] or i..[0]
-}
-
-static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
-
-	write_indices(indices, shapeindex, indexmode, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
-{
-	// scale endpoints for RGB
-	int a, b;
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
-
-	// interpolate R
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
-
-	// interpolate G
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
-
-	// interpolate B
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); 
-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
-
-	// interpolate A
-	for (int i = 0; i < NINDICES_A(indexmode); ++i)
-		palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
-}
-
-static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
-{
-	for (int i=0; i<NCHANNELS_RGBA; ++i)
-	{
-		if (p.transform_mode)
-		{
-			// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
-			endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
-			endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
-			endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
-		}
-	}
-}
-
-static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
-{
-	out.size_x = in.size_x;
-	out.size_y = in.size_y;
-
-	for (int y=0; y<in.size_y; ++y)
-	for (int x=0; x<in.size_x; ++x)
-	{
-		float t;
-		out.data[y][x] = in.data[y][x];
-
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: break;
-		case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		default: nvUnreachable();
-		}
-	}
-}
-
-void AVPCL::decompress_mode5(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGBA endpts[NREGIONS];
-	int shapeindex, pat_index, rotatemode, indexmode;
-
-	read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
-	
-	sign_extend(p, endpts);
-
-	if (p.transform_mode)
-		transform_inverse(p.transform_mode, endpts);
-
-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
-
-	for (int region = 0; region < NREGIONS; ++region)
-		generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
-
-	int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indexmode, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	Tile temp(t.size_x, t.size_y);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
-
-	rotate_tile(temp, rotatemode, t);
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
-// exceeds what we already have
-static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	Vector3 palette_rgb[NINDICES3];	// could be nindices2
-	float palette_a[NINDICES3];	// could be nindices2
-	float toterr = 0;
-
-	generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
-
-	Vector3 rgb;
-	float a;
-
-	for (int i = 0; i < np; ++i)
-	{
-		float err, besterr;
-		float palette_alpha = 0, tile_alpha = 0;
-
-		if(AVPCL::flag_premult)
-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
-
-		rgb.x = (colors[i]).x;
-		rgb.y = (colors[i]).y;
-		rgb.z = (colors[i]).z;
-		a = (colors[i]).w;
-
-		// compute the two indices separately
-		// if we're doing premultiplied alpha, we need to choose first the index that
-		// determines the alpha value, and then do the other index
-
-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
-		{
-			// do A index first as it has the alpha
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
-			{
-				err = Utils::metric1(a, palette_a[j], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					palette_alpha = palette_a[j];
-					indices[INDEXARRAY_A][i] = j;
-				}
-			}
-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
-
-			// do RGB index
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][i] = j;
-				}
-			}
-			toterr += besterr;
-			if (toterr > current_besterr)
-			{
-				// fill out bogus index values so it's initialized at least
-				for (int k = i; k < np; ++k)
-				{
-					indices[INDEXARRAY_RGB][k] = -1;
-					indices[INDEXARRAY_A][k] = -1;
-				}
-				return FLT_MAX;
-			}
-		}
-		else
-		{
-			// do RGB index
-			besterr = FLT_MAX;
-			int bestindex;
-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
-											 Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					bestindex = j;
-					indices[INDEXARRAY_RGB][i] = j;
-				}
-			}
-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
-			toterr += besterr;
-
-			// do A index
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
-											 Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][i] = j;
-				}
-			}
-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
-			if (toterr > current_besterr)
-			{
-				// fill out bogus index values so it's initialized at least
-				for (int k = i; k < np; ++k)
-				{
-					indices[INDEXARRAY_RGB][k] = -1;
-					indices[INDEXARRAY_A][k] = -1;
-				}
-				return FLT_MAX;
-			}
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector3 rgb;
-	float a;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr;
-		float palette_alpha = 0, tile_alpha = 0;
-
-		rgb.x = (tile.data[y][x]).x;
-		rgb.y = (tile.data[y][x]).y;
-		rgb.z = (tile.data[y][x]).z;
-		a = (tile.data[y][x]).w;
-
-		if(AVPCL::flag_premult)
-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
-
-		// compute the two indices separately
-		// if we're doing premultiplied alpha, we need to choose first the index that
-		// determines the alpha value, and then do the other index
-
-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
-		{
-			// do A index first as it has the alpha
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
-			{
-				err = Utils::metric1(a, palette_a[region][i], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][y][x] = i;
-					palette_alpha = palette_a[region][i];
-				}
-			}
-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
-
-			// do RGB index
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][y][x] = i;
-				}
-			}
-			toterr[region] += besterr;
-		}
-		else
-		{
-			// do RGB index first as it has the alpha
-			besterr = FLT_MAX;
-			int bestindex;
-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
-											 Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][y][x] = i;
-					bestindex = i;
-				}
-			}
-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
-			toterr[region] += besterr;
-
-			// do A index
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
-											 Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][y][x] = i;
-				}
-			}
-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
-		}
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
-						  float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGBA temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	for (int j=0; j<NINDEXARRAYS; ++j)
-	for (int i=0; i<np; ++i)
-		indices[j][i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					indices[j][i] = temp_indices[j][i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	IntEndptsRGBA temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	for (int j=0; j<NINDEXARRAYS; ++j)
-	for (int i=0; i<np; ++i)
-		indices[j][i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					good_indices[j][i] = temp_indices[j][i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					good_indices[j][i] = temp_indices[j][i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		for (int j=0; j<NINDEXARRAYS; ++j)
-		for (int i=0; i<np; ++i)
-			indices[j][i] = good_indices[j][i];
-	}
-
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGBA new_a, new_b;
-	IntEndptsRGBA new_endpt;
-	int do_b;
-	int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = temp_indices0[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[j][i] = temp_indices0[j][i];
-					nvAssert (orig_indices[j][i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS], 
-							const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGBA temp_in, temp_out;
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// make sure we have a valid error for temp_in
-		// we didn't change temp_in, so orig_err[region] is still valid
-		float temp_in_err = orig_err[region];
-
-		// now try to optimize these endpoints
-        float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-		// if we find an improvement, update the best so far and correct the output endpoints and errors
-		if (temp_out_err < best_err)
-		{
-			best_err = temp_out_err;
-			opt_err[region] = temp_out_err;
-			opt_endpts[region] = temp_out;
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-
-		assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
-
-		if (patterns[sp].transform_mode)
-			transform_forward(patterns[sp].transform_mode, orig_endpts);
-
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transform_mode)
-				transform_inverse(patterns[sp].transform_mode, orig_endpts);
-
-			optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-
-			assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
-
-			if (patterns[sp].transform_mode)
-				transform_forward(patterns[sp].transform_mode, opt_endpts);
-
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transform_mode)
-					transform_forward(patterns[sp].transform_mode, orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 5).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	if (v.w < 0.0f) v.w = 0.0f;
-	if (v.w > 255.0f) v.w = 255.0f;
-}
-
-// compute initial endpoints for the "RGB" portion and the "A" portion. 
-// Note these channels may have been rotated.
-static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[Tile::TILE_TOTAL];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		float mina = FLT_MAX, maxa = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-
-			dp = alphas[i] - mean.w;
-			if (dp < mina) mina = dp;
-			if (dp > maxa) maxa = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + Vector4(minp*direction, mina);
-		endpts[region].B = mean + Vector4(maxp*direction, maxa);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-}
-
-float AVPCL::compress_mode5(const Tile &t, char *block)
-{
-	FltEndpts endpts[NREGIONS];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-	int shape = 0;
-	Tile t1;
-
-	// try all rotations. refine tries the 2 different indexings.
-	for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
-	{
-		rotate_tile(t, r, t1);
-		rough(t1, shape, endpts);
-//		for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
-		for (int i = 0; i < 1 && msebest > 0; ++i)
-		{
-			float mse = refine(t1, shape, r, i, endpts, tempblock);
-			if (mse < msebest)
-			{
-				memcpy(block, tempblock, sizeof(tempblock));
-				msebest = mse;
-			}
-		}
-	}
-	return msebest;
-}
diff --git a/3rdparty/nvtt/bc7/avpcl_mode6.cpp b/3rdparty/nvtt/bc7/avpcl_mode6.cpp
deleted file mode 100644
index 38e3a259c..000000000
--- a/3rdparty/nvtt/bc7/avpcl_mode6.cpp
+++ /dev/null
@@ -1,1055 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x1000000 7777.1x2 4bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
-
-#define NINDICES	16
-#define	INDEXBITS	4
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-#define	NSHAPES	1
-
-static int shapes[NSHAPES] =
-{
-	0x0000,
-};
-
-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
-
-#define	NREGIONS	1
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red	green	blue	alpha	mode  mb verilog
-	7,7,	7,7,	7,7,	7,7,	0x40, 7, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGBA];
-	int endpt_b_prec[NCHANNELS_RGBA];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	7,7,7,7,	7,7,7,7,
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-/*
-we're using this table to assign lsbs
-abgr	>=2	correct
-0000	0	0
-0001	0	0
-0010	0	0
-0011	1	x1
-0100	0	0
-0101	1	x1
-0110	1	x1
-0111	1	1
-1000	0	0
-1001	1	x0
-1010	1	x0
-1011	1	1
-1100	1	x0
-1101	1	1
-1110	1	1
-1111	1	1
-
-we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8.
-I choose to assign the lsbs so that the rgb channels are as good as possible.
-*/
-
-// 8888 ->7777.1, use the "correct" column above to assign the lsb
-static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		// ignore the alpha channel in the count
-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1);
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 128);
-	}
-	compr_endpts.a_lsb = onescnt >= 2;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1);
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.B[j] < 128);
-	}
-	compr_endpts.b_lsb = onescnt >= 2;
-}
-
-static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
-	}
-}
-
-static void uncompress_endpoints(const IntEndptsRGBA_2 compr_endpts[NREGIONS], IntEndptsRGBA endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGBA endpts[NREGIONS], IntEndptsRGBA_2 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA_2 q_endpts[NREGIONS])
-{
-	IntEndptsRGBA full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1);
-
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1);
-
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	int index_positions[NREGIONS];
-
-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int x = index_positions[region] & 3;
-		int y = (index_positions[region] >> 2) & 3;
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGBA; ++i) 
-			{
-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-			}
-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGBA_2 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		out.write(endpts[i].a_lsb, 1);
-		out.write(endpts[i].b_lsb, 1);
-	}
-
-	nvAssert (out.getptr() == 65);
-}
-
-static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	p = patterns[pat_index];
-
-	shapeindex = 0;		// we don't have any
-
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-	
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		endpts[i].a_lsb  = in.read(1);
-		endpts[i].b_lsb  = in.read(1);
-	}
-
-	nvAssert (in.getptr() == 65);
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	nvAssert ((indices[0][0] & HIGH_INDEXBIT) == 0);
-
-	// the index we shorten is always index 0
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-	{
-		if (i==0)
-			out.write(indices[i>>2][i&3], INDEXBITS-1);	// write i..[2:0]
-		else
-			out.write(indices[i>>2][i&3], INDEXBITS);	// write i..[3:0]
-	}
-
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	// the index we shorten is always index 0
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-	{
-		if (i==0)
-			indices[i>>2][i&3] = in.read(INDEXBITS-1);	// read i..[1:0]
-		else
-			indices[i>>2][i&3] = in.read(INDEXBITS);	// read i..[2:0]
-	}
-}
-
-static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGBA endpts;
-
-	uncompress_one(endpts_2, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); 
-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM));
-}
-
-void AVPCL::decompress_mode6(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGBA_2 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float err, besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) :
-									     Utils::metric4premult(colors[i], palette[j]) ;
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) :
-										 Utils::metric4premult(tile.data[y][x], palette[region][i]) ;
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGBA_2 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGBA_2 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGBA_2 new_a, new_b;
-	IntEndptsRGBA_2 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGBA_2 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// try all lsb modes as we search for better endpoints
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.a_lsb = lsbmode & 1;
-			temp_in.b_lsb = (lsbmode >> 1) & 1;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-
-     simplify the above given that there is no transform now and that endpoints will always fit
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-
-		optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-
-		assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-		// (nreed) Commented out asserts because they go off all the time...not sure why
-		//for (int i=0; i<NREGIONS; ++i)
-		//	nvAssert(expected_opt_err[i] == opt_err[i]);
-		swap_indices(opt_endpts, opt_indices, shapeindex_best);
-
-		orig_toterr = opt_toterr = 0;
-		for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-		//nvAssert(opt_toterr <= orig_toterr);
-
-		if (opt_toterr < orig_toterr)
-		{
-			emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-			return opt_toterr;
-		}
-		else
-		{
-			emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-			return orig_toterr;
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 6).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	if (v.w < 0.0f) v.w = 0.0f;
-	if (v.w > 255.0f) v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr;
-
-		besterr = Utils::metric4(tile.data[y][x], palette[region][0]);
-
-		for (int i = 1; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector4 colors[Tile::TILE_TOTAL];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x];
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = colors[0];
-			endpts[region].B = colors[0];
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = colors[0];
-			endpts[region].B = colors[1];
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector4 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean, direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*direction;
-		endpts[region].B = mean + maxp*direction;
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode6(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=1;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-
diff --git a/3rdparty/nvtt/bc7/avpcl_mode7.cpp b/3rdparty/nvtt/bc7/avpcl_mode7.cpp
deleted file mode 100644
index 441c4ac8b..000000000
--- a/3rdparty/nvtt/bc7/avpcl_mode7.cpp
+++ /dev/null
@@ -1,1094 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x10000000 5555.1x4 64p 2bi (30b)
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_two.h"
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
-
-#define NINDICES	4
-#define	INDEXBITS	2
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-#define	NREGIONS  2
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		alpha		xfm	mode  mb
-	5,5,5,5,	5,5,5,5,	5,5,5,5,	5,5,5,5,	0,	0x80, 8, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGBA];
-	int endpt_b_prec[NCHANNELS_RGBA];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	5,5,5,5,  5,5,5,5,  5,5,5,5,  5,5,5,5,
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-static void transform_forward(IntEndptsRGBA_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-static void transform_inverse(IntEndptsRGBA_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-/*
-we're using this table to assign lsbs
-abgr	>=2	correct
-0000	0	0
-0001	0	0
-0010	0	0
-0011	1	x1
-0100	0	0
-0101	1	x1
-0110	1	x1
-0111	1	1
-1000	0	0
-1001	1	x0
-1010	1	x0
-1011	1	1
-1100	1	x0
-1101	1	1
-1110	1	1
-1111	1	1
-
-we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8.
-I choose to assign the lsbs so that the rgb channels are as good as possible.
-*/
-
-// 6666 ->5555.1, use the "correct" column above to assign the lsb
-static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		// ignore the alpha channel in the count
-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1);
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 32);
-	}
-	compr_endpts.a_lsb = onescnt >= 2;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1);
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.B[j] < 32);
-	}
-	compr_endpts.b_lsb = onescnt >= 2;
-}
-
-static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
-	}
-}
-static void uncompress_endpoints(const IntEndptsRGBA_2 compr_endpts[NREGIONS], IntEndptsRGBA endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGBA endpts[NREGIONS], IntEndptsRGBA_2 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA_2 q_endpts[NREGIONS])
-{
-	IntEndptsRGBA full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1);
-
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1);
-
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGBA; ++i) 
-			{
-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-			}
-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGBA_2 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		out.write(endpts[i].a_lsb, 1);
-		out.write(endpts[i].b_lsb, 1);
-	}
-
-	nvAssert (out.getptr() == 98);
-}
-
-static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-	
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		endpts[i].a_lsb  = in.read(1);
-		endpts[i].b_lsb  = in.read(1);
-	}
-
-	nvAssert (in.getptr() == 98);
-}
-
-// WORK PLACEHOLDER -- keep it simple for now
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGBA endpts;
-
-	uncompress_one(endpts_2, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); 
-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM));
-}
-
-// sign extend but only if it was transformed
-static void sign_extend(Pattern &p, IntEndptsRGBA_2 endpts[NREGIONS])
-{
-	nvUnreachable();
-}
-
-void AVPCL::decompress_mode7(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGBA_2 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float err, besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) :
-									     Utils::metric4premult(colors[i], palette[j]) ;
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) :
-										 Utils::metric4premult(tile.data[y][x], palette[region][i]) ;
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGBA_2 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGBA_2 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGBA_2 new_a, new_b;
-	IntEndptsRGBA_2 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGBA_2 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// try all lsb modes as we search for better endpoints
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.a_lsb = lsbmode & 1;
-			temp_in.b_lsb = (lsbmode >> 1) & 1;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-			float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 7).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	if (v.w < 0.0f) v.w = 0.0f;
-	if (v.w > 255.0f) v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector4 colors[Tile::TILE_TOTAL];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x];
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = colors[0];
-			endpts[region].B = colors[0];
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = colors[0];
-			endpts[region].B = colors[1];
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector4 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean, direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*direction;
-		endpts[region].B = mean + maxp*direction;
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode7(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-
diff --git a/3rdparty/nvtt/bc7/avpcl_utils.cpp b/3rdparty/nvtt/bc7/avpcl_utils.cpp
deleted file mode 100644
index af9971108..000000000
--- a/3rdparty/nvtt/bc7/avpcl_utils.cpp
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Utility and common routines
-
-#include "avpcl_utils.h"
-#include "avpcl.h"
-#include "nvmath/vector.inl"
-#include <math.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64};										// divided by 64
-static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};		// divided by 64
-
-int Utils::lerp(int a, int b, int i, int bias, int denom)
-{
-#ifdef	USE_ZOH_INTERP
-	nvAssert (denom == 3 || denom == 7 || denom == 15);
-	nvAssert (i >= 0 && i <= denom);
-	nvAssert (bias >= 0 && bias <= denom/2);
-	nvAssert (a >= 0 && b >= 0);
-
-	int round = 0;
-#ifdef	USE_ZOH_INTERP_ROUNDED
-	round = 32;
-#endif
-
-	switch (denom)
-	{
-	case 3:	denom *= 5; i *= 5;	// fall through to case 15
-	case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6;
-	case 7:	return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6;
-	default: nvUnreachable(); return 0;
-	}
-#else
-	return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom));		// simple exact interpolation
-#endif
-}
-
-Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom)
-{
-#ifdef	USE_ZOH_INTERP
-	nvAssert (denom == 3 || denom == 7 || denom == 15);
-	nvAssert (i >= 0 && i <= denom);
-	nvAssert (bias >= 0 && bias <= denom/2);
-//	nvAssert (a >= 0 && b >= 0);
-
-	// no need to bias these as this is an exact division
-
-	switch (denom)
-	{
-	case 3:	denom *= 5; i *= 5;	// fall through to case 15
-	case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f;
-	case 7:	return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f;
-	default: nvUnreachable(); return Vector4(0);
-	}
-#else
-	return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom));		// simple exact interpolation
-#endif
-}
-
-
-int Utils::unquantize(int q, int prec)
-{
-	int unq;
-
-	nvAssert (prec > 3);	// we only want to do one replicate
-
-#ifdef USE_ZOH_QUANT
-	if (prec >= 8)
-		unq = q;
-	else if (q == 0) 
-		unq = 0;
-	else if (q == ((1<<prec)-1)) 
-		unq = 255;
-	else
-		unq = (q * 256 + 128) >> prec;
-#else
-	// avpcl unquantizer -- bit replicate
-	unq = (q << (8-prec)) | (q >> (2*prec-8));
-#endif
-
-	return unq;
-}
-
-// quantize to the best value -- i.e., minimize unquantize error
-int Utils::quantize(float value, int prec)
-{
-	int q, unq;
-
-	nvAssert (prec > 3);	// we only want to do one replicate
-
-	unq = (int)floor(value + 0.5f);
-	nvAssert (unq <= 255);
-
-#ifdef USE_ZOH_QUANT
-	q = (prec >= 8) ? unq : (unq << prec) / 256;
-#else
-	// avpcl quantizer -- scale properly for best possible bit-replicated result
-	q = (unq * ((1<<prec)-1) + 127)/255;
-#endif
-
-	nvAssert (q >= 0 && q < (1 << prec));
-
-	return q;
-}
-
-float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
-{
-	Vector4 err = a - b;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else /*if (AVPCL::flag_nonuniform_ati)*/
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-// WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go.
-float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode)
-{
-	Vector3 err = a - b;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else if (AVPCL::flag_nonuniform_ati)
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// adjust weights based on rotatemode
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: break;
-		case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break;
-		case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break;
-		case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break;
-		default: nvUnreachable();
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-float Utils::metric1(const float a, const float b, int rotatemode)
-{
-	float err = a - b;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt, awt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else if (AVPCL::flag_nonuniform_ati)
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// adjust weights based on rotatemode
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
-		case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
-		case ROTATEMODE_RGBA_RABG: awt = gwt; break;
-		case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
-		default: nvUnreachable();
-		}
-
-		// weigh the components
-		err *= awt;
-	}
-
-	return err * err;
-}
-
-float Utils::premult(float r, float a)
-{
-	// note that the args are really integers stored in floats
-	int R = int(r), A = int(a);
-
-	nvAssert ((R==r) && (A==a));
-
-	return float((R*A + 127)/255);
-}
-
-static void premult4(Vector4& rgba)
-{
-	rgba.x = Utils::premult(rgba.x, rgba.w);
-	rgba.y = Utils::premult(rgba.y, rgba.w);
-	rgba.z = Utils::premult(rgba.z, rgba.w);
-}
-
-static void premult3(Vector3& rgb, float a)
-{
-	rgb.x = Utils::premult(rgb.x, a);
-	rgb.y = Utils::premult(rgb.y, a);
-	rgb.z = Utils::premult(rgb.z, a);
-}
-
-float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
-{
-	Vector4 pma = a, pmb = b;
-
-	premult4(pma);
-	premult4(pmb);
-
-	Vector4 err = pma - pmb;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else /*if (AVPCL::flag_nonuniform_ati)*/
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1)
-{
-	Vector3 pma = rgb0, pmb = rgb1;
-
-	premult3(pma, a0);
-	premult3(pmb, a1);
-
-	Vector3 err = pma - pmb;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else /*if (AVPCL::flag_nonuniform_ati)*/
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode)
-{
-	Vector3 pma = rgb0, pmb = rgb1;
-
-	switch(rotatemode)
-	{
-	case ROTATEMODE_RGBA_RGBA:
-		// this function isn't supposed to be called for this rotatemode
-		nvUnreachable();
-		break;
-	case ROTATEMODE_RGBA_AGBR:
-		pma.y = premult(pma.y, pma.x);
-		pma.z = premult(pma.z, pma.x);
-		pmb.y = premult(pmb.y, pmb.x);
-		pmb.z = premult(pmb.z, pmb.x);
-		break;
-	case ROTATEMODE_RGBA_RABG:
-		pma.x = premult(pma.x, pma.y);
-		pma.z = premult(pma.z, pma.y);
-		pmb.x = premult(pmb.x, pmb.y);
-		pmb.z = premult(pmb.z, pmb.y);
-		break;
-	case ROTATEMODE_RGBA_RGAB:
-		pma.x = premult(pma.x, pma.z);
-		pma.y = premult(pma.y, pma.z);
-		pmb.x = premult(pmb.x, pmb.z);
-		pmb.y = premult(pmb.y, pmb.z);
-		break;
-	default: nvUnreachable();
-	}
-
-	Vector3 err = pma - pmb;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else /*if (AVPCL::flag_nonuniform_ati)*/
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode)
-{
-	float err = premult(rgb0, a0) - premult(rgb1, a1);
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt, awt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else if (AVPCL::flag_nonuniform_ati)
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// adjust weights based on rotatemode
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
-		case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
-		case ROTATEMODE_RGBA_RABG: awt = gwt; break;
-		case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
-		default: nvUnreachable();
-		}
-
-		// weigh the components
-		err *= awt;
-	}
-
-	return err * err;
-}
diff --git a/3rdparty/nvtt/bc7/avpcl_utils.h b/3rdparty/nvtt/bc7/avpcl_utils.h
deleted file mode 100644
index cb546d547..000000000
--- a/3rdparty/nvtt/bc7/avpcl_utils.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// utility class holding common routines
-#ifndef _AVPCL_UTILS_H
-#define _AVPCL_UTILS_H
-
-#include "nvmath/vector.h"
-
-namespace AVPCL {
-
-inline int SIGN_EXTEND(int x, int nb) { return ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)); }
-
-static const int INDEXMODE_BITS				= 1;		// 2 different index modes
-static const int NINDEXMODES				= (1<<(INDEXMODE_BITS));
-static const int INDEXMODE_ALPHA_IS_3BITS	= 0;
-static const int INDEXMODE_ALPHA_IS_2BITS	= 1;
-
-static const int ROTATEMODE_BITS		= 2;		// 4 different rotate modes
-static const int NROTATEMODES			= (1<<(ROTATEMODE_BITS));
-static const int ROTATEMODE_RGBA_RGBA	= 0;
-static const int ROTATEMODE_RGBA_AGBR	= 1;
-static const int ROTATEMODE_RGBA_RABG	= 2;
-static const int ROTATEMODE_RGBA_RGAB	= 3;
-
-class Utils
-{
-public:
-	// error metrics
-	static float metric4(nv::Vector4::Arg a, nv::Vector4::Arg b);
-	static float metric3(nv::Vector3::Arg a, nv::Vector3::Arg b, int rotatemode);
-	static float metric1(float a, float b, int rotatemode);
-
-	static float metric4premult(nv::Vector4::Arg rgba0, nv::Vector4::Arg rgba1);
-	static float metric3premult_alphaout(nv::Vector3::Arg rgb0, float a0, nv::Vector3::Arg rgb1, float a1);
-	static float metric3premult_alphain(nv::Vector3::Arg rgb0, nv::Vector3::Arg rgb1, int rotatemode);
-	static float metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode);
-
-	static float premult(float r, float a);
-
-	// quantization and unquantization
-	static int unquantize(int q, int prec);
-	static int quantize(float value, int prec);
-
-	// lerping
-	static int lerp(int a, int b, int i, int bias, int denom);
-	static nv::Vector4 lerp(nv::Vector4::Arg a, nv::Vector4::Arg b, int i, int bias, int denom);
-};
-
-}
-
-#endif
diff --git a/3rdparty/nvtt/bc7/bits.h b/3rdparty/nvtt/bc7/bits.h
deleted file mode 100644
index 782f65597..000000000
--- a/3rdparty/nvtt/bc7/bits.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_BITS_H
-#define _AVPCL_BITS_H
-
-// read/write a bitstream
-
-#include "nvcore/debug.h"
-
-namespace AVPCL {
-
-class Bits
-{
-public:
-
-	Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
-	Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
-
-	void write(int value, int nbits) {
-		nvAssert (nbits >= 0 && nbits < 32);
-		nvAssert (sizeof(int)>= 4);
-		for (int i=0; i<nbits; ++i)
-			writeone(value>>i);
-	}
-	int read(int nbits) { 
-		nvAssert (nbits >= 0 && nbits < 32);
-		nvAssert (sizeof(int)>= 4);
-		int out = 0;
-		for (int i=0; i<nbits; ++i)
-			out |= readone() << i;
-		return out;
-	}
-	int getptr() { return bptr; }
-	void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
-	int getsize() { return bend; }
-
-private:
-	int	bptr;		// next bit to read
-	int bend;		// last written bit + 1
-	char *bits;		// ptr to user bit stream
-	const char *cbits;	// ptr to const user bit stream
-	int maxbits;	// max size of user bit stream
-	char readonly;	// 1 if this is a read-only stream
-
-	int readone() {
-		nvAssert (bptr < bend);
-		if (bptr >= bend) return 0;
-		int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
-		++bptr;
-		return bit != 0;
-	}
-	void writeone(int bit) {
-		nvAssert (!readonly); // "Writing a read-only bit stream"
-		nvAssert (bptr < maxbits);
-		if (bptr >= maxbits) return;
-		if (bit&1)
-			bits[bptr>>3] |= 1 << (bptr & 7);
-		else
-			bits[bptr>>3] &= ~(1 << (bptr & 7));
-		if (bptr++ >= bend) bend = bptr;
-	}
-};
-
-}
-
-#endif
diff --git a/3rdparty/nvtt/bc7/endpts.h b/3rdparty/nvtt/bc7/endpts.h
deleted file mode 100644
index 4f42a166f..000000000
--- a/3rdparty/nvtt/bc7/endpts.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_ENDPTS_H
-#define _AVPCL_ENDPTS_H
-
-// endpoint definitions and routines to search through endpoint space
-
-#include "nvmath/vector.h"
-
-namespace AVPCL {
-
-static const int NCHANNELS_RGB	= 3;
-static const int NCHANNELS_RGBA	= 4;
-static const int CHANNEL_R		= 0;
-static const int CHANNEL_G		= 1;
-static const int CHANNEL_B		= 2;
-static const int CHANNEL_A		= 3;
-
-struct FltEndpts
-{
-	nv::Vector4	A;
-	nv::Vector4	B;
-};
-
-struct IntEndptsRGB
-{
-	int		A[NCHANNELS_RGB];
-	int		B[NCHANNELS_RGB];
-};
-
-struct IntEndptsRGB_1
-{
-	int		A[NCHANNELS_RGB];
-	int		B[NCHANNELS_RGB];
-	int		lsb;				// shared lsb for A and B
-};
-
-struct IntEndptsRGB_2
-{
-	int		A[NCHANNELS_RGB];
-	int		B[NCHANNELS_RGB];
-	int		a_lsb;				// lsb for A
-	int		b_lsb;				// lsb for B
-};
-
-
-struct IntEndptsRGBA
-{
-	int		A[NCHANNELS_RGBA];
-	int		B[NCHANNELS_RGBA];
-};
-
-struct IntEndptsRGBA_2
-{
-	int		A[NCHANNELS_RGBA];
-	int		B[NCHANNELS_RGBA];
-	int		a_lsb;				// lsb for A
-	int		b_lsb;				// lsb for B
-};
-
-struct IntEndptsRGBA_2a
-{
-	int		A[NCHANNELS_RGBA];
-	int		B[NCHANNELS_RGBA];
-	int		a_lsb;				// lsb for RGB channels of A
-	int		b_lsb;				// lsb for RGB channels of A
-};
-
-}
-
-#endif
diff --git a/3rdparty/nvtt/bc7/shapes_three.h b/3rdparty/nvtt/bc7/shapes_three.h
deleted file mode 100644
index dc95ba5f4..000000000
--- a/3rdparty/nvtt/bc7/shapes_three.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef	_AVPCL_SHAPES_THREE_H
-#define _AVPCL_SHAPES_THREE_H
-
-// shapes for 3 regions
-
-#define NREGIONS 3
-#define NSHAPES 64
-#define SHAPEBITS 6
-
-static int shapes[NSHAPES*16] = 
-{
-0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   0, 2, 2, 2,   
-0, 0, 1, 1,   0, 0, 1, 1,   2, 0, 0, 1,   0, 0, 2, 2,   
-0, 2, 2, 1,   2, 2, 1, 1,   2, 2, 1, 1,   0, 0, 1, 1,   
-2, 2, 2, 2,   2, 2, 2, 1,   2, 2, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 2, 2,   0, 0, 1, 1,   
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 2, 2,   0, 0, 1, 1,   
-1, 1, 2, 2,   0, 0, 2, 2,   1, 1, 1, 1,   2, 2, 1, 1,   
-1, 1, 2, 2,   0, 0, 2, 2,   1, 1, 1, 1,   2, 2, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 2,   
-0, 0, 0, 0,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 1, 2,   
-1, 1, 1, 1,   1, 1, 1, 1,   2, 2, 2, 2,   0, 0, 1, 2,   
-2, 2, 2, 2,   2, 2, 2, 2,   2, 2, 2, 2,   0, 0, 1, 2,   
-
-0, 1, 1, 2,   0, 1, 2, 2,   0, 0, 1, 1,   0, 0, 1, 1,   
-0, 1, 1, 2,   0, 1, 2, 2,   0, 1, 1, 2,   2, 0, 0, 1,   
-0, 1, 1, 2,   0, 1, 2, 2,   1, 1, 2, 2,   2, 2, 0, 0,   
-0, 1, 1, 2,   0, 1, 2, 2,   1, 2, 2, 2,   2, 2, 2, 0,   
-
-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 0,   0, 0, 2, 2,   
-0, 0, 1, 1,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 2, 2,   
-0, 1, 1, 2,   2, 0, 0, 1,   1, 1, 2, 2,   0, 0, 2, 2,   
-1, 1, 2, 2,   2, 2, 0, 0,   1, 1, 2, 2,   1, 1, 1, 1,   
-
-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   1, 1, 0, 0,   
-0, 2, 2, 2,   2, 2, 2, 1,   0, 1, 2, 2,   2, 2, 1, 0,   
-0, 2, 2, 2,   2, 2, 2, 1,   0, 1, 2, 2,   2, 2, 1, 0,   
-
-0, 1, 2, 2,   0, 0, 1, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
-0, 1, 2, 2,   0, 0, 1, 2,   1, 2, 2, 1,   0, 1, 1, 0,   
-0, 0, 1, 1,   1, 1, 2, 2,   1, 2, 2, 1,   1, 2, 2, 1,   
-0, 0, 0, 0,   2, 2, 2, 2,   0, 1, 1, 0,   1, 2, 2, 1,   
-
-0, 0, 2, 2,   0, 1, 1, 0,   0, 0, 1, 1,   0, 0, 0, 0,   
-1, 1, 0, 2,   0, 1, 1, 0,   0, 1, 2, 2,   2, 0, 0, 0,   
-1, 1, 0, 2,   2, 0, 0, 2,   0, 1, 2, 2,   2, 2, 1, 1,   
-0, 0, 2, 2,   2, 2, 2, 2,   0, 0, 1, 1,   2, 2, 2, 1,   
-
-0, 0, 0, 0,   0, 2, 2, 2,   0, 0, 1, 1,   0, 1, 2, 0,   
-0, 0, 0, 2,   0, 0, 2, 2,   0, 0, 1, 2,   0, 1, 2, 0,   
-1, 1, 2, 2,   0, 0, 1, 2,   0, 0, 2, 2,   0, 1, 2, 0,   
-1, 2, 2, 2,   0, 0, 1, 1,   0, 2, 2, 2,   0, 1, 2, 0,   
-
-0, 0, 0, 0,   0, 1, 2, 0,   0, 1, 2, 0,   0, 0, 1, 1,   
-1, 1, 1, 1,   1, 2, 0, 1,   2, 0, 1, 2,   2, 2, 0, 0,   
-2, 2, 2, 2,   2, 0, 1, 2,   1, 2, 0, 1,   1, 1, 2, 2,   
-0, 0, 0, 0,   0, 1, 2, 0,   0, 1, 2, 0,   0, 0, 1, 1,   
-
-0, 0, 1, 1,   0, 1, 0, 1,   0, 0, 0, 0,   0, 0, 2, 2,   
-1, 1, 2, 2,   0, 1, 0, 1,   0, 0, 0, 0,   1, 1, 2, 2,   
-2, 2, 0, 0,   2, 2, 2, 2,   2, 1, 2, 1,   0, 0, 2, 2,   
-0, 0, 1, 1,   2, 2, 2, 2,   2, 1, 2, 1,   1, 1, 2, 2,   
-
-0, 0, 2, 2,   0, 2, 2, 0,   0, 1, 0, 1,   0, 0, 0, 0,   
-0, 0, 1, 1,   1, 2, 2, 1,   2, 2, 2, 2,   2, 1, 2, 1,   
-0, 0, 2, 2,   0, 2, 2, 0,   2, 2, 2, 2,   2, 1, 2, 1,   
-0, 0, 1, 1,   1, 2, 2, 1,   0, 1, 0, 1,   2, 1, 2, 1,   
-
-0, 1, 0, 1,   0, 2, 2, 2,   0, 0, 0, 2,   0, 0, 0, 0,   
-0, 1, 0, 1,   0, 1, 1, 1,   1, 1, 1, 2,   2, 1, 1, 2,   
-0, 1, 0, 1,   0, 2, 2, 2,   0, 0, 0, 2,   2, 1, 1, 2,   
-2, 2, 2, 2,   0, 1, 1, 1,   1, 1, 1, 2,   2, 1, 1, 2,   
-
-0, 2, 2, 2,   0, 0, 0, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   1, 1, 1, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   1, 1, 1, 2,   0, 1, 1, 0,   2, 1, 1, 2,   
-0, 2, 2, 2,   0, 0, 0, 2,   2, 2, 2, 2,   2, 1, 1, 2,   
-
-0, 1, 1, 0,   0, 0, 2, 2,   0, 0, 2, 2,   0, 0, 0, 0,   
-0, 1, 1, 0,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 0, 0,   
-2, 2, 2, 2,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 0, 0,   
-2, 2, 2, 2,   0, 0, 2, 2,   0, 0, 2, 2,   2, 1, 1, 2,   
-
-0, 0, 0, 2,   0, 2, 2, 2,   0, 1, 0, 1,   0, 1, 1, 1,   
-0, 0, 0, 1,   1, 2, 2, 2,   2, 2, 2, 2,   2, 0, 1, 1,   
-0, 0, 0, 2,   0, 2, 2, 2,   2, 2, 2, 2,   2, 2, 0, 1,   
-0, 0, 0, 1,   1, 2, 2, 2,   2, 2, 2, 2,   2, 2, 2, 0,
-};
-
-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
-
-static int shapeindex_to_compressed_indices[NSHAPES*3] = 
-{
-	0, 3,15,  0, 3, 8,  0,15, 8,  0,15, 3,
-	0, 8,15,  0, 3,15,  0,15, 3,  0,15, 8,
-	0, 8,15,  0, 8,15,  0, 6,15,  0, 6,15,
-	0, 6,15,  0, 5,15,  0, 3,15,  0, 3, 8,
-
-	0, 3,15,  0, 3, 8,  0, 8,15,  0,15, 3,
-	0, 3,15,  0, 3, 8,  0, 6,15,  0,10, 8,
-	0, 5, 3,  0, 8,15,  0, 8, 6,  0, 6,10,
-	0, 8,15,  0, 5,15,  0,15,10,  0,15, 8,
-
-	0, 8,15,  0,15, 3,  0, 3,15,  0, 5,10,
-	0, 6,10,  0,10, 8,  0, 8, 9,  0,15,10,
-	0,15, 6,  0, 3,15,  0,15, 8,  0, 5,15,
-	0,15, 3,  0,15, 6,  0,15, 6,  0,15, 8,
-
-	0, 3,15,  0,15, 3,  0, 5,15,  0, 5,15,
-	0, 5,15,  0, 8,15,  0, 5,15,  0,10,15,
-	0, 5,15,  0,10,15,  0, 8,15,  0,13,15,
-	0,15, 3,  0,12,15,  0, 3,15,  0, 3, 8
-
-};
-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*3+(region)]
-
-#endif
diff --git a/3rdparty/nvtt/bc7/shapes_two.h b/3rdparty/nvtt/bc7/shapes_two.h
deleted file mode 100644
index 853d557a6..000000000
--- a/3rdparty/nvtt/bc7/shapes_two.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_SHAPES_TWO_H
-#define _AVPCL_SHAPES_TWO_H
-
-// shapes for two regions
-
-#define NREGIONS 2
-#define NSHAPES 64
-#define SHAPEBITS 6
-
-static int shapes[NSHAPES*16] = 
-{
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   0, 0, 0, 1,   
-0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
-0, 0, 0, 1,   1, 1, 1, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 1,   0, 0, 0, 0,   
-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 0,   
-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 0,   0, 1, 1, 1,   
-1, 0, 0, 0,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 1, 1,   
-1, 1, 1, 0,   0, 0, 0, 0,   1, 0, 0, 0,   0, 0, 0, 1,   
-1, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 0, 0, 0,   
-
-0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 1, 1, 1,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 1,   
-0, 0, 0, 0,   1, 1, 0, 0,   1, 0, 0, 0,   0, 0, 1, 1,   
-0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   0, 0, 0, 1,   
-
-0, 0, 1, 1,   0, 0, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-0, 0, 0, 0,   1, 1, 0, 0,   0, 1, 1, 0,   1, 1, 0, 0,   
-
-0, 0, 0, 1,   0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 1,   1, 0, 0, 1,   
-1, 1, 1, 0,   1, 1, 1, 1,   1, 0, 0, 0,   1, 0, 0, 1,   
-1, 0, 0, 0,   0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   
-
-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   0, 0, 1, 1,   
-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   0, 0, 1, 1,   
-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   1, 1, 0, 0,   
-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   1, 1, 0, 0,   
-
-0, 0, 1, 1,   0, 1, 0, 1,   0, 1, 1, 0,   0, 1, 0, 1,   
-1, 1, 0, 0,   0, 1, 0, 1,   1, 0, 0, 1,   1, 0, 1, 0,   
-0, 0, 1, 1,   1, 0, 1, 0,   0, 1, 1, 0,   1, 0, 1, 0,   
-1, 1, 0, 0,   1, 0, 1, 0,   1, 0, 0, 1,   0, 1, 0, 1,   
-
-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   1, 0, 1, 1,   
-1, 1, 0, 0,   1, 1, 0, 0,   0, 1, 0, 0,   1, 1, 0, 1,   
-1, 1, 1, 0,   1, 0, 0, 0,   1, 1, 0, 0,   1, 1, 0, 0,   
-
-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 0, 0,   
-1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   0, 1, 1, 0,   
-0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   0, 0, 0, 0,   
-
-0, 1, 0, 0,   0, 0, 1, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
-1, 1, 1, 0,   0, 1, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-0, 1, 0, 0,   0, 0, 1, 0,   0, 1, 1, 1,   1, 1, 1, 0,   
-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 0,   0, 1, 0, 0,   
-
-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 1, 1,   
-1, 1, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   
-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   1, 1, 0, 0,   
-0, 0, 1, 1,   1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   
-
-0, 1, 1, 0,   0, 1, 1, 0,   0, 1, 1, 1,   0, 0, 0, 1,   
-1, 1, 0, 0,   0, 0, 1, 1,   1, 1, 1, 0,   1, 0, 0, 0,   
-1, 1, 0, 0,   0, 0, 1, 1,   1, 0, 0, 0,   1, 1, 1, 0,   
-1, 0, 0, 1,   1, 0, 0, 1,   0, 0, 0, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-1, 1, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 0,   0, 1, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 1, 1, 1,   
-
-};
-
-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
-
-static int shapeindex_to_compressed_indices[NSHAPES*2] = 
-{
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-
-	0,15,  0, 2,  0, 8,  0, 2,
-	0, 2,  0, 8,  0, 8,  0,15,
-	0, 2,  0, 8,  0, 2,  0, 2,
-	0, 8,  0, 8,  0, 2,  0, 2,
-
-	0,15,  0,15,  0, 6,  0, 8,
-	0, 2,  0, 8,  0,15,  0,15,
-	0, 2,  0, 8,  0, 2,  0, 2,
-	0, 2,  0,15,  0,15,  0, 6,
-
-	0, 6,  0, 2,  0, 6,  0, 8,
-	0,15,  0,15,  0, 2,  0, 2,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0, 2,  0, 2,  0,15
-
-};
-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*2+(region)]
-
-#endif
diff --git a/3rdparty/nvtt/bc7/tile.h b/3rdparty/nvtt/bc7/tile.h
deleted file mode 100644
index 730d9bac1..000000000
--- a/3rdparty/nvtt/bc7/tile.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_TILE_H
-#define _AVPCL_TILE_H
-
-#include "nvmath/vector.h"
-#include <math.h>
-#include "avpcl_utils.h"
-
-namespace AVPCL {
-
-// extract a tile of pixels from an array
-
-class Tile
-{
-public:
-	static const int TILE_H = 4;
-	static const int TILE_W = 4;
-	static const int TILE_TOTAL = TILE_H * TILE_W;
-	nv::Vector4 data[TILE_H][TILE_W];
-    float importance_map[TILE_H][TILE_W];
-	int	size_x, size_y;			// actual size of tile
-
-	Tile() {};
-	~Tile(){};
-	Tile(int xs, int ys) {size_x = xs; size_y = ys;}
-};
-
-}
-
-#endif
diff --git a/3rdparty/nvtt/nvcore/array.h b/3rdparty/nvtt/nvcore/array.h
deleted file mode 100644
index f4460f3b4..000000000
--- a/3rdparty/nvtt/nvcore/array.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_CORE_ARRAY_H
-#define NV_CORE_ARRAY_H
-
-/*
-This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be 
-using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers
-are not supported.
-
-Note also that push_back and resize does not support inserting arguments elements that are in the same 
-container. This is forbidden to prevent an extra copy.
-*/
-
-
-#include "memory.h"
-#include "debug.h"
-#include "foreach.h" // pseudoindex
-
-
-namespace nv 
-{
-    class Stream;
-
-    /**
-    * Replacement for std::vector that is easier to debug and provides
-    * some nice foreach enumerators. 
-    */
-    template<typename T>
-    class NVCORE_CLASS Array {
-    public:
-        typedef uint size_type;
-
-        // Default constructor.
-        NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {}
-
-        // Copy constructor.
-        NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) {
-            copy(a.m_buffer, a.m_size);
-        }
-
-        // Constructor that initializes the vector with the given elements.
-        NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) {
-            copy(ptr, num);
-        }
-
-        // Allocate array.
-        NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) {
-            setArrayCapacity(capacity);
-        }
-
-        // Destructor.
-        NV_FORCEINLINE ~Array() {
-            clear();
-            free<T>(m_buffer);
-        }
-
-
-        /// Const element access.
-        NV_FORCEINLINE const T & operator[]( uint index ) const
-        {
-            nvDebugCheck(index < m_size);
-            return m_buffer[index];
-        }
-        NV_FORCEINLINE const T & at( uint index ) const
-        {
-            nvDebugCheck(index < m_size);
-            return m_buffer[index];
-        }
-
-        /// Element access.
-        NV_FORCEINLINE T & operator[] ( uint index )
-        {
-            nvDebugCheck(index < m_size);
-            return m_buffer[index];
-        }
-        NV_FORCEINLINE T & at( uint index )
-        {
-            nvDebugCheck(index < m_size);
-            return m_buffer[index];
-        }
-
-        /// Get vector size.
-        NV_FORCEINLINE uint size() const { return m_size; }
-
-        /// Get vector size.
-        NV_FORCEINLINE uint count() const { return m_size; }
-
-        /// Get vector capacity.
-        NV_FORCEINLINE uint capacity() const { return m_capacity; }
-
-        /// Get const vector pointer.
-        NV_FORCEINLINE const T * buffer() const { return m_buffer; }
-
-        /// Get vector pointer.
-        NV_FORCEINLINE T * buffer() { return m_buffer; }
-
-        /// Provide begin/end pointers for C++11 range-based for loops.
-        NV_FORCEINLINE T * begin() { return m_buffer; }
-        NV_FORCEINLINE T * end() { return m_buffer + m_size; }
-        NV_FORCEINLINE const T * begin() const { return m_buffer; }
-        NV_FORCEINLINE const T * end() const { return m_buffer + m_size; }
-
-        /// Is vector empty.
-        NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
-
-        /// Is a null vector.
-        NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
-
-
-        T & append();
-        void push_back( const T & val );
-        void pushBack( const T & val );
-        Array<T> & append( const T & val );
-        Array<T> & operator<< ( T & t );
-        void pop_back();
-        void popBack(uint count = 1);
-        void popFront(uint count = 1);
-        const T & back() const;
-        T & back();
-        const T & front() const;
-        T & front();
-        bool contains(const T & e) const;
-        bool find(const T & element, uint * indexPtr) const;
-        bool find(const T & element, uint begin, uint end, uint * indexPtr) const;
-        void removeAt(uint index);
-        bool remove(const T & element);
-        void insertAt(uint index, const T & val = T());
-        void append(const Array<T> & other);
-        void append(const T other[], uint count);
-        void replaceWithLast(uint index);
-        void resize(uint new_size);
-        void resize(uint new_size, const T & elem);
-        void fill(const T & elem);
-        void clear();
-        void shrink();
-        void reserve(uint desired_size);
-        void copy(const T * data, uint count);
-        Array<T> & operator=( const Array<T> & a );
-        T * release();
-
-
-        // Array enumerator.
-        typedef uint PseudoIndex;
-
-        NV_FORCEINLINE PseudoIndex start() const { return 0; }
-        NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
-        NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
-
-#if NV_CC_MSVC
-        NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
-            return m_buffer[i(this)];
-        }
-        NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const {
-            return m_buffer[i(this)];
-        }
-#endif
-
-        // Friends.
-        template <typename Typ> 
-        friend Stream & operator<< ( Stream & s, Array<Typ> & p );
-
-        template <typename Typ>
-        friend void swap(Array<Typ> & a, Array<Typ> & b);
-
-
-    protected:
-
-        void setArraySize(uint new_size);
-        void setArrayCapacity(uint new_capacity);
-
-        T * m_buffer;
-        uint m_capacity;
-        uint m_size;
-
-    };
-
-
-} // nv namespace
-
-#endif // NV_CORE_ARRAY_H
diff --git a/3rdparty/nvtt/nvcore/array.inl b/3rdparty/nvtt/nvcore/array.inl
deleted file mode 100644
index 2138b3ab1..000000000
--- a/3rdparty/nvtt/nvcore/array.inl
+++ /dev/null
@@ -1,437 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_CORE_ARRAY_INL
-#define NV_CORE_ARRAY_INL
-
-#include "array.h"
-
-#include "stream.h"
-#include "utils.h" // swap
-
-#include <string.h>	// memmove
-#include <new> // for placement new
-
-
-
-namespace nv 
-{
-    template <typename T>
-    NV_FORCEINLINE T & Array<T>::append()
-    {
-        uint old_size = m_size;
-        uint new_size = m_size + 1;
-
-        setArraySize(new_size);
-
-        construct_range(m_buffer, new_size, old_size);
-
-        return m_buffer[old_size]; // Return reference to last element.
-    }
-
-    // Push an element at the end of the vector.
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::push_back( const T & val )
-    {
-#if 1
-        nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size);
-
-        uint old_size = m_size;
-        uint new_size = m_size + 1;
-
-        setArraySize(new_size);
-
-        construct_range(m_buffer, new_size, old_size, val);
-#else
-        uint new_size = m_size + 1;
-
-        if (new_size > m_capacity)
-        {
-            // @@ Is there any way to avoid this copy?
-            // @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy?
-            // @@ Assert instead of copy?
-            const T copy(val);	// create a copy in case value is inside of this array.
-
-            setArraySize(new_size);
-
-            new (m_buffer+new_size-1) T(copy);
-        }
-        else
-        {
-            m_size = new_size;
-            new(m_buffer+new_size-1) T(val);
-        }
-#endif // 0/1
-    }
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::pushBack( const T & val )
-    {
-        push_back(val);
-    }
-    template <typename T>
-    NV_FORCEINLINE Array<T> & Array<T>::append( const T & val )
-    {
-        push_back(val);
-        return *this;
-    }
-
-    // Qt like push operator.
-    template <typename T>
-    NV_FORCEINLINE Array<T> & Array<T>::operator<< ( T & t )
-    {
-        push_back(t);
-        return *this;
-    }
-
-    // Pop the element at the end of the vector.
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::pop_back()
-    {
-        nvDebugCheck( m_size > 0 );
-        resize( m_size - 1 );
-    }
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::popBack(uint count)
-    {
-        nvDebugCheck(m_size >= count);
-        resize(m_size - count);
-    }
-
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::popFront(uint count)
-    {
-        nvDebugCheck(m_size >= count);
-        //resize(m_size - count);
-
-        if (m_size == count) {
-            clear();
-        }
-        else {
-            destroy_range(m_buffer, 0, count);
-
-            memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count));
-
-            m_size -= count;
-        }
-
-    }
-
-
-    // Get back element.
-    template <typename T>
-    NV_FORCEINLINE const T & Array<T>::back() const
-    {
-        nvDebugCheck( m_size > 0 );
-        return m_buffer[m_size-1];
-    }
-
-    // Get back element.
-    template <typename T>
-    NV_FORCEINLINE T & Array<T>::back()
-    {
-        nvDebugCheck( m_size > 0 );
-        return m_buffer[m_size-1];
-    }
-
-    // Get front element.
-    template <typename T>
-    NV_FORCEINLINE const T & Array<T>::front() const
-    {
-        nvDebugCheck( m_size > 0 );
-        return m_buffer[0];
-    }
-
-    // Get front element.
-    template <typename T>
-    NV_FORCEINLINE T & Array<T>::front()
-    {
-        nvDebugCheck( m_size > 0 );
-        return m_buffer[0];
-    }
-
-    // Check if the given element is contained in the array.
-    template <typename T>
-    NV_FORCEINLINE bool Array<T>::contains(const T & e) const
-    {
-        return find(e, NULL);
-    }
-
-    // Return true if element found.
-    template <typename T>
-    NV_FORCEINLINE bool Array<T>::find(const T & element, uint * indexPtr) const
-    {
-        return find(element, 0, m_size, indexPtr);
-    }
-
-    // Return true if element found within the given range.
-    template <typename T>
-    NV_FORCEINLINE bool Array<T>::find(const T & element, uint begin, uint end, uint * indexPtr) const
-    {
-        return ::nv::find(element, m_buffer, begin, end, indexPtr);
-    }
-
-
-    // Remove the element at the given index. This is an expensive operation!
-    template <typename T>
-    void Array<T>::removeAt(uint index)
-    {
-        nvDebugCheck(index >= 0 && index < m_size);
-
-        if (m_size == 1) {
-            clear();
-        }
-        else {
-            m_buffer[index].~T();
-
-            memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index));
-            m_size--;
-        }
-    }
-
-    // Remove the first instance of the given element.
-    template <typename T>
-    bool Array<T>::remove(const T & element)
-    {
-        uint index;
-        if (find(element, &index)) {
-            removeAt(index);
-            return true;
-        }
-        return false;
-    }
-
-    // Insert the given element at the given index shifting all the elements up.
-    template <typename T>
-    void Array<T>::insertAt(uint index, const T & val/*=T()*/)
-    {
-        nvDebugCheck( index >= 0 && index <= m_size );
-
-        setArraySize(m_size + 1);
-
-        if (index < m_size - 1) {
-            memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index));
-        }
-
-        // Copy-construct into the newly opened slot.
-        new(m_buffer+index) T(val);
-    }
-
-    // Append the given data to our vector.
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::append(const Array<T> & other)
-    {
-        append(other.m_buffer, other.m_size);
-    }
-
-    // Append the given data to our vector.
-    template <typename T>
-    void Array<T>::append(const T other[], uint count)
-    {
-        if (count > 0) {
-            const uint old_size = m_size;
-
-            setArraySize(m_size + count);
-
-            for (uint i = 0; i < count; i++ ) {
-                new(m_buffer + old_size + i) T(other[i]);
-            }
-        }
-    }
-
-
-    // Remove the given element by replacing it with the last one.
-    template <typename T> 
-    void Array<T>::replaceWithLast(uint index)
-    {
-        nvDebugCheck( index < m_size );
-        nv::swap(m_buffer[index], back());      // @@ Is this OK when index == size-1?
-        (m_buffer+m_size-1)->~T();
-        m_size--;
-    }
-
-    // Resize the vector preserving existing elements.
-    template <typename T> 
-    void Array<T>::resize(uint new_size)
-    {
-        uint old_size = m_size;
-
-        // Destruct old elements (if we're shrinking).
-        destroy_range(m_buffer, new_size, old_size);
-
-        setArraySize(new_size);
-
-        // Call default constructors
-        construct_range(m_buffer, new_size, old_size);
-    }
-
-
-    // Resize the vector preserving existing elements and initializing the
-    // new ones with the given value.
-    template <typename T> 
-    void Array<T>::resize(uint new_size, const T & elem)
-    {
-        nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size);
-
-        uint old_size = m_size;
-
-        // Destruct old elements (if we're shrinking).
-        destroy_range(m_buffer, new_size, old_size);
-
-        setArraySize(new_size);
-
-        // Call copy constructors
-        construct_range(m_buffer, new_size, old_size, elem);
-    }
-
-    // Fill array with the given value.
-    template <typename T>
-    void Array<T>::fill(const T & elem)
-    {
-        fill(m_buffer, m_size, elem);
-    }
-
-    // Clear the buffer.
-    template <typename T> 
-    NV_FORCEINLINE void Array<T>::clear()
-    {
-        nvDebugCheck(isValidPtr(m_buffer));
-
-        // Destruct old elements
-        destroy_range(m_buffer, 0, m_size);
-
-        m_size = 0;
-    }
-
-    // Shrink the allocated vector.
-    template <typename T> 
-    NV_FORCEINLINE void Array<T>::shrink()
-    {
-        if (m_size < m_capacity) {
-            setArrayCapacity(m_size);
-        }
-    }
-
-    // Preallocate space.
-    template <typename T> 
-    NV_FORCEINLINE void Array<T>::reserve(uint desired_size)
-    {
-        if (desired_size > m_capacity) {
-            setArrayCapacity(desired_size);
-        }
-    }
-
-    // Copy elements to this array. Resizes it if needed.
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
-    {
-#if 1   // More simple, but maybe not be as efficient?
-        destroy_range(m_buffer, 0, m_size);
-
-        setArraySize(count);
-
-        construct_range(m_buffer, count, 0, data);
-#else
-        const uint old_size = m_size;
-
-        destroy_range(m_buffer, count, old_size);
-
-        setArraySize(count);
-
-        copy_range(m_buffer, data, old_size);
-
-        construct_range(m_buffer, count, old_size, data);
-#endif
-    }
-
-    // Assignment operator.
-    template <typename T>
-    NV_FORCEINLINE Array<T> & Array<T>::operator=( const Array<T> & a )
-    {
-        copy(a.m_buffer, a.m_size);
-        return *this;
-    }
-
-    // Release ownership of allocated memory and returns pointer to it.
-    template <typename T>
-    T * Array<T>::release() {
-        T * tmp = m_buffer;
-        m_buffer = NULL;
-        m_capacity = 0;
-        m_size = 0;
-        return tmp;
-    }
-
-
-
-    // Change array size.
-    template <typename T> 
-    inline void Array<T>::setArraySize(uint new_size) {
-        m_size = new_size;
-
-        if (new_size > m_capacity) {
-            uint new_buffer_size;
-            if (m_capacity == 0) {
-                // first allocation is exact
-                new_buffer_size = new_size;
-            }
-            else {
-                // following allocations grow array by 25%
-                new_buffer_size = new_size + (new_size >> 2);
-            }
-
-            setArrayCapacity( new_buffer_size );
-        }
-    }
-
-    // Change array capacity.
-    template <typename T> 
-    inline void Array<T>::setArrayCapacity(uint new_capacity) {
-        nvDebugCheck(new_capacity >= m_size);
-
-        if (new_capacity == 0) {
-            // free the buffer.
-            if (m_buffer != NULL) {
-                free<T>(m_buffer);
-                m_buffer = NULL;
-            }
-        }
-        else {
-            // realloc the buffer
-            m_buffer = realloc<T>(m_buffer, new_capacity);
-        }
-
-        m_capacity = new_capacity;
-    }
-
-    // Array serialization.
-    template <typename Typ> 
-    inline Stream & operator<< ( Stream & s, Array<Typ> & p )
-    {
-        if (s.isLoading()) {
-            uint size;
-            s << size;
-            p.resize( size );
-        }
-        else {
-            s << p.m_size;
-        }
-
-        for (uint i = 0; i < p.m_size; i++) {
-            s << p.m_buffer[i];
-        }
-
-        return s;
-    }
-
-    // Swap the members of the two given vectors.
-    template <typename Typ>
-    inline void swap(Array<Typ> & a, Array<Typ> & b)
-    {
-        nv::swap(a.m_buffer, b.m_buffer);
-        nv::swap(a.m_capacity, b.m_capacity);
-        nv::swap(a.m_size, b.m_size);
-    }
-
-
-} // nv namespace
-
-#endif // NV_CORE_ARRAY_INL
diff --git a/3rdparty/nvtt/nvcore/debug.h b/3rdparty/nvtt/nvcore/debug.h
deleted file mode 100644
index 61fbd2fcf..000000000
--- a/3rdparty/nvtt/nvcore/debug.h
+++ /dev/null
@@ -1,216 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_CORE_DEBUG_H
-#define NV_CORE_DEBUG_H
-
-#include "nvcore.h"
-
-#include <stdarg.h> // va_list
-
-
-// Make sure we are using our assert.
-#undef assert
-
-#define NV_ABORT_DEBUG      1
-#define NV_ABORT_IGNORE     2
-#define NV_ABORT_EXIT       3
-
-#define nvNoAssert(exp) \
-    NV_MULTI_LINE_MACRO_BEGIN \
-    (void)sizeof(exp); \
-    NV_MULTI_LINE_MACRO_END
-
-#if NV_NO_ASSERT
-
-#   define nvAssert(exp) nvNoAssert(exp)
-#   define nvCheck(exp) nvNoAssert(exp)
-#   define nvDebugAssert(exp) nvNoAssert(exp)
-#   define nvDebugCheck(exp) nvNoAssert(exp)
-#   define nvDebugBreak() nvNoAssert(0)
-
-#else // NV_NO_ASSERT
-
-#   if NV_CC_MSVC
-        // @@ Does this work in msvc-6 and earlier?
-#       define nvDebugBreak()       __debugbreak()
-//#       define nvDebugBreak()        __asm { int 3 }
-#   elif NV_OS_ORBIS
-#       define nvDebugBreak()       __debugbreak()
-#   elif NV_CC_GNUC
-#       define nvDebugBreak()       __builtin_trap()
-#   else
-#       error "No nvDebugBreak()!"
-#   endif
-
-/*
-#   elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN
-        // @@ Use __builtin_trap() on GCC
-#       define nvDebugBreak()       __asm__ volatile ("trap")
-#   elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN
-#       define nvDebugBreak()       __asm__ volatile ("int3")
-#   elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64
-#       define nvDebugBreak()       __asm__ ( "int %0" : :"I"(3) )
-#   else
-#       include <signal.h>
-#       define nvDebugBreak()       raise(SIGTRAP)
-#   endif
-*/
-
-#define nvDebugBreakOnce() \
-    NV_MULTI_LINE_MACRO_BEGIN \
-    static bool firstTime = true; \
-    if (firstTime) { firstTime = false; nvDebugBreak(); } \
-    NV_MULTI_LINE_MACRO_END
-
-#define nvAssertMacro(exp) \
-    NV_MULTI_LINE_MACRO_BEGIN \
-    if (!(exp)) { \
-        if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \
-            nvDebugBreak(); \
-        } \
-    } \
-    NV_MULTI_LINE_MACRO_END
-
-// GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care
-#define nvAssertMacroWithIgnoreAll(exp,...) \
-    NV_MULTI_LINE_MACRO_BEGIN \
-        static bool ignoreAll = false; \
-        if (!ignoreAll && !(exp)) { \
-            int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \
-            if (result == NV_ABORT_DEBUG) { \
-                nvDebugBreak(); \
-            } else if (result == NV_ABORT_IGNORE) { \
-                ignoreAll = true; \
-            } \
-        } \
-    NV_MULTI_LINE_MACRO_END
-
-// Interesting assert macro from Insomniac:
-// http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to
-// Used as follows:
-// if (nvCheck(i < count)) {
-//     normal path
-// } else {
-//     fixup code.
-// }
-// This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely.
-#define nvCheckMacro(exp) \
-    (\
-        (exp) ? true : ( \
-            (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \
-        ) \
-    )
-
-
-#define nvAssert(exp)    nvAssertMacro(exp)
-#define nvCheck(exp)     nvAssertMacro(exp)
-
-#if defined(_DEBUG)
-#   define nvDebugAssert(exp)   nvAssertMacro(exp)
-#   define nvDebugCheck(exp)    nvAssertMacro(exp)
-#else // _DEBUG
-#   define nvDebugAssert(exp)   nvNoAssert(exp)
-#   define nvDebugCheck(exp)    nvNoAssert(exp)
-#endif // _DEBUG
-
-#endif // NV_NO_ASSERT
-
-// Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc.
-/*#if !defined(_DEBUG)
-#   if NV_CC_MSVC
-#       define nvAssume(exp)    __assume(exp)
-#   else
-#       define nvAssume(exp)    nvCheck(exp)
-#   endif
-#else
-#   define nvAssume(exp)    nvCheck(exp)
-#endif*/
-
-#if defined(_DEBUG)
-#  if NV_CC_MSVC
-#   define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0)
-#  else
-#   define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable()
-#  endif
-#else
-#  if NV_CC_MSVC
-#   define nvUnreachable() __assume(0)
-#  else
-#   define nvUnreachable() __builtin_unreachable()
-#  endif
-#endif
-
-
-#define nvError(x)      nvAbort(x, __FILE__, __LINE__, __FUNC__)
-#define nvWarning(x)    nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
-
-#ifndef NV_DEBUG_PRINT
-#define NV_DEBUG_PRINT 1 //defined(_DEBUG)
-#endif
-
-#if NV_DEBUG_PRINT
-#define nvDebug(...)    nvDebugPrint(__VA_ARGS__)
-#else
-#if NV_CC_MSVC
-#define nvDebug(...)    __noop(__VA_ARGS__)
-#else
-#define nvDebug(...)    ((void)0) // Non-msvc platforms do not evaluate arguments?
-#endif
-#endif
-
-
-NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6)));
-NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
-
-namespace nv
-{
-    inline bool isValidPtr(const void * ptr) {
-    #if NV_CPU_X86_64
-        if (ptr == NULL) return true;
-        if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
-        if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
-    #else
-	    if (reinterpret_cast<uint32>(ptr) == 0xcccccccc) return false;
-	    if (reinterpret_cast<uint32>(ptr) == 0xcdcdcdcd) return false;
-	    if (reinterpret_cast<uint32>(ptr) == 0xdddddddd) return false;
-	    if (reinterpret_cast<uint32>(ptr) == 0xffffffff) return false;
-    #endif
-        return true;
-    }
-
-    // Message handler interface.
-    struct MessageHandler {
-        virtual void log(const char * str, va_list arg) = 0;
-        virtual ~MessageHandler() {}
-    };
-
-    // Assert handler interface.
-    struct AssertHandler {
-        virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0;
-        virtual ~AssertHandler() {}
-    };
-
-
-    namespace debug
-    {
-        NVCORE_API void dumpInfo();
-        NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 );
-
-        NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
-        NVCORE_API void resetMessageHandler();
-
-        NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
-        NVCORE_API void resetAssertHandler();
-
-        NVCORE_API void enableSigHandler(bool interactive);
-        NVCORE_API void disableSigHandler();
-
-        NVCORE_API bool isDebuggerPresent();
-        NVCORE_API bool attachToDebugger();
-
-        NVCORE_API void terminate(int code);
-    }
-
-} // nv namespace
-
-#endif // NV_CORE_DEBUG_H
diff --git a/3rdparty/nvtt/nvcore/defsgnucdarwin.h b/3rdparty/nvtt/nvcore/defsgnucdarwin.h
deleted file mode 100644
index 968f4bc00..000000000
--- a/3rdparty/nvtt/nvcore/defsgnucdarwin.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef NV_CORE_H
-#error "Do not include this file directly."
-#endif
-
-#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
-#include <stddef.h> // operator new, size_t, NULL
-
-#ifndef __STDC_VERSION__
-#	define __STDC_VERSION__ 0
-#endif // __STDC_VERSION__
-
-// Function linkage
-#define DLL_IMPORT
-#if __GNUC__ >= 4
-#	define DLL_EXPORT __attribute__((visibility("default")))
-#	define DLL_EXPORT_CLASS DLL_EXPORT
-#else
-#	define DLL_EXPORT
-#	define DLL_EXPORT_CLASS
-#endif
-
-// Function calling modes
-#if NV_CPU_X86
-#	define NV_CDECL 	__attribute__((cdecl))
-#	define NV_STDCALL	__attribute__((stdcall))
-#else
-#	define NV_CDECL 
-#	define NV_STDCALL
-#endif
-
-#define NV_FASTCALL		__attribute__((fastcall))
-#define NV_FORCEINLINE	inline
-#define NV_DEPRECATED   __attribute__((deprecated))
-#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX
-
-#if __GNUC__ > 2
-#define NV_PURE     __attribute__((pure))
-#define NV_CONST    __attribute__((const))
-#else
-#define NV_PURE
-#define NV_CONST
-#endif
-
-#define NV_NOINLINE __attribute__((noinline))
-
-// Define __FUNC__ properly.
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
-#	if __GNUC__ >= 2
-#		define __FUNC__ __PRETTY_FUNCTION__	// __FUNCTION__
-#	else
-#		define __FUNC__ "<unknown>"
-#	endif
-#else
-#	define __FUNC__ __PRETTY_FUNCTION__
-#endif
-
-#define restrict    __restrict__
diff --git a/3rdparty/nvtt/nvcore/defsgnuclinux.h b/3rdparty/nvtt/nvcore/defsgnuclinux.h
deleted file mode 100644
index 117d342ea..000000000
--- a/3rdparty/nvtt/nvcore/defsgnuclinux.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef NV_CORE_H
-#error "Do not include this file directly."
-#endif
-
-#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
-#include <stddef.h> // operator new, size_t, NULL
-
-#ifndef __STDC_VERSION__
-#	define __STDC_VERSION__ 0
-#endif
-
-// Function linkage
-#define DLL_IMPORT
-#if __GNUC__ >= 4
-#   define DLL_EXPORT   __attribute__((visibility("default")))
-#   define DLL_EXPORT_CLASS DLL_EXPORT
-#else
-#   define DLL_EXPORT
-#   define DLL_EXPORT_CLASS
-#endif
-
-// Function calling modes
-#if NV_CPU_X86
-#   define NV_CDECL     __attribute__((cdecl))
-#   define NV_STDCALL   __attribute__((stdcall))
-#else
-#   define NV_CDECL 
-#   define NV_STDCALL
-#endif
-
-#define NV_FASTCALL     __attribute__((fastcall))
-//#if __GNUC__ > 3
-// It seems that GCC does not assume always_inline implies inline. I think this depends on the GCC version :(
-#define NV_FORCEINLINE  inline
-//#else
-// Some compilers complain that inline and always_inline are redundant.
-//#define NV_FORCEINLINE  __attribute__((always_inline))
-//#endif
-#define NV_DEPRECATED   __attribute__((deprecated))
-#define NV_THREAD_LOCAL __thread 
-
-#if __GNUC__ > 2
-#define NV_PURE     __attribute__((pure))
-#define NV_CONST    __attribute__((const))
-#else
-#define NV_PURE
-#define NV_CONST
-#endif
-
-#define NV_NOINLINE __attribute__((noinline))
-
-// Define __FUNC__ properly.
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
-#   if __GNUC__ >= 2
-#       define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
-#   else
-#       define __FUNC__ "<unknown>"
-#   endif
-#else
-#   define __FUNC__ __PRETTY_FUNCTION__
-#endif
-
-#define restrict    __restrict__
diff --git a/3rdparty/nvtt/nvcore/defsgnucwin32.h b/3rdparty/nvtt/nvcore/defsgnucwin32.h
deleted file mode 100644
index 68465c824..000000000
--- a/3rdparty/nvtt/nvcore/defsgnucwin32.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef NV_CORE_H
-#error "Do not include this file directly."
-#endif
-
-//#include <cstddef> // size_t, NULL
-
-// Function linkage
-#define DLL_IMPORT	__declspec(dllimport)
-#define DLL_EXPORT	__declspec(dllexport)
-#define DLL_EXPORT_CLASS DLL_EXPORT
-
-// Function calling modes
-#if NV_CPU_X86
-#	define NV_CDECL 	__attribute__((cdecl))
-#	define NV_STDCALL	__attribute__((stdcall))
-#else
-#	define NV_CDECL 
-#	define NV_STDCALL
-#endif
-
-#define NV_FASTCALL		__attribute__((fastcall))
-#define NV_FORCEINLINE	inline
-#define NV_DEPRECATED   __attribute__((deprecated))
-
-#if __GNUC__ > 2
-#define NV_PURE		__attribute__((pure))
-#define NV_CONST	__attribute__((const))
-#else
-#define NV_PURE
-#define NV_CONST
-#endif
-
-#define NV_NOINLINE __attribute__((noinline))
-
-// Define __FUNC__ properly.
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
-#	if __GNUC__ >= 2
-#		define __FUNC__ __PRETTY_FUNCTION__	// __FUNCTION__
-#	else
-#		define __FUNC__ "<unknown>"
-#	endif
-#else
-#	define __FUNC__ __PRETTY_FUNCTION__
-#endif
-
-#define restrict	__restrict__
-
-/*
-// Type definitions
-typedef unsigned char		uint8;
-typedef signed char			int8;
-
-typedef unsigned short		uint16;
-typedef signed short		int16;
-
-typedef unsigned int		uint32;
-typedef signed int			int32;
-
-typedef unsigned long long	uint64;
-typedef signed long long	int64;
-
-// Aliases
-typedef uint32				uint;
-*/
-
diff --git a/3rdparty/nvtt/nvcore/defsvcwin32.h b/3rdparty/nvtt/nvcore/defsvcwin32.h
deleted file mode 100644
index a6c6bf93b..000000000
--- a/3rdparty/nvtt/nvcore/defsvcwin32.h
+++ /dev/null
@@ -1,94 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_CORE_H
-#error "Do not include this file directly."
-#endif
-
-// Function linkage
-#define DLL_IMPORT __declspec(dllimport)
-#define DLL_EXPORT __declspec(dllexport)
-#define DLL_EXPORT_CLASS DLL_EXPORT
-
-// Function calling modes
-#define NV_CDECL        __cdecl
-#define NV_STDCALL      __stdcall
-#define NV_FASTCALL     __fastcall
-#define NV_DEPRECATED
-
-#define NV_PURE
-#define NV_CONST
-
-// Set standard function names.
-#if _MSC_VER < 1900
-#   define snprintf _snprintf
-#endif
-#if _MSC_VER < 1500
-#   define vsnprintf _vsnprintf
-#endif
-#if _MSC_VER < 1700
-#   define strtoll _strtoi64
-#   define strtoull _strtoui64
-#endif
-#define chdir _chdir
-#define getcwd _getcwd 
-
-#if _MSC_VER < 1800 // Not sure what version introduced this.
-#define va_copy(a, b) (a) = (b)
-#endif
-
-#if !defined restrict
-#define restrict
-#endif
-
-// Ignore gcc attributes.
-#define __attribute__(X)
-
-#if !defined __FUNC__
-#define __FUNC__ __FUNCTION__ 
-#endif
-
-#define NV_NOINLINE __declspec(noinline)
-#define NV_FORCEINLINE inline
-
-#define NV_THREAD_LOCAL __declspec(thread)
-
-/*
-// Type definitions
-typedef unsigned char       uint8;
-typedef signed char         int8;
-
-typedef unsigned short      uint16;
-typedef signed short        int16;
-
-typedef unsigned int        uint32;
-typedef signed int          int32;
-
-typedef unsigned __int64    uint64;
-typedef signed __int64      int64;
-
-// Aliases
-typedef uint32              uint;
-*/
-
-// Unwanted VC++ warnings to disable.
-/*
-#pragma warning(disable : 4244)     // conversion to float, possible loss of data
-#pragma warning(disable : 4245)     // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch
-#pragma warning(disable : 4100)     // unreferenced formal parameter
-#pragma warning(disable : 4514)     // unreferenced inline function has been removed
-#pragma warning(disable : 4710)     // inline function not expanded
-#pragma warning(disable : 4127)     // Conditional expression is constant
-#pragma warning(disable : 4305)     // truncation from 'const double' to 'float'
-#pragma warning(disable : 4505)     // unreferenced local function has been removed
-
-#pragma warning(disable : 4702)     // unreachable code in inline expanded function
-#pragma warning(disable : 4711)     // function selected for automatic inlining
-#pragma warning(disable : 4725)     // Pentium fdiv bug
-
-#pragma warning(disable : 4786)     // Identifier was truncated and cannot be debugged.
-
-#pragma warning(disable : 4675)     // resolved overload was found by argument-dependent lookup
-*/
-
-#pragma warning(1 : 4705)     // Report unused local variables.
-#pragma warning(1 : 4555)     // Expression has no effect.
diff --git a/3rdparty/nvtt/nvcore/foreach.h b/3rdparty/nvtt/nvcore/foreach.h
deleted file mode 100644
index 71b19f778..000000000
--- a/3rdparty/nvtt/nvcore/foreach.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_CORE_FOREACH_H
-#define NV_CORE_FOREACH_H
-
-/*
-These foreach macros are very non-standard and somewhat confusing, but I like them.
-*/
-
-#include "nvcore.h"
-
-#if NV_CC_GNUC // If typeof or decltype is available:
-#if !NV_CC_CPP11
-#   define NV_DECLTYPE typeof // Using a non-standard extension over typeof that behaves as C++11 decltype
-#else
-#   define NV_DECLTYPE decltype
-#endif
-
-/*
-Ideally we would like to write this:
-
-#define NV_FOREACH(i, container) \
-    for(NV_DECLTYPE(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
-
-But gcc versions prior to 4.7 required an intermediate type. See:
-https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
-*/
-
-#define NV_FOREACH(i, container) \
-    typedef NV_DECLTYPE(container) NV_STRING_JOIN2(cont,__LINE__); \
-    for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
-
-#else // If typeof not available:
-
-#include <new> // placement new
-
-struct PseudoIndexWrapper {
-    template <typename T>
-    PseudoIndexWrapper(const T & container) {
-        nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory));
-        new (memory) typename T::PseudoIndex(container.start());
-    }
-    // PseudoIndex cannot have a dtor!
-
-    template <typename T> typename T::PseudoIndex & operator()(const T * /*container*/) {
-        return *reinterpret_cast<typename T::PseudoIndex *>(memory);
-    }
-    template <typename T> const typename T::PseudoIndex & operator()(const T * /*container*/) const {
-        return *reinterpret_cast<const typename T::PseudoIndex *>(memory);
-    }
-
-    uint8 memory[4];	// Increase the size if we have bigger enumerators.
-};
-
-#define NV_FOREACH(i, container) \
-    for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container))))
-
-#endif
-
-// Declare foreach keyword.
-#if !defined NV_NO_USE_KEYWORDS
-#   define foreach NV_FOREACH
-#   define foreach_index NV_FOREACH
-#endif
-
-
-#endif // NV_CORE_FOREACH_H
diff --git a/3rdparty/nvtt/nvcore/hash.h b/3rdparty/nvtt/nvcore/hash.h
deleted file mode 100644
index a8b0b2c63..000000000
--- a/3rdparty/nvtt/nvcore/hash.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_CORE_HASH_H
-#define NV_CORE_HASH_H
-
-#include "nvcore.h"
-
-namespace nv
-{
-    inline uint sdbmHash(const void * data_in, uint size, uint h = 5381)
-    {
-        const uint8 * data = (const uint8 *) data_in;
-        uint i = 0;
-        while (i < size) {
-            h = (h << 16) + (h << 6) - h + (uint) data[i++];
-        }
-        return h;
-    }
-
-    // Note that this hash does not handle NaN properly.
-    inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381)
-    {
-        for (uint i = 0; i < count; i++) {
-            //nvDebugCheck(nv::isFinite(*f));
-            union { float f; uint32 i; } x = { f[i] };
-            if (x.i == 0x80000000) x.i = 0;
-            h = sdbmHash(&x, 4, h);
-        }
-        return h;
-    }
-
-
-    template <typename T>
-    inline uint hash(const T & t, uint h = 5381)
-    {
-        return sdbmHash(&t, sizeof(T), h);
-    }
-
-    template <>
-    inline uint hash(const float & f, uint h)
-    {
-        return sdbmFloatHash(&f, 1, h);
-    }
-
-
-    // Functors for hash table:
-    template <typename Key> struct Hash 
-    {
-        uint operator()(const Key & k) const {
-            return hash(k);
-        }
-    };
-
-    template <typename Key> struct Equal
-    {
-        bool operator()(const Key & k0, const Key & k1) const {
-            return k0 == k1;
-        }
-    };
-
-
-    // @@ Move to Utils.h?
-    template <typename T1, typename T2>
-    struct Pair {
-        T1 first;
-        T2 second;
-    };
-
-    template <typename T1, typename T2>
-    bool operator==(const Pair<T1,T2> & p0, const Pair<T1,T2> & p1) {
-        return p0.first == p1.first && p0.second == p1.second;
-    }
-
-    template <typename T1, typename T2>
-    uint hash(const Pair<T1,T2> & p, uint h = 5381) {
-        return hash(p.second, hash(p.first));
-    }
-
-
-} // nv namespace
-
-#endif // NV_CORE_HASH_H
diff --git a/3rdparty/nvtt/nvcore/memory.h b/3rdparty/nvtt/nvcore/memory.h
deleted file mode 100644
index b332fab8e..000000000
--- a/3rdparty/nvtt/nvcore/memory.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
-
-#ifndef NV_CORE_MEMORY_H
-#define NV_CORE_MEMORY_H
-
-#include "nvcore.h"
-#include <stdlib.h>
-
-namespace nv {
-
-    // C++ helpers.
-    template <typename T> inline T * malloc(size_t count) {
-        return (T *)::malloc(sizeof(T) * count);
-    }
-
-    template <typename T> inline T * realloc(T * ptr, size_t count) {
-        return (T *)::realloc(ptr, sizeof(T) * count);
-    }
-
-    template <typename T> inline void free(const T * ptr) {
-        ::free((void *)ptr);
-    }
-
-    template <typename T> inline void zero(T & data) {
-        memset(&data, 0, sizeof(T));
-    }
-
-} // nv namespace
-
-#endif // NV_CORE_MEMORY_H
diff --git a/3rdparty/nvtt/nvcore/nvcore.h b/3rdparty/nvtt/nvcore/nvcore.h
deleted file mode 100644
index 689feff4c..000000000
--- a/3rdparty/nvtt/nvcore/nvcore.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_CORE_H
-#define NV_CORE_H
-
-#define NVCORE_SHARED 0
-#define NV_NO_ASSERT 0
-
-// Function linkage
-#if NVCORE_SHARED
-#ifdef NVCORE_EXPORTS
-#define NVCORE_API DLL_EXPORT
-#define NVCORE_CLASS DLL_EXPORT_CLASS
-#else
-#define NVCORE_API DLL_IMPORT
-#define NVCORE_CLASS DLL_IMPORT
-#endif
-#else // NVCORE_SHARED
-#define NVCORE_API
-#define NVCORE_CLASS
-#endif // NVCORE_SHARED
-
-// Platform definitions
-#include "posh.h"
-
-#define NV_OS_STRING POSH_OS_STRING
-
-#if defined POSH_OS_LINUX
-#   define NV_OS_LINUX 1
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_ORBIS
-#   define NV_OS_ORBIS 1
-#elif defined POSH_OS_FREEBSD
-#   define NV_OS_FREEBSD 1
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_OPENBSD
-#   define NV_OS_OPENBSD 1
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_CYGWIN32
-#   define NV_OS_CYGWIN 1
-#elif defined POSH_OS_MINGW
-#   define NV_OS_MINGW 1
-#   define NV_OS_WIN32 1
-#elif defined POSH_OS_OSX
-#   define NV_OS_DARWIN 1
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_IOS
-#   define NV_OS_DARWIN 1 //ACS should we keep this on IOS?
-#   define NV_OS_UNIX 1
-#   define NV_OS_IOS 1
-#elif defined POSH_OS_UNIX
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_WIN64
-#   define NV_OS_WIN32 1
-#   define NV_OS_WIN64 1
-#elif defined POSH_OS_WIN32
-#   define NV_OS_WIN32 1
-#elif defined POSH_OS_XBOX
-#   define NV_OS_XBOX 1
-#else
-#   error "Unsupported OS"
-#endif
-
-#ifndef NV_OS_WIN32
-#	define NV_OS_WIN32  0
-#endif // NV_OS_WIN32
-
-#ifndef NV_OS_WIN64
-#	define NV_OS_WIN64  0
-#endif // NV_OS_WIN64
-
-#ifndef NV_OS_MINGW
-#	define NV_OS_MINGW  0
-#endif // NV_OS_MINGW
-
-#ifndef NV_OS_CYGWIN
-#	define NV_OS_CYGWIN 0
-#endif // NV_OS_CYGWIN
-
-#ifndef NV_OS_LINUX
-#	define NV_OS_LINUX  0
-#endif // NV_OS_LINUX
-
-#ifndef NV_OS_FREEBSD
-#	define NV_OS_FREEBSD 0
-#endif // NV_OS_FREEBSD
-
-#ifndef NV_OS_OPENBSD
-#	define NV_OS_OPENBSD 0
-#endif // NV_OS_OPENBSD
-
-#ifndef NV_OS_UNIX
-#	define NV_OS_UNIX   0
-#endif // NV_OS_UNIX
-
-#ifndef NV_OS_DARWIN
-#	define NV_OS_DARWIN 0
-#endif // NV_OS_DARWIN
-
-#ifndef NV_OS_XBOX
-#	define NV_OS_XBOX   0
-#endif // NV_OS_XBOX
-
-#ifndef NV_OS_ORBIS
-#	define NV_OS_ORBIS  0
-#endif // NV_OS_ORBIS
-
-#ifndef NV_OS_IOS
-#	define NV_OS_IOS    0
-#endif // NV_OS_IOS
-
-// Threading:
-// some platforms don't implement __thread or similar for thread-local-storage
-#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
-#   define NV_OS_USE_PTHREAD 1
-#   if NV_OS_DARWIN || NV_OS_IOS
-#       define NV_OS_HAS_TLS_QUALIFIER 0
-#   else
-#       define NV_OS_HAS_TLS_QUALIFIER 1
-#   endif
-#else
-#   define NV_OS_USE_PTHREAD 0
-#   define NV_OS_HAS_TLS_QUALIFIER 1
-#endif
-
-
-// CPUs:
-
-#define NV_CPU_STRING   POSH_CPU_STRING
-
-#if defined POSH_CPU_X86_64
-//#   define NV_CPU_X86 1
-#   define NV_CPU_X86_64 1
-#elif defined POSH_CPU_X86
-#   define NV_CPU_X86 1
-#elif defined POSH_CPU_PPC
-#   define NV_CPU_PPC 1
-#elif defined POSH_CPU_STRONGARM
-#   define NV_CPU_ARM 1
-#elif defined POSH_CPU_AARCH64
-#   define NV_CPU_AARCH64 1
-#else
-#   error "Unsupported CPU"
-#endif
-
-#ifndef NV_CPU_X86
-#	define NV_CPU_X86     0
-#endif // NV_CPU_X86
-
-#ifndef NV_CPU_X86_64
-#	define NV_CPU_X86_64  0
-#endif // NV_CPU_X86_64
-
-#ifndef NV_CPU_PPC
-#	define NV_CPU_PPC     0
-#endif // NV_CPU_PPC
-
-#ifndef NV_CPU_ARM
-#	define NV_CPU_ARM     0
-#endif // NV_CPU_ARM
-
-#ifndef NV_CPU_AARCH64
-#	define NV_CPU_AARCH64 0
-#endif // NV_CPU_AARCH64
-
-// Compiler:
-
-#if defined POSH_COMPILER_CLANG
-#   define NV_CC_CLANG  1
-#   define NV_CC_GNUC   1    // Clang is compatible with GCC.
-#   define NV_CC_STRING "clang"
-#	pragma clang diagnostic ignored "-Wmissing-braces"
-#	pragma clang diagnostic ignored "-Wshadow"
-#	pragma clang diagnostic ignored "-Wunused-local-typedef"
-#	pragma clang diagnostic ignored "-Wunused-function"
-#	pragma clang diagnostic ignored "-Wunused-variable"
-#	pragma clang diagnostic ignored "-Wunused-parameter"
-#	pragma clang diagnostic ignored "-Wsometimes-uninitialized"
-#elif defined POSH_COMPILER_GCC
-#   define NV_CC_GNUC   1
-#   define NV_CC_STRING "gcc"
-#	pragma GCC diagnostic ignored "-Wshadow"
-#	pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
-#	pragma GCC diagnostic ignored "-Wunused-function"
-#	pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#	pragma GCC diagnostic ignored "-Wunused-variable"
-#	pragma GCC diagnostic ignored "-Wunused-parameter"
-#	pragma GCC diagnostic ignored "-Warray-bounds"
-#elif defined POSH_COMPILER_MSVC
-#   define NV_CC_MSVC   1
-#   define NV_CC_STRING "msvc"
-#else
-#   error "Unsupported compiler"
-#endif
-
-#ifndef NV_CC_GNUC
-#	define NV_CC_GNUC  0
-#endif // NV_CC_GNUC
-
-#ifndef NV_CC_MSVC
-#	define NV_CC_MSVC  0
-#endif // NV_CC_MSVC
-
-#ifndef NV_CC_CLANG
-#	define NV_CC_CLANG 0
-#endif // NV_CC_CLANG
-
-#if NV_CC_MSVC
-#define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
-#else
-// @@ IC: This works in CLANG, about GCC?
-// @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
-#ifdef __clang__
-#define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
-#elif defined __GNUC__ 
-#define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
-#endif
-#endif
-
-// Endiannes:
-#define NV_LITTLE_ENDIAN    POSH_LITTLE_ENDIAN
-#define NV_BIG_ENDIAN       POSH_BIG_ENDIAN
-#define NV_ENDIAN_STRING    POSH_ENDIAN_STRING
-
-
-// Type definitions:
-typedef posh_u8_t   uint8;
-typedef posh_i8_t   int8;
-
-typedef posh_u16_t  uint16;
-typedef posh_i16_t  int16;
-
-typedef posh_u32_t  uint32;
-typedef posh_i32_t  int32;
-
-typedef posh_u64_t  uint64;
-typedef posh_i64_t  int64;
-
-// Aliases
-typedef uint32      uint;
-
-
-// Version string:
-#define NV_VERSION_STRING \
-    NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
-    NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
-
-
-// Disable copy constructor and assignment operator. 
-#if NV_CC_CPP11
-#define NV_FORBID_COPY(C) \
-    C( const C & ) = delete; \
-    C &operator=( const C & ) = delete
-#else
-#define NV_FORBID_COPY(C) \
-    private: \
-    C( const C & ); \
-    C &operator=( const C & )
-#endif
-
-// Disable dynamic allocation on the heap. 
-// See Prohibiting Heap-Based Objects in More Effective C++.
-#define NV_FORBID_HEAPALLOC() \
-    private: \
-    void *operator new(size_t size); \
-    void *operator new[](size_t size)
-
-// String concatenation macros.
-#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
-#define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
-#define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
-#define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
-#define NV_STRING2(x) #x
-#define NV_STRING(x) NV_STRING2(x)
-
-#if NV_CC_MSVC
-#define NV_MULTI_LINE_MACRO_BEGIN do {  
-#define NV_MULTI_LINE_MACRO_END \
-    __pragma(warning(push)) \
-    __pragma(warning(disable:4127)) \
-    } while(false) \
-    __pragma(warning(pop))  
-#else
-#define NV_MULTI_LINE_MACRO_BEGIN do {
-#define NV_MULTI_LINE_MACRO_END } while(false)
-#endif
-
-#if NV_CC_CPP11
-#define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
-#else
-#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
-#endif
-#define NV_COMPILER_CHECK(x) nvStaticCheck(x)   // I like this name best.
-
-// Make sure type definitions are fine.
-NV_COMPILER_CHECK(sizeof(int8) == 1);
-NV_COMPILER_CHECK(sizeof(uint8) == 1);
-NV_COMPILER_CHECK(sizeof(int16) == 2);
-NV_COMPILER_CHECK(sizeof(uint16) == 2);
-NV_COMPILER_CHECK(sizeof(int32) == 4);
-NV_COMPILER_CHECK(sizeof(uint32) == 4);
-NV_COMPILER_CHECK(sizeof(int32) == 4);
-NV_COMPILER_CHECK(sizeof(uint32) == 4);
-
-
-#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
-
-#if 0 // Disabled in The Witness.
-#if NV_CC_MSVC
-#define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x)
-#else
-#define NV_MESSAGE(x) message(x)
-#endif
-#else
-#define NV_MESSAGE(x) 
-#endif
-
-
-// Startup initialization macro.
-#define NV_AT_STARTUP(some_code) \
-    namespace { \
-        static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
-            NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
-        } \
-        NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
-    }
-
-// Indicate the compiler that the parameter is not used to suppress compier warnings.
-#define NV_UNUSED(a) ((a)=(a))
-
-// Null index. @@ Move this somewhere else... it's only used by nvmesh.
-//const unsigned int NIL = unsigned int(~0);
-//#define NIL uint(~0)
-
-// Null pointer.
-#ifndef NULL
-#define NULL 0
-#endif
-
-// Platform includes
-#if NV_CC_MSVC
-#   if NV_OS_WIN32
-#       include "defsvcwin32.h"
-#   elif NV_OS_XBOX
-#       include "defsvcxbox.h"
-#   else
-#       error "MSVC: Platform not supported"
-#   endif
-#elif NV_CC_GNUC
-#   if NV_OS_LINUX
-#       include "defsgnuclinux.h"
-#   elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
-#       include "defsgnucdarwin.h"
-#   elif NV_OS_MINGW
-#       include "defsgnucwin32.h"
-#   elif NV_OS_CYGWIN
-#       error "GCC: Cygwin not supported"
-#   else
-#       error "GCC: Platform not supported"
-#   endif
-#endif
-
-#endif // NV_CORE_H
diff --git a/3rdparty/nvtt/nvcore/posh.h b/3rdparty/nvtt/nvcore/posh.h
deleted file mode 100644
index 45d2d9e3c..000000000
--- a/3rdparty/nvtt/nvcore/posh.h
+++ /dev/null
@@ -1,1030 +0,0 @@
-/**
-@file posh.h
-@author Brian Hook
-@version 1.3.001
-
-Header file for POSH, the Portable Open Source Harness project.
-
-NOTE: Unlike most header files, this one is designed to be included
-multiple times, which is why it does not have the @#ifndef/@#define
-preamble.
-
-POSH relies on environment specified preprocessor symbols in order
-to infer as much as possible about the target OS/architecture and
-the host compiler capabilities.
-
-NOTE: POSH is simple and focused. It attempts to provide basic
-functionality and information, but it does NOT attempt to emulate
-missing functionality.  I am also not willing to make POSH dirty
-and hackish to support truly ancient and/or outmoded and/or bizarre
-technologies such as non-ANSI compilers, systems with non-IEEE
-floating point formats, segmented 16-bit operating systems, etc.
-
-Please refer to the accompanying HTML documentation or visit
-http://www.poshlib.org for more information on how to use POSH.
-
-LICENSE:
-
-Copyright (c) 2004, Brian Hook
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above
-      copyright notice, this list of conditions and the following
-      disclaimer in the documentation and/or other materials provided
-      with the distribution.
-
-    * The names of this package'ss contributors contributors may not
-      be used to endorse or promote products derived from this
-      software without specific prior written permission.
-
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-REVISION:
-
-I've been lax about revision histories, so this starts at, um, 1.3.001.
-Sorry for any inconveniences.
-
-1.3.001 - 2/23/2006 - Incorporated fix for bug reported by Bill Cary,
-                      where I was not detecting Visual Studio
-                      compilation on x86-64 systems.  Added check for
-                      _M_X64 which should fix that.
-
-*/
-/*
-I have yet to find an authoritative reference on preprocessor
-symbols, but so far this is what I've gleaned:
-
-GNU GCC/G++:
-   - __GNUC__: GNU C version
-   - __GNUG__: GNU C++ compiler
-   - __sun__ : on Sun platforms
-   - __svr4__: on Solaris and other SysV R4 platforms
-   - __mips__: on MIPS processor platforms
-   - __sparc_v9__: on Sparc 64-bit CPUs
-   - __sparcv9: 64-bit Solaris
-   - __MIPSEL__: mips processor, compiled for little endian
-   - __MIPSEB__: mips processor, compiled for big endian
-   - _R5900: MIPS/Sony/Toshiba R5900 (PS2)
-   - mc68000: 68K
-   - m68000: 68K
-   - m68k: 68K
-   - __palmos__: PalmOS
-
-Intel C/C++ Compiler:
-   - __ECC      : compiler version, IA64 only
-   - __EDG__
-   - __ELF__
-   - __GXX_ABI_VERSION
-   - __i386     : IA-32 only
-   - __i386__   : IA-32 only
-   - i386       : IA-32 only
-   - __ia64     : IA-64 only
-   - __ia64__   : IA-64 only
-   - ia64       : IA-64 only
-   - __ICC      : IA-32 only
-   - __INTEL_COMPILER : IA-32 or IA-64, newer versions only
-
-Apple's C/C++ Compiler for OS X:
-   - __APPLE_CC__
-   - __APPLE__
-   - __BIG_ENDIAN__
-   - __APPLE__
-   - __ppc__
-   - __MACH__
-
-DJGPP:
-   - __MSDOS__
-   - __unix__
-   - __unix
-   - __GNUC__
-   - __GO32
-   - DJGPP
-   - __i386, __i386, i386
-
-Cray's C compiler:
-   - _ADDR64: if 64-bit pointers
-   - _UNICOS: 
-   - __unix:
-
-SGI's CC compiler predefines the following (and more) with -ansi:
-   - __sgi
-   - __unix
-   - __host_mips
-   - _SYSTYPE_SVR4
-   - __mips
-   - _MIPSEB
-   - anyone know if there is a predefined symbol for the compiler?!
-
-MinGW:
-   - as GnuC but also defines _WIN32, __WIN32, WIN32, _X86_, __i386, __i386__, and several others
-   - __MINGW32__
-
-Cygwin:
-   - as Gnu C, but also
-   - __unix__
-   - __CYGWIN32__
-
-Microsoft Visual Studio predefines the following:
-   - _MSC_VER
-   - _WIN32: on Win32
-   - _M_IX6 (on x86 systems)
-   - _M_X64: on x86-64 systems
-   - _M_ALPHA (on DEC AXP systems)
-   - _SH3: WinCE, Hitachi SH-3
-   - _MIPS: WinCE, MIPS
-   - _ARM: WinCE, ARM
-
-Sun's C Compiler:
-   - sun and _sun
-   - unix and _unix
-   - sparc and _sparc (SPARC systems only)
-   - i386 and _i386 (x86 systems only)
-   - __SVR4 (Solaris only)
-   - __sparcv9: 64-bit solaris
-   - __SUNPRO_C
-   - _LP64: defined in 64-bit LP64 mode, but only if <sys/types.h> is included
-
-Borland C/C++ predefines the following:
-   - __BORLANDC__:
-
-DEC/Compaq C/C++ on Alpha:
-   - __alpha
-   - __arch64__
-   - __unix__ (on Tru64 Unix)
-   - __osf__
-   - __DECC
-   - __DECCXX (C++ compilation)
-   - __DECC_VER
-   - __DECCXX_VER
-
-IBM's AIX compiler:
-   - __64BIT__ if 64-bit mode
-   - _AIX
-   - __IBMC__: C compiler version
-   - __IBMCPP__: C++ compiler version
-   - _LONG_LONG: compiler allows long long
-
-Watcom:
-   - __WATCOMC__
-   - __DOS__ : if targeting DOS
-   - __386__ : if 32-bit support
-   - __WIN32__ : if targetin 32-bit Windows
-
-HP-UX C/C++ Compiler:
-   - __hpux
-   - __unix
-   - __hppa (on PA-RISC)
-   - __LP64__: if compiled in 64-bit mode
-
-Metrowerks:
-   - __MWERKS__
-   - __powerpc__
-   - _powerc
-   - __MC68K__
-   - macintosh when compiling for MacOS
-   - __INTEL__ for x86 targets
-   - __POWERPC__
-
-LLVM:
-   - __llvm__
-   - __clang__
-*/
-
-/*
-** ----------------------------------------------------------------------------
-** Include <limits.h> optionally
-** ----------------------------------------------------------------------------
-*/
-#ifdef POSH_USE_LIMITS_H
-#  include <limits.h>
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** Determine compilation environment
-** ----------------------------------------------------------------------------
-*/
-#if defined __ECC || defined __ICC || defined __INTEL_COMPILER
-#  define POSH_COMPILER_STRING "Intel C/C++"
-#  define POSH_COMPILER_INTEL 1
-#endif
-
-#if ( defined __host_mips || defined __sgi ) && !defined __GNUC__
-#  define POSH_COMPILER_STRING    "MIPSpro C/C++"
-#  define POSH_COMPILER_MIPSPRO 1 
-#endif
-
-#if defined __hpux && !defined __GNUC__
-#  define POSH_COMPILER_STRING "HP-UX CC"
-#  define POSH_COMPILER_HPCC 1 
-#endif
-
-#if defined __clang__
-#  define POSH_COMPILER_STRING "Clang"
-#  define POSH_COMPILER_CLANG 1
-#endif
-
-#if defined __GNUC__ && !defined __clang__
-#  define POSH_COMPILER_STRING "Gnu GCC"
-#  define POSH_COMPILER_GCC 1
-#endif
-
-#if defined __APPLE_CC__
-   /* we don't define the compiler string here, let it be GNU */
-#  define POSH_COMPILER_APPLECC 1
-#endif
-
-#if defined __IBMC__ || defined __IBMCPP__
-#  define POSH_COMPILER_STRING "IBM C/C++"
-#  define POSH_COMPILER_IBM 1
-#endif
-
-#if defined _MSC_VER
-#  define POSH_COMPILER_STRING "Microsoft Visual C++"
-#  define POSH_COMPILER_MSVC 1
-#endif
-
-#if defined __SUNPRO_C
-#  define POSH_COMPILER_STRING "Sun Pro" 
-#  define POSH_COMPILER_SUN 1
-#endif
-
-#if defined __BORLANDC__
-#  define POSH_COMPILER_STRING "Borland C/C++"
-#  define POSH_COMPILER_BORLAND 1
-#endif
-
-#if defined __MWERKS__
-#  define POSH_COMPILER_STRING     "MetroWerks CodeWarrior"
-#  define POSH_COMPILER_METROWERKS 1
-#endif
-
-#if defined __DECC || defined __DECCXX
-#  define POSH_COMPILER_STRING "Compaq/DEC C/C++"
-#  define POSH_COMPILER_DEC 1
-#endif
-
-#if defined __WATCOMC__
-#  define POSH_COMPILER_STRING "Watcom C/C++"
-#  define POSH_COMPILER_WATCOM 1
-#endif
-
-#if !defined POSH_COMPILER_STRING
-#  define POSH_COMPILER_STRING "Unknown compiler"
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** Determine target operating system
-** ----------------------------------------------------------------------------
-*/
-#if defined linux || defined __linux__
-#  define POSH_OS_LINUX 1 
-#  define POSH_OS_STRING "Linux"
-#endif
-
-#if defined __FreeBSD__
-#  define POSH_OS_FREEBSD 1 
-#  define POSH_OS_STRING "FreeBSD"
-#endif
-
-#if defined __OpenBSD__
-#  define POSH_OS_OPENBSD 1
-#  define POSH_OS_STRING "OpenBSD"
-#endif
-
-#if defined __CYGWIN32__
-#  define POSH_OS_CYGWIN32 1
-#  define POSH_OS_STRING "Cygwin"
-#endif
-
-#if defined GEKKO
-#  define POSH_OS_GAMECUBE
-#  define __powerpc__
-#  define POSH_OS_STRING "GameCube"
-#endif
-
-#if defined __MINGW32__
-#  define POSH_OS_MINGW 1
-#  define POSH_OS_STRING "MinGW"
-#endif
-
-#if defined GO32 && defined DJGPP && defined __MSDOS__ 
-#  define POSH_OS_GO32 1
-#  define POSH_OS_STRING "GO32/MS-DOS"
-#endif
-
-/* NOTE: make sure you use /bt=DOS if compiling for 32-bit DOS,
-   otherwise Watcom assumes host=target */
-#if defined __WATCOMC__  && defined __386__ && defined __DOS__
-#  define POSH_OS_DOS32 1
-#  define POSH_OS_STRING "DOS/32-bit"
-#endif
-
-#if defined _UNICOS
-#  define POSH_OS_UNICOS 1
-#  define POSH_OS_STRING "UNICOS"
-#endif
-
-#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
-#  define POSH_OS_OSX 1
-#  define POSH_OS_STRING "MacOS X"
-#endif
-
-#if defined __sun__ || defined sun || defined __sun || defined __solaris__
-#  if defined __SVR4 || defined __svr4__ || defined __solaris__
-#     define POSH_OS_STRING "Solaris"
-#     define POSH_OS_SOLARIS 1
-#  endif
-#  if !defined POSH_OS_STRING
-#     define POSH_OS_STRING "SunOS"
-#     define POSH_OS_SUNOS 1
-#  endif
-#endif
-
-#if defined __sgi__ || defined sgi || defined __sgi
-#  define POSH_OS_IRIX 1
-#  define POSH_OS_STRING "Irix"
-#endif
-
-#if defined __hpux__ || defined __hpux
-#  define POSH_OS_HPUX 1
-#  define POSH_OS_STRING "HP-UX"
-#endif
-
-#if defined _AIX
-#  define POSH_OS_AIX 1
-#  define POSH_OS_STRING "AIX"
-#endif
-
-#if ( defined __alpha && defined __osf__ )
-#  define POSH_OS_TRU64 1
-#  define POSH_OS_STRING "Tru64"
-#endif
-
-#if defined __BEOS__ || defined __beos__
-#  define POSH_OS_BEOS 1
-#  define POSH_OS_STRING "BeOS"
-#endif
-
-#if defined amiga || defined amigados || defined AMIGA || defined _AMIGA
-#  define POSH_OS_AMIGA 1
-#  define POSH_OS_STRING "Amiga"
-#endif
-
-#if defined __unix__
-#  define POSH_OS_UNIX 1 
-#  if !defined POSH_OS_STRING
-#     define POSH_OS_STRING "Unix-like(generic)"
-#  endif
-#endif
-
-#if defined _WIN32_WCE
-#  define POSH_OS_WINCE 1
-#  define POSH_OS_STRING "Windows CE"
-#endif
-
-#if defined _XBOX || defined _XBOX_VER
-#  define POSH_OS_XBOX 1
-#  define POSH_OS_STRING "XBOX"
-#endif
-
-#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__
-#  define POSH_OS_WIN32 1
-#  if !defined POSH_OS_XBOX
-#     if defined _WIN64
-#        define POSH_OS_WIN64 1
-#        if !defined POSH_OS_STRING
-#           define POSH_OS_STRING "Win64"
-#        endif // !defined POSH_OS_STRING
-#     else
-#        if !defined POSH_OS_STRING
-#           define POSH_OS_STRING "Win32"
-#        endif
-#     endif
-#  endif
-#endif
-
-#if defined __palmos__
-#  define POSH_OS_PALM 1
-#  define POSH_OS_STRING "PalmOS"
-#endif
-
-#if defined THINK_C || defined macintosh
-#  define POSH_OS_MACOS 1
-#  define POSH_OS_STRING "MacOS"
-#endif
-
-/*
-** -----------------------------------------------------------------------------
-** Determine target CPU
-** -----------------------------------------------------------------------------
-*/
-
-#if defined GEKKO
-#  define POSH_CPU_PPC750 1
-#  define POSH_CPU_STRING "IBM PowerPC 750 (NGC)"
-#endif
-
-#if defined mc68000 || defined m68k || defined __MC68K__ || defined m68000
-#  define POSH_CPU_68K 1
-#  define POSH_CPU_STRING "MC68000"
-#endif
-
-#if defined __PPC__ || defined __POWERPC__  || defined powerpc || defined _POWER || defined __ppc__ || defined __powerpc__ || defined _M_PPC
-#  define POSH_CPU_PPC 1
-#  if !defined POSH_CPU_STRING
-#    if defined __powerpc64__
-#       define POSH_CPU_STRING "PowerPC64"
-#    else
-#       define POSH_CPU_STRING "PowerPC"
-#    endif
-#  endif
-#endif
-
-#if defined _CRAYT3E || defined _CRAYMPP
-#  define POSH_CPU_CRAYT3E 1 /* target processor is a DEC Alpha 21164 used in a Cray T3E*/
-#  define POSH_CPU_STRING "Cray T3E (Alpha 21164)"
-#endif
-
-#if defined CRAY || defined _CRAY && !defined _CRAYT3E
-#  error Non-AXP Cray systems not supported
-#endif
-
-#if defined _SH3
-#  define POSH_CPU_SH3 1
-#  define POSH_CPU_STRING "Hitachi SH-3"
-#endif
-
-#if defined __sh4__ || defined __SH4__
-#  define POSH_CPU_SH3 1
-#  define POSH_CPU_SH4 1
-#  define POSH_CPU_STRING "Hitachi SH-4"
-#endif
-
-#if defined __sparc__ || defined __sparc
-#  if defined __arch64__ || defined __sparcv9 || defined __sparc_v9__
-#     define POSH_CPU_SPARC64 1 
-#     define POSH_CPU_STRING "Sparc/64"
-#  else
-#     define POSH_CPU_STRING "Sparc/32"
-#  endif
-#  define POSH_CPU_SPARC 1
-#endif
-
-#if defined ARM || defined __arm__ || defined _ARM
-#  define POSH_CPU_STRONGARM 1
-#  define POSH_CPU_STRING "ARM"
-#endif
-
-#if defined __aarch64__
-#  define POSH_CPU_AARCH64 1
-#  define POSH_CPU_STRING "ARM64"
-#endif
-
-#if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS
-#  define POSH_CPU_MIPS 1 
-#  if defined _R5900
-#    define POSH_CPU_STRING "MIPS R5900 (PS2)"
-#  else
-#    define POSH_CPU_STRING "MIPS"
-#  endif
-#endif
-
-#if defined __ia64 || defined _M_IA64 || defined __ia64__ 
-#  define POSH_CPU_IA64 1
-#  define POSH_CPU_STRING "IA64"
-#endif
-
-#if defined __X86__ || defined __i386__ || defined i386 || defined _M_IX86 || defined __386__ || defined __x86_64__ || defined _M_X64
-#  define POSH_CPU_X86 1
-#  if defined __x86_64__ || defined _M_X64
-#     define POSH_CPU_X86_64 1 
-#  endif
-#  if defined POSH_CPU_X86_64
-#     define POSH_CPU_STRING "AMD x86-64"
-#  else
-#     define POSH_CPU_STRING "Intel 386+"
-#  endif
-#endif
-
-#if defined __alpha || defined alpha || defined _M_ALPHA || defined __alpha__
-#  define POSH_CPU_AXP 1
-#  define POSH_CPU_STRING "AXP"
-#endif
-
-#if defined __hppa || defined hppa
-#  define POSH_CPU_HPPA 1
-#  define POSH_CPU_STRING "PA-RISC"
-#endif
-
-#if !defined POSH_CPU_STRING
-#  error POSH cannot determine target CPU
-#  define POSH_CPU_STRING "Unknown" /* this is here for Doxygen's benefit */
-#endif
-
-/*
-** -----------------------------------------------------------------------------
-** Attempt to autodetect building for embedded on Sony PS2
-** -----------------------------------------------------------------------------
-*/
-#if !defined POSH_OS_STRING
-#  if !defined FORCE_DOXYGEN
-#    define POSH_OS_EMBEDDED 1 
-#  endif
-#  if defined _R5900
-#     define POSH_OS_STRING "Sony PS2(embedded)"
-#  else
-#     define POSH_OS_STRING "Embedded/Unknown"
-#  endif
-#endif
-
-/*
-** ---------------------------------------------------------------------------
-** Handle cdecl, stdcall, fastcall, etc.
-** ---------------------------------------------------------------------------
-*/
-#if defined POSH_CPU_X86 && !defined POSH_CPU_X86_64
-#  if defined __GNUC__
-#     define POSH_CDECL __attribute__((cdecl))
-#     define POSH_STDCALL __attribute__((stdcall))
-#     define POSH_FASTCALL __attribute__((fastcall))
-#  elif ( defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ || defined __MWERKS__ )
-#     define POSH_CDECL    __cdecl
-#     define POSH_STDCALL  __stdcall
-#     define POSH_FASTCALL __fastcall
-#  endif
-#else
-#  define POSH_CDECL    
-#  define POSH_STDCALL  
-#  define POSH_FASTCALL 
-#endif
-
-/*
-** ---------------------------------------------------------------------------
-** Define POSH_IMPORTEXPORT signature based on POSH_DLL and POSH_BUILDING_LIB
-** ---------------------------------------------------------------------------
-*/
-
-/*
-** We undefine this so that multiple inclusions will work
-*/
-#if defined POSH_IMPORTEXPORT
-#  undef POSH_IMPORTEXPORT
-#endif
-
-#if defined POSH_DLL
-#   if defined POSH_OS_WIN32
-#      if defined _MSC_VER 
-#         if ( _MSC_VER >= 800 )
-#            if defined POSH_BUILDING_LIB
-#               define POSH_IMPORTEXPORT __declspec( dllexport )
-#            else
-#               define POSH_IMPORTEXPORT __declspec( dllimport )
-#            endif
-#         else
-#            if defined POSH_BUILDING_LIB
-#               define POSH_IMPORTEXPORT __export
-#            else
-#               define POSH_IMPORTEXPORT 
-#            endif
-#         endif
-#      endif  /* defined _MSC_VER */
-#      if defined __BORLANDC__
-#         if ( __BORLANDC__ >= 0x500 )
-#            if defined POSH_BUILDING_LIB 
-#               define POSH_IMPORTEXPORT __declspec( dllexport )
-#            else
-#               define POSH_IMPORTEXPORT __declspec( dllimport )
-#            endif
-#         else
-#            if defined POSH_BUILDING_LIB
-#               define POSH_IMPORTEXPORT __export
-#            else
-#               define POSH_IMPORTEXPORT 
-#            endif
-#         endif
-#      endif /* defined __BORLANDC__ */
-       /* for all other compilers, we're just making a blanket assumption */
-#      if defined __GNUC__ || defined __WATCOMC__ || defined __MWERKS__
-#         if defined POSH_BUILDING_LIB
-#            define POSH_IMPORTEXPORT __declspec( dllexport )
-#         else
-#            define POSH_IMPORTEXPORT __declspec( dllimport )
-#         endif
-#      endif /* all other compilers */
-#      if !defined POSH_IMPORTEXPORT
-#         error Building DLLs not supported on this compiler (poshlib@poshlib.org if you know how)
-#      endif
-#   endif /* defined POSH_OS_WIN32 */
-#endif
-
-/* On pretty much everything else, we can thankfully just ignore this */
-#if !defined POSH_IMPORTEXPORT
-#  define POSH_IMPORTEXPORT
-#endif
-
-#if defined FORCE_DOXYGEN
-#  define POSH_DLL    
-#  define POSH_BUILDING_LIB
-#  undef POSH_DLL
-#  undef POSH_BUILDING_LIB
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** (Re)define POSH_PUBLIC_API export signature 
-** ----------------------------------------------------------------------------
-*/
-#ifdef POSH_PUBLIC_API
-#  undef POSH_PUBLIC_API
-#endif
-
-#if ( ( defined _MSC_VER ) && ( _MSC_VER < 800 ) ) || ( defined __BORLANDC__ && ( __BORLANDC__ < 0x500 ) )
-#  define POSH_PUBLIC_API(rtype) extern rtype POSH_IMPORTEXPORT 
-#else
-#  define POSH_PUBLIC_API(rtype) extern POSH_IMPORTEXPORT rtype
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** Try to infer endianess.  Basically we just go through the CPUs we know are
-** little endian, and assume anything that isn't one of those is big endian.
-** As a sanity check, we also do this with operating systems we know are
-** little endian, such as Windows.  Some processors are bi-endian, such as 
-** the MIPS series, so we have to be careful about those.
-** ----------------------------------------------------------------------------
-*/
-#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_CPU_AARCH64 || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__
-#  define POSH_ENDIAN_STRING "little"
-#  define POSH_LITTLE_ENDIAN 1
-#else
-#  define POSH_ENDIAN_STRING "big"
-#  define POSH_BIG_ENDIAN 1
-#endif
-
-#if defined FORCE_DOXYGEN
-#  define POSH_LITTLE_ENDIAN
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** Cross-platform compile time assertion macro
-** ----------------------------------------------------------------------------
-*/
-#define POSH_COMPILE_TIME_ASSERT(name, x) typedef int _POSH_dummy_ ## name[(x) ? 1 : -1 ]
-
-/*
-** ----------------------------------------------------------------------------
-** 64-bit Integer
-**
-** We don't require 64-bit support, nor do we emulate its functionality, we
-** simply export it if it's available.  Since we can't count on <limits.h>
-** for 64-bit support, we ignore the POSH_USE_LIMITS_H directive.
-** ----------------------------------------------------------------------------
-*/
-#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64
-#  define POSH_64BIT_INTEGER 1
-typedef long posh_i64_t; 
-typedef unsigned long posh_u64_t;
-#  define POSH_I64( x ) ((posh_i64_t)x)
-#  define POSH_U64( x ) ((posh_u64_t)x)
-#  define POSH_I64_PRINTF_PREFIX "l"
-#elif defined _MSC_VER || defined __BORLANDC__ || defined __WATCOMC__ || ( defined __alpha && defined __DECC )
-#  define POSH_64BIT_INTEGER 1
-typedef __int64 posh_i64_t;
-typedef unsigned __int64 posh_u64_t;
-#  define POSH_I64( x ) ((posh_i64_t)(x##i64))
-#  define POSH_U64( x ) ((posh_u64_t)(x##ui64))
-#  define POSH_I64_PRINTF_PREFIX "I64"
-#elif defined __GNUC__ || defined __MWERKS__ || defined __SUNPRO_C || defined __SUNPRO_CC || defined __APPLE_CC__ || defined POSH_OS_IRIX || defined _LONG_LONG || defined _CRAYC
-#  define POSH_64BIT_INTEGER 1
-typedef long long posh_i64_t;
-typedef unsigned long long posh_u64_t;
-#  define POSH_U64( x ) ((posh_u64_t)(x##LL))
-#  define POSH_I64( x ) ((posh_i64_t)(x##LL))
-#  define POSH_I64_PRINTF_PREFIX "ll"
-#endif
-
-/* hack */
-/*#ifdef __MINGW32__
-#undef POSH_I64
-#undef POSH_U64
-#undef POSH_I64_PRINTF_PREFIX
-#define POSH_I64( x ) ((posh_i64_t)x)
-#define POSH_U64( x ) ((posh_u64_t)x)
-#define POSH_I64_PRINTF_PREFIX "I64"
-#endif*/
-
-#ifdef FORCE_DOXYGEN
-typedef long long posh_i64_t;
-typedef unsigned long posh_u64_t;
-#  define POSH_64BIT_INTEGER
-#  define POSH_I64_PRINTF_PREFIX
-#  define POSH_I64(x)
-#  define POSH_U64(x)
-#endif
-
-/** Minimum value for a 64-bit signed integer */
-#define POSH_I64_MIN  POSH_I64(0x8000000000000000)
-/** Maximum value for a 64-bit signed integer */
-#define POSH_I64_MAX  POSH_I64(0x7FFFFFFFFFFFFFFF)
-/** Minimum value for a 64-bit unsigned integer */
-#define POSH_U64_MIN  POSH_U64(0)
-/** Maximum value for a 64-bit unsigned integer */
-#define POSH_U64_MAX  POSH_U64(0xFFFFFFFFFFFFFFFF)
-
-/* ----------------------------------------------------------------------------
-** Basic Sized Types
-**
-** These types are expected to be EXACTLY sized so you can use them for
-** serialization.
-** ----------------------------------------------------------------------------
-*/
-#define POSH_FALSE 0 
-#define POSH_TRUE  1 
-
-typedef int            posh_bool_t;
-typedef unsigned char  posh_byte_t;
-
-/* NOTE: These assume that CHAR_BIT is 8!! */
-typedef unsigned char  posh_u8_t;
-typedef signed char    posh_i8_t;
-
-#if defined POSH_USE_LIMITS_H
-#  if CHAR_BITS > 8
-#    error This machine uses 9-bit characters.  This is a warning, you can comment this out now.
-#  endif /* CHAR_BITS > 8 */
-
-/* 16-bit */
-#  if ( USHRT_MAX == 65535 ) 
-   typedef unsigned short posh_u16_t;
-   typedef short          posh_i16_t;
-#  else
-   /* Yes, in theory there could still be a 16-bit character type and shorts are
-      32-bits in size...if you find such an architecture, let me know =P */
-#    error No 16-bit type found
-#  endif
-
-/* 32-bit */
-#  if ( INT_MAX == 2147483647 )
-  typedef unsigned       posh_u32_t;
-  typedef int            posh_i32_t;
-#  elif ( LONG_MAX == 2147483647 )
-  typedef unsigned long  posh_u32_t;
-  typedef long           posh_i32_t;
-#  else
-      error No 32-bit type found
-#  endif
-
-#else /* POSH_USE_LIMITS_H */
-
-  typedef unsigned short posh_u16_t;
-  typedef short          posh_i16_t;
-
-#  if !defined POSH_OS_PALM
-  typedef unsigned       posh_u32_t;
-  typedef int            posh_i32_t;
-#  else
-  typedef unsigned long  posh_u32_t;
-  typedef long           posh_i32_t;
-#  endif
-#endif
-
-/** Minimum value for a byte */
-#define POSH_BYTE_MIN    0
-/** Maximum value for an 8-bit unsigned value */
-#define POSH_BYTE_MAX    255
-/** Minimum value for a byte */
-#define POSH_I16_MIN     ( ( posh_i16_t ) 0x8000 )
-/** Maximum value for a 16-bit signed value */
-#define POSH_I16_MAX     ( ( posh_i16_t ) 0x7FFF ) 
-/** Minimum value for a 16-bit unsigned value */
-#define POSH_U16_MIN     0
-/** Maximum value for a 16-bit unsigned value */
-#define POSH_U16_MAX     ( ( posh_u16_t ) 0xFFFF )
-/** Minimum value for a 32-bit signed value */
-#define POSH_I32_MIN     ( ( posh_i32_t ) 0x80000000 )
-/** Maximum value for a 32-bit signed value */
-#define POSH_I32_MAX     ( ( posh_i32_t ) 0x7FFFFFFF )
-/** Minimum value for a 32-bit unsigned value */
-#define POSH_U32_MIN     0
-/** Maximum value for a 32-bit unsigned value */
-#define POSH_U32_MAX     ( ( posh_u32_t ) 0xFFFFFFFF )
-
-/*
-** ----------------------------------------------------------------------------
-** Sanity checks on expected sizes
-** ----------------------------------------------------------------------------
-*/
-#if !defined FORCE_DOXYGEN
-
-POSH_COMPILE_TIME_ASSERT(posh_byte_t, sizeof(posh_byte_t) == 1);
-POSH_COMPILE_TIME_ASSERT(posh_u8_t, sizeof(posh_u8_t) == 1);
-POSH_COMPILE_TIME_ASSERT(posh_i8_t, sizeof(posh_i8_t) == 1);
-POSH_COMPILE_TIME_ASSERT(posh_u16_t, sizeof(posh_u16_t) == 2);
-POSH_COMPILE_TIME_ASSERT(posh_i16_t, sizeof(posh_i16_t) == 2);
-POSH_COMPILE_TIME_ASSERT(posh_u32_t, sizeof(posh_u32_t) == 4);
-POSH_COMPILE_TIME_ASSERT(posh_i32_t, sizeof(posh_i32_t) == 4);
-
-#if !defined POSH_NO_FLOAT
-   POSH_COMPILE_TIME_ASSERT(posh_testfloat_t, sizeof(float)==4 );
-   POSH_COMPILE_TIME_ASSERT(posh_testdouble_t, sizeof(double)==8);
-#endif
-
-#if defined POSH_64BIT_INTEGER
-   POSH_COMPILE_TIME_ASSERT(posh_u64_t, sizeof(posh_u64_t) == 8);
-   POSH_COMPILE_TIME_ASSERT(posh_i64_t, sizeof(posh_i64_t) == 8);
-#endif
-
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** 64-bit pointer support
-** ----------------------------------------------------------------------------
-*/
-#if defined POSH_CPU_AXP && ( defined POSH_OS_TRU64 || defined POSH_OS_LINUX )
-#  define POSH_64BIT_POINTER 1
-#endif
-
-#if defined POSH_CPU_X86_64 && defined POSH_OS_LINUX
-#  define POSH_64BIT_POINTER 1
-#endif
-
-#if defined POSH_CPU_SPARC64 || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC
-#   define POSH_64BIT_POINTER 1
-#endif
-
-#if defined POSH_64BIT_POINTER
-   POSH_COMPILE_TIME_ASSERT( posh_64bit_pointer, sizeof( void * ) == 8 );
-#elif !defined FORCE_DOXYGEN
-/* if this assertion is hit then you're on a system that either has 64-bit
-   addressing and we didn't catch it, or you're on a system with 16-bit
-   pointers.  In the latter case, POSH doesn't actually care, we're just
-   triggering this assertion to make sure you're aware of the situation,
-   so feel free to delete it.
-
-   If this assertion is triggered on a known 32 or 64-bit platform, 
-   please let us know (poshlib@poshlib.org) */
-   POSH_COMPILE_TIME_ASSERT( posh_32bit_pointer, sizeof( void * ) == 4 );
-#endif
-
-#if defined FORCE_DOXYGEN
-#  define POSH_64BIT_POINTER
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** POSH Utility Functions
-**
-** These are optional POSH utility functions that are not required if you don't
-** need anything except static checking of your host and target environment.
-** 
-** These functions are NOT wrapped with POSH_PUBLIC_API because I didn't want
-** to enforce their export if your own library is only using them internally.
-** ----------------------------------------------------------------------------
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-const char *POSH_GetArchString( void );
-
-#if !defined POSH_NO_FLOAT
-
-posh_u32_t  POSH_LittleFloatBits( float f );
-posh_u32_t  POSH_BigFloatBits( float f );
-float       POSH_FloatFromLittleBits( posh_u32_t bits );
-float       POSH_FloatFromBigBits( posh_u32_t bits );
-
-void        POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] );
-double      POSH_DoubleFromBits( const posh_byte_t src[ 8 ] );
-
-/* unimplemented
-float      *POSH_WriteFloatToLittle( void *dst, float f );
-float      *POSH_WriteFloatToBig( void *dst, float f );
-float       POSH_ReadFloatFromLittle( const void *src );
-float       POSH_ReadFloatFromBig( const void *src );
-
-double     *POSH_WriteDoubleToLittle( void *dst, double d );
-double     *POSH_WriteDoubleToBig( void *dst, double d );
-double      POSH_ReadDoubleFromLittle( const void *src );
-double      POSH_ReadDoubleFromBig( const void *src );
-*/
-#endif /* !defined POSH_NO_FLOAT */
-
-#if defined FORCE_DOXYGEN
-#  define POSH_NO_FLOAT
-#  undef  POSH_NO_FLOAT
-#endif
-
-extern posh_u16_t  POSH_SwapU16( posh_u16_t u );
-extern posh_i16_t  POSH_SwapI16( posh_i16_t u );
-extern posh_u32_t  POSH_SwapU32( posh_u32_t u );
-extern posh_i32_t  POSH_SwapI32( posh_i32_t u );
-
-#if defined POSH_64BIT_INTEGER
-
-extern posh_u64_t  POSH_SwapU64( posh_u64_t u );
-extern posh_i64_t  POSH_SwapI64( posh_i64_t u );
-
-#endif /*POSH_64BIT_INTEGER */
-
-extern posh_u16_t *POSH_WriteU16ToLittle( void *dst, posh_u16_t value );
-extern posh_i16_t *POSH_WriteI16ToLittle( void *dst, posh_i16_t value );
-extern posh_u32_t *POSH_WriteU32ToLittle( void *dst, posh_u32_t value );
-extern posh_i32_t *POSH_WriteI32ToLittle( void *dst, posh_i32_t value );
-
-extern posh_u16_t *POSH_WriteU16ToBig( void *dst, posh_u16_t value );
-extern posh_i16_t *POSH_WriteI16ToBig( void *dst, posh_i16_t value );
-extern posh_u32_t *POSH_WriteU32ToBig( void *dst, posh_u32_t value );
-extern posh_i32_t *POSH_WriteI32ToBig( void *dst, posh_i32_t value );
-
-extern posh_u16_t  POSH_ReadU16FromLittle( const void *src );
-extern posh_i16_t  POSH_ReadI16FromLittle( const void *src );
-extern posh_u32_t  POSH_ReadU32FromLittle( const void *src );
-extern posh_i32_t  POSH_ReadI32FromLittle( const void *src );
-
-extern posh_u16_t  POSH_ReadU16FromBig( const void *src );
-extern posh_i16_t  POSH_ReadI16FromBig( const void *src );
-extern posh_u32_t  POSH_ReadU32FromBig( const void *src );
-extern posh_i32_t  POSH_ReadI32FromBig( const void *src );
-
-#if defined POSH_64BIT_INTEGER
-extern posh_u64_t *POSH_WriteU64ToLittle( void *dst, posh_u64_t value );
-extern posh_i64_t *POSH_WriteI64ToLittle( void *dst, posh_i64_t value );
-extern posh_u64_t *POSH_WriteU64ToBig( void *dst, posh_u64_t value );
-extern posh_i64_t *POSH_WriteI64ToBig( void *dst, posh_i64_t value );
-
-extern posh_u64_t  POSH_ReadU64FromLittle( const void *src );
-extern posh_i64_t  POSH_ReadI64FromLittle( const void *src );
-extern posh_u64_t  POSH_ReadU64FromBig( const void *src );
-extern posh_i64_t  POSH_ReadI64FromBig( const void *src );
-#endif /* POSH_64BIT_INTEGER */
-
-#if defined POSH_LITTLE_ENDIAN
-
-#  define POSH_LittleU16(x) (x)
-#  define POSH_LittleU32(x) (x)
-#  define POSH_LittleI16(x) (x)
-#  define POSH_LittleI32(x) (x)
-#  if defined POSH_64BIT_INTEGER
-#    define POSH_LittleU64(x) (x)
-#    define POSH_LittleI64(x) (x)
-#  endif /* defined POSH_64BIT_INTEGER */
-
-#  define POSH_BigU16(x) POSH_SwapU16(x)
-#  define POSH_BigU32(x) POSH_SwapU32(x)
-#  define POSH_BigI16(x) POSH_SwapI16(x)
-#  define POSH_BigI32(x) POSH_SwapI32(x)
-#  if defined POSH_64BIT_INTEGER
-#    define POSH_BigU64(x) POSH_SwapU64(x)
-#    define POSH_BigI64(x) POSH_SwapI64(x)
-#  endif /* defined POSH_64BIT_INTEGER */
-
-#else
-
-#  define POSH_BigU16(x) (x)
-#  define POSH_BigU32(x) (x)
-#  define POSH_BigI16(x) (x)
-#  define POSH_BigI32(x) (x)
-
-#  if defined POSH_64BIT_INTEGER
-#    define POSH_BigU64(x) (x)
-#    define POSH_BigI64(x) (x)
-#  endif /* POSH_64BIT_INTEGER */
-
-#  define POSH_LittleU16(x) POSH_SwapU16(x)
-#  define POSH_LittleU32(x) POSH_SwapU32(x)
-#  define POSH_LittleI16(x) POSH_SwapI16(x)
-#  define POSH_LittleI32(x) POSH_SwapI32(x)
-
-#  if defined POSH_64BIT_INTEGER
-#    define POSH_LittleU64(x) POSH_SwapU64(x)
-#    define POSH_LittleI64(x) POSH_SwapI64(x)
-#  endif /* POSH_64BIT_INTEGER */
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/3rdparty/nvtt/nvcore/stdstream.h b/3rdparty/nvtt/nvcore/stdstream.h
deleted file mode 100644
index 4f0a10a42..000000000
--- a/3rdparty/nvtt/nvcore/stdstream.h
+++ /dev/null
@@ -1,459 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#include "nvcore.h"
-#include "stream.h"
-#include "array.h"
-
-#include <stdio.h> // fopen
-#include <string.h> // memcpy
-
-namespace nv
-{
-
-    // Portable version of fopen.
-    inline FILE * fileOpen(const char * fileName, const char * mode)
-    {
-        nvCheck(fileName != NULL);
-#if NV_CC_MSVC && _MSC_VER >= 1400
-        FILE * fp;
-        if (fopen_s(&fp, fileName, mode) == 0) {
-            return fp;
-        }
-        return NULL;
-#else
-        return fopen(fileName, mode);
-#endif
-    }
-
-
-    /// Base stdio stream.
-    class NVCORE_CLASS StdStream : public Stream
-    {
-        NV_FORBID_COPY(StdStream);
-    public:
-
-        /// Ctor.
-        StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { }
-
-        /// Dtor. 
-        virtual ~StdStream()
-        {
-            if( m_fp != NULL && m_autoclose ) {
-#if NV_OS_WIN32
-                _fclose_nolock( m_fp );
-#else
-                fclose( m_fp );
-#endif
-            }
-        }
-
-
-        /** @name Stream implementation. */
-        //@{
-        virtual void seek( uint pos )
-        {
-            nvDebugCheck(m_fp != NULL);
-            nvDebugCheck(pos <= size());
-#if NV_OS_WIN32
-            _fseek_nolock(m_fp, pos, SEEK_SET);
-#else
-            fseek(m_fp, pos, SEEK_SET);
-#endif
-        }
-
-        virtual uint tell() const
-        {
-            nvDebugCheck(m_fp != NULL);
-#if NV_OS_WIN32
-            return _ftell_nolock(m_fp);
-#else
-            return (uint)ftell(m_fp);
-#endif
-        }
-
-        virtual uint size() const
-        {
-            nvDebugCheck(m_fp != NULL);
-#if NV_OS_WIN32
-            uint pos = _ftell_nolock(m_fp);
-            _fseek_nolock(m_fp, 0, SEEK_END);
-            uint end = _ftell_nolock(m_fp);
-            _fseek_nolock(m_fp, pos, SEEK_SET);
-#else
-            uint pos = (uint)ftell(m_fp);
-            fseek(m_fp, 0, SEEK_END);
-            uint end = (uint)ftell(m_fp);
-            fseek(m_fp, pos, SEEK_SET);
-#endif
-            return end;
-        }
-
-        virtual bool isError() const
-        {
-            return m_fp == NULL || ferror( m_fp ) != 0;
-        }
-
-        virtual void clearError()
-        {
-            nvDebugCheck(m_fp != NULL);
-            clearerr(m_fp);
-        }
-
-        // @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream. 
-        // That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better 
-        // implementation uses use ftell and fseek to determine our location within the file.
-        virtual bool isAtEnd() const
-        {
-            if (m_fp == NULL) return true;
-            //nvDebugCheck(m_fp != NULL);
-            //return feof( m_fp ) != 0;
-#if NV_OS_WIN32
-            uint pos = _ftell_nolock(m_fp);
-            _fseek_nolock(m_fp, 0, SEEK_END);
-            uint end = _ftell_nolock(m_fp);
-            _fseek_nolock(m_fp, pos, SEEK_SET);
-#else
-            uint pos = (uint)ftell(m_fp);
-            fseek(m_fp, 0, SEEK_END);
-            uint end = (uint)ftell(m_fp);
-            fseek(m_fp, pos, SEEK_SET);
-#endif
-            return pos == end;
-        }
-
-        /// Always true.
-        virtual bool isSeekable() const { return true; }
-        //@}
-
-    protected:
-
-        FILE * m_fp;
-        bool m_autoclose;
-
-    };
-
-
-    /// Standard output stream.
-    class NVCORE_CLASS StdOutputStream : public StdStream
-    {
-        NV_FORBID_COPY(StdOutputStream);
-    public:
-
-        /// Construct stream by file name.
-        StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { }
-
-        /// Construct stream by file handle.
-        StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose)
-        {
-        }
-
-        /** @name Stream implementation. */
-        //@{
-        /// Write data.
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            nvDebugCheck(m_fp != NULL);
-#if NV_OS_WIN32
-            return (uint)_fwrite_nolock(data, 1, len, m_fp);
-#elif NV_OS_LINUX
-            return (uint)fwrite_unlocked(data, 1, len, m_fp);
-#elif NV_OS_DARWIN
-            // @@ No error checking, always returns len.
-            for (uint i = 0; i < len; i++) {
-                putc_unlocked(((char *)data)[i], m_fp);
-            }
-            return len;
-#else
-            return (uint)fwrite(data, 1, len, m_fp);
-#endif
-        }
-
-        virtual bool isLoading() const
-        {
-            return false;
-        }
-
-        virtual bool isSaving() const
-        {
-            return true;
-        }
-        //@}
-
-    };
-
-
-    /// Standard input stream.
-    class NVCORE_CLASS StdInputStream : public StdStream
-    {
-        NV_FORBID_COPY(StdInputStream);
-    public:
-
-        /// Construct stream by file name.
-        StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { }
-
-        /// Construct stream by file handle.
-        StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
-        {
-        }
-
-        /** @name Stream implementation. */
-        //@{
-        /// Read data.
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            nvDebugCheck(m_fp != NULL);
-#if NV_OS_WIN32
-            return (uint)_fread_nolock(data, 1, len, m_fp);
-#elif NV_OS_LINUX
-            return (uint)fread_unlocked(data, 1, len, m_fp);
-#elif NV_OS_DARWIN
-            // @@ No error checking, always returns len.
-            for (uint i = 0; i < len; i++) {
-                ((char *)data)[i] = getc_unlocked(m_fp);
-            }
-            return len;
-#else
-            return (uint)fread(data, 1, len, m_fp);
-#endif
-            
-        }
-
-        virtual bool isLoading() const
-        {
-            return true;
-        }
-
-        virtual bool isSaving() const
-        {
-            return false;
-        }
-        //@}
-    };
-
-
-
-    /// Memory input stream.
-    class NVCORE_CLASS MemoryInputStream : public Stream
-    {
-        NV_FORBID_COPY(MemoryInputStream);
-    public:
-
-        /// Ctor.
-        MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { }
-
-        /** @name Stream implementation. */
-        //@{
-        /// Read data.
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            nvDebugCheck(!isError());
-
-            uint left = m_size - tell();
-            if (len > left) len = left;
-
-            memcpy( data, m_ptr, len );
-            m_ptr += len;
-
-            return len;
-        }
-
-        virtual void seek( uint pos )
-        {
-            nvDebugCheck(!isError());
-            m_ptr = m_mem + pos;
-            nvDebugCheck(!isError());
-        }
-
-        virtual uint tell() const
-        {
-            nvDebugCheck(m_ptr >= m_mem);
-            return uint(m_ptr - m_mem);
-        }
-
-        virtual uint size() const
-        {
-            return m_size;
-        }
-
-        virtual bool isError() const
-        {
-            return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem;
-        }
-
-        virtual void clearError()
-        {
-            // Nothing to do.
-        }
-
-        virtual bool isAtEnd() const
-        {
-            return m_ptr == m_mem + m_size;
-        }
-
-        /// Always true.
-        virtual bool isSeekable() const
-        {
-            return true;
-        }
-
-        virtual bool isLoading() const
-        {
-            return true;
-        }
-
-        virtual bool isSaving() const
-        {
-            return false;
-        }
-        //@}
-
-        const uint8 * ptr() const { return m_ptr; }
-
-
-    private:
-
-        const uint8 * m_mem;
-        const uint8 * m_ptr;
-        uint m_size;
-
-    };
-
-
-    /// Buffer output stream.
-    class NVCORE_CLASS BufferOutputStream : public Stream
-    {
-        NV_FORBID_COPY(BufferOutputStream);
-    public:
-
-        BufferOutputStream(Array<uint8> & buffer) : m_buffer(buffer) { }
-
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            m_buffer.append((uint8 *)data, len);
-            return len;
-        }
-
-        virtual void seek( uint /*pos*/ ) { /*Not implemented*/ }
-        virtual uint tell() const { return m_buffer.size(); }
-        virtual uint size() const { return m_buffer.size(); }
-
-        virtual bool isError() const { return false; }
-        virtual void clearError() {}
-
-        virtual bool isAtEnd() const { return true; }
-        virtual bool isSeekable() const { return false; }
-        virtual bool isLoading() const { return false; }
-        virtual bool isSaving() const { return true; }
-
-    private:
-        Array<uint8> & m_buffer;
-    };
-
-
-    /// Protected input stream.
-    class NVCORE_CLASS ProtectedStream : public Stream
-    {
-        NV_FORBID_COPY(ProtectedStream);
-    public:
-
-        /// Ctor.
-        ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false)
-        { 
-        }
-
-        /// Ctor.
-        ProtectedStream( Stream * s, bool autodelete = true ) : 
-        m_s(s), m_autodelete(autodelete) 
-        {
-            nvDebugCheck(m_s != NULL);
-        }
-
-        /// Dtor.
-        virtual ~ProtectedStream()
-        {
-            if( m_autodelete ) {
-                delete m_s;
-            }
-        }
-
-        /** @name Stream implementation. */
-        //@{
-        /// Read data.
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            len = m_s->serialize( data, len );
-
-            if( m_s->isError() ) {
-                throw;
-            }
-
-            return len;
-        }
-
-        virtual void seek( uint pos )
-        {
-            m_s->seek( pos );
-
-            if( m_s->isError() ) {
-                throw;
-            }
-        }
-
-        virtual uint tell() const
-        {
-            return m_s->tell();
-        }
-
-        virtual uint size() const
-        {
-            return m_s->size();
-        }
-
-        virtual bool isError() const
-        {
-            return m_s->isError();
-        }
-
-        virtual void clearError()
-        {
-            m_s->clearError();
-        }
-
-        virtual bool isAtEnd() const
-        {
-            return m_s->isAtEnd();
-        }
-
-        virtual bool isSeekable() const
-        {
-            return m_s->isSeekable();
-        }
-
-        virtual bool isLoading() const
-        {
-            return m_s->isLoading();
-        }
-
-        virtual bool isSaving() const
-        {
-            return m_s->isSaving();
-        }
-        //@}
-
-
-    private:
-
-        Stream * const m_s;
-        bool const m_autodelete;
-
-    };
-
-} // nv namespace
-
-
-//#endif // NV_CORE_STDSTREAM_H
diff --git a/3rdparty/nvtt/nvcore/stream.h b/3rdparty/nvtt/nvcore/stream.h
deleted file mode 100644
index 9252d9efe..000000000
--- a/3rdparty/nvtt/nvcore/stream.h
+++ /dev/null
@@ -1,163 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_CORE_STREAM_H
-#define NV_CORE_STREAM_H
-
-#include "nvcore.h"
-#include "debug.h"
-
-namespace nv
-{
-
-    /// Base stream class.
-    class NVCORE_CLASS Stream {
-    public:
-
-        enum ByteOrder {
-            LittleEndian = false,
-            BigEndian = true,
-        };
-
-        /// Get the byte order of the system.
-        static ByteOrder getSystemByteOrder() { 
-#if NV_LITTLE_ENDIAN
-            return LittleEndian;
-#else
-            return BigEndian;
-#endif
-        }
-
-
-        /// Ctor.
-        Stream() : m_byteOrder(LittleEndian) { }
-
-        /// Virtual destructor.
-        virtual ~Stream() {}
-
-        /// Set byte order.
-        void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
-
-        /// Get byte order.
-        ByteOrder byteOrder() const { return m_byteOrder; }
-
-
-        /// Serialize the given data.
-        virtual uint serialize( void * data, uint len ) = 0;
-
-        /// Move to the given position in the archive.
-        virtual void seek( uint pos ) = 0;
-
-        /// Return the current position in the archive.
-        virtual uint tell() const = 0;
-
-        /// Return the current size of the archive.
-        virtual uint size() const = 0;
-
-        /// Determine if there has been any error.
-        virtual bool isError() const = 0;
-
-        /// Clear errors.
-        virtual void clearError() = 0;
-
-        /// Return true if the stream is at the end.
-        virtual bool isAtEnd() const = 0;
-
-        /// Return true if the stream is seekable.
-        virtual bool isSeekable() const = 0;
-
-        /// Return true if this is an input stream.
-        virtual bool isLoading() const = 0;
-
-        /// Return true if this is an output stream.
-        virtual bool isSaving() const = 0;
-
-
-        void advance(uint offset) { seek(tell() + offset); }
-
-
-        // friends	
-        friend Stream & operator<<( Stream & s, bool & c ) {
-#if NV_OS_DARWIN && !NV_CC_CPP11
-            nvStaticCheck(sizeof(bool) == 4);
-            uint8 b = c ? 1 : 0;
-            s.serialize( &b, 1 );
-            c = (b == 1);
-#else
-            nvStaticCheck(sizeof(bool) == 1);
-            s.serialize( &c, 1 );
-#endif
-            return s;
-        }
-        friend Stream & operator<<( Stream & s, char & c ) {
-            nvStaticCheck(sizeof(char) == 1);
-            s.serialize( &c, 1 );
-            return s;
-        }
-        friend Stream & operator<<( Stream & s, uint8 & c ) {
-            nvStaticCheck(sizeof(uint8) == 1);
-            s.serialize( &c, 1 );
-            return s;
-        }
-        friend Stream & operator<<( Stream & s, int8 & c ) {
-            nvStaticCheck(sizeof(int8) == 1);
-            s.serialize( &c, 1 );
-            return s;
-        }
-        friend Stream & operator<<( Stream & s, uint16 & c ) {
-            nvStaticCheck(sizeof(uint16) == 2);
-            return s.byteOrderSerialize( &c, 2 );
-        }
-        friend Stream & operator<<( Stream & s, int16 & c ) {
-            nvStaticCheck(sizeof(int16) == 2);
-            return s.byteOrderSerialize( &c, 2 );
-        }
-        friend Stream & operator<<( Stream & s, uint32 & c ) {
-            nvStaticCheck(sizeof(uint32) == 4);
-            return s.byteOrderSerialize( &c, 4 );
-        }
-        friend Stream & operator<<( Stream & s, int32 & c ) {
-            nvStaticCheck(sizeof(int32) == 4);
-            return s.byteOrderSerialize( &c, 4 );
-        }
-        friend Stream & operator<<( Stream & s, uint64 & c ) {
-            nvStaticCheck(sizeof(uint64) == 8);
-            return s.byteOrderSerialize( &c, 8 );
-        }
-        friend Stream & operator<<( Stream & s, int64 & c ) {
-            nvStaticCheck(sizeof(int64) == 8);
-            return s.byteOrderSerialize( &c, 8 );
-        }
-        friend Stream & operator<<( Stream & s, float & c ) {
-            nvStaticCheck(sizeof(float) == 4);
-            return s.byteOrderSerialize( &c, 4 );
-        }
-        friend Stream & operator<<( Stream & s, double & c ) {
-            nvStaticCheck(sizeof(double) == 8);
-            return s.byteOrderSerialize( &c, 8 );
-        }
-
-    protected:
-
-        /// Serialize in the stream byte order.
-        Stream & byteOrderSerialize( void * v, uint len ) {
-            if( m_byteOrder == getSystemByteOrder() ) {
-                serialize( v, len );
-            }
-            else {
-                for( uint i = len; i > 0; i-- ) {
-                    serialize( (uint8 *)v + i - 1, 1 );
-                }
-            }
-            return *this;
-        }
-
-
-    private:
-
-        ByteOrder m_byteOrder;
-
-    };
-
-} // nv namespace
-
-#endif // NV_CORE_STREAM_H
diff --git a/3rdparty/nvtt/nvcore/strlib.h b/3rdparty/nvtt/nvcore/strlib.h
deleted file mode 100644
index 80a957cbb..000000000
--- a/3rdparty/nvtt/nvcore/strlib.h
+++ /dev/null
@@ -1,429 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_CORE_STRING_H
-#define NV_CORE_STRING_H
-
-#include "debug.h"
-#include "hash.h" // hash
-
-//#include <string.h> // strlen, etc.
-
-#if NV_OS_WIN32
-#define NV_PATH_SEPARATOR '\\'
-#else
-#define NV_PATH_SEPARATOR '/'
-#endif
-
-namespace nv
-{
-
-    NVCORE_API uint strHash(const char * str, uint h) NV_PURE;
-
-    /// String hash based on Bernstein's hash.
-    inline uint strHash(const char * data, uint h = 5381)
-    {
-        uint i = 0;
-        while(data[i] != 0) {
-            h = (33 * h) ^ uint(data[i]);
-            i++;
-        }
-        return h;
-    }
-
-    template <> struct Hash<const char *> {
-        uint operator()(const char * str) const { return strHash(str); }
-    };
-
-    NVCORE_API uint strLen(const char * str) NV_PURE;                       // Asserts on NULL strings.
-
-    NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE;       // Asserts on NULL strings.
-    NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE;   // Asserts on NULL strings.
-    NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE;     // Accepts NULL strings.
-    NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
-
-    template <> struct Equal<const char *> {
-        bool operator()(const char * a, const char * b) const { return strEqual(a, b); }
-    };
-
-    NVCORE_API bool strBeginsWith(const char * dst, const char * prefix) NV_PURE;
-    NVCORE_API bool strEndsWith(const char * dst, const char * suffix) NV_PURE;
-
-
-    NVCORE_API void strCpy(char * dst, uint size, const char * src);
-    NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len);
-    NVCORE_API void strCat(char * dst, uint size, const char * src);
-
-    NVCORE_API const char * strSkipWhiteSpace(const char * str);
-    NVCORE_API char * strSkipWhiteSpace(char * str);
-
-    NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
-
-    NVCORE_API bool isNumber(const char * str) NV_PURE;
-
-    /* @@ Implement these two functions and modify StringBuilder to use them?
-    NVCORE_API void strFormat(const char * dst, const char * fmt, ...);
-    NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg);
-
-    template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3)));
-    template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) {
-        va_list args;
-        va_start(args, fmt);
-        strFormatList(buffer, count, fmt, args);
-        va_end(args);
-    }
-    template <size_t count> void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) {
-        va_list tmp;
-        va_copy(tmp, args);
-        strFormatList(buffer, count, fmt, tmp);
-        va_end(tmp);
-    }*/
-
-    template <int count> void strCpySafe(char (&buffer)[count], const char *src) {
-        strCpy(buffer, count, src);
-    }
-
-    template <int count> void strCatSafe(char (&buffer)[count], const char * src) {
-        strCat(buffer, count, src);
-    }
-
-
-
-    /// String builder.
-    class NVCORE_CLASS StringBuilder
-    {
-    public:
-
-        StringBuilder();
-        explicit StringBuilder( uint size_hint );
-        StringBuilder(const char * str);
-        StringBuilder(const char * str, uint len);
-        StringBuilder(const StringBuilder & other);
-
-        ~StringBuilder();
-
-        StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
-        StringBuilder & formatList( const char * format, va_list arg );
-
-        StringBuilder & append(const char * str);
-		StringBuilder & append(const char * str, uint len);
-        StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
-        StringBuilder & appendFormatList(const char * format, va_list arg);
-
-        StringBuilder & appendSpace(uint n);
-
-        StringBuilder & number( int i, int base = 10 );
-        StringBuilder & number( uint i, int base = 10 );
-
-        StringBuilder & reserve(uint size_hint);
-        StringBuilder & copy(const char * str);
-        StringBuilder & copy(const char * str, uint len);
-        StringBuilder & copy(const StringBuilder & str);
-
-        StringBuilder & toLower();
-        StringBuilder & toUpper();
-
-        bool endsWith(const char * str) const;
-        bool beginsWith(const char * str) const;
-
-        char * reverseFind(char c);
-
-        void reset();
-        bool isNull() const { return m_size == 0; }
-
-        // const char * accessors
-        //operator const char * () const { return m_str; }
-        //operator char * () { return m_str; }
-        const char * str() const { return m_str; }
-        char * str() { return m_str; }
-
-        char * release();
-
-        /// Implement value semantics.
-        StringBuilder & operator=( const StringBuilder & s ) {
-            return copy(s);
-        }
-
-        /// Implement value semantics.
-        StringBuilder & operator=( const char * s ) {
-            return copy(s);
-        }
-
-        /// Equal operator.
-        bool operator==( const StringBuilder & s ) const {
-            return strMatch(s.m_str, m_str);
-        }
-
-        /// Return the exact length.
-        uint length() const { return isNull() ? 0 : strLen(m_str); }
-
-        /// Return the size of the string container.
-        uint capacity() const { return m_size; }
-
-        /// Return the hash of the string.
-        uint hash() const { return isNull() ? 0 : strHash(m_str); }
-
-        // Swap strings.
-        friend void swap(StringBuilder & a, StringBuilder & b);
-
-    protected:
-
-        /// Size of the string container.
-        uint m_size;
-
-        /// String.
-        char * m_str;
-
-    };
-
-
-    /// Path string. @@ This should be called PathBuilder.
-    class NVCORE_CLASS Path : public StringBuilder
-    {
-    public:
-        Path() : StringBuilder() {}
-        explicit Path(int size_hint) : StringBuilder(size_hint) {}
-        Path(const char * str) : StringBuilder(str) {}
-        Path(const Path & path) : StringBuilder(path) {}
-
-        const char * fileName() const;
-        const char * extension() const;
-
-        void translatePath(char pathSeparator = NV_PATH_SEPARATOR);
-
-        void appendSeparator(char pathSeparator = NV_PATH_SEPARATOR);
-
-        void stripFileName();
-        void stripExtension();
-
-        // statics
-        NVCORE_API static char separator();
-        NVCORE_API static const char * fileName(const char *);
-        NVCORE_API static const char * extension(const char *);
-
-        NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR);
-    };
-
-
-    /// String class.
-    class NVCORE_CLASS String
-    {
-    public:
-
-        /// Constructs a null string. @sa isNull()
-        String()
-        {
-            data = NULL;
-        }
-
-        /// Constructs a shared copy of str.
-        String(const String & str)
-        {
-            data = str.data;
-            if (data != NULL) addRef();
-        }
-
-        /// Constructs a shared string from a standard string.
-        String(const char * str)
-        {
-            setString(str);
-        }
-
-        /// Constructs a shared string from a standard string.
-        String(const char * str, int length)
-        {
-            setString(str, length);
-        }
-
-        /// Constructs a shared string from a StringBuilder.
-        String(const StringBuilder & str)
-        {
-            setString(str);
-        }
-
-        /// Dtor.
-        ~String()
-        {
-            release();
-        }
-
-        String clone() const;
-
-        /// Release the current string and allocate a new one.
-        const String & operator=( const char * str )
-        {
-            release();
-            setString( str );
-            return *this;
-        }
-
-        /// Release the current string and allocate a new one.
-        const String & operator=( const StringBuilder & str )
-        {
-            release();
-            setString( str );
-            return *this;
-        }
-
-        /// Implement value semantics.
-        String & operator=( const String & str )
-        {
-            if (str.data != data)
-            {
-                release();
-                data = str.data;
-                addRef();
-            }
-            return *this;
-        }
-
-        /// Equal operator.
-        bool operator==( const String & str ) const
-        {
-            return strMatch(str.data, data);
-        }
-
-        /// Equal operator.
-        bool operator==( const char * str ) const
-        {
-            return strMatch(str, data);
-        }
-
-        /// Not equal operator.
-        bool operator!=( const String & str ) const
-        {
-            return !strMatch(str.data, data);
-        }
-
-        /// Not equal operator.
-        bool operator!=( const char * str ) const
-        {
-            return !strMatch(str, data);
-        }
-
-        /// Returns true if this string is the null string.
-        bool isNull() const { return data == NULL; }
-
-        /// Return the exact length.
-        uint length() const { nvDebugCheck(data != NULL); return strLen(data); }
-
-        /// Return the hash of the string.
-        uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
-
-        /// const char * cast operator.
-        operator const char * () const { return data; }
-
-        /// Get string pointer.
-        const char * str() const { return data; }
-
-
-    private:
-
-        // Add reference count.
-        void addRef();
-
-        // Decrease reference count.
-        void release();
-
-        uint16 getRefCount() const
-        {
-            nvDebugCheck(data != NULL);
-            return *reinterpret_cast<const uint16 *>(data - 2);
-        }
-
-        void setRefCount(uint16 count) {
-            nvDebugCheck(data != NULL);
-            nvCheck(count < 0xFFFF);
-            *reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
-        }
-
-        void setData(const char * str) {
-            data = str + 2;
-        }
-
-        void allocString(const char * str)
-        {
-            allocString(str, strLen(str));
-        }
-
-        void allocString(const char * str, uint length);
-
-        void setString(const char * str);
-        void setString(const char * str, uint length);
-        void setString(const StringBuilder & str);
-
-        // Swap strings.
-        friend void swap(String & a, String & b);
-
-    private:
-
-        const char * data;
-
-    };
-
-    template <> struct Hash<String> {
-        uint operator()(const String & str) const { return str.hash(); }
-    };
-
-
-    // Like AutoPtr, but for const char strings.
-    class AutoString
-    {
-        NV_FORBID_COPY(AutoString);
-        NV_FORBID_HEAPALLOC();
-    public:
-
-        // Ctor.
-        AutoString(const char * p = NULL) : m_ptr(p) { }
-
-#if NV_CC_CPP11
-        // Move ctor.
-        AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; }
-#endif
-        
-        // Dtor. Deletes owned pointer.
-        ~AutoString() {
-            delete [] m_ptr;
-            m_ptr = NULL;
-        }
-
-        // Delete owned pointer and assign new one.
-        void operator=(const char * p) {
-            if (p != m_ptr) 
-            {
-                delete [] m_ptr;
-                m_ptr = p;
-            }
-        }
-
-        // Get pointer.
-        const char * ptr() const { return m_ptr; }
-        operator const char *() const { return m_ptr; }
-
-        // Relinquish ownership of the underlying pointer and returns that pointer.
-        const char * release() {
-            const char * tmp = m_ptr;
-            m_ptr = NULL;
-            return tmp;
-        }
-
-        // comparison operators.
-        friend bool operator == (const AutoString & ap, const char * const p) {
-            return (ap.ptr() == p);
-        }
-        friend bool operator != (const AutoString & ap, const char * const p) {
-            return (ap.ptr() != p);
-        }
-        friend bool operator == (const char * const p, const AutoString & ap) {
-            return (ap.ptr() == p);
-        }
-        friend bool operator != (const char * const p, const AutoString & ap) {
-            return (ap.ptr() != p);
-        }
-
-    private:
-        const char * m_ptr;
-    };
-
-} // nv namespace
-
-#endif // NV_CORE_STRING_H
diff --git a/3rdparty/nvtt/nvcore/utils.h b/3rdparty/nvtt/nvcore/utils.h
deleted file mode 100644
index 364b62928..000000000
--- a/3rdparty/nvtt/nvcore/utils.h
+++ /dev/null
@@ -1,281 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_CORE_UTILS_H
-#define NV_CORE_UTILS_H
-
-#include "debug.h" // nvdebugcheck
-
-#include <new> // for placement new
-
-
-// Just in case. Grrr.
-#undef min
-#undef max
-
-#define NV_INT8_MIN    (-128)
-#define NV_INT8_MAX    127
-#define NV_UINT8_MAX    255
-#define NV_INT16_MIN    (-32767-1)
-#define NV_INT16_MAX    32767
-#define NV_UINT16_MAX   0xffff
-#define NV_INT32_MIN    (-2147483647-1)
-#define NV_INT32_MAX    2147483647
-#define NV_UINT32_MAX   0xffffffff
-#define NV_INT64_MAX    POSH_I64(9223372036854775807)
-#define NV_INT64_MIN    (-POSH_I64(9223372036854775807)-1)
-#define NV_UINT64_MAX   POSH_U64(0xffffffffffffffff)
-
-#define NV_HALF_MAX     65504.0F
-#define NV_FLOAT_MAX    3.402823466e+38F
-
-#define NV_INTEGER_TO_FLOAT_MAX  16777217     // Largest integer such that it and all smaller integers can be stored in a 32bit float.
-
-
-namespace nv
-{
-    // Less error prone than casting. From CB:
-    // http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
-
-    // These intentionally look like casts.
-
-    // uint32 casts:
-    template <typename T> inline uint32 U32(T x) { return x; }
-    template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
-    template <> inline uint32 U32<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
-    //template <> inline uint32 U32<uint32>(uint32 x) { return x; }
-    template <> inline uint32 U32<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; }
-    //template <> inline uint32 U32<uint16>(uint16 x) { return x; }
-    template <> inline uint32 U32<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; }
-    //template <> inline uint32 U32<uint8>(uint8 x) { return x; }
-    template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
-
-    // int32 casts:
-    template <typename T> inline int32 I32(T x) { return x; }
-    template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
-    template <> inline int32 I32<int64>(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; }
-    template <> inline int32 I32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
-    //template <> inline int32 I32<int32>(int32 x) { return x; }
-    //template <> inline int32 I32<uint16>(uint16 x) { return x; }
-    //template <> inline int32 I32<int16>(int16 x) { return x; }
-    //template <> inline int32 I32<uint8>(uint8 x) { return x; }
-    //template <> inline int32 I32<int8>(int8 x) { return x; }
-
-    // uint16 casts:
-    template <typename T> inline uint16 U16(T x) { return x; }
-    template <> inline uint16 U16<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
-    template <> inline uint16 U16<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
-    template <> inline uint16 U16<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
-    template <> inline uint16 U16<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
-    //template <> inline uint16 U16<uint16>(uint16 x) { return x; }
-    template <> inline uint16 U16<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; }
-    //template <> inline uint16 U16<uint8>(uint8 x) { return x; }
-    template <> inline uint16 U16<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; }
-
-    // int16 casts:
-    template <typename T> inline int16 I16(T x) { return x; }
-    template <> inline int16 I16<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
-    template <> inline int16 I16<int64>(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
-    template <> inline int16 I16<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
-    template <> inline int16 I16<int32>(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
-    template <> inline int16 I16<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
-    //template <> inline int16 I16<int16>(int16 x) { return x; }
-    //template <> inline int16 I16<uint8>(uint8 x) { return x; }
-    //template <> inline int16 I16<int8>(int8 x) { return x; }
-
-    // uint8 casts:
-    template <typename T> inline uint8 U8(T x) { return x; }
-    template <> inline uint8 U8<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<uint16>(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<int16>(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
-    //template <> inline uint8 U8<uint8>(uint8 x) { return x; }
-    template <> inline uint8 U8<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; }
-    //template <> inline uint8 U8<float>(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; }
-
-    // int8 casts:
-    template <typename T> inline int8 I8(T x) { return x; }
-    template <> inline int8 I8<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
-    template <> inline int8 I8<int64>(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
-    template <> inline int8 I8<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
-    template <> inline int8 I8<int32>(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
-    template <> inline int8 I8<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
-    template <> inline int8 I8<int16>(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
-    template <> inline int8 I8<uint8>(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
-    //template <> inline int8 I8<int8>(int8 x) { return x; }
-
-    // float casts:
-    template <typename T> inline float F32(T x) { return x; }
-    template <> inline float F32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
-    template <> inline float F32<int64>(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
-    template <> inline float F32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
-    template <> inline float F32<int32>(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
-    // The compiler should not complain about these conversions:
-    //template <> inline float F32<uint16>(uint16 x) { nvDebugCheck(return (float)x; }
-    //template <> inline float F32<int16>(int16 x) { nvDebugCheck(return (float)x; }
-    //template <> inline float F32<uint8>(uint8 x) { nvDebugCheck(return (float)x; }
-    //template <> inline float F32<int8>(int8 x) { nvDebugCheck(return (float)x; }
-
-
-    /// Swap two values.
-    template <typename T> 
-    inline void swap(T & a, T & b)
-    {
-        T temp(a);
-        a = b; 
-        b = temp;
-    }
-
-    /// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN.
-    template <typename T> 
-    //inline const T & max(const T & a, const T & b)
-    inline T max(const T & a, const T & b)
-    {
-        return (b < a) ? a : b;
-    }
-
-	/// Return the maximum of the four arguments.
-	template <typename T> 
-	//inline const T & max4(const T & a, const T & b, const T & c)
-	inline T max4(const T & a, const T & b, const T & c, const T & d)
-	{
-		return max(max(a, b), max(c, d));
-	}
-
-    /// Return the maximum of the three arguments.
-    template <typename T> 
-    //inline const T & max3(const T & a, const T & b, const T & c)
-    inline T max3(const T & a, const T & b, const T & c)
-    {
-        return max(a, max(b, c));
-    }
-
-    /// Return the minimum of two values.
-    template <typename T> 
-    //inline const T & min(const T & a, const T & b)
-    inline T min(const T & a, const T & b)
-    {
-        return (a < b) ? a : b;
-    }
-
-    /// Return the maximum of the three arguments.
-    template <typename T> 
-    //inline const T & min3(const T & a, const T & b, const T & c)
-    inline T min3(const T & a, const T & b, const T & c)
-    {
-        return min(a, min(b, c));
-    }
-
-    /// Clamp between two values.
-    template <typename T> 
-    //inline const T & clamp(const T & x, const T & a, const T & b)
-    inline T clamp(const T & x, const T & a, const T & b)
-    {
-        return min(max(x, a), b);
-    }
-
-    /** Return the next power of two. 
-    * @see http://graphics.stanford.edu/~seander/bithacks.html
-    * @warning Behaviour for 0 is undefined.
-    * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
-    * @note nextPowerOfTwo(x) = 2 << log2(x-1)
-    */
-    inline uint nextPowerOfTwo( uint x )
-    {
-        nvDebugCheck( x != 0 );
-#if 1	// On modern CPUs this is supposed to be as fast as using the bsr instruction.
-        x--;
-        x |= x >> 1;
-        x |= x >> 2;
-        x |= x >> 4;
-        x |= x >> 8;
-        x |= x >> 16;
-        return x+1;	
-#else
-        uint p = 1;
-        while( x > p ) {
-            p += p;
-        }
-        return p;
-#endif
-    }
-
-    /// Return true if @a n is a power of two.
-    inline bool isPowerOfTwo( uint n )
-    {
-        return (n & (n-1)) == 0;
-    }
-
-
-    // @@ Move this to utils?
-    /// Delete all the elements of a container.
-    template <typename T>
-    void deleteAll(T & container)
-    {
-        for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
-        {
-            delete container[i];
-        }
-    }
-
-
-
-    // @@ Specialize these methods for numeric, pointer, and pod types.
-
-    template <typename T>
-    void construct_range(T * restrict ptr, uint new_size, uint old_size) {
-        for (uint i = old_size; i < new_size; i++) {
-            new(ptr+i) T; // placement new
-        }
-    }
-
-    template <typename T>
-    void construct_range(T * restrict ptr, uint new_size, uint old_size, const T & elem) {
-        for (uint i = old_size; i < new_size; i++) {
-            new(ptr+i) T(elem); // placement new
-        }
-    }
-
-    template <typename T>
-    void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) {
-        for (uint i = old_size; i < new_size; i++) {
-            new(ptr+i) T(src[i]); // placement new
-        }
-    }
-
-    template <typename T>
-    void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
-        for (uint i = new_size; i < old_size; i++) {
-            (ptr+i)->~T(); // Explicit call to the destructor
-        }
-    }
-
-    template <typename T>
-    void fill(T * restrict dst, uint count, const T & value) {
-        for (uint i = 0; i < count; i++) {
-            dst[i] = value;
-        }
-    }
-
-    template <typename T>
-    void copy_range(T * restrict dst, const T * restrict src, uint count) {
-        for (uint i = 0; i < count; i++) {
-            dst[i] = src[i];
-        }
-    }
-
-    template <typename T>
-    bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
-        for (uint i = begin; i < end; i++) {
-            if (ptr[i] == element) {
-                if (index != NULL) *index = i;
-                return true;
-            }
-        }
-        return false;
-    }
-
-} // nv namespace
-
-#endif // NV_CORE_UTILS_H
diff --git a/3rdparty/nvtt/nvmath/fitting.cpp b/3rdparty/nvtt/nvmath/fitting.cpp
deleted file mode 100644
index ba01b1fc1..000000000
--- a/3rdparty/nvtt/nvmath/fitting.cpp
+++ /dev/null
@@ -1,1200 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
-
-#include "fitting.h"
-#include "vector.inl"
-#include "plane.inl"
-#include "matrix.inl"
-
-#include "nvcore/array.inl"
-#include "nvcore/utils.h" // max, swap
-
-using namespace nv;
-
-// @@ Move to EigenSolver.h
-
-// @@ We should be able to do something cheaper...
-static Vector3 estimatePrincipalComponent(const float * __restrict matrix)
-{
-	const Vector3 row0(matrix[0], matrix[1], matrix[2]);
-	const Vector3 row1(matrix[1], matrix[3], matrix[4]);
-	const Vector3 row2(matrix[2], matrix[4], matrix[5]);
-
-	float r0 = lengthSquared(row0);
-	float r1 = lengthSquared(row1);
-	float r2 = lengthSquared(row2);
-
-	if (r0 > r1 && r0 > r2) return row0;
-	if (r1 > r2) return row1;
-	return row2;
-}
-
-
-static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix)
-{
-    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
-    {
-        return Vector3(0.0f);
-    }
-
-    Vector3 v = estimatePrincipalComponent(matrix);
-
-    const int NUM = 8;
-    for (int i = 0; i < NUM; i++)
-    {
-        float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
-        float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
-        float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
-
-        float norm = max(max(x, y), z);
-
-        v = Vector3(x, y, z) / norm;
-    }
-
-    return v;
-}
-
-
-Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points)
-{
-    Vector3 centroid(0.0f);
-
-    for (int i = 0; i < n; i++)
-    {
-        centroid += points[i];
-    }
-    centroid /= float(n);
-
-    return centroid;
-}
-
-Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
-{
-    Vector3 centroid(0.0f);
-    float total = 0.0f;
-
-    for (int i = 0; i < n; i++)
-    {
-        total += weights[i];
-        centroid += weights[i]*points[i];
-    }
-    centroid /= total;
-
-    return centroid;
-}
-
-Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points)
-{
-    Vector4 centroid(0.0f);
-
-    for (int i = 0; i < n; i++)
-    {
-        centroid += points[i];
-    }
-    centroid /= float(n);
-
-    return centroid;
-}
-
-Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
-{
-    Vector4 centroid(0.0f);
-    float total = 0.0f;
-
-    for (int i = 0; i < n; i++)
-    {
-        total += weights[i];
-        centroid += weights[i]*points[i];
-    }
-    centroid /= total;
-
-    return centroid;
-}
-
-
-
-Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance)
-{
-    // compute the centroid
-    Vector3 centroid = computeCentroid(n, points);
-
-    // compute covariance matrix
-    for (int i = 0; i < 6; i++)
-    {
-        covariance[i] = 0.0f;
-    }
-
-    for (int i = 0; i < n; i++)
-    {
-        Vector3 v = points[i] - centroid;
-
-        covariance[0] += v.x * v.x;
-        covariance[1] += v.x * v.y;
-        covariance[2] += v.x * v.z;
-        covariance[3] += v.y * v.y;
-        covariance[4] += v.y * v.z;
-        covariance[5] += v.z * v.z;
-    }
-
-    return centroid;
-}
-
-Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance)
-{
-    // compute the centroid
-    Vector3 centroid = computeCentroid(n, points, weights, metric);
-
-    // compute covariance matrix
-    for (int i = 0; i < 6; i++)
-    {
-        covariance[i] = 0.0f;
-    }
-
-    for (int i = 0; i < n; i++)
-    {
-        Vector3 a = (points[i] - centroid) * metric;
-        Vector3 b = weights[i]*a;
-
-        covariance[0] += a.x * b.x;
-        covariance[1] += a.x * b.y;
-        covariance[2] += a.x * b.z;
-        covariance[3] += a.y * b.y;
-        covariance[4] += a.y * b.z;
-        covariance[5] += a.z * b.z;
-    }
-
-    return centroid;
-}
-
-Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, float *__restrict covariance)
-{
-    // compute the centroid
-    Vector4 centroid = computeCentroid(n, points);
-
-    // compute covariance matrix
-    for (int i = 0; i < 10; i++)
-    {
-        covariance[i] = 0.0f;
-    }
-
-    for (int i = 0; i < n; i++)
-    {
-        Vector4 v = points[i] - centroid;
-
-        covariance[0] += v.x * v.x;
-        covariance[1] += v.x * v.y;
-        covariance[2] += v.x * v.z;
-        covariance[3] += v.x * v.w;
-
-		covariance[4] += v.y * v.y;
-        covariance[5] += v.y * v.z;
-        covariance[6] += v.y * v.w;
-
-		covariance[7] += v.z * v.z;
-		covariance[8] += v.z * v.w;
-
-		covariance[9] += v.w * v.w;
-	}
-
-    return centroid;
-}
-
-Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric, float *__restrict covariance)
-{
-    // compute the centroid
-    Vector4 centroid = computeCentroid(n, points, weights, metric);
-
-    // compute covariance matrix
-    for (int i = 0; i < 10; i++)
-    {
-        covariance[i] = 0.0f;
-    }
-
-    for (int i = 0; i < n; i++)
-    {
-        Vector4 a = (points[i] - centroid) * metric;
-        Vector4 b = weights[i]*a;
-
-        covariance[0] += a.x * b.x;
-        covariance[1] += a.x * b.y;
-        covariance[2] += a.x * b.z;
-        covariance[3] += a.x * b.w;
-
-		covariance[4] += a.y * b.y;
-        covariance[5] += a.y * b.z;
-        covariance[6] += a.y * b.w;
-
-		covariance[7] += a.z * b.z;
-		covariance[8] += a.z * b.w;
-
-		covariance[9] += a.w * b.w;
-    }
-
-    return centroid;
-}
-
-
-
-Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points)
-{
-    float matrix[6];
-    computeCovariance(n, points, matrix);
-
-    return firstEigenVector_PowerMethod(matrix);
-}
-
-Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
-{
-    float matrix[6];
-    computeCovariance(n, points, weights, metric, matrix);
-
-    return firstEigenVector_PowerMethod(matrix);
-}
-
-
-
-static inline Vector3 firstEigenVector_EigenSolver3(const float *__restrict matrix)
-{
-    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
-    {
-        return Vector3(0.0f);
-    }
-
-    float eigenValues[3];
-    Vector3 eigenVectors[3];
-	if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors))
-	{
-		return Vector3(0.0f);
-	}
-
-	return eigenVectors[0];
-}
-
-Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points)
-{
-    float matrix[6];
-    computeCovariance(n, points, matrix);
-
-    return firstEigenVector_EigenSolver3(matrix);
-}
-
-Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
-{
-    float matrix[6];
-    computeCovariance(n, points, weights, metric, matrix);
-
-    return firstEigenVector_EigenSolver3(matrix);
-}
-
-
-
-static inline Vector4 firstEigenVector_EigenSolver4(const float *__restrict matrix)
-{
-    if (matrix[0] == 0 && matrix[4] == 0 && matrix[7] == 0&& matrix[9] == 0)
-    {
-        return Vector4(0.0f);
-    }
-
-    float eigenValues[4];
-    Vector4 eigenVectors[4];
-	if (!nv::Fit::eigenSolveSymmetric4(matrix, eigenValues, eigenVectors))
-	{
-		return Vector4(0.0f);
-	}
-
-	return eigenVectors[0];
-}
-
-Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points)
-{
-    float matrix[10];
-    computeCovariance(n, points, matrix);
-
-    return firstEigenVector_EigenSolver4(matrix);
-}
-
-Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
-{
-    float matrix[10];
-    computeCovariance(n, points, weights, metric, matrix);
-
-    return firstEigenVector_EigenSolver4(matrix);
-}
-
-
-
-void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R);
-
-Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
-{
-	// Store the points in an n x n matrix
-    Array<float> Q; Q.resize(n*n, 0.0f);
-	for (int i = 0; i < n; ++i)
-	{
-		Q[i*n+0] = points[i].x;
-		Q[i*n+1] = points[i].y;
-		Q[i*n+2] = points[i].z;
-	}
-
-	// Alloc space for the SVD outputs
-    Array<float> diag; diag.resize(n, 0.0f);
-    Array<float> R; R.resize(n*n, 0.0f);
-
-	ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
-
-	// Get the principal component
-	return Vector3(R[0], R[1], R[2]);
-}
-
-Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points)
-{
-	// Store the points in an n x n matrix
-    Array<float> Q; Q.resize(n*n, 0.0f);
-	for (int i = 0; i < n; ++i)
-	{
-		Q[i*n+0] = points[i].x;
-		Q[i*n+1] = points[i].y;
-		Q[i*n+2] = points[i].z;
-		Q[i*n+3] = points[i].w;
-	}
-
-	// Alloc space for the SVD outputs
-    Array<float> diag; diag.resize(n, 0.0f);
-    Array<float> R; R.resize(n*n, 0.0f);
-
-	ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
-
-	// Get the principal component
-	return Vector4(R[0], R[1], R[2], R[3]);
-}
-
-
-
-Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points)
-{
-    // compute the centroid and covariance
-    float matrix[6];
-    Vector3 centroid = computeCovariance(n, points, matrix);
-
-    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
-    {
-        // If no plane defined, then return a horizontal plane.
-        return Plane(Vector3(0, 0, 1), centroid);
-    }
-
-    float eigenValues[3];
-    Vector3 eigenVectors[3];
-    if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
-        // If no plane defined, then return a horizontal plane.
-        return Plane(Vector3(0, 0, 1), centroid);
-    }
-
-    return Plane(eigenVectors[2], centroid);
-}
-
-bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/)
-{
-    // compute the centroid and covariance
-    float matrix[6];
-    computeCovariance(n, points, matrix);
-
-    float eigenValues[3];
-    Vector3 eigenVectors[3];
-    if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
-        return false;
-    }
-
-    return eigenValues[2] < epsilon;
-}
-
-
-
-// Tridiagonal solver from Charles Bloom. 
-// Householder transforms followed by QL decomposition. 
-// Seems to be based on the code from Numerical Recipes in C.
-
-static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd);
-static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd);
-
-bool nv::Fit::eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
-{
-    nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
-
-    float subd[3];
-    float diag[3];
-    float work[3][3];
-
-    work[0][0] = matrix[0];
-    work[0][1] = work[1][0] = matrix[1];
-    work[0][2] = work[2][0] = matrix[2];
-    work[1][1] = matrix[3];
-    work[1][2] = work[2][1] = matrix[4];
-    work[2][2] = matrix[5];
-
-    EigenSolver3_Tridiagonal(work, diag, subd);
-    if (!EigenSolver3_QLAlgorithm(work, diag, subd))
-    {
-        for (int i = 0; i < 3; i++) {
-            eigenValues[i] = 0;
-            eigenVectors[i] = Vector3(0);
-        }
-        return false;
-    }
-
-    for (int i = 0; i < 3; i++) {
-        eigenValues[i] = (float)diag[i];
-    }
-
-    // eigenvectors are the columns; make them the rows :
-
-    for (int i=0; i < 3; i++)
-    {
-        for (int j = 0; j < 3; j++)
-        {
-            eigenVectors[j].component[i] = (float) work[i][j];
-        }
-    }
-
-    // shuffle to sort by singular value :
-    if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1])
-    {
-        swap(eigenValues[0], eigenValues[2]);
-        swap(eigenVectors[0], eigenVectors[2]);
-    }
-    if (eigenValues[1] > eigenValues[0])
-    {
-        swap(eigenValues[0], eigenValues[1]);
-        swap(eigenVectors[0], eigenVectors[1]);
-    }
-    if (eigenValues[2] > eigenValues[1])
-    {
-        swap(eigenValues[1], eigenValues[2]);
-        swap(eigenVectors[1], eigenVectors[2]);
-    }
-
-    nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]);
-    nvDebugCheck(eigenValues[1] >= eigenValues[2]);
-
-    return true;
-}
-
-static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd)
-{
-    // Householder reduction T = Q^t M Q
-    //   Input:   
-    //     mat, symmetric 3x3 matrix M
-    //   Output:  
-    //     mat, orthogonal matrix Q
-    //     diag, diagonal entries of T
-    //     subd, subdiagonal entries of T (T is symmetric)
-    const float epsilon = 1e-08f;
-
-    float a = mat[0][0];
-    float b = mat[0][1];
-    float c = mat[0][2];
-    float d = mat[1][1];
-    float e = mat[1][2];
-    float f = mat[2][2];
-
-    diag[0] = a;
-    subd[2] = 0.f;
-    if (fabsf(c) >= epsilon)
-    {
-        const float ell = sqrtf(b*b+c*c);
-        b /= ell;
-        c /= ell;
-        const float q = 2*b*e+c*(f-d);
-        diag[1] = d+c*q;
-        diag[2] = f-c*q;
-        subd[0] = ell;
-        subd[1] = e-b*q;
-        mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
-        mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c;
-        mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b;
-    }
-    else
-    {
-        diag[1] = d;
-        diag[2] = f;
-        subd[0] = b;
-        subd[1] = e;
-        mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
-        mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0;
-        mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1;
-    }
-}
-
-static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd)
-{
-    // QL iteration with implicit shifting to reduce matrix from tridiagonal
-    // to diagonal
-    const int maxiter = 32;
-
-    for (int ell = 0; ell < 3; ell++)
-    {
-        int iter;
-        for (iter = 0; iter < maxiter; iter++)
-        {
-            int m;
-            for (m = ell; m <= 1; m++)
-            {
-                float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
-                if ( fabsf(subd[m]) + dd == dd )
-                    break;
-            }
-            if ( m == ell )
-                break;
-
-            float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
-            float r = sqrtf(g*g+1);
-            if ( g < 0 )
-                g = diag[m]-diag[ell]+subd[ell]/(g-r);
-            else
-                g = diag[m]-diag[ell]+subd[ell]/(g+r);
-            float s = 1, c = 1, p = 0;
-            for (int i = m-1; i >= ell; i--)
-            {
-                float f = s*subd[i], b = c*subd[i];
-                if ( fabsf(f) >= fabsf(g) )
-                {
-                    c = g/f;
-                    r = sqrtf(c*c+1);
-                    subd[i+1] = f*r;
-                    c *= (s = 1/r);
-                }
-                else
-                {
-                    s = f/g;
-                    r = sqrtf(s*s+1);
-                    subd[i+1] = g*r;
-                    s *= (c = 1/r);
-                }
-                g = diag[i+1]-p;
-                r = (diag[i]-g)*s+2*b*c;
-                p = s*r;
-                diag[i+1] = g+p;
-                g = c*r-b;
-
-                for (int k = 0; k < 3; k++)
-                {
-                    f = mat[k][i+1];
-                    mat[k][i+1] = s*mat[k][i]+c*f;
-                    mat[k][i] = c*mat[k][i]-s*f;
-                }
-            }
-            diag[ell] -= p;
-            subd[ell] = g;
-            subd[m] = 0;
-        }
-
-        if ( iter == maxiter )
-            // should not get here under normal circumstances
-            return false;
-    }
-
-    return true;
-}
-
-
-
-// Tridiagonal solver for 4x4 symmetric matrices.
-
-static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd);
-static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd);
-
-bool nv::Fit::eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4])
-{
-    nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
-
-    float subd[4];
-    float diag[4];
-    float work[4][4];
-
-    work[0][0] = matrix[0];
-    work[0][1] = work[1][0] = matrix[1];
-    work[0][2] = work[2][0] = matrix[2];
-    work[0][3] = work[3][0] = matrix[3];
-    work[1][1] = matrix[4];
-    work[1][2] = work[2][1] = matrix[5];
-    work[1][3] = work[3][1] = matrix[6];
-    work[2][2] = matrix[7];
-    work[2][3] = work[3][2] = matrix[8];
-    work[3][3] = matrix[9];
-
-    EigenSolver4_Tridiagonal(work, diag, subd);
-    if (!EigenSolver4_QLAlgorithm(work, diag, subd))
-    {
-        for (int i = 0; i < 4; i++) {
-            eigenValues[i] = 0;
-            eigenVectors[i] = Vector4(0);
-        }
-        return false;
-    }
-
-    for (int i = 0; i < 4; i++) {
-        eigenValues[i] = (float)diag[i];
-    }
-
-    // eigenvectors are the columns; make them the rows
-
-    for (int i = 0; i < 4; i++)
-    {
-        for (int j = 0; j < 4; j++)
-        {
-            eigenVectors[j].component[i] = (float) work[i][j];
-        }
-    }
-
-    // sort by singular value
-
-	for (int i = 0; i < 3; ++i)
-	{
-		for (int j = i+1; j < 4; ++j)
-		{
-			if (eigenValues[j] > eigenValues[i])
-			{
-				swap(eigenValues[i], eigenValues[j]);
-				swap(eigenVectors[i], eigenVectors[j]);
-			}
-		}
-	}
-
-    nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2] && eigenValues[0] >= eigenValues[3]);
-    nvDebugCheck(eigenValues[1] >= eigenValues[2] && eigenValues[1] >= eigenValues[3]);
-    nvDebugCheck(eigenValues[2] >= eigenValues[2]);
-
-    return true;
-}
-
-inline float signNonzero(float x)
-{
-	return (x >= 0.0f) ? 1.0f : -1.0f;
-}
-
-static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd)
-{
-    // Householder reduction T = Q^t M Q
-    //   Input:   
-    //     mat, symmetric 3x3 matrix M
-    //   Output:  
-    //     mat, orthogonal matrix Q
-    //     diag, diagonal entries of T
-    //     subd, subdiagonal entries of T (T is symmetric)
-
-	static const int n = 4;
-
-	// Set epsilon relative to size of elements in matrix
-	static const float relEpsilon = 1e-6f;
-	float maxElement = FLT_MAX;
-	for (int i = 0; i < n; ++i)
-		for (int j = 0; j < n; ++j)
-			maxElement = max(maxElement, fabsf(mat[i][j]));
-	float epsilon = relEpsilon * maxElement;
-
-	// Iterative algorithm, works for any size of matrix but might be slower than
-	// a closed-form solution for symmetric 4x4 matrices.  Based on this article:
-	// http://en.wikipedia.org/wiki/Householder_transformation#Tridiagonalization
-
-	Matrix A, Q(identity);
-	memcpy(&A, mat, sizeof(float)*n*n);
-
-	// We proceed from left to right, making the off-tridiagonal entries zero in
-	// one column of the matrix at a time.
-	for (int k = 0; k < n - 2; ++k)
-	{
-		float sum = 0.0f;
-		for (int j = k+1; j < n; ++j)
-			sum += A(j,k)*A(j,k);
-		float alpha = -signNonzero(A(k+1,k)) * sqrtf(sum);
-		float r = sqrtf(0.5f * (alpha*alpha - A(k+1,k)*alpha));
-
-		// If r is zero, skip this column - already in tridiagonal form
-		if (fabsf(r) < epsilon)
-			continue;
-
-		float v[n] = {};
-		v[k+1] = 0.5f * (A(k+1,k) - alpha) / r;
-		for (int j = k+2; j < n; ++j)
-			v[j] = 0.5f * A(j,k) / r;
-
-		Matrix P(identity);
-		for (int i = 0; i < n; ++i)
-			for (int j = 0; j < n; ++j)
-				P(i,j) -= 2.0f * v[i] * v[j];
-
-		A = mul(mul(P, A), P);
-		Q = mul(Q, P);
-	}
-
-	nvDebugCheck(fabsf(A(2,0)) < epsilon);
-	nvDebugCheck(fabsf(A(0,2)) < epsilon);
-	nvDebugCheck(fabsf(A(3,0)) < epsilon);
-	nvDebugCheck(fabsf(A(0,3)) < epsilon);
-	nvDebugCheck(fabsf(A(3,1)) < epsilon);
-	nvDebugCheck(fabsf(A(1,3)) < epsilon);
-
-	for (int i = 0; i < n; ++i)
-		diag[i] = A(i,i);
-	for (int i = 0; i < n - 1; ++i)
-		subd[i] = A(i+1,i);
-	subd[n-1] = 0.0f;
-
-	memcpy(mat, &Q, sizeof(float)*n*n);
-}
-
-static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd)
-{
-    // QL iteration with implicit shifting to reduce matrix from tridiagonal
-    // to diagonal
-    const int maxiter = 32;
-
-    for (int ell = 0; ell < 4; ell++)
-    {
-        int iter;
-        for (iter = 0; iter < maxiter; iter++)
-        {
-            int m;
-            for (m = ell; m < 3; m++)
-            {
-                float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
-                if ( fabsf(subd[m]) + dd == dd )
-                    break;
-            }
-            if ( m == ell )
-                break;
-
-            float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
-            float r = sqrtf(g*g+1);
-            if ( g < 0 )
-                g = diag[m]-diag[ell]+subd[ell]/(g-r);
-            else
-                g = diag[m]-diag[ell]+subd[ell]/(g+r);
-            float s = 1, c = 1, p = 0;
-            for (int i = m-1; i >= ell; i--)
-            {
-                float f = s*subd[i], b = c*subd[i];
-                if ( fabsf(f) >= fabsf(g) )
-                {
-                    c = g/f;
-                    r = sqrtf(c*c+1);
-                    subd[i+1] = f*r;
-                    c *= (s = 1/r);
-                }
-                else
-                {
-                    s = f/g;
-                    r = sqrtf(s*s+1);
-                    subd[i+1] = g*r;
-                    s *= (c = 1/r);
-                }
-                g = diag[i+1]-p;
-                r = (diag[i]-g)*s+2*b*c;
-                p = s*r;
-                diag[i+1] = g+p;
-                g = c*r-b;
-
-                for (int k = 0; k < 4; k++)
-                {
-                    f = mat[k][i+1];
-                    mat[k][i+1] = s*mat[k][i]+c*f;
-                    mat[k][i] = c*mat[k][i]-s*f;
-                }
-            }
-            diag[ell] -= p;
-            subd[ell] = g;
-            subd[m] = 0;
-        }
-
-        if ( iter == maxiter )
-            // should not get here under normal circumstances
-            return false;
-    }
-
-    return true;
-}
-
-
-
-int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster)
-{
-    // Compute principal component.
-    float matrix[6];
-    Vector3 centroid = computeCovariance(n, points, weights, metric, matrix);
-    Vector3 principal = firstEigenVector_PowerMethod(matrix);
-
-    // Pick initial solution.
-    int mini, maxi;
-    mini = maxi = 0;
-
-    float mindps, maxdps;
-    mindps = maxdps = dot(points[0] - centroid, principal);
-
-    for (int i = 1; i < n; ++i)
-    {
-        float dps = dot(points[i] - centroid, principal);
-
-        if (dps < mindps) {
-            mindps = dps;
-            mini = i;
-        }
-        else {
-            maxdps = dps;
-            maxi = i;
-        }
-    }
-
-    cluster[0] = centroid + mindps * principal;
-    cluster[1] = centroid + maxdps * principal;
-    cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f;
-    cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f;
-
-    // Now we have to iteratively refine the clusters.
-    while (true)
-    {
-        Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) };
-        float total[4] = {0, 0, 0, 0};
-
-        for (int i = 0; i < n; ++i)
-        {
-            // Find nearest cluster.
-            int nearest = 0;
-            float mindist = FLT_MAX;
-            for (int j = 0; j < 4; j++)
-            {
-                float dist = lengthSquared((cluster[j] - points[i]) * metric);
-                if (dist < mindist)
-                {
-                    mindist = dist;
-                    nearest = j;
-                }
-            }
-
-            newCluster[nearest] += weights[i] * points[i];
-            total[nearest] += weights[i];
-        }
-
-        for (int j = 0; j < 4; j++)
-        {
-            if (total[j] != 0)
-                newCluster[j] /= total[j];
-        }
-
-        if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) && 
-            equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3]))
-        {
-            return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0);
-        }
-
-        cluster[0] = newCluster[0];
-        cluster[1] = newCluster[1];
-        cluster[2] = newCluster[2];
-        cluster[3] = newCluster[3];
-
-        // Sort clusters by weight.
-        for (int i = 0; i < 4; i++)
-        {
-            for (int j = i; j > 0 && total[j] > total[j - 1]; j--)
-            {
-                swap( total[j], total[j - 1] );
-                swap( cluster[j], cluster[j - 1] );
-            }
-        }
-    }
-}
-
-
-
-// Adaptation of James Arvo's SVD code, as found in ZOH.
-
-inline float Sqr(float x) { return x*x; }
-
-inline float svd_pythag( float a, float b )
-{
-	float at = fabsf(a);
-	float bt = fabsf(b);
-	if( at > bt )
-		return at * sqrtf( 1.0f + Sqr( bt / at ) );
-	else if( bt > 0.0f )
-		return bt * sqrtf( 1.0f + Sqr( at / bt ) );
-	else return 0.0f;
-}
-
-inline float SameSign( float a, float b ) 
-{
-	float t;
-	if( b >= 0.0f ) t = fabsf( a );
-	else t = -fabsf( a );
-	return t;
-}
-
-void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R)
-{
-	static const int MaxIterations = 30;
-
-	int    i, j, k, l, p, q, iter;
-	float  c, f, h, s, x, y, z;
-	float  norm  = 0.0f;
-	float  g     = 0.0f;
-	float  scale = 0.0f;
-
-    Array<float> temp; temp.resize(cols, 0.0f);
-
-	for( i = 0; i < cols; i++ ) 
-	{
-		temp[i] = scale * g;
-		scale   = 0.0f;
-		g       = 0.0f;
-		s       = 0.0f;
-		l       = i + 1;
-
-		if( i < rows )
-		{
-			for( k = i; k < rows; k++ ) scale += fabsf( Q[k*cols+i] );
-			if( scale != 0.0f ) 
-			{
-				for( k = i; k < rows; k++ ) 
-				{
-					Q[k*cols+i] /= scale;
-					s += Sqr( Q[k*cols+i] );
-				}
-				f = Q[i*cols+i];
-				g = -SameSign( sqrtf(s), f );
-				h = f * g - s;
-				Q[i*cols+i] = f - g;
-				if( i != cols - 1 )
-				{
-					for( j = l; j < cols; j++ ) 
-					{
-						s = 0.0f;
-						for( k = i; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
-						f = s / h;
-						for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
-					}
-				}
-				for( k = i; k < rows; k++ ) Q[k*cols+i] *= scale;
-			}
-		}
-
-		diag[i] = scale * g;
-		g       = 0.0f;
-		s       = 0.0f;
-		scale   = 0.0f;
-
-		if( i < rows && i != cols - 1 ) 
-		{
-			for( k = l; k < cols; k++ ) scale += fabsf( Q[i*cols+k] );
-			if( scale != 0.0f ) 
-			{
-				for( k = l; k < cols; k++ ) 
-				{
-					Q[i*cols+k] /= scale;
-					s += Sqr( Q[i*cols+k] );
-				}
-				f = Q[i*cols+l];
-				g = -SameSign( sqrtf(s), f );
-				h = f * g - s;
-				Q[i*cols+l] = f - g;
-				for( k = l; k < cols; k++ ) temp[k] = Q[i*cols+k] / h;
-				if( i != rows - 1 ) 
-				{
-					for( j = l; j < rows; j++ ) 
-					{
-						s = 0.0f;
-						for( k = l; k < cols; k++ ) s += Q[j*cols+k] * Q[i*cols+k];
-						for( k = l; k < cols; k++ ) Q[j*cols+k] += s * temp[k];
-					}
-				}
-				for( k = l; k < cols; k++ ) Q[i*cols+k] *= scale;
-			}
-		}
-		norm = max( norm, fabsf( diag[i] ) + fabsf( temp[i] ) );
-	}
-
-
-	for( i = cols - 1; i >= 0; i-- ) 
-	{
-		if( i < cols - 1 ) 
-		{
-			if( g != 0.0f ) 
-			{
-				for( j = l; j < cols; j++ ) R[i*cols+j] = ( Q[i*cols+j] / Q[i*cols+l] ) / g;
-				for( j = l; j < cols; j++ ) 
-				{
-					s = 0.0f;
-					for( k = l; k < cols; k++ ) s += Q[i*cols+k] * R[j*cols+k];
-					for( k = l; k < cols; k++ ) R[j*cols+k] += s * R[i*cols+k];
-				}
-			}
-			for( j = l; j < cols; j++ ) 
-			{
-				R[i*cols+j] = 0.0f;
-				R[j*cols+i] = 0.0f;
-			}
-		}
-		R[i*cols+i] = 1.0f;
-		g = temp[i];
-		l = i;
-	}
-
-
-	for( i = cols - 1; i >= 0; i-- ) 
-	{
-		l = i + 1;
-		g = diag[i];
-		if( i < cols - 1 ) for( j = l; j < cols; j++ ) Q[i*cols+j] = 0.0f;
-		if( g != 0.0f ) 
-		{
-			g = 1.0f / g;
-			if( i != cols - 1 ) 
-			{
-				for( j = l; j < cols; j++ ) 
-				{
-					s = 0.0f;
-					for( k = l; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
-					f = ( s / Q[i*cols+i] ) * g;
-					for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
-				}
-			}
-			for( j = i; j < rows; j++ ) Q[j*cols+i] *= g;
-		} 
-		else 
-		{
-			for( j = i; j < rows; j++ ) Q[j*cols+i] = 0.0f;
-		}
-		Q[i*cols+i] += 1.0f;
-	}
-
-
-	for( k = cols - 1; k >= 0; k-- ) 
-	{
-		for( iter = 1; iter <= MaxIterations; iter++ ) 
-		{
-			int jump;
-
-			for( l = k; l >= 0; l-- )
-			{
-				q = l - 1;
-				if( fabsf( temp[l] ) + norm == norm ) { jump = 1; break; }
-				if( fabsf( diag[q] ) + norm == norm ) { jump = 0; break; }
-			}
-
-			if( !jump )
-			{
-				c = 0.0f;
-				s = 1.0f;
-				for( i = l; i <= k; i++ )
-				{
-					f = s * temp[i];
-					temp[i] *= c;
-					if( fabsf( f ) + norm == norm ) break;
-					g = diag[i];
-					h = svd_pythag( f, g );
-					diag[i] = h;
-					h = 1.0f / h;
-					c = g * h;
-					s = -f * h;
-					for( j = 0; j < rows; j++ ) 
-					{
-						y = Q[j*cols+q];
-						z = Q[j*cols+i];
-						Q[j*cols+q] = y * c + z * s;
-						Q[j*cols+i] = z * c - y * s;
-					}
-				}
-			}
-
-			z = diag[k];
-			if( l == k ) 
-			{
-				if( z < 0.0f ) 
-				{
-					diag[k] = -z;
-					for( j = 0; j < cols; j++ ) R[k*cols+j] *= -1.0f; 
-				}
-				break;
-			}
-			if( iter >= MaxIterations ) return;
-			x = diag[l];
-			q = k - 1;
-			y = diag[q];
-			g = temp[q];
-			h = temp[k];
-			f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0f * h * y );
-			g = svd_pythag( f, 1.0f );
-			f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x;
-			c = 1.0f;
-			s = 1.0f;
-			for( j = l; j <= q; j++ ) 
-			{
-				i = j + 1;
-				g = temp[i];
-				y = diag[i];
-				h = s * g;
-				g = c * g;
-				z = svd_pythag( f, h );
-				temp[j] = z;
-				c = f / z;
-				s = h / z;
-				f = x * c + g * s;
-				g = g * c - x * s;
-				h = y * s;
-				y = y * c;
-				for( p = 0; p < cols; p++ ) 
-				{
-					x = R[j*cols+p];
-					z = R[i*cols+p];
-					R[j*cols+p] = x * c + z * s;
-					R[i*cols+p] = z * c - x * s;
-				}
-				z = svd_pythag( f, h );
-				diag[j] = z;
-				if( z != 0.0f ) 
-				{
-					z = 1.0f / z;
-					c = f * z;
-					s = h * z;
-				}
-				f = c * g + s * y;
-				x = c * y - s * g;
-				for( p = 0; p < rows; p++ ) 
-				{
-					y = Q[p*cols+j];
-					z = Q[p*cols+i];
-					Q[p*cols+j] = y * c + z * s;
-					Q[p*cols+i] = z * c - y * s;
-				}
-			}
-			temp[l] = 0.0f;
-			temp[k] = f;
-			diag[k] = x;
-		}
-	}
-
-	// Sort the singular values into descending order.
-
-	for( i = 0; i < cols - 1; i++ )
-	{
-		float biggest = diag[i];  // Biggest singular value so far.
-		int   bindex  = i;        // The row/col it occurred in.
-		for( j = i + 1; j < cols; j++ )
-		{
-			if( diag[j] > biggest ) 
-			{
-				biggest = diag[j];
-				bindex  = j;
-			}            
-		}
-		if( bindex != i )  // Need to swap rows and columns.
-		{
-			// Swap columns in Q.
-			for (int j = 0; j < rows; ++j)
-				swap(Q[j*cols+i], Q[j*cols+bindex]);
-
-			// Swap rows in R.
-			for (int j = 0; j < rows; ++j)
-				swap(R[i*cols+j], R[bindex*cols+j]);
-
-			// Swap elements in diag.
-			swap(diag[i], diag[bindex]);
-		}
-	}
-}
diff --git a/3rdparty/nvtt/nvmath/fitting.h b/3rdparty/nvtt/nvmath/fitting.h
deleted file mode 100644
index e83504580..000000000
--- a/3rdparty/nvtt/nvmath/fitting.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
-
-#ifndef NV_MATH_FITTING_H
-#define NV_MATH_FITTING_H
-
-#include "vector.h"
-#include "plane.h"
-
-namespace nv
-{
-    namespace Fit
-    {
-        Vector3 computeCentroid(int n, const Vector3 * points);
-        Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
-
-        Vector4 computeCentroid(int n, const Vector4 * points);
-        Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
-
-        Vector3 computeCovariance(int n, const Vector3 * points, float * covariance);
-        Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance);
-
-        Vector4 computeCovariance(int n, const Vector4 * points, float * covariance);
-        Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance);
-
-        Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points);
-        Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
-
-        Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points);
-        Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
-
-		Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points);
-        Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
-
-        Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points);
-        Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points);
-
-        Plane bestPlane(int n, const Vector3 * points);
-        bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON);
-
-        bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]);
-        bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]);
-
-        // Returns number of clusters [1-4].
-        int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster);
-    }
-
-} // nv namespace
-
-#endif // NV_MATH_FITTING_H
diff --git a/3rdparty/nvtt/nvmath/matrix.h b/3rdparty/nvtt/nvmath/matrix.h
deleted file mode 100644
index 901a98278..000000000
--- a/3rdparty/nvtt/nvmath/matrix.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#ifndef NV_MATH_MATRIX_H
-#define NV_MATH_MATRIX_H
-
-#include "vector.h"
-
-// - Matrices are stored in memory in *column major* order.
-// - Points are to be though of as column vectors.
-// - Transformation of a point p by a matrix M is: p' = M * p
-
-namespace nv
-{
-    enum identity_t { identity };
-
-    // 3x3 matrix.
-    class NVMATH_CLASS Matrix3
-    {
-    public:
-        Matrix3();
-        explicit Matrix3(float f);
-        explicit Matrix3(identity_t);
-        Matrix3(const Matrix3 & m);
-        Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2);
-
-        float data(uint idx) const;
-        float & data(uint idx);
-        float get(uint row, uint col) const;
-        float operator()(uint row, uint col) const;
-        float & operator()(uint row, uint col);
-
-        Vector3 row(uint i) const;
-        Vector3 column(uint i) const;
-
-        void operator*=(float s);
-        void operator/=(float s);
-        void operator+=(const Matrix3 & m);
-        void operator-=(const Matrix3 & m);
-
-        void scale(float s);
-        void scale(Vector3::Arg s);
-        float determinant() const;
-
-    private:
-        float m_data[9];
-    };
-
-    // Solve equation system using LU decomposition and back-substitution.
-    extern bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x);
-
-    // Solve equation system using Cramer's inverse.
-    extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
-
-
-    // 4x4 matrix.
-    class NVMATH_CLASS Matrix
-    {
-    public:
-        typedef Matrix const & Arg;
-
-        Matrix();
-        explicit Matrix(float f);
-        explicit Matrix(identity_t);
-        Matrix(const Matrix3 & m);
-        Matrix(const Matrix & m);
-        Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
-        //explicit Matrix(const float m[]);	// m is assumed to contain 16 elements
-
-        float data(uint idx) const;
-        float & data(uint idx);
-        float get(uint row, uint col) const;
-        float operator()(uint row, uint col) const;
-        float & operator()(uint row, uint col);
-        const float * ptr() const;
-
-        Vector4 row(uint i) const;
-        Vector4 column(uint i) const;
-
-        void zero();
-        void identity();
-
-        void scale(float s);
-        void scale(Vector3::Arg s);
-        void translate(Vector3::Arg t);
-        void rotate(float theta, float v0, float v1, float v2);
-        float determinant() const;
-
-        void operator+=(const Matrix & m);
-        void operator-=(const Matrix & m);
-
-        void apply(Matrix::Arg m);
-
-    private:
-        float m_data[16];
-    };
-
-    // Solve equation system using LU decomposition and back-substitution.
-    extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x);
-
-    // Solve equation system using Cramer's inverse.
-    extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x);
-
-    // Compute inverse using LU decomposition.
-    extern Matrix inverseLU(const Matrix & m);
-
-    // Compute inverse using Gaussian elimination and partial pivoting.
-    extern Matrix inverse(const Matrix & m);
-    extern Matrix3 inverse(const Matrix3 & m);
-
-} // nv namespace
-
-#endif // NV_MATH_MATRIX_H
diff --git a/3rdparty/nvtt/nvmath/matrix.inl b/3rdparty/nvtt/nvmath/matrix.inl
deleted file mode 100644
index 0996a4f88..000000000
--- a/3rdparty/nvtt/nvmath/matrix.inl
+++ /dev/null
@@ -1,1274 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_MATRIX_INL
-#define NV_MATH_MATRIX_INL
-
-#include "matrix.h"
-
-namespace nv
-{
-    inline Matrix3::Matrix3() {}
-    
-    inline Matrix3::Matrix3(float f)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] = f;
-        }
-    }
-
-    inline Matrix3::Matrix3(identity_t)
-    {
-        for(int i = 0; i < 3; i++) {
-            for(int j = 0; j < 3; j++) {
-                m_data[3*j+i] = (i == j) ? 1.0f : 0.0f;
-            }
-        }
-    }
-
-    inline Matrix3::Matrix3(const Matrix3 & m)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] = m.m_data[i];
-        }
-    }
-    
-    inline Matrix3::Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2)
-    {
-        m_data[0] = v0.x; m_data[1] = v0.y; m_data[2] = v0.z;
-        m_data[3] = v1.x; m_data[4] = v1.y; m_data[5] = v1.z;
-        m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z;
-    }
-
-    inline float Matrix3::data(uint idx) const
-    {
-        nvDebugCheck(idx < 9);
-        return m_data[idx];
-    }
-    inline float & Matrix3::data(uint idx)
-    {
-        nvDebugCheck(idx < 9);
-        return m_data[idx];
-    }
-    inline float Matrix3::get(uint row, uint col) const
-    {
-        nvDebugCheck(row < 3 && col < 3);
-        return m_data[col * 3 + row];
-    }
-    inline float Matrix3::operator()(uint row, uint col) const
-    {
-        nvDebugCheck(row < 3 && col < 3);
-        return m_data[col * 3 + row];
-    }
-    inline float & Matrix3::operator()(uint row, uint col)
-    {
-        nvDebugCheck(row < 3 && col < 3);
-        return m_data[col * 3 + row];
-    }
-
-    inline Vector3 Matrix3::row(uint i) const
-    {
-        nvDebugCheck(i < 3);
-        return Vector3(get(i, 0), get(i, 1), get(i, 2));
-    }
-    inline Vector3 Matrix3::column(uint i) const
-    {
-        nvDebugCheck(i < 3);
-        return Vector3(get(0, i), get(1, i), get(2, i));
-    }
-
-    inline void Matrix3::operator*=(float s)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] *= s;
-        }
-    }
-
-    inline void Matrix3::operator/=(float s)
-    {
-        float is = 1.0f /s;
-        for(int i = 0; i < 9; i++) {
-            m_data[i] *= is;
-        }
-    }
-
-    inline void Matrix3::operator+=(const Matrix3 & m)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] += m.m_data[i];
-        }
-    }
-
-    inline void Matrix3::operator-=(const Matrix3 & m)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] -= m.m_data[i];
-        }
-    }
-
-    inline Matrix3 operator+(const Matrix3 & a, const Matrix3 & b)
-    {
-        Matrix3 m = a;
-        m += b;
-        return m;
-    }
-
-    inline Matrix3 operator-(const Matrix3 & a, const Matrix3 & b)
-    {
-        Matrix3 m = a;
-        m -= b;
-        return m;
-    }
-
-    inline Matrix3 operator*(const Matrix3 & a, float s)
-    {
-        Matrix3 m = a;
-        m *= s;
-        return m;
-    }
-
-    inline Matrix3 operator*(float s, const Matrix3 & a)
-    {
-        Matrix3 m = a;
-        m *= s;
-        return m;
-    }
-
-    inline Matrix3 operator/(const Matrix3 & a, float s)
-    {
-        Matrix3 m = a;
-        m /= s;
-        return m;
-    }
-
-    inline Matrix3 mul(const Matrix3 & a, const Matrix3 & b)
-    {
-        Matrix3 m;
-
-        for(int i = 0; i < 3; i++) {
-            const float ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2);
-            m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0);
-            m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1);
-            m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2);
-        }
-
-        return m;
-    }
-
-    inline Matrix3 operator*(const Matrix3 & a, const Matrix3 & b)
-    {
-        return mul(a, b);
-    }
-
-    // Transform the given 3d vector with the given matrix.
-    inline Vector3 transform(const Matrix3 & m, const Vector3 & p)
-    {
-        return Vector3(
-            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2),
-            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2),
-            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2));
-    }
-
-    inline void Matrix3::scale(float s)
-    {
-        for (int i = 0; i < 9; i++) {
-            m_data[i] *= s;
-        }
-    }
-
-    inline void Matrix3::scale(Vector3::Arg s)
-    {
-        m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x;
-        m_data[3] *= s.y; m_data[4] *= s.y; m_data[5] *= s.y;
-        m_data[6] *= s.z; m_data[7] *= s.z; m_data[8] *= s.z;
-    }
-
-    inline float Matrix3::determinant() const
-    {
-        return 
-            get(0,0) * get(1,1) * get(2,2) + 
-            get(0,1) * get(1,2) * get(2,0) + 
-            get(0,2) * get(1,0) * get(2,1) -
-            get(0,2) * get(1,1) * get(2,0) - 
-            get(0,1) * get(1,0) * get(2,2) -
-            get(0,0) * get(1,2) * get(2,1);
-    }
-
-    // Inverse using Cramer's rule.
-    inline Matrix3 inverseCramer(const Matrix3 & m)
-    {
-        const float det = m.determinant();
-        if (equal(det, 0.0f, 0.0f)) {
-            return Matrix3(0);
-        }
-
-        Matrix3 r;
-
-        r.data(0) =  - m.data(5) * m.data(7) + m.data(4) * m.data(8);
-        r.data(1) =  + m.data(5) * m.data(6) - m.data(3) * m.data(8);
-        r.data(2) =  - m.data(4) * m.data(6) + m.data(3) * m.data(7);
-
-        r.data(3) =  + m.data(2) * m.data(7) - m.data(1) * m.data(8);
-        r.data(4) =  - m.data(2) * m.data(6) + m.data(0) * m.data(8);
-        r.data(5) =  + m.data(1) * m.data(6) - m.data(0) * m.data(7);
-
-        r.data(6) =  - m.data(2) * m.data(4) + m.data(1) * m.data(5);
-        r.data(7) =  + m.data(2) * m.data(3) - m.data(0) * m.data(5);
-        r.data(8) =  - m.data(1) * m.data(3) + m.data(0) * m.data(4);
-
-        r.scale(1.0f / det);
-
-        return r;
-    }
-
-
-
-    inline Matrix::Matrix()
-    {
-    }
-
-    inline Matrix::Matrix(float f)
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] = 0.0f;
-        }
-    }
-
-    inline Matrix::Matrix(identity_t)
-    {
-        for(int i = 0; i < 4; i++) {
-            for(int j = 0; j < 4; j++) {
-                m_data[4*j+i] = (i == j) ? 1.0f : 0.0f;
-            }
-        }
-    }
-
-    inline Matrix::Matrix(const Matrix & m)
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] = m.m_data[i];
-        }
-    }
-
-    inline Matrix::Matrix(const Matrix3 & m)
-    {
-        for(int i = 0; i < 3; i++) {
-            for(int j = 0; j < 3; j++) {
-                operator()(i, j) = m.get(i, j);
-            }
-        }
-        for(int i = 0; i < 4; i++) {
-            operator()(3, i) = 0;
-            operator()(i, 3) = 0;
-        }
-    }
-
-    inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3)
-    {
-        m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w;
-        m_data[ 4] = v1.x; m_data[ 5] = v1.y; m_data[ 6] = v1.z; m_data[ 7] = v1.w;
-        m_data[ 8] = v2.x; m_data[ 9] = v2.y; m_data[10] = v2.z; m_data[11] = v2.w;
-        m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w;
-    }
-
-    /*inline Matrix::Matrix(const float m[])
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] = m[i];
-        }
-    }*/
-
-
-    // Accessors
-    inline float Matrix::data(uint idx) const
-    {
-        nvDebugCheck(idx < 16);
-        return m_data[idx];
-    }
-    inline float & Matrix::data(uint idx)
-    {
-        nvDebugCheck(idx < 16);
-        return m_data[idx];
-    }
-    inline float Matrix::get(uint row, uint col) const
-    {
-        nvDebugCheck(row < 4 && col < 4);
-        return m_data[col * 4 + row];
-    }
-    inline float Matrix::operator()(uint row, uint col) const
-    {
-        nvDebugCheck(row < 4 && col < 4);
-        return m_data[col * 4 + row];
-    }
-    inline float & Matrix::operator()(uint row, uint col)
-    {
-        nvDebugCheck(row < 4 && col < 4);
-        return m_data[col * 4 + row];
-    }
-
-    inline const float * Matrix::ptr() const
-    {
-        return m_data;
-    }
-
-    inline Vector4 Matrix::row(uint i) const
-    {
-        nvDebugCheck(i < 4);
-        return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3));
-    }
-
-    inline Vector4 Matrix::column(uint i) const
-    {
-        nvDebugCheck(i < 4);
-        return Vector4(get(0, i), get(1, i), get(2, i), get(3, i));
-    }
-
-    inline void Matrix::zero()
-    {
-        m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
-        m_data[4] = 0; m_data[5] = 0; m_data[6] = 0; m_data[7] = 0;
-        m_data[8] = 0; m_data[9] = 0; m_data[10] = 0; m_data[11] = 0;
-        m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 0;
-    }
-
-    inline void Matrix::identity()
-    {
-        m_data[0] = 1; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
-        m_data[4] = 0; m_data[5] = 1; m_data[6] = 0; m_data[7] = 0;
-        m_data[8] = 0; m_data[9] = 0; m_data[10] = 1; m_data[11] = 0;
-        m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 1;
-    }
-
-    // Apply scale.
-    inline void Matrix::scale(float s)
-    {
-        m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s;
-        m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s;
-        m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s;
-        m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s;
-    }
-
-    // Apply scale.
-    inline void Matrix::scale(Vector3::Arg s)
-    {
-        m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.x;
-        m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.y; m_data[7] *= s.y;
-        m_data[8] *= s.z; m_data[9] *= s.z; m_data[10] *= s.z; m_data[11] *= s.z;
-    }
-
-    // Apply translation.
-    inline void Matrix::translate(Vector3::Arg t)
-    {
-        m_data[12] = m_data[0] * t.x + m_data[4] * t.y + m_data[8]  * t.z + m_data[12];
-        m_data[13] = m_data[1] * t.x + m_data[5] * t.y + m_data[9]  * t.z + m_data[13];
-        m_data[14] = m_data[2] * t.x + m_data[6] * t.y + m_data[10] * t.z + m_data[14];
-        m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15];
-    }
-
-    Matrix rotation(float theta, float v0, float v1, float v2);
-
-    // Apply rotation.
-    inline void Matrix::rotate(float theta, float v0, float v1, float v2)
-    {
-        Matrix R(rotation(theta, v0, v1, v2));
-        apply(R);
-    }
-
-    // Apply transform.
-    inline void Matrix::apply(Matrix::Arg m)
-    {
-        nvDebugCheck(this != &m);
-
-        for(int i = 0; i < 4; i++) {
-            const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3);
-            m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0);
-            m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1);
-            m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2);
-            m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3);
-        }
-    }
-
-    // Get scale matrix.
-    inline Matrix scale(Vector3::Arg s)
-    {
-        Matrix m(identity);
-        m(0,0) = s.x;
-        m(1,1) = s.y;
-        m(2,2) = s.z;
-        return m;
-    }
-
-    // Get scale matrix.
-    inline Matrix scale(float s)
-    {
-        Matrix m(identity);
-        m(0,0) = m(1,1) = m(2,2) = s;
-        return m;
-    }
-
-    // Get translation matrix.
-    inline Matrix translation(Vector3::Arg t)
-    {
-        Matrix m(identity);
-        m(0,3) = t.x;
-        m(1,3) = t.y;
-        m(2,3) = t.z;
-        return m;
-    }
-
-    // Get rotation matrix.
-    inline Matrix rotation(float theta, float v0, float v1, float v2)
-    {
-        float cost = cosf(theta);
-        float sint = sinf(theta);
-
-        Matrix m(identity);
-
-        if( 1 == v0 && 0 == v1 && 0 == v2 ) {
-            m(1,1) = cost; m(2,1) = -sint;
-            m(1,2) = sint; m(2,2) = cost;
-        }
-        else if( 0 == v0  && 1 == v1 && 0 == v2 ) {
-            m(0,0) = cost; m(2,0) = sint;
-            m(1,2) = -sint; m(2,2) = cost;
-        }
-        else if( 0 == v0 && 0 == v1 && 1 == v2 ) {
-            m(0,0) = cost; m(1,0) = -sint;
-            m(0,1) = sint; m(1,1) = cost;
-        } 
-        else {
-            float a2, b2, c2;
-            a2 = v0 * v0;
-            b2 = v1 * v1;
-            c2 = v2 * v2;
-
-            float iscale = 1.0f / sqrtf(a2 + b2 + c2);
-            v0 *= iscale;
-            v1 *= iscale;
-            v2 *= iscale;
-
-            float abm, acm, bcm;
-            float mcos, asin, bsin, csin;
-            mcos = 1.0f - cost;
-            abm = v0 * v1 * mcos;
-            acm = v0 * v2 * mcos;
-            bcm = v1 * v2 * mcos;
-            asin = v0 * sint;
-            bsin = v1 * sint;
-            csin = v2 * sint;
-            m(0,0) = a2 * mcos + cost;
-            m(1,0) = abm - csin;
-            m(2,0) = acm + bsin;
-            m(3,0) = abm + csin;
-            m(1,1) = b2 * mcos + cost;
-            m(2,1) = bcm - asin;
-            m(3,1) = acm - bsin;
-            m(1,2) = bcm + asin;
-            m(2,2) = c2 * mcos + cost;
-        }
-        return m;
-    }
-
-    //Matrix rotation(float yaw, float pitch, float roll);
-    //Matrix skew(float angle, Vector3::Arg v1, Vector3::Arg v2);
-
-    // Get frustum matrix.
-    inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
-    {
-        Matrix m(0.0f);
-
-        float doubleznear = 2.0f * zNear;
-        float one_deltax = 1.0f / (xmax - xmin);
-        float one_deltay = 1.0f / (ymax - ymin);
-        float one_deltaz = 1.0f / (zFar - zNear);
-
-        m(0,0) = doubleznear * one_deltax;
-        m(1,1) = doubleznear * one_deltay;
-        m(0,2) = (xmax + xmin) * one_deltax;
-        m(1,2) = (ymax + ymin) * one_deltay;
-        m(2,2) = -(zFar + zNear) * one_deltaz;
-        m(3,2) = -1.0f;
-        m(2,3) = -(zFar * doubleznear) * one_deltaz;
-
-        return m;
-    }
-
-    // Get inverse frustum matrix.
-    inline Matrix frustumInverse(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
-    {
-        Matrix m(0.0f);
-
-        float one_doubleznear = 1.0f / (2.0f * zNear);
-        float one_doubleznearzfar = 1.0f / (2.0f * zNear * zFar);
-
-        m(0,0) = (xmax - xmin) * one_doubleznear;
-        m(0,3) = (xmax + xmin) * one_doubleznear;
-        m(1,1) = (ymax - ymin) * one_doubleznear;
-        m(1,3) = (ymax + ymin) * one_doubleznear;
-        m(2,3) = -1;
-        m(3,2) = -(zFar - zNear) * one_doubleznearzfar;
-        m(3,3) = (zFar + zNear) * one_doubleznearzfar;
-
-        return m;
-    }
-
-    // Get infinite frustum matrix.
-    inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear)
-    {
-        Matrix m(0.0f);
-
-        float doubleznear = 2.0f * zNear;
-        float one_deltax = 1.0f / (xmax - xmin);
-        float one_deltay = 1.0f / (ymax - ymin);
-        float nudge = 1.0; // 0.999;
-
-        m(0,0) = doubleznear * one_deltax;
-        m(1,1) = doubleznear * one_deltay;
-        m(0,2) = (xmax + xmin) * one_deltax;
-        m(1,2) = (ymax + ymin) * one_deltay;
-        m(2,2) = -1.0f * nudge;
-        m(3,2) = -1.0f;
-        m(2,3) = -doubleznear * nudge;
-
-        return m;
-    }
-
-    // Get perspective matrix.
-    inline Matrix perspective(float fovy, float aspect, float zNear, float zFar)
-    {
-        float xmax = zNear * tanf(fovy / 2);
-        float xmin = -xmax;
-
-        float ymax = xmax / aspect;
-        float ymin = -ymax;
-
-        return frustum(xmin, xmax, ymin, ymax, zNear, zFar);	
-    }
-
-    // Get inverse perspective matrix.
-    inline Matrix perspectiveInverse(float fovy, float aspect, float zNear, float zFar)
-    {
-        float xmax = zNear * tanf(fovy / 2);
-        float xmin = -xmax;
-
-        float ymax = xmax / aspect;
-        float ymin = -ymax;
-
-        return frustumInverse(xmin, xmax, ymin, ymax, zNear, zFar);	
-    }
-
-    // Get infinite perspective matrix.
-    inline Matrix perspective(float fovy, float aspect, float zNear)
-    {
-        float x = zNear * tanf(fovy / 2);
-        float y = x / aspect;
-        return frustum( -x, x, -y, y, zNear );	
-    }
-
-    // Get matrix determinant.
-    inline float Matrix::determinant() const
-    {
-        return 
-            m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] +
-            m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] +
-            m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] +
-            m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] +
-            m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] +
-            m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15];
-    }
-
-    inline Matrix transpose(Matrix::Arg m)
-    {
-        Matrix r;
-        for (int i = 0; i < 4; i++)
-        {
-            for (int j = 0; j < 4; j++)
-            {
-                r(i, j) = m(j, i);
-            }
-        }
-        return r;
-    }
-
-    // Inverse using Cramer's rule.
-    inline Matrix inverseCramer(Matrix::Arg m)
-    {
-        Matrix r;
-        r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15);
-        r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15);
-        r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15);
-        r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11);
-        r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15);
-        r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15);
-        r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15);
-        r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11);
-        r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15);
-        r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15);
-        r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15);
-        r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11);
-        r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14);
-        r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14);
-        r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14);
-        r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10);
-        r.scale(1.0f / m.determinant());
-        return r;
-    }
-
-    inline Matrix isometryInverse(Matrix::Arg m)
-    {
-        Matrix r(identity);
-
-        // transposed 3x3 upper left matrix
-        for (int i = 0; i < 3; i++)
-        {
-            for (int j = 0; j < 3; j++)
-            {
-                r(i, j) = m(j, i);
-            }
-        }
-
-        // translate by the negative offsets
-        r.translate(-Vector3(m.data(12), m.data(13), m.data(14)));
-
-        return r;
-    }
-
-    // Transform the given 3d point with the given matrix.
-    inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p)
-    {
-        return Vector3(
-            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + m(0,3),
-            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + m(1,3),
-            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + m(2,3));
-    }
-
-    // Transform the given 3d vector with the given matrix.
-    inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p)
-    {
-        return Vector3(
-            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2),
-            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2),
-            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2));
-    }
-
-    // Transform the given 4d vector with the given matrix.
-    inline Vector4 transform(Matrix::Arg m, Vector4::Arg p)
-    {
-        return Vector4(
-            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + p.w * m(0,3),
-            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + p.w * m(1,3),
-            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + p.w * m(2,3),
-            p.x * m(3,0) + p.y * m(3,1) + p.z * m(3,2) + p.w * m(3,3));
-    }
-
-    inline Matrix mul(Matrix::Arg a, Matrix::Arg b)
-    {
-        // @@ Is this the right order? mul(a, b) = b * a
-        Matrix m = a;
-        m.apply(b);
-        return m;
-    }
-
-    inline void Matrix::operator+=(const Matrix & m)
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] += m.m_data[i];
-        }
-    }
-
-    inline void Matrix::operator-=(const Matrix & m)
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] -= m.m_data[i];
-        }
-    }
-
-    inline Matrix operator+(const Matrix & a, const Matrix & b)
-    {
-        Matrix m = a;
-        m += b;
-        return m;
-    }
-
-    inline Matrix operator-(const Matrix & a, const Matrix & b)
-    {
-        Matrix m = a;
-        m -= b;
-        return m;
-    }
-
-
-} // nv namespace
-
-
-#if 0 // old code.
-/** @name Special matrices. */
-//@{
-/** Generate a translation matrix. */
-void TranslationMatrix(const Vec3 & v) {
-    data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0;
-    data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0;
-    data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0;
-    data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1;
-}
-
-/** Rotate theta degrees around v. */
-void RotationMatrix( float theta, float v0, float v1, float v2 ) {
-    float cost = cos(theta);
-    float sint = sin(theta);
-
-    if( 1 == v0 && 0 == v1 && 0 == v2 ) {
-        data[0] = 1.0f;	data[1] = 0.0f;	data[2] = 0.0f;	data[3] = 0.0f;
-        data[4] = 0.0f;	data[5] = cost;	data[6] = -sint;data[7] = 0.0f;
-        data[8] = 0.0f;	data[9] = sint;	data[10] = cost;data[11] = 0.0f;
-        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
-    }
-    else if( 0 == v0  && 1 == v1 && 0 == v2 ) {
-        data[0] = cost;	data[1] = 0.0f;	data[2] = sint;	data[3] = 0.0f;
-        data[4] = 0.0f;	data[5] = 1.0f;	data[6] = 0.0f;	data[7] = 0.0f;
-        data[8] = -sint;data[9] = 0.0f;data[10] = cost;	data[11] = 0.0f;
-        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
-    }
-    else if( 0 == v0 && 0 == v1 && 1 == v2 ) {
-        data[0] = cost;	data[1] = -sint;data[2] = 0.0f;	data[3] = 0.0f;
-        data[4] = sint; data[5] = cost;	data[6] = 0.0f;	data[7] = 0.0f;
-        data[8] = 0.0f;	data[9] = 0.0f;	data[10] = 1.0f;data[11] = 0.0f;
-        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
-    } 
-    else {
-        //we need scale a,b,c to unit length.
-        float a2, b2, c2;
-        a2 = v0 * v0;
-        b2 = v1 * v1;
-        c2 = v2 * v2;
-
-        float iscale = 1.0f / sqrtf(a2 + b2 + c2);
-        v0 *= iscale;
-        v1 *= iscale;
-        v2 *= iscale;
-
-        float abm, acm, bcm;
-        float mcos, asin, bsin, csin;
-        mcos = 1.0f - cost;
-        abm = v0 * v1 * mcos;
-        acm = v0 * v2 * mcos;
-        bcm = v1 * v2 * mcos;
-        asin = v0 * sint;
-        bsin = v1 * sint;
-        csin = v2 * sint;
-        data[0] = a2 * mcos + cost;
-        data[1] = abm - csin;
-        data[2] = acm + bsin;
-        data[3] = abm + csin;
-        data[4] = 0.0f;
-        data[5] = b2 * mcos + cost;
-        data[6] = bcm - asin;
-        data[7] = acm - bsin;
-        data[8] = 0.0f;
-        data[9] = bcm + asin;
-        data[10] = c2 * mcos + cost;
-        data[11] = 0.0f;
-        data[12] = 0.0f;
-        data[13] = 0.0f;
-        data[14] = 0.0f;
-        data[15] = 1.0f;
-    }
-}
-
-/*
-void SkewMatrix(float angle, const Vec3 & v1, const Vec3 & v2) {
-v1.Normalize();
-v2.Normalize();
-
-Vec3 v3;
-v3.Cross(v1, v2);
-v3.Normalize();
-
-// Get skew factor.
-float costheta = Vec3DotProduct(v1, v2);
-float sintheta = Real.Sqrt(1 - costheta * costheta);
-float skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta;
-
-// Build orthonormal matrix.
-v1 = FXVector3.Cross(v3, v2);
-v1.Normalize();
-
-Matrix R = Matrix::Identity;
-R[0, 0] = v3.X;�// Not sure this is in the correct order...
-R[1, 0] = v3.Y;
-R[2, 0] = v3.Z;
-R[0, 1] = v1.X;
-R[1, 1] = v1.Y;
-R[2, 1] = v1.Z;
-R[0, 2] = v2.X;
-R[1, 2] = v2.Y;
-R[2, 2] = v2.Z;
-
-// Build skew matrix.
-Matrix S = Matrix::Identity;
-S[2, 1] = -skew;
-
-// Return skew transform.
-return R * S * R.Transpose;	// Not sure this is in the correct order...
-}
-*/
-
-/**
-* Generate rotation matrix for the euler angles. This is the same as computing
-* 3 rotation matrices and multiplying them together in our custom order.
-*
-* @todo Have to recompute this code for our new convention.
-**/
-void RotationMatrix( float yaw, float pitch, float roll ) {
-    float sy = sin(yaw+ToRadian(90));
-    float cy = cos(yaw+ToRadian(90));
-    float sp = sin(pitch-ToRadian(90));
-    float cp = cos(pitch-ToRadian(90));
-    float sr = sin(roll);
-    float cr = cos(roll);
-
-    data[0] = cr*cy + sr*sp*sy;
-    data[1] = cp*sy;
-    data[2] = -sr*cy + cr*sp*sy;
-    data[3] = 0;
-
-    data[4] = -cr*sy + sr*sp*cy;
-    data[5] = cp*cy;
-    data[6] = sr*sy + cr*sp*cy;
-    data[7] = 0;
-
-    data[8] = sr*cp;
-    data[9] = -sp;
-    data[10] = cr*cp;
-    data[11] = 0;
-
-    data[12] = 0;
-    data[13] = 0;
-    data[14] = 0;
-    data[15] = 1;
-}
-
-/** Create a frustum matrix with the far plane at the infinity. */
-void Frustum( float xmin, float xmax, float ymin, float ymax, float zNear, float zFar ) {
-    float one_deltax, one_deltay, one_deltaz, doubleznear;
-
-    doubleznear = 2.0f * zNear;
-    one_deltax = 1.0f / (xmax - xmin);
-    one_deltay = 1.0f / (ymax - ymin);
-    one_deltaz = 1.0f / (zFar - zNear);
-
-    data[0] = (float)(doubleznear * one_deltax);
-    data[1] = 0.0f;
-    data[2] = 0.0f;
-    data[3] = 0.0f;
-    data[4] = 0.0f;
-    data[5] = (float)(doubleznear * one_deltay);
-    data[6] = 0.f;
-    data[7] = 0.f;
-    data[8] = (float)((xmax + xmin) * one_deltax);
-    data[9] = (float)((ymax + ymin) * one_deltay);
-    data[10] = (float)(-(zFar + zNear) * one_deltaz);
-    data[11] = -1.f;
-    data[12] = 0.f;
-    data[13] = 0.f;
-    data[14] = (float)(-(zFar * doubleznear) * one_deltaz);
-    data[15] = 0.f;
-}
-
-/** Create a frustum matrix with the far plane at the infinity. */
-void FrustumInf( float xmin, float xmax, float ymin, float ymax, float zNear ) {
-    float one_deltax, one_deltay, doubleznear, nudge;
-
-    doubleznear = 2.0f * zNear;
-    one_deltax = 1.0f / (xmax - xmin);
-    one_deltay = 1.0f / (ymax - ymin);
-    nudge = 1.0; // 0.999;
-
-    data[0] = doubleznear * one_deltax;
-    data[1] = 0.0f;
-    data[2] = 0.0f;
-    data[3] = 0.0f;
-
-    data[4] = 0.0f;
-    data[5] = doubleznear * one_deltay;
-    data[6] = 0.f;
-    data[7] = 0.f;
-
-    data[8] = (xmax + xmin) * one_deltax;
-    data[9] = (ymax + ymin) * one_deltay;
-    data[10] = -1.0f * nudge;
-    data[11] = -1.0f;
-
-    data[12] = 0.f;
-    data[13] = 0.f;
-    data[14] = -doubleznear * nudge;
-    data[15] = 0.f;
-}
-
-/** Create an inverse frustum matrix with the far plane at the infinity. */
-void FrustumInfInv( float left, float right, float bottom, float top, float zNear ) {
-    // this matrix is wrong (not tested floatly) I think it should be transposed.
-    data[0] = (right - left) / (2 * zNear);
-    data[1] = 0;
-    data[2] = 0;
-    data[3] = (right + left) / (2 * zNear);
-    data[4] = 0;
-    data[5] = (top - bottom) / (2 * zNear);
-    data[6] = 0;
-    data[7] = (top + bottom) / (2 * zNear);
-    data[8] = 0;
-    data[9] = 0;
-    data[10] = 0;
-    data[11] = -1;
-    data[12] = 0;
-    data[13] = 0;
-    data[14] = -1 / (2 * zNear);
-    data[15] = 1 / (2 * zNear);
-}
-
-/** Create an homogeneous projection matrix. */
-void Perspective( float fov, float aspect, float zNear, float zFar ) {
-    float xmin, xmax, ymin, ymax;
-
-    xmax = zNear * tan( fov/2 );
-    xmin = -xmax;
-
-    ymax = xmax / aspect;
-    ymin = -ymax;
-
-    Frustum(xmin, xmax, ymin, ymax, zNear, zFar);
-}
-
-/** Create a projection matrix with the far plane at the infinity. */
-void PerspectiveInf( float fov, float aspect, float zNear ) {
-    float x = zNear * tan( fov/2 );
-    float y = x / aspect;
-    FrustumInf( -x, x, -y, y, zNear );
-}
-
-/** Create an inverse projection matrix with far plane at the infinity. */
-void PerspectiveInfInv( float fov, float aspect, float zNear ) {
-    float x = zNear * tan( fov/2 );
-    float y = x / aspect;
-    FrustumInfInv( -x, x, -y, y, zNear );
-}
-
-/** Build bone matrix from quatertion and offset. */
-void BoneMatrix(const Quat & q, const Vec3 & offset) {
-    float x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz;
-
-    // calculate coefficients
-    x2 = q.x + q.x;
-    y2 = q.y + q.y;
-    z2 = q.z + q.z;
-
-    xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
-    yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
-    wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;
-
-    data[0] = 1.0f - (yy + zz); 	
-    data[1] = xy - wz;
-    data[2] = xz + wy;		
-    data[3] = 0.0f;
-
-    data[4] = xy + wz;		
-    data[5] = 1.0f - (xx + zz);
-    data[6] = yz - wx;		
-    data[7] = 0.0f;
-
-    data[8] = xz - wy;		
-    data[9] = yz + wx;
-    data[10] = 1.0f - (xx + yy);		
-    data[11] = 0.0f;
-
-    data[12] = offset.x;
-    data[13] = offset.y;
-    data[14] = offset.z;			
-    data[15] = 1.0f;
-}
-
-//@}
-
-
-/** @name Transformations: */
-//@{
-
-/** Apply a general scale. */
-void Scale( float x, float y, float z ) {
-    data[0] *= x;	data[4] *= y;	data[8]  *= z;
-    data[1] *= x;	data[5] *= y;	data[9]  *= z;
-    data[2] *= x;	data[6] *= y;	data[10] *= z;
-    data[3] *= x;	data[7] *= y;	data[11] *= z;
-}
-
-/** Apply a rotation of theta degrees around the axis v*/
-void Rotate( float theta, const Vec3 & v ) {
-    Matrix b;
-    b.RotationMatrix( theta, v[0], v[1], v[2] );
-    Multiply4x3( b );
-}
-
-/** Apply a rotation of theta degrees around the axis v*/
-void Rotate( float theta, float v0, float v1, float v2 ) {
-    Matrix b;
-    b.RotationMatrix( theta, v0, v1, v2 );
-    Multiply4x3( b );
-}
-
-/**
-* Translate the matrix by t. This is the same as multiplying by a
-* translation matrix with the given offset.
-* this = T * this
-*/
-void Translate( const Vec3 &t ) {
-    data[12] = data[0] * t.x + data[4] * t.y + data[8]  * t.z + data[12];
-    data[13] = data[1] * t.x + data[5] * t.y + data[9]  * t.z + data[13];
-    data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14];
-    data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15];
-}
-
-/** 
-* Translate the matrix by x, y, z. This is the same as multiplying by a 
-* translation matrix with the given offsets.
-*/
-void Translate( float x, float y, float z ) {
-    data[12] = data[0] * x + data[4] * y + data[8]  * z + data[12];
-    data[13] = data[1] * x + data[5] * y + data[9]  * z + data[13];
-    data[14] = data[2] * x + data[6] * y + data[10] * z + data[14];
-    data[15] = data[3] * x + data[7] * y + data[11] * z + data[15];
-}
-
-/** Compute the transposed matrix. */
-void Transpose() {
-    piSwap(data[1], data[4]);
-    piSwap(data[2], data[8]);
-    piSwap(data[6], data[9]);
-    piSwap(data[3], data[12]);
-    piSwap(data[7], data[13]);
-    piSwap(data[11], data[14]);
-}
-
-/** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */
-void IsometryInverse() {
-    // transposed 3x3 upper left matrix
-    piSwap(data[1], data[4]);
-    piSwap(data[2], data[8]);
-    piSwap(data[6], data[9]);
-
-    // translate by the negative offsets
-    Vec3 v(-data[12], -data[13], -data[14]);
-    data[12] = data[13] = data[14] = 0;
-    Translate(v);
-}
-
-/** Compute the inverse of the affine portion of this matrix. */
-void AffineInverse() {
-    data[12] = data[13] = data[14] = 0;
-    Transpose();
-}
-//@}
-
-/** @name Matrix operations: */
-//@{
-
-/** Return the determinant of this matrix. */
-float Determinant() const {
-    return	data[0] * data[5] * data[10] * data[15] + 
-        data[1] * data[6] * data[11] * data[12] +
-        data[2] * data[7] * data[ 8] * data[13] +
-        data[3] * data[4] * data[ 9] * data[14] -
-        data[3] * data[6] * data[ 9] * data[12] -
-        data[2] * data[5] * data[ 8] * data[15] -
-        data[1] * data[4] * data[11] * data[14] -
-        data[0] * data[7] * data[10] * data[12];
-}
-
-
-/** Standard matrix product: this *= B. */
-void Multiply4x4( const Matrix & restrict B ) {
-    Multiply4x4(*this, B);
-}
-
-/** Standard matrix product: this = A * B. this != B*/
-void Multiply4x4( const Matrix & A, const Matrix & restrict B ) {
-    piDebugCheck(this != &B);
-
-    for(int i = 0; i < 4; i++) {
-        const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
-        GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
-        GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
-        GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
-        GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
-    }
-
-    /* Unrolled but does not allow this == A
-    data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3];
-    data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3];
-    data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3];
-    data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3];
-    data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7];
-    data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7];
-    data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7];
-    data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7];
-    data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11];
-    data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11];
-    data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11];
-    data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11];
-    data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15];
-    data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15];
-    data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15];
-    data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15];
-    */
-}
-
-/** Standard matrix product: this *= B. */
-void Multiply4x3( const Matrix & restrict B ) {
-    Multiply4x3(*this, B);
-}
-
-/** Standard product of matrices, where the last row is [0 0 0 1]. */
-void Multiply4x3( const Matrix & A, const Matrix & restrict B ) {
-    piDebugCheck(this != &B);
-
-    for(int i = 0; i < 3; i++) {
-        const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
-        GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
-        GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
-        GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
-        GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
-    }
-    data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f;
-
-    /* Unrolled but does not allow this == A
-    data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3];
-    data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3];
-    data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3];
-    data[3] = 0.0f;
-    data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7];
-    data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7];
-    data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7];
-    data[7] = 0.0f;
-    data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11];
-    data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11];
-    data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11];
-    data[11]= 0.0f;
-    data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15];
-    data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15];
-    data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15];
-    data[15]= 1.0f;
-    */
-}
-//@}
-
-
-/** @name Vector operations: */
-//@{
-
-/** Transform 3d vector (w=0). */
-void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10];
-}
-/** Transform 3d vector by the transpose (w=0). */
-void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2];
-    dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6];
-    dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10];
-}
-
-/** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */
-void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
-}
-
-/** Transform a point, normalize it, and return w. */
-float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    float w;
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
-    w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]);
-    *dest *= w;
-    return w;
-}
-
-/** Transform a point and return w. */
-float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
-    return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15];
-}
-
-/** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */
-void TransformVec4(const Vec3 & orig, Vec4 * dest) const {
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
-    dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15];
-}
-//@}
-
-/** @name Matrix analysis. */
-//@{
-
-/** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */
-void GetEulerAnglesZYZ(float * s, float * t, float * r) const {
-    if( GetElem(2,2) < 1.0f ) {
-        if( GetElem(2,2) > -1.0f ) {
-            // 	cs*ct*cr-ss*sr 		-ss*ct*cr-cs*sr		st*cr
-            //	cs*ct*sr+ss*cr		-ss*ct*sr+cs*cr		st*sr
-            //	-cs*st				ss*st				ct
-            *s = atan2(GetElem(1,2), -GetElem(0,2));
-            *t = acos(GetElem(2,2));
-            *r = atan2(GetElem(2,1), GetElem(2,0));		
-        }
-        else {
-            // 	-c(s-r)	 	s(s-r)		0
-            //	s(s-r)		c(s-r)		0
-            //	0			0			-1
-            *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r
-            *t = PI;
-            *r = 0;
-        }
-    }
-    else {
-        // 	c(s+r)		-s(s+r)		0
-        //	s(s+r)		c(s+r)		0
-        //	0			0			1
-        *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r
-        *t = 0;
-        *r = 0;
-    }
-}
-
-//@}
-
-MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m );
-
-/** Print to debug output. */
-void Print() const {
-    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] );
-    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] );
-    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] );
-    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] );
-}
-
-
-public:
-
-    float data[16];
-
-};
-#endif
-
-
-#endif // NV_MATH_MATRIX_INL
diff --git a/3rdparty/nvtt/nvmath/nvmath.h b/3rdparty/nvtt/nvmath/nvmath.h
deleted file mode 100644
index 94f7ec794..000000000
--- a/3rdparty/nvtt/nvmath/nvmath.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#ifndef NV_MATH_H
-#define NV_MATH_H
-
-#include <cmath>
-#include <float.h>  // finite, isnan
-
-#include "nvcore/utils.h"   // max, clamp
-
-#define NVMATH_API
-#define NVMATH_CLASS
-
-#define PI                  float(3.1415926535897932384626433833)
-#define NV_EPSILON          (0.0001f)
-#define NV_NORMAL_EPSILON   (0.001f)
-
-namespace nv
-{
-    inline float toRadian(float degree) { return degree * (PI / 180.0f); }
-    inline float toDegree(float radian) { return radian * (180.0f / PI); }
-
-    // Robust floating point comparisons:
-    // http://realtimecollisiondetection.net/blog/?p=89
-    inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
-    {
-        //return fabs(f0-f1) <= epsilon;
-        return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
-    }
-
-    inline bool isZero(const float f, const float epsilon = NV_EPSILON)
-    {
-        return fabsf(f) <= epsilon;
-    }
-
-    inline bool isFinite(const float f)
-    {
-#if defined(_MSC_VER) && _MSC_VER <= 1800
-		(void)f;
-		return true;
-#else
-		return std::isfinite(f);
-#endif // defined(_MSC_VER) && _MSC_VER <= 1800
-    }
-
-    // Eliminates negative zeros from a float array.
-    inline void floatCleanup(float * fp, int n)
-    {
-        for (int i = 0; i < n; i++) {
-            //nvDebugCheck(isFinite(fp[i]));
-            union { float f; uint32 i; } x = { fp[i] };
-            if (x.i == 0x80000000) fp[i] = 0.0f;
-        }
-    }
-
-    inline float saturate(float f) {
-        return clamp(f, 0.0f, 1.0f);
-    }
-}
-
-#endif // NV_MATH_H
diff --git a/3rdparty/nvtt/nvmath/plane.h b/3rdparty/nvtt/nvmath/plane.h
deleted file mode 100644
index eb544b137..000000000
--- a/3rdparty/nvtt/nvmath/plane.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#ifndef NV_MATH_PLANE_H
-#define NV_MATH_PLANE_H
-
-#include "nvmath.h"
-#include "vector.h"
-
-namespace nv
-{
-    class Matrix;
-
-    class NVMATH_CLASS Plane
-    {
-    public:
-        Plane();
-        Plane(float x, float y, float z, float w);
-        Plane(const Vector4 & v);
-        Plane(const Vector3 & v, float d);
-        Plane(const Vector3 & normal, const Vector3 & point);
-        Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2);
-
-        const Plane & operator=(const Plane & v);
-
-        Vector3 vector() const;
-        float offset() const;
-
-        void operator*=(float s);
-
-        Vector4 v;
-    };
-
-    Plane transformPlane(const Matrix &, const Plane &);
-
-    Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c);
-
-
-} // nv namespace
-
-#endif // NV_MATH_PLANE_H
diff --git a/3rdparty/nvtt/nvmath/plane.inl b/3rdparty/nvtt/nvmath/plane.inl
deleted file mode 100644
index 7baf80485..000000000
--- a/3rdparty/nvtt/nvmath/plane.inl
+++ /dev/null
@@ -1,49 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_PLANE_INL
-#define NV_MATH_PLANE_INL
-
-#include "plane.h"
-#include "vector.inl"
-
-namespace nv
-{
-    inline Plane::Plane() {}
-    inline Plane::Plane(float x, float y, float z, float w) : v(x, y, z, w) {}
-    inline Plane::Plane(const Vector4 & v) : v(v) {}
-    inline Plane::Plane(const Vector3 & v, float d) : v(v, d) {}
-    inline Plane::Plane(const Vector3 & normal, const Vector3 & point) : v(normal, -dot(normal, point)) {}
-    inline Plane::Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2) {
-        Vector3 n = cross(v1-v0, v2-v0);
-        float d = -dot(n, v0);
-        v = Vector4(n, d);
-    }
-
-    inline const Plane & Plane::operator=(const Plane & p) { v = p.v; return *this; }
-
-    inline Vector3 Plane::vector() const { return v.xyz(); }
-    inline float Plane::offset() const { return v.w; }
-
-    // Normalize plane.
-    inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)
-    {
-        const float len = length(plane.vector());
-        const float inv = isZero(len, epsilon) ? 0 : 1.0f / len;
-        return Plane(plane.v * inv);
-    }
-
-    // Get the signed distance from the given point to this plane.
-    inline float distance(const Plane & plane, const Vector3 & point)
-    {
-        return dot(plane.vector(), point) + plane.offset();
-    }
-
-    inline void Plane::operator*=(float s)
-    {
-        v *= s;
-    }
-
-} // nv namespace
-
-#endif // NV_MATH_PLANE_H
diff --git a/3rdparty/nvtt/nvmath/vector.h b/3rdparty/nvtt/nvmath/vector.h
deleted file mode 100644
index 180cfab0e..000000000
--- a/3rdparty/nvtt/nvmath/vector.h
+++ /dev/null
@@ -1,148 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#ifndef NV_MATH_VECTOR_H
-#define NV_MATH_VECTOR_H
-
-#include "nvmath.h"
-
-namespace nv
-{
-    class NVMATH_CLASS Vector2
-    {
-    public:
-        typedef Vector2 const & Arg;
-
-        Vector2();
-        explicit Vector2(float f);
-        Vector2(float x, float y);
-        Vector2(Vector2::Arg v);
-
-        //template <typename T> explicit Vector2(const T & v) : x(v.x), y(v.y) {}
-        //template <typename T> operator T() const { return T(x, y); }
-
-        const Vector2 & operator=(Vector2::Arg v);
-
-        const float * ptr() const;
-
-        void set(float x, float y);
-
-        Vector2 operator-() const;
-        void operator+=(Vector2::Arg v);
-        void operator-=(Vector2::Arg v);
-        void operator*=(float s);
-        void operator*=(Vector2::Arg v);
-
-        friend bool operator==(Vector2::Arg a, Vector2::Arg b);
-        friend bool operator!=(Vector2::Arg a, Vector2::Arg b);
-
-        union {
-            struct {
-                float x, y;
-            };
-            float component[2];
-        };
-    };
-
-    class NVMATH_CLASS Vector3
-    {
-    public:
-        typedef Vector3 const & Arg;
-
-        Vector3();
-        explicit Vector3(float x);
-        //explicit Vector3(int x) : x(float(x)), y(float(x)), z(float(x)) {}
-        Vector3(float x, float y, float z);
-        Vector3(Vector2::Arg v, float z);
-        Vector3(Vector3::Arg v);
-
-        //template <typename T> explicit Vector3(const T & v) : x(v.x), y(v.y), z(v.z) {}
-        //template <typename T> operator T() const { return T(x, y, z); }
-
-        const Vector3 & operator=(Vector3::Arg v);
-
-        Vector2 xy() const;
-
-        const float * ptr() const;
-
-        void set(float x, float y, float z);
-
-        Vector3 operator-() const;
-        void operator+=(Vector3::Arg v);
-        void operator-=(Vector3::Arg v);
-        void operator*=(float s);
-        void operator/=(float s);
-        void operator*=(Vector3::Arg v);
-        void operator/=(Vector3::Arg v);
-
-        friend bool operator==(Vector3::Arg a, Vector3::Arg b);
-        friend bool operator!=(Vector3::Arg a, Vector3::Arg b);
-
-        union {
-            struct {
-                float x, y, z;
-            };
-            float component[3];
-        };
-    };
-
-    class NVMATH_CLASS Vector4
-    {
-    public:
-        typedef Vector4 const & Arg;
-
-        Vector4();
-        explicit Vector4(float x);
-        Vector4(float x, float y, float z, float w);
-        Vector4(Vector2::Arg v, float z, float w);
-        Vector4(Vector2::Arg v, Vector2::Arg u);
-        Vector4(Vector3::Arg v, float w);
-        Vector4(Vector4::Arg v);
-        //	Vector4(const Quaternion & v);
-
-        //template <typename T> explicit Vector4(const T & v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
-        //template <typename T> operator T() const { return T(x, y, z, w); }
-
-        const Vector4 & operator=(Vector4::Arg v);
-
-        Vector2 xy() const;
-        Vector2 zw() const;
-        Vector3 xyz() const;
-
-        const float * ptr() const;
-
-        void set(float x, float y, float z, float w);
-
-        Vector4 operator-() const;
-        void operator+=(Vector4::Arg v);
-        void operator-=(Vector4::Arg v);
-        void operator*=(float s);
-        void operator/=(float s);
-        void operator*=(Vector4::Arg v);
-        void operator/=(Vector4::Arg v);
-
-        friend bool operator==(Vector4::Arg a, Vector4::Arg b);
-        friend bool operator!=(Vector4::Arg a, Vector4::Arg b);
-
-        union {
-            struct {
-                float x, y, z, w;
-            };
-            float component[4];
-        };
-    };
-
-} // nv namespace
-
-// If we had these functions, they would be ambiguous, the compiler would not know which one to pick:
-//template <typename T> Vector2 to(const T & v) { return Vector2(v.x, v.y); }
-//template <typename T> Vector3 to(const T & v) { return Vector3(v.x, v.y, v.z); }
-//template <typename T> Vector4 to(const T & v) { return Vector4(v.x, v.y, v.z, v.z); }
-
-// We could use a cast operator so that we could infer the expected type, but that doesn't work the same way in all compilers and produces horrible error messages.
-
-// Instead we simply have explicit casts:
-template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); }
-template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); }
-template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.w); }
-
-#endif // NV_MATH_VECTOR_H
diff --git a/3rdparty/nvtt/nvmath/vector.inl b/3rdparty/nvtt/nvmath/vector.inl
deleted file mode 100644
index 8f1da1eca..000000000
--- a/3rdparty/nvtt/nvmath/vector.inl
+++ /dev/null
@@ -1,921 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#ifndef NV_MATH_VECTOR_INL
-#define NV_MATH_VECTOR_INL
-
-#include "vector.h"
-#include "nvcore/utils.h" // min, max
-#include "nvcore/hash.h" // hash
-
-namespace nv
-{
-
-    // Helpers to convert vector types. Assume T has x,y members and 2 argument constructor.
-    //template <typename T> T to(Vector2::Arg v) { return T(v.x, v.y); }
-
-    // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
-    //template <typename T> T to(Vector3::Arg v) { return T(v.x, v.y, v.z); }
-
-    // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
-    //template <typename T> T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); }
-
-
-    // Vector2
-    inline Vector2::Vector2() {}
-    inline Vector2::Vector2(float f) : x(f), y(f) {}
-    inline Vector2::Vector2(float x, float y) : x(x), y(y) {}
-    inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {}
-
-    inline const Vector2 & Vector2::operator=(Vector2::Arg v)
-    {
-        x = v.x;
-        y = v.y;
-        return *this;
-    }
-
-    inline const float * Vector2::ptr() const
-    {
-        return &x;
-    }
-
-    inline void Vector2::set(float x, float y)
-    {
-        this->x = x;
-        this->y = y;
-    }
-
-    inline Vector2 Vector2::operator-() const
-    {
-        return Vector2(-x, -y);
-    }
-
-    inline void Vector2::operator+=(Vector2::Arg v)
-    {
-        x += v.x;
-        y += v.y;
-    }
-
-    inline void Vector2::operator-=(Vector2::Arg v)
-    {
-        x -= v.x;
-        y -= v.y;
-    }
-
-    inline void Vector2::operator*=(float s)
-    {
-        x *= s;
-        y *= s;
-    }
-
-    inline void Vector2::operator*=(Vector2::Arg v)
-    {
-        x *= v.x;
-        y *= v.y;
-    }
-
-    inline bool operator==(Vector2::Arg a, Vector2::Arg b)
-    {
-        return a.x == b.x && a.y == b.y; 
-    }
-    inline bool operator!=(Vector2::Arg a, Vector2::Arg b)
-    {
-        return a.x != b.x || a.y != b.y; 
-    }
-
-
-    // Vector3
-    inline Vector3::Vector3() {}
-    inline Vector3::Vector3(float f) : x(f), y(f), z(f) {}
-    inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {}
-    inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {}
-    inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {}
-
-    inline const Vector3 & Vector3::operator=(Vector3::Arg v)
-    {
-        x = v.x;
-        y = v.y;
-        z = v.z;
-        return *this;
-    }
-
-
-    inline Vector2 Vector3::xy() const
-    {
-        return Vector2(x, y);
-    }
-
-    inline const float * Vector3::ptr() const
-    {
-        return &x;
-    }
-
-    inline void Vector3::set(float x, float y, float z)
-    {
-        this->x = x;
-        this->y = y;
-        this->z = z;
-    }
-
-    inline Vector3 Vector3::operator-() const
-    {
-        return Vector3(-x, -y, -z);
-    }
-
-    inline void Vector3::operator+=(Vector3::Arg v)
-    {
-        x += v.x;
-        y += v.y;
-        z += v.z;
-    }
-
-    inline void Vector3::operator-=(Vector3::Arg v)
-    {
-        x -= v.x;
-        y -= v.y;
-        z -= v.z;
-    }
-
-    inline void Vector3::operator*=(float s)
-    {
-        x *= s;
-        y *= s;
-        z *= s;
-    }
-
-    inline void Vector3::operator/=(float s)
-    {
-        float is = 1.0f / s;
-        x *= is;
-        y *= is;
-        z *= is;
-    }
-
-    inline void Vector3::operator*=(Vector3::Arg v)
-    {
-        x *= v.x;
-        y *= v.y;
-        z *= v.z;
-    }
-
-    inline void Vector3::operator/=(Vector3::Arg v)
-    {
-        x /= v.x;
-        y /= v.y;
-        z /= v.z;
-    }
-
-    inline bool operator==(Vector3::Arg a, Vector3::Arg b)
-    {
-        return a.x == b.x && a.y == b.y && a.z == b.z; 
-    }
-    inline bool operator!=(Vector3::Arg a, Vector3::Arg b)
-    {
-        return a.x != b.x || a.y != b.y || a.z != b.z; 
-    }
-
-
-    // Vector4
-    inline Vector4::Vector4() {}
-    inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {}
-    inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
-    inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {}
-    inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {}
-    inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {}
-    inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
-
-    inline const Vector4 & Vector4::operator=(const Vector4 & v)
-    {
-        x = v.x;
-        y = v.y;
-        z = v.z;
-        w = v.w;
-        return *this;
-    }
-
-    inline Vector2 Vector4::xy() const
-    {
-        return Vector2(x, y);
-    }
-
-    inline Vector2 Vector4::zw() const
-    {
-        return Vector2(z, w);
-    }
-
-    inline Vector3 Vector4::xyz() const
-    {
-        return Vector3(x, y, z);
-    }
-
-    inline const float * Vector4::ptr() const
-    {
-        return &x;
-    }
-
-    inline void Vector4::set(float x, float y, float z, float w)
-    {
-        this->x = x;
-        this->y = y;
-        this->z = z;
-        this->w = w;
-    }
-
-    inline Vector4 Vector4::operator-() const
-    {
-        return Vector4(-x, -y, -z, -w);
-    }
-
-    inline void Vector4::operator+=(Vector4::Arg v)
-    {
-        x += v.x;
-        y += v.y;
-        z += v.z;
-        w += v.w;
-    }
-
-    inline void Vector4::operator-=(Vector4::Arg v)
-    {
-        x -= v.x;
-        y -= v.y;
-        z -= v.z;
-        w -= v.w;
-    }
-
-    inline void Vector4::operator*=(float s)
-    {
-        x *= s;
-        y *= s;
-        z *= s;
-        w *= s;
-    }
-
-    inline void Vector4::operator/=(float s)
-    {
-        x /= s;
-        y /= s;
-        z /= s;
-        w /= s;
-    }
-
-    inline void Vector4::operator*=(Vector4::Arg v)
-    {
-        x *= v.x;
-        y *= v.y;
-        z *= v.z;
-        w *= v.w;
-    }
-
-    inline void Vector4::operator/=(Vector4::Arg v)
-    {
-        x /= v.x;
-        y /= v.y;
-        z /= v.z;
-        w /= v.w;
-    }
-
-    inline bool operator==(Vector4::Arg a, Vector4::Arg b)
-    {
-        return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 
-    }
-    inline bool operator!=(Vector4::Arg a, Vector4::Arg b)
-    {
-        return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; 
-    }
-
-
-
-    // Functions
-
-
-    // Vector2
-
-    inline Vector2 add(Vector2::Arg a, Vector2::Arg b)
-    {
-        return Vector2(a.x + b.x, a.y + b.y);
-    }
-    inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b)
-    {
-        return add(a, b);
-    }
-
-    inline Vector2 sub(Vector2::Arg a, Vector2::Arg b)
-    {
-        return Vector2(a.x - b.x, a.y - b.y);
-    }
-    inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b)
-    {
-        return sub(a, b);
-    }
-
-    inline Vector2 scale(Vector2::Arg v, float s)
-    {
-        return Vector2(v.x * s, v.y * s);
-    }
-
-    inline Vector2 scale(Vector2::Arg v, Vector2::Arg s)
-    {
-        return Vector2(v.x * s.x, v.y * s.y);
-    }
-
-    inline Vector2 operator*(Vector2::Arg v, float s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2)
-    {
-        return Vector2(v1.x*v2.x, v1.y*v2.y);
-    }
-
-    inline Vector2 operator*(float s, Vector2::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector2 operator/(Vector2::Arg v, float s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    inline Vector2 lerp(Vector2::Arg v1, Vector2::Arg v2, float t)
-    {
-        const float s = 1.0f - t;
-        return Vector2(v1.x * s + t * v2.x, v1.y * s + t * v2.y);
-    }
-
-    inline float dot(Vector2::Arg a, Vector2::Arg b)
-    {
-        return a.x * b.x + a.y * b.y;
-    }
-
-    inline float lengthSquared(Vector2::Arg v)
-    {
-        return v.x * v.x + v.y * v.y;
-    }
-
-    inline float length(Vector2::Arg v)
-    {
-        return sqrtf(lengthSquared(v));
-    }
-
-    inline float distance(Vector2::Arg a, Vector2::Arg b)
-    {
-        return length(a - b);
-    }
-
-    inline float inverseLength(Vector2::Arg v)
-    {
-        return 1.0f / sqrtf(lengthSquared(v));
-    }
-
-    inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
-    {
-        return equal(length(v), 1, epsilon);
-    }
-
-    inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        NV_UNUSED(epsilon);
-        nvDebugCheck(!isZero(l, epsilon));
-        Vector2 n = scale(v, 1.0f / l);
-        nvDebugCheck(isNormalized(n));
-        return n;
-    }
-
-    inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        if (isZero(l, epsilon)) {
-            return fallback;
-        }
-        return scale(v, 1.0f / l);
-    }
-
-    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
-    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
-    inline Vector2 normalizeFast(Vector2::Arg v)
-    {
-        const float very_small_float = 1.0e-037f;
-        float l = very_small_float + length(v);
-        return scale(v, 1.0f / l);
-    }
-
-    inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
-    {
-        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon);
-    }
-
-    inline Vector2 min(Vector2::Arg a, Vector2::Arg b)
-    {
-        return Vector2(min(a.x, b.x), min(a.y, b.y));
-    }
-
-    inline Vector2 max(Vector2::Arg a, Vector2::Arg b)
-    {
-        return Vector2(max(a.x, b.x), max(a.y, b.y));
-    }
-
-    inline Vector2 clamp(Vector2::Arg v, float min, float max)
-    {
-        return Vector2(clamp(v.x, min, max), clamp(v.y, min, max));
-    }
-
-    inline Vector2 saturate(Vector2::Arg v)
-    {
-        return Vector2(saturate(v.x), saturate(v.y));
-    }
-
-    inline bool isFinite(Vector2::Arg v)
-    {
-        return isFinite(v.x) && isFinite(v.y);
-    }
-
-    inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f))
-    {
-        if (!isFinite(v)) return fallback;
-        Vector2 vf = v;
-        nv::floatCleanup(vf.component, 2);
-        return vf;
-    }
-
-    // Note, this is the area scaled by 2!
-    inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1)
-    {
-	    return (v0.x * v1.y - v0.y * v1.x); // * 0.5f;
-    }
-    inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
-    {
-        // IC: While it may be appealing to use the following expression:
-        //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f;
-
-        // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point 
-        // numbers and the results becomes very unstable and dependent on the order of the factors.
-
-        // Instead, it's preferable to substract the vertices first, and multiply the resulting small values together. The result
-        // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of 
-        // the triangle.
-
-        //return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f;
-        return triangleArea(a-c, b-c);
-    }
-
-
-    template <>
-    inline uint hash(const Vector2 & v, uint h)
-    {
-        return sdbmFloatHash(v.component, 2, h);
-    }
-
-
-
-    // Vector3
-
-    inline Vector3 add(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(a.x + b.x, a.y + b.y, a.z + b.z);
-    }
-    inline Vector3 add(Vector3::Arg a, float b)
-    {
-        return Vector3(a.x + b, a.y + b, a.z + b);
-    }
-    inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b)
-    {
-        return add(a, b);
-    }
-    inline Vector3 operator+(Vector3::Arg a, float b)
-    {
-        return add(a, b);
-    }
-
-    inline Vector3 sub(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(a.x - b.x, a.y - b.y, a.z - b.z);
-    }
-    inline Vector3 sub(Vector3::Arg a, float b)
-    {
-        return Vector3(a.x - b, a.y - b, a.z - b);
-    }
-    inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b)
-    {
-        return sub(a, b);
-    }
-    inline Vector3 operator-(Vector3::Arg a, float b)
-    {
-        return sub(a, b);
-    }
-
-    inline Vector3 cross(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
-    }
-
-    inline Vector3 scale(Vector3::Arg v, float s)
-    {
-        return Vector3(v.x * s, v.y * s, v.z * s);
-    }
-
-    inline Vector3 scale(Vector3::Arg v, Vector3::Arg s)
-    {
-        return Vector3(v.x * s.x, v.y * s.y, v.z * s.z);
-    }
-
-    inline Vector3 operator*(Vector3::Arg v, float s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator*(float s, Vector3::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator/(Vector3::Arg v, float s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s)
-    {
-        return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s);
-    }*/
-
-    inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t)
-    {
-        const float s = 1.0f - t;
-        return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z);
-    }
-
-    inline float dot(Vector3::Arg a, Vector3::Arg b)
-    {
-        return a.x * b.x + a.y * b.y + a.z * b.z;
-    }
-
-    inline float lengthSquared(Vector3::Arg v)
-    {
-        return v.x * v.x + v.y * v.y + v.z * v.z;
-    }
-
-    inline float length(Vector3::Arg v)
-    {
-        return sqrtf(lengthSquared(v));
-    }
-
-    inline float distance(Vector3::Arg a, Vector3::Arg b)
-    {
-        return length(a - b);
-    }
-
-    inline float distanceSquared(Vector3::Arg a, Vector3::Arg b)
-    {
-        return lengthSquared(a - b);
-    }
-
-    inline float inverseLength(Vector3::Arg v)
-    {
-        return 1.0f / sqrtf(lengthSquared(v));
-    }
-
-    inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
-    {
-        return equal(length(v), 1, epsilon);
-    }
-
-    inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        NV_UNUSED(epsilon);
-        nvDebugCheck(!isZero(l, epsilon));
-        Vector3 n = scale(v, 1.0f / l);
-        nvDebugCheck(isNormalized(n));
-        return n;
-    }
-
-    inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        if (isZero(l, epsilon)) {
-            return fallback;
-        }
-        return scale(v, 1.0f / l);
-    }
-
-    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
-    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
-    inline Vector3 normalizeFast(Vector3::Arg v)
-    {
-        const float very_small_float = 1.0e-037f;
-        float l = very_small_float + length(v);
-        return scale(v, 1.0f / l);
-    }
-
-    inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON)
-    {
-        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon);
-    }
-
-    inline Vector3 min(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-    }
-
-    inline Vector3 max(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-    }
-
-    inline Vector3 clamp(Vector3::Arg v, float min, float max)
-    {
-        return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max));
-    }
-
-    inline Vector3 saturate(Vector3::Arg v)
-    {
-        return Vector3(saturate(v.x), saturate(v.y), saturate(v.z));
-    }
-
-    inline Vector3 floor(Vector3::Arg v)
-    {
-        return Vector3(floorf(v.x), floorf(v.y), floorf(v.z));
-    }
-
-    inline Vector3 ceil(Vector3::Arg v)
-    {
-        return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z));
-    }
-
-    inline bool isFinite(Vector3::Arg v)
-    {
-        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z);
-    }
-
-    inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f))
-    {
-        if (!isFinite(v)) return fallback;
-        Vector3 vf = v;
-        nv::floatCleanup(vf.component, 3);
-        return vf;
-    }
-
-    inline Vector3 reflect(Vector3::Arg v, Vector3::Arg n)
-    {
-	    return v - (2 * dot(v, n)) * n;
-    }
-
-    template <>
-    inline uint hash(const Vector3 & v, uint h)
-    {
-        return sdbmFloatHash(v.component, 3, h);
-    }
-
-
-    // Vector4
-
-    inline Vector4 add(Vector4::Arg a, Vector4::Arg b)
-    {
-        return Vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-    }
-    inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b)
-    {
-        return add(a, b);
-    }
-
-    inline Vector4 sub(Vector4::Arg a, Vector4::Arg b)
-    {
-        return Vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-    }
-    inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b)
-    {
-        return sub(a, b);
-    }
-
-    inline Vector4 scale(Vector4::Arg v, float s)
-    {
-        return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
-    }
-
-    inline Vector4 scale(Vector4::Arg v, Vector4::Arg s)
-    {
-        return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w);
-    }
-
-    inline Vector4 operator*(Vector4::Arg v, float s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator*(float s, Vector4::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator/(Vector4::Arg v, float s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    /*inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s)
-    {
-        return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s);
-    }*/
-
-    inline Vector4 lerp(Vector4::Arg v1, Vector4::Arg v2, float t)
-    {
-        const float s = 1.0f - t;
-        return Vector4(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z, v1.w * s + t * v2.w);
-    }
-
-    inline float dot(Vector4::Arg a, Vector4::Arg b)
-    {
-        return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-    }
-
-    inline float lengthSquared(Vector4::Arg v)
-    {
-        return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
-    }
-
-    inline float length(Vector4::Arg v)
-    {
-        return sqrtf(lengthSquared(v));
-    }
-
-    inline float inverseLength(Vector4::Arg v)
-    {
-        return 1.0f / sqrtf(lengthSquared(v));
-    }
-
-    inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
-    {
-        return equal(length(v), 1, epsilon);
-    }
-
-    inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        NV_UNUSED(epsilon);
-        nvDebugCheck(!isZero(l, epsilon));
-        Vector4 n = scale(v, 1.0f / l);
-        nvDebugCheck(isNormalized(n));
-        return n;
-    }
-
-    inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        if (isZero(l, epsilon)) {
-            return fallback;
-        }
-        return scale(v, 1.0f / l);
-    }
-
-    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
-    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
-    inline Vector4 normalizeFast(Vector4::Arg v)
-    {
-        const float very_small_float = 1.0e-037f;
-        float l = very_small_float + length(v);
-        return scale(v, 1.0f / l);
-    }
-
-    inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON)
-    {
-        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon);
-    }
-
-    inline Vector4 min(Vector4::Arg a, Vector4::Arg b)
-    {
-        return Vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-    }
-
-    inline Vector4 max(Vector4::Arg a, Vector4::Arg b)
-    {
-        return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-    }
-
-    inline Vector4 clamp(Vector4::Arg v, float min, float max)
-    {
-        return Vector4(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max), clamp(v.w, min, max));
-    }
-
-    inline Vector4 saturate(Vector4::Arg v)
-    {
-        return Vector4(saturate(v.x), saturate(v.y), saturate(v.z), saturate(v.w));
-    }
-
-    inline bool isFinite(Vector4::Arg v)
-    {
-        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w);
-    }
-
-    inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f))
-    {
-        if (!isFinite(v)) return fallback;
-        Vector4 vf = v;
-        nv::floatCleanup(vf.component, 4);
-        return vf;
-    }
-
-    template <>
-    inline uint hash(const Vector4 & v, uint h)
-    {
-        return sdbmFloatHash(v.component, 4, h);
-    }
-
-
-#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float
-
-    //int:
-
-    inline Vector2 scale(Vector2::Arg v, int s)
-    {
-        return Vector2(v.x * s, v.y * s);
-    }
-
-    inline Vector2 operator*(Vector2::Arg v, int s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector2 operator*(int s, Vector2::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector2 operator/(Vector2::Arg v, int s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    inline Vector3 scale(Vector3::Arg v, int s)
-    {
-        return Vector3(v.x * s, v.y * s, v.z * s);
-    }
-
-    inline Vector3 operator*(Vector3::Arg v, int s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator*(int s, Vector3::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator/(Vector3::Arg v, int s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    inline Vector4 scale(Vector4::Arg v, int s)
-    {
-        return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
-    }
-
-    inline Vector4 operator*(Vector4::Arg v, int s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator*(int s, Vector4::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator/(Vector4::Arg v, int s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    //double:
-
-    inline Vector3 operator*(Vector3::Arg v, double s)
-    {
-        return scale(v, (float)s);
-    }
-
-    inline Vector3 operator*(double s, Vector3::Arg v)
-    {
-        return scale(v, (float)s);
-    }
-
-    inline Vector3 operator/(Vector3::Arg v, double s)
-    {
-        return scale(v, 1.f/((float)s));
-    }    
-        
-#endif //NV_OS_IOS
-
-} // nv namespace
-
-#endif // NV_MATH_VECTOR_INL
diff --git a/3rdparty/nvtt/nvtt.cpp b/3rdparty/nvtt/nvtt.cpp
deleted file mode 100644
index 51a2bce3f..000000000
--- a/3rdparty/nvtt/nvtt.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2011-2017 Branimir Karadzic. All rights reserved.
- * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
- */
-
-#include "nvtt.h"
-
-#include <string.h>
-#include <bx/uint32_t.h>
-
-#include "bc6h/zoh.h"
-#include "bc7/avpcl.h"
-#include "nvmath/vector.inl"
-
-NVCORE_API int nvAbort(const char *, const char *, int , const char *, const char *, ...)
-{
-	abort();
-	return 0;
-}
-
-namespace nvtt
-{
-	using namespace nv;
-
-	void compressBC6H(const void* _input, uint32_t _width, uint32_t _height, uint32_t _stride, void* _output)
-	{
-		const uint8_t* src = (const uint8_t*)_input;
-		char* dst = (char*)_output;
-
-		for (uint32_t yy = 0; yy < _height; yy += 4)
-		{
-			for (uint32_t xx = 0; xx < _width; xx += 4)
-			{
-				const Vector4* rgba = (const Vector4*)&src[yy*_stride + xx*sizeof(float)*4];
-
-				ZOH::Utils::FORMAT = ZOH::UNSIGNED_F16;
-				ZOH::Tile zohTile(4, 4);
-
-				memset(zohTile.data, 0, sizeof(zohTile.data) );
-				memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map) );
-
-				for (uint32_t blockY = 0; blockY < 4; ++blockY)
-				{
-					for (uint32_t blockX = 0; blockX < 4; ++blockX)
-					{
-						Vector4 color = rgba[blockY*4 + blockX];
-						uint16 rHalf = bx::halfFromFloat(color.x);
-						uint16 gHalf = bx::halfFromFloat(color.y);
-						uint16 bHalf = bx::halfFromFloat(color.z);
-						zohTile.data[blockY][blockX].x = ZOH::Tile::half2float(rHalf);
-						zohTile.data[blockY][blockX].y = ZOH::Tile::half2float(gHalf);
-						zohTile.data[blockY][blockX].z = ZOH::Tile::half2float(bHalf);
-						zohTile.importance_map[blockY][blockX] = 1.0f;
-					}
-				}
-
-				ZOH::compress(zohTile, &dst[( (yy*_width) + xx)/4 * 16]);
-			}
-		}
-	}
-
-	void compressBC7(const void* _input, uint32_t _width, uint32_t _height, uint32_t _stride, void* _output)
-	{
-		const uint8_t* src = (const uint8_t*)_input;
-		char* dst = (char*)_output;
-
-		for (uint32_t yy = 0; yy < _height; yy += 4)
-		{
-			for (uint32_t xx = 0; xx < _width; xx += 4)
-			{
-				const Vector4* rgba = (const Vector4*)&src[yy*_stride + xx*sizeof(float)*4];
-
-				AVPCL::mode_rgb     = false;
-				AVPCL::flag_premult = false;
-				AVPCL::flag_nonuniform     = false;
-				AVPCL::flag_nonuniform_ati = false;
-
-				AVPCL::Tile avpclTile(4, 4);
-				memset(avpclTile.data, 0, sizeof(avpclTile.data) );
-				for (uint32_t blockY = 0; blockY < 4; ++blockY)
-				{
-					for (uint32_t blockX = 0; blockX < 4; ++blockX)
-					{
-						Vector4 color = rgba[blockY*4 + blockX];
-						avpclTile.data[blockY][blockX] = color * 255.0f;
-						avpclTile.importance_map[blockY][blockX] = 1.0f;
-					}
-				}
-
-				AVPCL::compress(avpclTile, &dst[( (yy*_width) + xx)/4 * 16]);
-			}
-		}
-	}
-
-} //namespace nvtt
diff --git a/3rdparty/nvtt/nvtt.h b/3rdparty/nvtt/nvtt.h
deleted file mode 100644
index a37c7cfb2..000000000
--- a/3rdparty/nvtt/nvtt.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef NVTT_H
-#define NVTT_H
-
-#include <stdint.h>
-
-namespace nvtt
-{
-void compressBC6H(const void* _input, uint32_t _width, uint32_t _height, uint32_t _stride, void* _output);
-void compressBC7(const void* _input, uint32_t _width, uint32_t _height, uint32_t _stride, void* _output);
-
-} // namespace nvtt
-
-#endif // NVTT_H
diff --git a/3rdparty/pvrtc/AlphaBitmap.h b/3rdparty/pvrtc/AlphaBitmap.h
deleted file mode 100644
index 419733206..000000000
--- a/3rdparty/pvrtc/AlphaBitmap.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include "Bitmap.h"
-
-namespace Javelin {
-
-class AlphaBitmap : public Bitmap {
-public:
-    AlphaBitmap() {}
-
-    AlphaBitmap(int w, int h)
-        : Bitmap(w, h, 1) {
-    }
-
-    const unsigned char *GetData() const { return data; }
-
-    unsigned char *GetData() { return data; }
-};
-
-}
diff --git a/3rdparty/pvrtc/BitScale.cpp b/3rdparty/pvrtc/BitScale.cpp
deleted file mode 100644
index 3e7419343..000000000
--- a/3rdparty/pvrtc/BitScale.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-#include "BitScale.h"
-
-const uint8_t Javelin::Data::BITSCALE_5_TO_8[32] = {
- 0, 8, 16, 24, 32, 41, 49, 57, 65, 74,
- 82, 90, 98, 106, 115, 123, 131, 139, 148, 156,
- 164, 172, 180, 189, 197, 205, 213, 222, 230, 238,
- 246, 255};
-
-const uint8_t Javelin::Data::BITSCALE_4_TO_8[16] = {
- 0, 17, 34, 51, 68, 85, 102, 119, 136, 153,
- 170, 187, 204, 221, 238, 255};
-
-const uint8_t Javelin::Data::BITSCALE_3_TO_8[8] = {
- 0, 36, 72, 109, 145, 182, 218, 255};
-
-const uint8_t Javelin::Data::BITSCALE_8_TO_5_FLOOR[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
- 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
- 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
- 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
- 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 9, 9, 9, 9, 9,
- 9, 9, 9, 10, 10, 10, 10, 10, 10, 10,
- 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
- 12, 12, 12, 12, 12, 12, 12, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 14, 14, 14, 14,
- 14, 14, 14, 14, 15, 15, 15, 15, 15, 15,
- 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
- 17, 17, 17, 17, 17, 17, 17, 17, 17, 18,
- 18, 18, 18, 18, 18, 18, 18, 19, 19, 19,
- 19, 19, 19, 19, 19, 20, 20, 20, 20, 20,
- 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
- 21, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
- 24, 24, 24, 24, 24, 24, 25, 25, 25, 25,
- 25, 25, 25, 25, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 27, 27, 27, 27, 27, 27, 27,
- 27, 28, 28, 28, 28, 28, 28, 28, 28, 29,
- 29, 29, 29, 29, 29, 29, 29, 30, 30, 30,
- 30, 30, 30, 30, 30, 31};
-
-const uint8_t Javelin::Data::BITSCALE_8_TO_4_FLOOR[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 15};
-
-const uint8_t Javelin::Data::BITSCALE_8_TO_3_FLOOR[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 7};
-
-const uint8_t Javelin::Data::BITSCALE_8_TO_5_CEIL[256] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 2,
- 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
- 3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
- 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
- 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
- 8, 8, 8, 8, 8, 8, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 10, 10, 10, 10, 10,
- 10, 10, 10, 11, 11, 11, 11, 11, 11, 11,
- 11, 12, 12, 12, 12, 12, 12, 12, 12, 13,
- 13, 13, 13, 13, 13, 13, 13, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
- 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
- 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
- 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
- 19, 19, 19, 19, 19, 19, 19, 20, 20, 20,
- 20, 20, 20, 20, 20, 21, 21, 21, 21, 21,
- 21, 21, 21, 22, 22, 22, 22, 22, 22, 22,
- 22, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
- 25, 25, 25, 25, 25, 25, 26, 26, 26, 26,
- 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 28, 28, 28, 28, 28, 28, 28,
- 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
- 30, 30, 30, 30, 30, 30, 30, 31, 31, 31,
- 31, 31, 31, 31, 31, 31};
-
-const uint8_t Javelin::Data::BITSCALE_8_TO_4_CEIL[256] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15};
-
-const uint8_t Javelin::Data::BITSCALE_8_TO_3_CEIL[256] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7};
-
diff --git a/3rdparty/pvrtc/BitScale.h b/3rdparty/pvrtc/BitScale.h
deleted file mode 100644
index b600fe935..000000000
--- a/3rdparty/pvrtc/BitScale.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//============================================================================
-
-#pragma once
-
-#include <stdint.h>
-
-//============================================================================
-
-namespace Javelin
-{
-  namespace Data
-  {
-//============================================================================
-    
-    extern const uint8_t BITSCALE_5_TO_8[32];
-    extern const uint8_t BITSCALE_4_TO_8[16];
-    extern const uint8_t BITSCALE_3_TO_8[8];
-    extern const uint8_t BITSCALE_8_TO_5_FLOOR[256];
-    extern const uint8_t BITSCALE_8_TO_4_FLOOR[256];
-    extern const uint8_t BITSCALE_8_TO_3_FLOOR[256];
-    extern const uint8_t BITSCALE_8_TO_5_CEIL[256];
-    extern const uint8_t BITSCALE_8_TO_4_CEIL[256];
-    extern const uint8_t BITSCALE_8_TO_3_CEIL[256];
-    
-//============================================================================
-  } // namespace Data
-} // namespace Javelin
-//============================================================================
diff --git a/3rdparty/pvrtc/BitUtility.h b/3rdparty/pvrtc/BitUtility.h
deleted file mode 100644
index 588ff3e89..000000000
--- a/3rdparty/pvrtc/BitUtility.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-namespace Javelin {
-
-class BitUtility {
-public:
-    static bool IsPowerOf2(unsigned int x) {
-        return (x & (x - 1)) == 0;
-    }
-
-    static unsigned int RotateRight(unsigned int value, unsigned int shift) {
-        if ((shift &= sizeof(value) * 8 - 1) == 0) {
-            return value;
-        }
-        return (value >> shift) | (value << (sizeof(value) * 8 - shift));
-    }
-};
-
-}
diff --git a/3rdparty/pvrtc/Bitmap.h b/3rdparty/pvrtc/Bitmap.h
deleted file mode 100644
index 409ef1e4d..000000000
--- a/3rdparty/pvrtc/Bitmap.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-#include "Point2.h"
-
-namespace Javelin {
-
-class Bitmap {
-public:
-    int width;
-    int height;
-    unsigned char *data;
-
-    Bitmap() {}
-
-    Bitmap(int w, int h, int bytesPerPixel)
-        : width(w)
-        , height(h)
-        , data(new unsigned char[width * height * bytesPerPixel]) {
-    }
-
-    virtual ~Bitmap() {
-        delete [] data;
-    }
-
-    Point2<int> GetSize() const { return Point2<int>(width, height); }
-
-    int GetArea() const { return width * height; }
-
-    int GetBitmapWidth() const { return width; }
-
-    int GetBitmapHeight() const { return height; }
-
-    const unsigned char *GetRawData() const { return data; }
-};
-
-}
diff --git a/3rdparty/pvrtc/ColorRgba.h b/3rdparty/pvrtc/ColorRgba.h
deleted file mode 100644
index e3ec6aa7c..000000000
--- a/3rdparty/pvrtc/ColorRgba.h
+++ /dev/null
@@ -1,152 +0,0 @@
-#pragma once
-
-namespace Javelin {
-
-template<typename T>
-class ColorRgb {
-public:
-    T r;
-    T g;
-    T b;
-    
-
-    ColorRgb()
-        : r(0)
-        , g(0)
-        , b(0) {
-    }
-
-    ColorRgb(T red, T green, T blue)
-        : r(red)
-        , g(green)
-        , b(blue) {
-    }
-
-    ColorRgb(const ColorRgb<T> &x)
-        : r(x.r)
-        , g(x.g)
-        , b(x.b) {
-    }
-
-    ColorRgb<int> operator *(int x) {
-        return ColorRgb<int>(r * x, g * x, b * x);
-    }
-
-    ColorRgb<int> operator +(const ColorRgb<T> &x) const {
-        return ColorRgb<int>(r + (int)x.r, g + (int)x.g, b + (int)x.b);
-    }
-
-    ColorRgb<int> operator -(const ColorRgb<T> &x) const {
-        return ColorRgb<int>(r - (int)x.r, g - (int)x.g, b - (int)x.b);
-    }
-
-    int operator %(const ColorRgb<T> &x) const {
-        return r * (int)x.r + g * (int)x.g + b * (int)x.b;
-    }
-
-    bool operator ==(const ColorRgb<T> &x) const {
-        return r == x.r && g == x.g && b == x.b;
-    }
-
-    bool operator !=(const ColorRgb<T> &x) const {
-        return r != x.r || g != x.g || b != x.b;
-    }
-
-    void SetMin(const ColorRgb<T> &x) {
-        if (x.r < r) {
-            r = x.r;
-        }
-        if (x.g < g) {
-            g = x.g;
-        }
-        if (x.b < b) {
-            b = x.b;
-        }
-    }
-
-    void SetMax(const ColorRgb<T> &x) {
-        if (x.r > r) {
-            r = x.r;
-        }
-        if (x.g > g) {
-            g = x.g;
-        }
-        if (x.b > b) {
-            b = x.b;
-        }
-    }
-};
-
-template<typename T>
-class ColorRgba : public ColorRgb<T> {
-public:
-    T a;
-
-    ColorRgba() :
-        a(0) {
-    }
-
-    ColorRgba(T red, T green, T blue, T alpha)
-        : ColorRgb<T>(red, green, blue)
-        , a(alpha) {
-    }
-
-    ColorRgba(const ColorRgba<T> &x)
-        : ColorRgb<T>(x.r, x.g, x.b)
-        , a(x.a) {
-    }
-
-    ColorRgba<int> operator *(int x) {
-        return ColorRgba<T>(ColorRgb<T>::r * x, 
-                            ColorRgb<T>::g * x, 
-                            ColorRgb<T>::b * x, 
-                            a * x);
-    }
-
-    ColorRgba<int> operator +(const ColorRgba<T> &x) {
-        return ColorRgba<T>(ColorRgb<T>::r + (int)x.r, 
-                            ColorRgb<T>::g + (int)x.g, 
-                            ColorRgb<T>::b + (int)x.b, 
-                            a + (int)x.a);
-    }
-
-    ColorRgba<int> operator -(const ColorRgba<T> &x) {
-        return ColorRgba<T>(ColorRgb<T>::r - (int)x.r, 
-                            ColorRgb<T>::g - (int)x.g, 
-                            ColorRgb<T>::b - (int)x.b, 
-                            a - (int)x.a);
-    }
-
-    int operator %(const ColorRgba<T> &x) {
-        return ColorRgb<T>::r * (int)x.r + 
-               ColorRgb<T>::g * (int)x.g + 
-               ColorRgb<T>::b * (int)x.b + 
-               a * (int)x.a;
-    }
-
-    bool operator ==(const ColorRgba<T> &x) {
-        return ColorRgb<T>::r == x.r && ColorRgb<T>::g == x.g && 
-               ColorRgb<T>::b == x.b && a == x.a;
-    }
-
-    bool operator !=(const ColorRgba<T> &x) {
-        return ColorRgb<T>::r != x.r || ColorRgb<T>::g != x.g || 
-               ColorRgb<T>::b != x.b || a != x.a;
-    }
-
-    void SetMin(const ColorRgba<T> &x) {
-        ColorRgb<T>::SetMin(x);
-        if (x.a < a) {
-            a = x.a;
-        }
-    }
-
-    void SetMax(const ColorRgba<T> &x) {
-        ColorRgb<T>::SetMax(x);
-        if (x.a > a) {
-            a = x.a;
-        }
-    }
-};
-
-}
diff --git a/3rdparty/pvrtc/Interval.h b/3rdparty/pvrtc/Interval.h
deleted file mode 100644
index a7252e837..000000000
--- a/3rdparty/pvrtc/Interval.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#pragma once
-
-namespace Javelin {
-
-template<typename T>
-class Interval {
-public:
-    T min;
-    T max;
-
-    Interval() {
-    }
-
-    Interval<T> &operator|=(const T &x) {
-        min.SetMin(x); 
-        max.SetMax(x);
-        return *this;
-    }
-};
-
-}
diff --git a/3rdparty/pvrtc/LICENSE.TXT b/3rdparty/pvrtc/LICENSE.TXT
deleted file mode 100644
index 974fc09e2..000000000
--- a/3rdparty/pvrtc/LICENSE.TXT
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright © 2014, Jeffrey Lim. All Rights Reserved.
-
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, 
-   this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice, 
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-   derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 
-FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR 
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE.
diff --git a/3rdparty/pvrtc/MortonTable.cpp b/3rdparty/pvrtc/MortonTable.cpp
deleted file mode 100644
index 29a5af67f..000000000
--- a/3rdparty/pvrtc/MortonTable.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//============================================================================
-
-#include "MortonTable.h"
-
-//============================================================================
-
-const unsigned short Javelin::Data::MORTON_TABLE[256] =
-{
-    0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015,
-    0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055,
-    0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115,
-    0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
-    0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415,
-    0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455,
-    0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515,
-    0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
-    0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015,
-    0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055,
-    0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115,
-    0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
-    0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415,
-    0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455,
-    0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515,
-    0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
-    0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015,
-    0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055,
-    0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115,
-    0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
-    0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415,
-    0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455,
-    0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515,
-    0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
-    0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015,
-    0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055,
-    0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115,
-    0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
-    0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415,
-    0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455,
-    0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515,
-    0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
-};
-
-//============================================================================
diff --git a/3rdparty/pvrtc/MortonTable.h b/3rdparty/pvrtc/MortonTable.h
deleted file mode 100644
index 7a27e5954..000000000
--- a/3rdparty/pvrtc/MortonTable.h
+++ /dev/null
@@ -1,18 +0,0 @@
-//============================================================================
-
-#pragma once
-
-//============================================================================
-
-namespace Javelin
-{
-	namespace Data
-	{
-//============================================================================
-		
-		extern const unsigned short MORTON_TABLE[256];
-		
-//============================================================================
-	} // namespace Data
-} // namespace Javelin
-//============================================================================
diff --git a/3rdparty/pvrtc/Point2.h b/3rdparty/pvrtc/Point2.h
deleted file mode 100644
index 89fa4b632..000000000
--- a/3rdparty/pvrtc/Point2.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-namespace Javelin {
-
-template<typename T>
-class Point2 {
-public:
-    T x;
-    T y;
-
-    Point2(int a, int b)
-        : x(a)
-        , y(b) {
-    }
-};
-
-}
diff --git a/3rdparty/pvrtc/PvrTcDecoder.cpp b/3rdparty/pvrtc/PvrTcDecoder.cpp
deleted file mode 100644
index d8a36b342..000000000
--- a/3rdparty/pvrtc/PvrTcDecoder.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-//============================================================================
-
-#include "PvrTcDecoder.h"
-#include "PvrTcPacket.h"
-
-#include "MortonTable.h"
-#include <assert.h>
-
-//============================================================================
-
-using namespace Javelin;
-using Data::MORTON_TABLE;
-
-//============================================================================
-
-inline unsigned PvrTcDecoder::GetMortonNumber(int x, int y)
-{
-    return MORTON_TABLE[x >> 8] << 17 | MORTON_TABLE[y >> 8] << 16 | MORTON_TABLE[x & 0xFF] << 1 | MORTON_TABLE[y & 0xFF];
-}
-
-//============================================================================
-
-void PvrTcDecoder::DecodeRgb4Bpp(ColorRgb<unsigned char>* result, const Point2<int>& size, const void* data)
-{
-    assert(size.x == size.y);
-	
-	const int blocks = size.x / 4;
-	const int blockMask = blocks-1;
-    const PvrTcPacket* packets = static_cast<const PvrTcPacket*>(data);
-    
-    for(int y = 0; y < blocks; ++y)
-    {
-        for(int x = 0; x < blocks; ++x)
-        {
-            const PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-            
-            unsigned mod = packet->modulationData;
-			const unsigned char (*weights)[4] = PvrTcPacket::WEIGHTS + 4*packet->usePunchthroughAlpha;
-            const unsigned char (*factor)[4] = PvrTcPacket::BILINEAR_FACTORS;
-			
-			for(int py = 0; py < 4; ++py)
-			{
-				const int yOffset = (py < 2) ? -1 : 0;
-				const int y0 = (y + yOffset) & blockMask;
-				const int y1 = (y0+1) & blockMask;
-				
-				for(int px = 0; px < 4; ++px)
-				{
-					const int xOffset = (px < 2) ? -1 : 0;
-					const int x0 = (x + xOffset) & blockMask;
-					const int x1 = (x0+1) & blockMask;
-					
-					const PvrTcPacket* p0 = packets + GetMortonNumber(x0, y0);
-					const PvrTcPacket* p1 = packets + GetMortonNumber(x1, y0);
-					const PvrTcPacket* p2 = packets + GetMortonNumber(x0, y1);
-					const PvrTcPacket* p3 = packets + GetMortonNumber(x1, y1);
-					
-					ColorRgb<int> ca = p0->GetColorRgbA() * (*factor)[0] +
-									   p1->GetColorRgbA() * (*factor)[1] +
-									   p2->GetColorRgbA() * (*factor)[2] +
-									   p3->GetColorRgbA() * (*factor)[3];
-					
-					ColorRgb<int> cb = p0->GetColorRgbB() * (*factor)[0] +
-									   p1->GetColorRgbB() * (*factor)[1] +
-									   p2->GetColorRgbB() * (*factor)[2] +
-									   p3->GetColorRgbB() * (*factor)[3];
-					
-					const unsigned char* w = weights[mod&3];
-					ColorRgb<unsigned char> c;
-					c.r = (ca.r * w[0] + cb.r * w[1]) >> 7;
-					c.g = (ca.g * w[0] + cb.g * w[1]) >> 7;
-					c.b = (ca.b * w[0] + cb.b * w[1]) >> 7;
-					
-					result[(py+y*4)*size.x + (px+x*4)] = c;
-					mod >>= 2;
-					factor++;
-				}
-			}
-        }
-    }
-}
-
-void PvrTcDecoder::DecodeRgba4Bpp(ColorRgba<unsigned char>* result, const Point2<int>& size, const void* data)
-{
-    assert(size.x == size.y);
-    
-	const int blocks = size.x / 4;
-	const int blockMask = blocks-1;
-    const PvrTcPacket* packets = static_cast<const PvrTcPacket*>(data);
-    
-    for(int y = 0; y < blocks; ++y)
-    {
-        for(int x = 0; x < blocks; ++x)
-        {
-            const PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-            
-            unsigned mod = packet->modulationData;
-            const unsigned char (*weights)[4] = PvrTcPacket::WEIGHTS + 4*packet->usePunchthroughAlpha;
-            const unsigned char (*factor)[4] = PvrTcPacket::BILINEAR_FACTORS;
-			
-			for(int py = 0; py < 4; ++py)
-			{
-				const int yOffset = (py < 2) ? -1 : 0;
-				const int y0 = (y + yOffset) & blockMask;
-				const int y1 = (y0+1) & blockMask;
-				
-				for(int px = 0; px < 4; ++px)
-				{
-					const int xOffset = (px < 2) ? -1 : 0;
-					const int x0 = (x + xOffset) & blockMask;
-					const int x1 = (x0+1) & blockMask;
-					
-					const PvrTcPacket* p0 = packets + GetMortonNumber(x0, y0);
-					const PvrTcPacket* p1 = packets + GetMortonNumber(x1, y0);
-					const PvrTcPacket* p2 = packets + GetMortonNumber(x0, y1);
-					const PvrTcPacket* p3 = packets + GetMortonNumber(x1, y1);
-					
-					ColorRgba<int> ca = p0->GetColorRgbaA() * (*factor)[0] +
-									   	p1->GetColorRgbaA() * (*factor)[1] +
-									   	p2->GetColorRgbaA() * (*factor)[2] +
-										p3->GetColorRgbaA() * (*factor)[3];
-					
-					ColorRgba<int> cb = p0->GetColorRgbaB() * (*factor)[0] +
-										p1->GetColorRgbaB() * (*factor)[1] +
-										p2->GetColorRgbaB() * (*factor)[2] +
-										p3->GetColorRgbaB() * (*factor)[3];
-					
-					const unsigned char* w = weights[mod&3];
-					ColorRgba<unsigned char> c;
-					c.r = (ca.r * w[0] + cb.r * w[1]) >> 7;
-					c.g = (ca.g * w[0] + cb.g * w[1]) >> 7;
-					c.b = (ca.b * w[0] + cb.b * w[1]) >> 7;
-					c.a = (ca.a * w[2] + cb.a * w[3]) >> 7;
-					
-					result[(py+y*4)*size.x + (px+x*4)] = c;
-					mod >>= 2;
-					factor++;
-				}
-			}
-        }
-    }
-}
-
-//============================================================================
diff --git a/3rdparty/pvrtc/PvrTcDecoder.h b/3rdparty/pvrtc/PvrTcDecoder.h
deleted file mode 100644
index 1b6fcf964..000000000
--- a/3rdparty/pvrtc/PvrTcDecoder.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//============================================================================
-
-#pragma once
-#include "Point2.h"
-#include "ColorRgba.h"
-
-//============================================================================
-
-namespace Javelin
-{
-//============================================================================
-
-    class PvrTcDecoder
-    {
-    public:
-        static void DecodeRgb4Bpp(ColorRgb<unsigned char>* result, const Point2<int>& size, const void* data);
-        static void DecodeRgba4Bpp(ColorRgba<unsigned char>* result, const Point2<int>& size, const void* data);
-        
-    private:
-		static unsigned GetMortonNumber(int x, int y);
-    };
-    
-//============================================================================
-}
-//============================================================================
diff --git a/3rdparty/pvrtc/PvrTcEncoder.cpp b/3rdparty/pvrtc/PvrTcEncoder.cpp
deleted file mode 100644
index 56cc8e03c..000000000
--- a/3rdparty/pvrtc/PvrTcEncoder.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
-//============================================================================
-
-#include "PvrTcEncoder.h"
-#include "AlphaBitmap.h"
-#include "PvrTcPacket.h"
-#include "RgbBitmap.h"
-#include "RgbaBitmap.h"
-#include "MortonTable.h"
-#include "BitUtility.h"
-#include "Interval.h"
-#include <assert.h>
-#include <math.h>
-#include <stdint.h>
-
-//============================================================================
-
-using namespace Javelin;
-using Data::MORTON_TABLE;
-
-//============================================================================
-
-static const unsigned char MODULATION_LUT[16] =
-{
-	0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3
-};
-
-//============================================================================
-
-inline unsigned PvrTcEncoder::GetMortonNumber(int x, int y)
-{
-	return MORTON_TABLE[x >> 8] << 17 | MORTON_TABLE[y >> 8] << 16 | MORTON_TABLE[x & 0xFF] << 1 | MORTON_TABLE[y & 0xFF];
-}
-
-//============================================================================
-
-void PvrTcEncoder::EncodeAlpha2Bpp(void* result, const AlphaBitmap& bitmap)
-{
-	int size = bitmap.GetBitmapWidth();
-	assert(size == bitmap.GetBitmapHeight());
-	assert(BitUtility::IsPowerOf2(size));
-	
-	// Blocks in each dimension.
-	int xBlocks = size/8;
-	int yBlocks = size/4;
-	
-	const unsigned char* bitmapData = bitmap.GetRawData();
-	
-	PvrTcPacket* packets = static_cast<PvrTcPacket*>(result);
-	for(int y = 0; y < yBlocks; ++y)
-	{
-		for(int x = 0; x < xBlocks; ++x)
-		{
-			PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-			packet->usePunchthroughAlpha = 0;
-			packet->colorAIsOpaque = 0;
-			packet->colorA = 0x7ff;		// White, with 0 alpha
-			packet->colorBIsOpaque = 1;
-			packet->colorB = 0x7fff;	// White with full alpha
-			
-			const unsigned char* blockBitmapData = &bitmapData[y*4*size + x*8];
-			
-			uint32_t modulationData = 0;
-			for(int py = 0; py < 4; ++py)
-			{
-				const unsigned char* rowBitmapData = blockBitmapData;
-				for(int px = 0; px < 8; ++px)
-				{
-					unsigned char pixel = *rowBitmapData++;
-					modulationData = BitUtility::RotateRight(modulationData | (pixel >> 7), 1);
-				}
-				blockBitmapData += size;
-			}
-			packet->modulationData = modulationData;
-		}
-	}
-}
-
-void PvrTcEncoder::EncodeAlpha4Bpp(void* result, const AlphaBitmap& bitmap)
-{
-	int size = bitmap.GetBitmapWidth();
-	assert(size == bitmap.GetBitmapHeight());
-	assert(BitUtility::IsPowerOf2(size));
-	
-	// Blocks in each dimension.
-	int blocks = size/4;
-	
-	const unsigned char* bitmapData = bitmap.GetRawData();
-	
-	PvrTcPacket* packets = static_cast<PvrTcPacket*>(result);
-	for(int y = 0; y < blocks; ++y)
-	{
-		for(int x = 0; x < blocks; ++x)
-		{
-			PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-			packet->usePunchthroughAlpha = 0;
-			packet->colorAIsOpaque = 0;
-			packet->colorA = 0x7ff;		// White, with 0 alpha
-			packet->colorBIsOpaque = 1;
-			packet->colorB = 0x7fff;	// White with full alpha
-
-			const unsigned char* blockBitmapData = &bitmapData[(y*size + x)*4];
-			
-			uint32_t modulationData = 0;
-			for(int py = 0; py < 4; ++py)
-			{
-				const unsigned char* rowBitmapData = blockBitmapData;
-				for(int px = 0; px < 4; ++px)
-				{
-					unsigned char pixel = *rowBitmapData++;
-					modulationData = BitUtility::RotateRight(modulationData | MODULATION_LUT[pixel>>4], 2);
-				}
-				blockBitmapData += size;
-			}
-			packet->modulationData = modulationData;
-		}
-	}
-}
-
-//============================================================================
-
-typedef Interval<ColorRgb<unsigned char> > ColorRgbBoundingBox;
-
-static void CalculateBoundingBox(ColorRgbBoundingBox& cbb, const RgbBitmap& bitmap, int blockX, int blockY)
-{
-	int size = bitmap.GetBitmapWidth();
-	const ColorRgb<unsigned char>* data = bitmap.GetData() + blockY * 4 * size + blockX * 4;
-	
-	cbb.min = data[0];
-	cbb.max = data[0];
-	cbb |= data[1];
-	cbb |= data[2];
-	cbb |= data[3];
-	
-	cbb |= data[size];
-	cbb |= data[size+1];
-	cbb |= data[size+2];
-	cbb |= data[size+3];
-
-	cbb |= data[2*size];
-	cbb |= data[2*size+1];
-	cbb |= data[2*size+2];
-	cbb |= data[2*size+3];
-
-	cbb |= data[3*size];
-	cbb |= data[3*size+1];
-	cbb |= data[3*size+2];
-	cbb |= data[3*size+3];
-}
-
-void PvrTcEncoder::EncodeRgb4Bpp(void* result, const RgbBitmap& bitmap)
-{
-	assert(bitmap.GetBitmapWidth() == bitmap.GetBitmapHeight());
-	assert(BitUtility::IsPowerOf2(bitmap.GetBitmapWidth()));
-	const int size = bitmap.GetBitmapWidth();
-	const int blocks = size / 4;
-	const int blockMask = blocks-1;
-	
-	PvrTcPacket* packets = static_cast<PvrTcPacket*>(result);
-
-	for(int y = 0; y < blocks; ++y)
-	{
-		for(int x = 0; x < blocks; ++x)
-		{
-			ColorRgbBoundingBox cbb;
-			CalculateBoundingBox(cbb, bitmap, x, y);
-			PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-			packet->usePunchthroughAlpha = 0;
-			packet->SetColorA(cbb.min);
-			packet->SetColorB(cbb.max);
-		}
-	}
-	
-	for(int y = 0; y < blocks; ++y)
-	{
-		for(int x = 0; x < blocks; ++x)
-		{
-			const unsigned char (*factor)[4] = PvrTcPacket::BILINEAR_FACTORS;
-			const ColorRgb<unsigned char>* data = bitmap.GetData() + y * 4 * size + x * 4;
-
-			uint32_t modulationData = 0;
-			
-			for(int py = 0; py < 4; ++py)
-			{
-				const int yOffset = (py < 2) ? -1 : 0;
-				const int y0 = (y + yOffset) & blockMask;
-				const int y1 = (y0+1) & blockMask;
-
-				for(int px = 0; px < 4; ++px)
-				{
-					const int xOffset = (px < 2) ? -1 : 0;
-					const int x0 = (x + xOffset) & blockMask;
-					const int x1 = (x0+1) & blockMask;
-					
-					const PvrTcPacket* p0 = packets + GetMortonNumber(x0, y0);
-					const PvrTcPacket* p1 = packets + GetMortonNumber(x1, y0);
-					const PvrTcPacket* p2 = packets + GetMortonNumber(x0, y1);
-					const PvrTcPacket* p3 = packets + GetMortonNumber(x1, y1);
-					
-					ColorRgb<int> ca = p0->GetColorRgbA() * (*factor)[0] +
-									   p1->GetColorRgbA() * (*factor)[1] +
-									   p2->GetColorRgbA() * (*factor)[2] +
-									   p3->GetColorRgbA() * (*factor)[3];
-					
-					ColorRgb<int> cb = p0->GetColorRgbB() * (*factor)[0] +
-									   p1->GetColorRgbB() * (*factor)[1] +
-									   p2->GetColorRgbB() * (*factor)[2] +
-									   p3->GetColorRgbB() * (*factor)[3];
-					
-					const ColorRgb<unsigned char>& pixel = data[py*size + px];
-					ColorRgb<int> d = cb - ca;
-					ColorRgb<int> p(pixel.r*16, pixel.g*16, pixel.b*16);
-					ColorRgb<int> v = p - ca;
-					
-					// PVRTC uses weightings of 0, 3/8, 5/8 and 1
-					// The boundaries for these are 3/16, 1/2 (=8/16), 13/16
-					int projection = (v % d) * 16;
-					int lengthSquared = d % d;
-					if(projection > 3*lengthSquared) modulationData++;
-					if(projection > 8*lengthSquared) modulationData++;
-					if(projection > 13*lengthSquared) modulationData++;
-					
-					modulationData = BitUtility::RotateRight(modulationData, 2);
-					
-					factor++;
-				}
-			}
-
-			PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-			packet->modulationData = modulationData;
-		}
-	}
-}
-
-//============================================================================
-
-static void CalculateBoundingBox(ColorRgbBoundingBox& cbb, const RgbaBitmap& bitmap, int blockX, int blockY)
-{
-	int size = bitmap.GetBitmapWidth();
-	const ColorRgba<unsigned char>* data = bitmap.GetData() + blockY * 4 * size + blockX * 4;
-	
-	cbb.min = data[0];
-	cbb.max = data[0];
-	
-	cbb |= data[1];
-	cbb |= data[2];
-	cbb |= data[3];
-	
-	cbb |= data[size];
-	cbb |= data[size+1];
-	cbb |= data[size+2];
-	cbb |= data[size+3];
-	
-	cbb |= data[2*size];
-	cbb |= data[2*size+1];
-	cbb |= data[2*size+2];
-	cbb |= data[2*size+3];
-	
-	cbb |= data[3*size];
-	cbb |= data[3*size+1];
-	cbb |= data[3*size+2];
-	cbb |= data[3*size+3];
-}
-
-void PvrTcEncoder::EncodeRgb4Bpp(void* result, const RgbaBitmap& bitmap)
-{
-	assert(bitmap.GetBitmapWidth() == bitmap.GetBitmapHeight());
-	assert(BitUtility::IsPowerOf2(bitmap.GetBitmapWidth()));
-	const int size = bitmap.GetBitmapWidth();
-	const int blocks = size / 4;
-	const int blockMask = blocks-1;
-	
-	PvrTcPacket* packets = static_cast<PvrTcPacket*>(result);
-	
-	for(int y = 0; y < blocks; ++y)
-	{
-		for(int x = 0; x < blocks; ++x)
-		{
-			ColorRgbBoundingBox cbb;
-			CalculateBoundingBox(cbb, bitmap, x, y);
-			PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-			packet->usePunchthroughAlpha = 0;
-			packet->SetColorA(cbb.min);
-			packet->SetColorB(cbb.max);
-		}
-	}
-	
-	for(int y = 0; y < blocks; ++y)
-	{
-		for(int x = 0; x < blocks; ++x)
-		{
-			const unsigned char (*factor)[4] = PvrTcPacket::BILINEAR_FACTORS;
-			const ColorRgba<unsigned char>* data = bitmap.GetData() + y * 4 * size + x * 4;
-			
-			uint32_t modulationData = 0;
-			
-			for(int py = 0; py < 4; ++py)
-			{
-				const int yOffset = (py < 2) ? -1 : 0;
-				const int y0 = (y + yOffset) & blockMask;
-				const int y1 = (y0+1) & blockMask;
-
-				for(int px = 0; px < 4; ++px)
-				{
-					const int xOffset = (px < 2) ? -1 : 0;
-					const int x0 = (x + xOffset) & blockMask;
-					const int x1 = (x0+1) & blockMask;
-					
-					const PvrTcPacket* p0 = packets + GetMortonNumber(x0, y0);
-					const PvrTcPacket* p1 = packets + GetMortonNumber(x1, y0);
-					const PvrTcPacket* p2 = packets + GetMortonNumber(x0, y1);
-					const PvrTcPacket* p3 = packets + GetMortonNumber(x1, y1);
-					
-					ColorRgb<int> ca = p0->GetColorRgbA() * (*factor)[0] +
-									   p1->GetColorRgbA() * (*factor)[1] +
-									   p2->GetColorRgbA() * (*factor)[2] +
-									   p3->GetColorRgbA() * (*factor)[3];
-					
-					ColorRgb<int> cb = p0->GetColorRgbB() * (*factor)[0] +
-									   p1->GetColorRgbB() * (*factor)[1] +
-									   p2->GetColorRgbB() * (*factor)[2] +
-									   p3->GetColorRgbB() * (*factor)[3];
-					
-					const ColorRgb<unsigned char>& pixel = data[py*size + px];
-					ColorRgb<int> d = cb - ca;
-					ColorRgb<int> p(pixel.r*16, pixel.g*16, pixel.b*16);
-					ColorRgb<int> v = p - ca;
-					
-					// PVRTC uses weightings of 0, 3/8, 5/8 and 1
-					// The boundaries for these are 3/16, 1/2 (=8/16), 13/16
-					int projection = (v % d) * 16;
-					int lengthSquared = d % d;
-					if(projection > 3*lengthSquared) modulationData++;
-					if(projection > 8*lengthSquared) modulationData++;
-					if(projection > 13*lengthSquared) modulationData++;
-					
-					modulationData = BitUtility::RotateRight(modulationData, 2);
-					
-					factor++;
-				}
-			}
-
-			PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-			packet->modulationData = modulationData;
-		}
-	}
-}
-
-//============================================================================
-
-typedef Interval<ColorRgba<unsigned char> > ColorRgbaBoundingBox;
-
-static void CalculateBoundingBox(ColorRgbaBoundingBox& cbb, const RgbaBitmap& bitmap, int blockX, int blockY)
-{
-	int size = bitmap.GetBitmapWidth();
-	const ColorRgba<unsigned char>* data = bitmap.GetData() + blockY * 4 * size + blockX * 4;
-	
-	cbb.min = data[0];
-	cbb.max = data[0];
-	
-	cbb |= data[1];
-	cbb |= data[2];
-	cbb |= data[3];
-	
-	cbb |= data[size];
-	cbb |= data[size+1];
-	cbb |= data[size+2];
-	cbb |= data[size+3];
-	
-	cbb |= data[2*size];
-	cbb |= data[2*size+1];
-	cbb |= data[2*size+2];
-	cbb |= data[2*size+3];
-	
-	cbb |= data[3*size];
-	cbb |= data[3*size+1];
-	cbb |= data[3*size+2];
-	cbb |= data[3*size+3];
-}
-
-void PvrTcEncoder::EncodeRgba4Bpp(void* result, const RgbaBitmap& bitmap)
-{
-	assert(bitmap.GetBitmapWidth() == bitmap.GetBitmapHeight());
-	assert(BitUtility::IsPowerOf2(bitmap.GetBitmapWidth()));
-	const int size = bitmap.GetBitmapWidth();
-	const int blocks = size / 4;
-	const int blockMask = blocks-1;
-	
-	PvrTcPacket* packets = static_cast<PvrTcPacket*>(result);
-	
-	for(int y = 0; y < blocks; ++y)
-	{
-		for(int x = 0; x < blocks; ++x)
-		{
-			ColorRgbaBoundingBox cbb;
-			CalculateBoundingBox(cbb, bitmap, x, y);
-			PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-			packet->usePunchthroughAlpha = 0;
-			packet->SetColorA(cbb.min);
-			packet->SetColorB(cbb.max);
-		}
-	}
-	
-	for(int y = 0; y < blocks; ++y)
-	{
-		for(int x = 0; x < blocks; ++x)
-		{
-			const unsigned char (*factor)[4] = PvrTcPacket::BILINEAR_FACTORS;
-			const ColorRgba<unsigned char>* data = bitmap.GetData() + y * 4 * size + x * 4;
-			
-			uint32_t modulationData = 0;
-			
-			for(int py = 0; py < 4; ++py)
-			{
-				const int yOffset = (py < 2) ? -1 : 0;
-				const int y0 = (y + yOffset) & blockMask;
-				const int y1 = (y0+1) & blockMask;
-				
-				for(int px = 0; px < 4; ++px)
-				{
-					const int xOffset = (px < 2) ? -1 : 0;
-					const int x0 = (x + xOffset) & blockMask;
-					const int x1 = (x0+1) & blockMask;
-					
-					const PvrTcPacket* p0 = packets + GetMortonNumber(x0, y0);
-					const PvrTcPacket* p1 = packets + GetMortonNumber(x1, y0);
-					const PvrTcPacket* p2 = packets + GetMortonNumber(x0, y1);
-					const PvrTcPacket* p3 = packets + GetMortonNumber(x1, y1);
-					
-					ColorRgba<int> ca = p0->GetColorRgbaA() * (*factor)[0] +
-										p1->GetColorRgbaA() * (*factor)[1] +
-										p2->GetColorRgbaA() * (*factor)[2] +
-										p3->GetColorRgbaA() * (*factor)[3];
-					
-					ColorRgba<int> cb = p0->GetColorRgbaB() * (*factor)[0] +
-										p1->GetColorRgbaB() * (*factor)[1] +
-										p2->GetColorRgbaB() * (*factor)[2] +
-										p3->GetColorRgbaB() * (*factor)[3];
-					
-					const ColorRgba<unsigned char>& pixel = data[py*size + px];
-					ColorRgba<int> d = cb - ca;
-					ColorRgba<int> p(pixel.r*16, pixel.g*16, pixel.b*16, pixel.a*16);
-					ColorRgba<int> v = p - ca;
-					
-					// PVRTC uses weightings of 0, 3/8, 5/8 and 1
-					// The boundaries for these are 3/16, 1/2 (=8/16), 13/16
-					int projection = (v % d) * 16;
-					int lengthSquared = d % d;
-					if(projection > 3*lengthSquared) modulationData++;
-					if(projection > 8*lengthSquared) modulationData++;
-					if(projection > 13*lengthSquared) modulationData++;
-					
-					modulationData = BitUtility::RotateRight(modulationData, 2);
-					
-					factor++;
-				}
-			}
-			
-			PvrTcPacket* packet = packets + GetMortonNumber(x, y);
-			packet->modulationData = modulationData;
-		}
-	}
-}
-
-//============================================================================
diff --git a/3rdparty/pvrtc/PvrTcEncoder.h b/3rdparty/pvrtc/PvrTcEncoder.h
deleted file mode 100644
index fd244846a..000000000
--- a/3rdparty/pvrtc/PvrTcEncoder.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//============================================================================
-
-#pragma once
-#include "ColorRgba.h"
-#include "AlphaBitmap.h"
-#include "RgbBitmap.h"
-#include "RgbaBitmap.h"
-
-//============================================================================
-
-namespace Javelin
-{
-//============================================================================
-
-	class AlphaBitmap;
-	class RgbBitmap;
-	class RgbaBitmap;
-	
-	class PvrTcEncoder
-	{
-	public:
-		// Result must be large enough for bitmap.GetArea()/4 bytes
-		static void EncodeAlpha2Bpp(void* result, const AlphaBitmap& bitmap);
-		
-		// Result must be large enough for bitmap.GetArea()/2 bytes
-		static void EncodeAlpha4Bpp(void* result, const AlphaBitmap& bitmap);
-		
-		// Result must be large enough for bitmap.GetArea()/2 bytes
-		static void EncodeRgb4Bpp(void* result, const RgbBitmap& bitmap);
-
-		// Result must be large enough for bitmap.GetArea()/2 bytes
-		static void EncodeRgb4Bpp(void* result, const RgbaBitmap& bitmap);
-
-		// Result must be large enough for bitmap.GetArea()/2 bytes
-		static void EncodeRgba4Bpp(void* result, const RgbaBitmap& bitmap);
-
-	private:
-		static unsigned GetMortonNumber(int x, int y);
-	};
-	
-//============================================================================
-}
-//============================================================================
diff --git a/3rdparty/pvrtc/PvrTcPacket.cpp b/3rdparty/pvrtc/PvrTcPacket.cpp
deleted file mode 100644
index 2e40d371e..000000000
--- a/3rdparty/pvrtc/PvrTcPacket.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-//============================================================================
-
-#include "PvrTcPacket.h"
-#include "BitScale.h"
-
-//============================================================================
-
-using namespace Javelin;
-
-//============================================================================
-
-const unsigned char PvrTcPacket::BILINEAR_FACTORS[16][4] =
-{
-	{ 4, 4, 4, 4 },
-	{ 2, 6, 2, 6 },
-	{ 8, 0, 8, 0 },
-	{ 6, 2, 6, 2 },
-	
-	{ 2, 2, 6, 6 },
-	{ 1, 3, 3, 9 },
-	{ 4, 0, 12, 0 },
-	{ 3, 1, 9, 3 },
-	
-	{ 8, 8, 0, 0 },
-	{ 4, 12, 0, 0 },
-	{ 16, 0, 0, 0 },
-	{ 12, 4, 0, 0 },
-	
-	{ 6, 6, 2, 2 },
-	{ 3, 9, 1, 3 },
-	{ 12, 0, 4, 0 },
-	{ 9, 3, 3, 1 },
-};
-
-// Weights are { colorA, colorB, alphaA, alphaB }
-const unsigned char PvrTcPacket::WEIGHTS[8][4] =
-{
-	// Weights for Mode=0
-	{ 8, 0, 8, 0 },
-	{ 5, 3, 5, 3 },
-	{ 3, 5, 3, 5 },
-	{ 0, 8, 0, 8 },
-	
-	// Weights for Mode=1
-	{ 8, 0, 8, 0 },
-	{ 4, 4, 4, 4 },
-	{ 4, 4, 0, 0 },
-	{ 0, 8, 0, 8 },
-};
-
-//============================================================================
-
-ColorRgb<int> PvrTcPacket::GetColorRgbA() const
-{
-	if(colorAIsOpaque)
-	{
-		unsigned char r = colorA >> 9;
-		unsigned char g = colorA >> 4 & 0x1f;
-		unsigned char b = colorA & 0xf;
-		return ColorRgb<int>(Data::BITSCALE_5_TO_8[r],
-							 Data::BITSCALE_5_TO_8[g],
-							 Data::BITSCALE_4_TO_8[b]);
-	}
-	else
-	{
-		unsigned char r = (colorA >> 7) & 0xf;
-		unsigned char g = (colorA >> 3) & 0xf;
-		unsigned char b = colorA & 7;
-		return ColorRgb<int>(Data::BITSCALE_4_TO_8[r],
-							 Data::BITSCALE_4_TO_8[g],
-							 Data::BITSCALE_3_TO_8[b]);
-	}
-}
-
-ColorRgb<int> PvrTcPacket::GetColorRgbB() const
-{
-	if(colorBIsOpaque)
-	{
-		unsigned char r = colorB >> 10;
-		unsigned char g = colorB >> 5 & 0x1f;
-		unsigned char b = colorB & 0x1f;
-		return ColorRgb<int>(Data::BITSCALE_5_TO_8[r],
-							 Data::BITSCALE_5_TO_8[g],
-							 Data::BITSCALE_5_TO_8[b]);
-	}
-	else
-	{
-		unsigned char r = colorB >> 8 & 0xf;
-		unsigned char g = colorB >> 4 & 0xf;
-		unsigned char b = colorB & 0xf;
-		return ColorRgb<int>(Data::BITSCALE_4_TO_8[r],
-							 Data::BITSCALE_4_TO_8[g],
-							 Data::BITSCALE_4_TO_8[b]);
-	}
-}
-
-ColorRgba<int> PvrTcPacket::GetColorRgbaA() const
-{
-	if(colorAIsOpaque)
-	{
-		unsigned char r = colorA >> 9;
-		unsigned char g = colorA >> 4 & 0x1f;
-		unsigned char b = colorA & 0xf;
-		return ColorRgba<int>(Data::BITSCALE_5_TO_8[r],
-							  Data::BITSCALE_5_TO_8[g],
-							  Data::BITSCALE_4_TO_8[b],
-							  255);
-	}
-	else
-	{
-		unsigned char a = colorA >> 11 & 7;
-		unsigned char r = colorA >> 7 & 0xf;
-		unsigned char g = colorA >> 3 & 0xf;
-		unsigned char b = colorA & 7;
-		return ColorRgba<int>(Data::BITSCALE_4_TO_8[r],
-							  Data::BITSCALE_4_TO_8[g],
-							  Data::BITSCALE_3_TO_8[b],
-							  Data::BITSCALE_3_TO_8[a]);
-	}
-}
-
-ColorRgba<int> PvrTcPacket::GetColorRgbaB() const
-{
-	if(colorBIsOpaque)
-	{
-		unsigned char r = colorB >> 10;
-		unsigned char g = colorB >> 5 & 0x1f;
-		unsigned char b = colorB & 0x1f;
-		return ColorRgba<int>(Data::BITSCALE_5_TO_8[r],
-							  Data::BITSCALE_5_TO_8[g],
-							  Data::BITSCALE_5_TO_8[b],
-							  255);
-	}
-	else
-	{
-		unsigned char a = colorB >> 12 & 7;
-		unsigned char r = colorB >> 8 & 0xf;
-		unsigned char g = colorB >> 4 & 0xf;
-		unsigned char b = colorB & 0xf;
-		return ColorRgba<int>(Data::BITSCALE_4_TO_8[r],
-							  Data::BITSCALE_4_TO_8[g],
-							  Data::BITSCALE_4_TO_8[b],
-							  Data::BITSCALE_3_TO_8[a]);
-	}
-}
-
-//============================================================================
-
-void PvrTcPacket::SetColorA(const ColorRgb<unsigned char>& c)
-{
-	int r = Data::BITSCALE_8_TO_5_FLOOR[c.r];
-	int g = Data::BITSCALE_8_TO_5_FLOOR[c.g];
-	int b = Data::BITSCALE_8_TO_4_FLOOR[c.b];
-	colorA = r<<9 | g<<4 | b;
-	colorAIsOpaque = true;
-}
-
-void PvrTcPacket::SetColorB(const ColorRgb<unsigned char>& c)
-{
-	int r = Data::BITSCALE_8_TO_5_CEIL[c.r];
-	int g = Data::BITSCALE_8_TO_5_CEIL[c.g];
-	int b = Data::BITSCALE_8_TO_5_CEIL[c.b];
-	colorB = r<<10 | g<<5 | b;
-	colorBIsOpaque = true;
-}
-
-void PvrTcPacket::SetColorA(const ColorRgba<unsigned char>& c)
-{
-	int a = Data::BITSCALE_8_TO_3_FLOOR[c.a];
-	if(a == 7)
-	{
-		int r = Data::BITSCALE_8_TO_5_FLOOR[c.r];
-		int g = Data::BITSCALE_8_TO_5_FLOOR[c.g];
-		int b = Data::BITSCALE_8_TO_4_FLOOR[c.b];
-		colorA = r<<9 | g<<4 | b;
-		colorAIsOpaque = true;
-	}
-	else
-	{
-		int r = Data::BITSCALE_8_TO_4_FLOOR[c.r];
-		int g = Data::BITSCALE_8_TO_4_FLOOR[c.g];
-		int b = Data::BITSCALE_8_TO_3_FLOOR[c.b];
-		colorA = a<<11 | r<<7 | g<<3 | b;
-		colorAIsOpaque = false;
-	}
-}
-
-void PvrTcPacket::SetColorB(const ColorRgba<unsigned char>& c)
-{
-	int a = Data::BITSCALE_8_TO_3_CEIL[c.a];
-	if(a == 7)
-	{
-		int r = Data::BITSCALE_8_TO_5_CEIL[c.r];
-		int g = Data::BITSCALE_8_TO_5_CEIL[c.g];
-		int b = Data::BITSCALE_8_TO_5_CEIL[c.b];
-		colorB = r<<10 | g<<5 | b;
-		colorBIsOpaque = true;
-	}
-	else
-	{
-		int r = Data::BITSCALE_8_TO_4_CEIL[c.r];
-		int g = Data::BITSCALE_8_TO_4_CEIL[c.g];
-		int b = Data::BITSCALE_8_TO_4_CEIL[c.b];
-		colorB = a<<12 | r<<8 | g<<4 | b;
-		colorBIsOpaque = false;
-	}
-}
-
-//============================================================================
diff --git a/3rdparty/pvrtc/PvrTcPacket.h b/3rdparty/pvrtc/PvrTcPacket.h
deleted file mode 100644
index ac3b6a4dd..000000000
--- a/3rdparty/pvrtc/PvrTcPacket.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//============================================================================
-//
-// Modulation data specifies weightings of colorA to colorB for each pixel
-//
-// For mode = 0
-//	00: 0/8
-//  01: 3/8
-//  10: 5/8
-//  11: 8/8
-//
-// For mode = 1
-//  00: 0/8
-//  01: 4/8
-//  10: 4/8 with alpha punchthrough
-//  11: 8/8
-//
-// For colorIsOpaque=0
-//  3 bits A
-//  4 bits R
-//  4 bits G
-//  3/4 bits B
-//
-// For colorIsOpaque=1
-//  5 bits R
-//  5 bits G
-//  4/5 bits B
-//
-//============================================================================
-
-#pragma once
-#include "ColorRgba.h"
-
-//============================================================================
-
-namespace Javelin
-{
-//============================================================================
-
-	struct PvrTcPacket
-	{
-		unsigned int    modulationData;
-		unsigned        usePunchthroughAlpha : 1;
-		unsigned        colorA          	 : 14;
-		unsigned        colorAIsOpaque  	 : 1;
-		unsigned        colorB        		 : 15;
-		unsigned        colorBIsOpaque  	 : 1;
-		
-		ColorRgb<int> GetColorRgbA() const;
-		ColorRgb<int> GetColorRgbB() const;
-		ColorRgba<int> GetColorRgbaA() const;
-		ColorRgba<int> GetColorRgbaB() const;
-		
-		void SetColorA(const ColorRgb<unsigned char>& c);
-		void SetColorB(const ColorRgb<unsigned char>& c);
-
-		void SetColorA(const ColorRgba<unsigned char>& c);
-		void SetColorB(const ColorRgba<unsigned char>& c);
-		
-		static const unsigned char BILINEAR_FACTORS[16][4];
-		static const unsigned char WEIGHTS[8][4];
-	};
-
-//============================================================================
-} // namespace Javelin
-//============================================================================
diff --git a/3rdparty/pvrtc/README.md b/3rdparty/pvrtc/README.md
deleted file mode 100644
index fb31a1820..000000000
--- a/3rdparty/pvrtc/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-PvrTcCompressor
-===============
-
-This was an afternoon project to determine whether crude approximations could
-produce reasonable results.
-
-~~This is *NOT* complete sourcecode. It includes enough code to show the details
-of how the algorithm works.~~
-
-~~If anyone decides to make this compile separately, send a pull request.~~
-
-Thanks to Brendan Duncan for contributing a pull request to fill in all of the
-classes and to build a simple test case. Specifically, he has contributed all of
-the files that do NOT begin with PvrTc
-
-http://roartindon.blogspot.sg/2014/08/pvr-texture-compression-exploration.html
-
diff --git a/3rdparty/pvrtc/RgbBitmap.h b/3rdparty/pvrtc/RgbBitmap.h
deleted file mode 100644
index 4f3c57b5c..000000000
--- a/3rdparty/pvrtc/RgbBitmap.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#pragma once
-
-#include "Bitmap.h"
-#include "ColorRgba.h"
-
-namespace Javelin {
-
-class RgbBitmap : public Bitmap {
-public:
-    RgbBitmap() {}
-
-    RgbBitmap(int w, int h)
-        : Bitmap(w, h, 3) {
-    }
-
-    const ColorRgb<unsigned char> *GetData() const { 
-        return reinterpret_cast<ColorRgb<unsigned char> *>(data); 
-    }
-
-    ColorRgb<unsigned char> *GetData() { 
-        return reinterpret_cast<ColorRgb<unsigned char> *>(data); 
-    }
-};
-
-}
diff --git a/3rdparty/pvrtc/RgbaBitmap.h b/3rdparty/pvrtc/RgbaBitmap.h
deleted file mode 100644
index ae43a779d..000000000
--- a/3rdparty/pvrtc/RgbaBitmap.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#pragma once
-
-#include "ColorRgba.h"
-
-namespace Javelin {
-
-class RgbaBitmap : public Bitmap {
-public:
-    RgbaBitmap() {}
-
-    RgbaBitmap(int w, int h)
-        : Bitmap(w, h, 4) {
-    }
-
-    const ColorRgba<unsigned char> *GetData() const { 
-        return reinterpret_cast<ColorRgba<unsigned char> *>(data); 
-    }
-
-    ColorRgba<unsigned char> *GetData() { 
-        return reinterpret_cast<ColorRgba<unsigned char> *>(data); 
-    }
-};
-
-}
diff --git a/3rdparty/stb/stb_image.c b/3rdparty/stb/stb_image.c
deleted file mode 100644
index ad26b74f5..000000000
--- a/3rdparty/stb/stb_image.c
+++ /dev/null
@@ -1,6769 +0,0 @@
-#ifdef __GNUC__
-#	pragma GCC diagnostic ignored "-Wshadow"
-#	pragma GCC diagnostic ignored "-Warray-bounds"
-#	ifndef __clang__
-#		pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
-#	endif // __clang__
-#elif defined(_MSC_VER)
-#	pragma warning(disable:4244) // warning C4244: '=': conversion from 'int' to 'stbi__uint16', possible loss of data
-#	pragma warning(disable:4245) // warning C4245: 'argument': conversion from 'int' to 'char', signed/unsigned mismatch
-#	pragma warning(disable:4312) // warning C4312: 'type cast': conversion from '' to '' of greater size
-#	pragma warning(disable:4456) // warning C4456: declaration of 'k' hides previous local declaration
-#	pragma warning(disable:4457) // warning C4457: declaration of 'y' hides function parameter
-#endif
-
-/* stb_image - v2.06 - public domain image loader - http://nothings.org/stb_image.h
-                                     no warranty implied; use at your own risk
-
-   Do this:
-      #define STB_IMAGE_IMPLEMENTATION
-   before you include this file in *one* C or C++ file to create the implementation.
-
-   // i.e. it should look like this:
-   #include ...
-   #include ...
-   #include ...
-   #define STB_IMAGE_IMPLEMENTATION
-   #include "stb_image.h"
-
-   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
-   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
-
-
-   QUICK NOTES:
-      Primarily of interest to game developers and other people who can
-          avoid problematic images and only need the trivial interface
-
-      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
-      PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
-
-      TGA (not sure what subset, if a subset)
-      BMP non-1bpp, non-RLE
-      PSD (composited view only, no extra channels, 8/16 bit-per-channel)
-
-      GIF (*comp always reports as 4-channel)
-      HDR (radiance rgbE format)
-      PIC (Softimage PIC)
-      PNM (PPM and PGM binary only)
-
-      Animated GIF still needs a proper API, but here's one way to do it:
-          http://gist.github.com/urraka/685d9a6340b26b830d49
-
-      - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
-      - decode from arbitrary I/O callbacks
-      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
-
-   Full documentation under "DOCUMENTATION" below.
-
-
-   Revision 2.00 release notes:
-
-      - Progressive JPEG is now supported.
-
-      - PPM and PGM binary formats are now supported, thanks to Ken Miller.
-
-      - x86 platforms now make use of SSE2 SIMD instructions for
-        JPEG decoding, and ARM platforms can use NEON SIMD if requested.
-        This work was done by Fabian "ryg" Giesen. SSE2 is used by
-        default, but NEON must be enabled explicitly; see docs.
-
-        With other JPEG optimizations included in this version, we see
-        2x speedup on a JPEG on an x86 machine, and a 1.5x speedup
-        on a JPEG on an ARM machine, relative to previous versions of this
-        library. The same results will not obtain for all JPGs and for all
-        x86/ARM machines. (Note that progressive JPEGs are significantly
-        slower to decode than regular JPEGs.) This doesn't mean that this
-        is the fastest JPEG decoder in the land; rather, it brings it
-        closer to parity with standard libraries. If you want the fastest
-        decode, look elsewhere. (See "Philosophy" section of docs below.)
-
-        See final bullet items below for more info on SIMD.
-
-      - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing
-        the memory allocator. Unlike other STBI libraries, these macros don't
-        support a context parameter, so if you need to pass a context in to
-        the allocator, you'll have to store it in a global or a thread-local
-        variable.
-
-      - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and
-        STBI_NO_LINEAR.
-            STBI_NO_HDR:     suppress implementation of .hdr reader format
-            STBI_NO_LINEAR:  suppress high-dynamic-range light-linear float API
-
-      - You can suppress implementation of any of the decoders to reduce
-        your code footprint by #defining one or more of the following
-        symbols before creating the implementation.
-
-            STBI_NO_JPEG
-            STBI_NO_PNG
-            STBI_NO_BMP
-            STBI_NO_PSD
-            STBI_NO_TGA
-            STBI_NO_GIF
-            STBI_NO_HDR
-            STBI_NO_PIC
-            STBI_NO_PNM   (.ppm and .pgm)
-
-      - You can request *only* certain decoders and suppress all other ones
-        (this will be more forward-compatible, as addition of new decoders
-        doesn't require you to disable them explicitly):
-
-            STBI_ONLY_JPEG
-            STBI_ONLY_PNG
-            STBI_ONLY_BMP
-            STBI_ONLY_PSD
-            STBI_ONLY_TGA
-            STBI_ONLY_GIF
-            STBI_ONLY_HDR
-            STBI_ONLY_PIC
-            STBI_ONLY_PNM   (.ppm and .pgm)
-
-         Note that you can define multiples of these, and you will get all
-         of them ("only x" and "only y" is interpreted to mean "only x&y").
-
-       - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
-         want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
-
-      - Compilation of all SIMD code can be suppressed with
-            #define STBI_NO_SIMD
-        It should not be necessary to disable SIMD unless you have issues
-        compiling (e.g. using an x86 compiler which doesn't support SSE
-        intrinsics or that doesn't support the method used to detect
-        SSE2 support at run-time), and even those can be reported as
-        bugs so I can refine the built-in compile-time checking to be
-        smarter.
-
-      - The old STBI_SIMD system which allowed installing a user-defined
-        IDCT etc. has been removed. If you need this, don't upgrade. My
-        assumption is that almost nobody was doing this, and those who
-        were will find the built-in SIMD more satisfactory anyway.
-
-      - RGB values computed for JPEG images are slightly different from
-        previous versions of stb_image. (This is due to using less
-        integer precision in SIMD.) The C code has been adjusted so
-        that the same RGB values will be computed regardless of whether
-        SIMD support is available, so your app should always produce
-        consistent results. But these results are slightly different from
-        previous versions. (Specifically, about 3% of available YCbCr values
-        will compute different RGB results from pre-1.49 versions by +-1;
-        most of the deviating values are one smaller in the G channel.)
-
-      - If you must produce consistent results with previous versions of
-        stb_image, #define STBI_JPEG_OLD and you will get the same results
-        you used to; however, you will not get the SIMD speedups for
-        the YCbCr-to-RGB conversion step (although you should still see
-        significant JPEG speedup from the other changes).
-
-        Please note that STBI_JPEG_OLD is a temporary feature; it will be
-        removed in future versions of the library. It is only intended for
-        near-term back-compatibility use.
-
-
-   Latest revision history:
-      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
-      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
-                         RGB-format JPEG; remove white matting in PSD;
-                         allocate large structures on the stack; 
-                         correct channel count for PNG & BMP
-      2.10  (2016-01-22) avoid warning introduced in 2.09
-      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
-      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
-      2.07  (2015-09-13) partial animated GIF support
-                         limited 16-bit PSD support
-                         minor bugs, code cleanup, and compiler warnings
-      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
-      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
-      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
-      2.03  (2015-04-12) additional corruption checking
-                         stbi_set_flip_vertically_on_load
-                         fix NEON support; fix mingw support
-      2.02  (2015-01-19) fix incorrect assert, fix warning
-      2.01  (2015-01-17) fix various warnings
-      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
-      2.00  (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
-                         progressive JPEG
-                         PGM/PPM support
-                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
-                         STBI_NO_*, STBI_ONLY_*
-                         GIF bugfix
-
-   See end of file for full revision history.
-
-
- ============================    Contributors    =========================
-
- Image formats                          Extensions, features
-    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
-    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
-    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
-    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
-    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
-    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
-    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
-    urraka@github (animated gif)           Junggon Kim (PNM comments)
-                                           Daniel Gibson (16-bit TGA)
-
- Optimizations & bugfixes
-    Fabian "ryg" Giesen
-    Arseny Kapoulkine
-
- Bug & warning fixes
-    Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
-    Christpher Lloyd        Martin Golini      Jerry Jansson      Joseph Thomson
-    Dave Moore              Roy Eltham         Hayaki Saito       Phil Jordan
-    Won Chun                Luke Graham        Johan Duparc       Nathan Reed
-    the Horde3D community   Thomas Ruf         Ronny Chevalier    Nick Verigakis
-    Janez Zemva             John Bartholomew   Michal Cichon      svdijk@github
-    Jonathan Blow           Ken Hamada         Tero Hanninen      Baldur Karlsson
-    Laurent Gomila          Cort Stratton      Sergio Gonzalez    romigrou@github
-    Aruelien Pocheville     Thibault Reuille   Cass Everitt       Matthew Gregan
-    Ryamond Barbiero        Paul Du Bois       Engin Manap        snagar@github
-    Michaelangel007@github  Oriol Ferrer Mesia socks-the-fox
-    Blazej Dariusz Roszkowski
-
-
-LICENSE
-
-This software is dual-licensed to the public domain and under the following
-license: you are granted a perpetual, irrevocable license to copy, modify,
-publish, and distribute this file as you see fit.
-
-*/
-
-#ifndef STBI_INCLUDE_STB_IMAGE_H
-#define STBI_INCLUDE_STB_IMAGE_H
-
-// DOCUMENTATION
-//
-// Limitations:
-//    - no 16-bit-per-channel PNG
-//    - no 12-bit-per-channel JPEG
-//    - no JPEGs with arithmetic coding
-//    - no 1-bit BMP
-//    - GIF always returns *comp=4
-//
-// Basic usage (see HDR discussion below for HDR usage):
-//    int x,y,n;
-//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
-//    // ... process data if not NULL ...
-//    // ... x = width, y = height, n = # 8-bit components per pixel ...
-//    // ... replace '0' with '1'..'4' to force that many components per pixel
-//    // ... but 'n' will always be the number that it would have been if you said 0
-//    stbi_image_free(data)
-//
-// Standard parameters:
-//    int *x       -- outputs image width in pixels
-//    int *y       -- outputs image height in pixels
-//    int *comp    -- outputs # of image components in image file
-//    int req_comp -- if non-zero, # of image components requested in result
-//
-// The return value from an image loader is an 'unsigned char *' which points
-// to the pixel data, or NULL on an allocation failure or if the image is
-// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
-// with each pixel consisting of N interleaved 8-bit components; the first
-// pixel pointed to is top-left-most in the image. There is no padding between
-// image scanlines or between pixels, regardless of format. The number of
-// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
-// If req_comp is non-zero, *comp has the number of components that _would_
-// have been output otherwise. E.g. if you set req_comp to 4, you will always
-// get RGBA output, but you can check *comp to see if it's trivially opaque
-// because e.g. there were only 3 channels in the source image.
-//
-// An output image with N components has the following components interleaved
-// in this order in each pixel:
-//
-//     N=#comp     components
-//       1           grey
-//       2           grey, alpha
-//       3           red, green, blue
-//       4           red, green, blue, alpha
-//
-// If image loading fails for any reason, the return value will be NULL,
-// and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
-// can be queried for an extremely brief, end-user unfriendly explanation
-// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
-// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
-// more user-friendly ones.
-//
-// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
-//
-// ===========================================================================
-//
-// Philosophy
-//
-// stb libraries are designed with the following priorities:
-//
-//    1. easy to use
-//    2. easy to maintain
-//    3. good performance
-//
-// Sometimes I let "good performance" creep up in priority over "easy to maintain",
-// and for best performance I may provide less-easy-to-use APIs that give higher
-// performance, in addition to the easy to use ones. Nevertheless, it's important
-// to keep in mind that from the standpoint of you, a client of this library,
-// all you care about is #1 and #3, and stb libraries do not emphasize #3 above all.
-//
-// Some secondary priorities arise directly from the first two, some of which
-// make more explicit reasons why performance can't be emphasized.
-//
-//    - Portable ("ease of use")
-//    - Small footprint ("easy to maintain")
-//    - No dependencies ("ease of use")
-//
-// ===========================================================================
-//
-// I/O callbacks
-//
-// I/O callbacks allow you to read from arbitrary sources, like packaged
-// files or some other source. Data read from callbacks are processed
-// through a small internal buffer (currently 128 bytes) to try to reduce
-// overhead.
-//
-// The three functions you must define are "read" (reads some bytes of data),
-// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
-//
-// ===========================================================================
-//
-// SIMD support
-//
-// The JPEG decoder will try to automatically use SIMD kernels on x86 when
-// supported by the compiler. For ARM Neon support, you must explicitly
-// request it.
-//
-// (The old do-it-yourself SIMD API is no longer supported in the current
-// code.)
-//
-// On x86, SSE2 will automatically be used when available based on a run-time
-// test; if not, the generic C versions are used as a fall-back. On ARM targets,
-// the typical path is to have separate builds for NEON and non-NEON devices
-// (at least this is true for iOS and Android). Therefore, the NEON support is
-// toggled by a build flag: define STBI_NEON to get NEON loops.
-//
-// The output of the JPEG decoder is slightly different from versions where
-// SIMD support was introduced (that is, for versions before 1.49). The
-// difference is only +-1 in the 8-bit RGB channels, and only on a small
-// fraction of pixels. You can force the pre-1.49 behavior by defining
-// STBI_JPEG_OLD, but this will disable some of the SIMD decoding path
-// and hence cost some performance.
-//
-// If for some reason you do not want to use any of SIMD code, or if
-// you have issues compiling it, you can disable it entirely by
-// defining STBI_NO_SIMD.
-//
-// ===========================================================================
-//
-// HDR image support   (disable by defining STBI_NO_HDR)
-//
-// stb_image now supports loading HDR images in general, and currently
-// the Radiance .HDR file format, although the support is provided
-// generically. You can still load any file through the existing interface;
-// if you attempt to load an HDR file, it will be automatically remapped to
-// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
-// both of these constants can be reconfigured through this interface:
-//
-//     stbi_hdr_to_ldr_gamma(2.2f);
-//     stbi_hdr_to_ldr_scale(1.0f);
-//
-// (note, do not use _inverse_ constants; stbi_image will invert them
-// appropriately).
-//
-// Additionally, there is a new, parallel interface for loading files as
-// (linear) floats to preserve the full dynamic range:
-//
-//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
-//
-// If you load LDR images through this interface, those images will
-// be promoted to floating point values, run through the inverse of
-// constants corresponding to the above:
-//
-//     stbi_ldr_to_hdr_scale(1.0f);
-//     stbi_ldr_to_hdr_gamma(2.2f);
-//
-// Finally, given a filename (or an open file or memory block--see header
-// file for details) containing image data, you can query for the "most
-// appropriate" interface to use (that is, whether the image is HDR or
-// not), using:
-//
-//     stbi_is_hdr(char *filename);
-//
-// ===========================================================================
-//
-// iPhone PNG support:
-//
-// By default we convert iphone-formatted PNGs back to RGB, even though
-// they are internally encoded differently. You can disable this conversion
-// by by calling stbi_convert_iphone_png_to_rgb(0), in which case
-// you will always just get the native iphone "format" through (which
-// is BGR stored in RGB).
-//
-// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
-// pixel to remove any premultiplied alpha *only* if the image file explicitly
-// says there's premultiplied data (currently only happens in iPhone images,
-// and only if iPhone convert-to-rgb processing is on).
-//
-
-
-#ifndef STBI_NO_STDIO
-#include <stdio.h>
-#endif // STBI_NO_STDIO
-
-#define STBI_VERSION 1
-
-enum
-{
-   STBI_default = 0, // only used for req_comp
-
-   STBI_grey       = 1,
-   STBI_grey_alpha = 2,
-   STBI_rgb        = 3,
-   STBI_rgb_alpha  = 4
-};
-
-typedef unsigned char stbi_uc;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef STB_IMAGE_STATIC
-#define STBIDEF static
-#else
-#define STBIDEF extern
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// PRIMARY API - works on images of any type
-//
-
-//
-// load image by filename, open file, or memory buffer
-//
-
-typedef struct
-{
-   int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
-   void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
-   int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
-} stbi_io_callbacks;
-
-STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *comp, int req_comp);
-
-#ifndef STBI_NO_STDIO
-STBIDEF stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
-// for stbi_load_from_file, file pointer is left pointing immediately after image
-#endif
-
-#ifndef STBI_NO_LINEAR
-   STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *comp, int req_comp);
-   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
-   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
-
-   #ifndef STBI_NO_STDIO
-   STBIDEF float *stbi_loadf_from_file  (FILE *f,                int *x, int *y, int *comp, int req_comp);
-   #endif
-#endif
-
-#ifndef STBI_NO_HDR
-   STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
-   STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
-#endif // STBI_NO_HDR
-
-#ifndef STBI_NO_LINEAR
-   STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
-   STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
-#endif // STBI_NO_LINEAR
-
-// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
-STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
-STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
-#ifndef STBI_NO_STDIO
-STBIDEF int      stbi_is_hdr          (char const *filename);
-STBIDEF int      stbi_is_hdr_from_file(FILE *f);
-#endif // STBI_NO_STDIO
-
-
-// get a VERY brief reason for failure
-// NOT THREADSAFE
-STBIDEF const char *stbi_failure_reason  (void);
-
-// free the loaded image -- this is just free()
-STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
-
-// get image dimensions & components without fully decoding
-STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
-STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
-
-#ifndef STBI_NO_STDIO
-STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
-STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
-
-#endif
-
-
-
-// for image formats that explicitly notate that they have premultiplied alpha,
-// we just return the colors as stored in the file. set this flag to force
-// unpremultiplication. results are undefined if the unpremultiply overflow.
-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
-
-// indicate whether we should process iphone images back to canonical format,
-// or just pass them through "as-is"
-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
-
-// flip the image vertically, so the first pixel in the output array is the bottom left
-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
-
-// ZLIB client - used by PNG, available for other purposes
-
-STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
-STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
-STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
-STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
-
-STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
-STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-//
-//
-////   end header file   /////////////////////////////////////////////////////
-#endif // STBI_INCLUDE_STB_IMAGE_H
-
-#ifdef STB_IMAGE_IMPLEMENTATION
-
-#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
-  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
-  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
-  || defined(STBI_ONLY_ZLIB)
-   #ifndef STBI_ONLY_JPEG
-   #define STBI_NO_JPEG
-   #endif
-   #ifndef STBI_ONLY_PNG
-   #define STBI_NO_PNG
-   #endif
-   #ifndef STBI_ONLY_BMP
-   #define STBI_NO_BMP
-   #endif
-   #ifndef STBI_ONLY_PSD
-   #define STBI_NO_PSD
-   #endif
-   #ifndef STBI_ONLY_TGA
-   #define STBI_NO_TGA
-   #endif
-   #ifndef STBI_ONLY_GIF
-   #define STBI_NO_GIF
-   #endif
-   #ifndef STBI_ONLY_HDR
-   #define STBI_NO_HDR
-   #endif
-   #ifndef STBI_ONLY_PIC
-   #define STBI_NO_PIC
-   #endif
-   #ifndef STBI_ONLY_PNM
-   #define STBI_NO_PNM
-   #endif
-#endif
-
-#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
-#define STBI_NO_ZLIB
-#endif
-
-
-#include <stdarg.h>
-#include <stddef.h> // ptrdiff_t on osx
-#include <stdlib.h>
-#include <string.h>
-
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
-#include <math.h>  // ldexp
-#endif
-
-#ifndef STBI_NO_STDIO
-#include <stdio.h>
-#endif
-
-#ifndef STBI_ASSERT
-#include <assert.h>
-#define STBI_ASSERT(x) assert(x)
-#endif
-
-
-#ifndef _MSC_VER
-   #ifdef __cplusplus
-   #define stbi_inline inline
-   #else
-   #define stbi_inline
-   #endif
-#else
-   #define stbi_inline __forceinline
-#endif
-
-
-#ifdef _MSC_VER
-typedef unsigned short stbi__uint16;
-typedef   signed short stbi__int16;
-typedef unsigned int   stbi__uint32;
-typedef   signed int   stbi__int32;
-#else
-#include <stdint.h>
-typedef uint16_t stbi__uint16;
-typedef int16_t  stbi__int16;
-typedef uint32_t stbi__uint32;
-typedef int32_t  stbi__int32;
-#endif
-
-// should produce compiler error if size is wrong
-typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
-
-#ifdef _MSC_VER
-#define STBI_NOTUSED(v)  (void)(v)
-#else
-#define STBI_NOTUSED(v)  (void)sizeof(v)
-#endif
-
-#ifdef _MSC_VER
-#define STBI_HAS_LROTL
-#endif
-
-#ifdef STBI_HAS_LROTL
-   #define stbi_lrot(x,y)  _lrotl(x,y)
-#else
-   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
-#endif
-
-#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
-// ok
-#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
-// ok
-#else
-#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
-#endif
-
-#ifndef STBI_MALLOC
-#define STBI_MALLOC(sz)           malloc(sz)
-#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
-#define STBI_FREE(p)              free(p)
-#endif
-
-#ifndef STBI_REALLOC_SIZED
-#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
-#endif
-
-// x86/x64 detection
-#if defined(__x86_64__) || defined(_M_X64)
-#define STBI__X64_TARGET
-#elif defined(__i386) || defined(_M_IX86)
-#define STBI__X86_TARGET
-#endif
-
-#if defined(__GNUC__) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
-// NOTE: not clear do we actually need this for the 64-bit path?
-// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
-// (but compiling with -msse2 allows the compiler to use SSE2 everywhere;
-// this is just broken and gcc are jerks for not fixing it properly
-// http://www.virtualdub.org/blog/pivot/entry.php?id=363 )
-#define STBI_NO_SIMD
-#endif
-
-#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
-// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
-//
-// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
-// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
-// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
-// simultaneously enabling "-mstackrealign".
-//
-// See https://github.com/nothings/stb/issues/81 for more information.
-//
-// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
-// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
-#define STBI_NO_SIMD
-#endif
-
-#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
-#define STBI_SSE2
-#include <emmintrin.h>
-
-#ifdef _MSC_VER
-
-#if _MSC_VER >= 1400  // not VC6
-#include <intrin.h> // __cpuid
-static int stbi__cpuid3(void)
-{
-   int info[4];
-   __cpuid(info,1);
-   return info[3];
-}
-#else
-static int stbi__cpuid3(void)
-{
-   int res;
-   __asm {
-      mov  eax,1
-      cpuid
-      mov  res,edx
-   }
-   return res;
-}
-#endif
-
-#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
-
-static int stbi__sse2_available()
-{
-   int info3 = stbi__cpuid3();
-   return ((info3 >> 26) & 1) != 0;
-}
-#else // assume GCC-style if not VC++
-#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
-
-static int stbi__sse2_available()
-{
-#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later
-   // GCC 4.8+ has a nice way to do this
-   return __builtin_cpu_supports("sse2");
-#else
-   // portable way to do this, preferably without using GCC inline ASM?
-   // just bail for now.
-   return 0;
-#endif
-}
-#endif
-#endif
-
-// ARM NEON
-#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
-#undef STBI_NEON
-#endif
-
-#ifdef STBI_NEON
-#include <arm_neon.h>
-// assume GCC or Clang on ARM targets
-#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
-#endif
-
-#ifndef STBI_SIMD_ALIGN
-#define STBI_SIMD_ALIGN(type, name) type name
-#endif
-
-///////////////////////////////////////////////
-//
-//  stbi__context struct and start_xxx functions
-
-// stbi__context structure is our basic context used by all images, so it
-// contains all the IO context, plus some basic image information
-typedef struct
-{
-   stbi__uint32 img_x, img_y;
-   int img_n, img_out_n;
-
-   stbi_io_callbacks io;
-   void *io_user_data;
-
-   int read_from_callbacks;
-   int buflen;
-   stbi_uc buffer_start[128];
-
-   stbi_uc *img_buffer, *img_buffer_end;
-   stbi_uc *img_buffer_original, *img_buffer_original_end;
-} stbi__context;
-
-
-static void stbi__refill_buffer(stbi__context *s);
-
-// initialize a memory-decode context
-static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
-{
-   s->io.read = NULL;
-   s->read_from_callbacks = 0;
-   s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
-   s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
-}
-
-// initialize a callback-based context
-static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
-{
-   s->io = *c;
-   s->io_user_data = user;
-   s->buflen = sizeof(s->buffer_start);
-   s->read_from_callbacks = 1;
-   s->img_buffer_original = s->buffer_start;
-   stbi__refill_buffer(s);
-   s->img_buffer_original_end = s->img_buffer_end;
-}
-
-#ifndef STBI_NO_STDIO
-
-static int stbi__stdio_read(void *user, char *data, int size)
-{
-   return (int) fread(data,1,size,(FILE*) user);
-}
-
-static void stbi__stdio_skip(void *user, int n)
-{
-   fseek((FILE*) user, n, SEEK_CUR);
-}
-
-static int stbi__stdio_eof(void *user)
-{
-   return feof((FILE*) user);
-}
-
-static stbi_io_callbacks stbi__stdio_callbacks =
-{
-   stbi__stdio_read,
-   stbi__stdio_skip,
-   stbi__stdio_eof,
-};
-
-static void stbi__start_file(stbi__context *s, FILE *f)
-{
-   stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
-}
-
-//static void stop_file(stbi__context *s) { }
-
-#endif // !STBI_NO_STDIO
-
-static void stbi__rewind(stbi__context *s)
-{
-   // conceptually rewind SHOULD rewind to the beginning of the stream,
-   // but we just rewind to the beginning of the initial buffer, because
-   // we only use it after doing 'test', which only ever looks at at most 92 bytes
-   s->img_buffer = s->img_buffer_original;
-   s->img_buffer_end = s->img_buffer_original_end;
-}
-
-#ifndef STBI_NO_JPEG
-static int      stbi__jpeg_test(stbi__context *s);
-static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_PNG
-static int      stbi__png_test(stbi__context *s);
-static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_BMP
-static int      stbi__bmp_test(stbi__context *s);
-static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_TGA
-static int      stbi__tga_test(stbi__context *s);
-static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_PSD
-static int      stbi__psd_test(stbi__context *s);
-static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_HDR
-static int      stbi__hdr_test(stbi__context *s);
-static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_PIC
-static int      stbi__pic_test(stbi__context *s);
-static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_GIF
-static int      stbi__gif_test(stbi__context *s);
-static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_PNM
-static int      stbi__pnm_test(stbi__context *s);
-static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
-static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-// this is not threadsafe
-static const char *stbi__g_failure_reason;
-
-STBIDEF const char *stbi_failure_reason(void)
-{
-   return stbi__g_failure_reason;
-}
-
-static int stbi__err(const char *str)
-{
-   stbi__g_failure_reason = str;
-   return 0;
-}
-
-static void *stbi__malloc(size_t size)
-{
-    return STBI_MALLOC(size);
-}
-
-// stbi__err - error
-// stbi__errpf - error returning pointer to float
-// stbi__errpuc - error returning pointer to unsigned char
-
-#ifdef STBI_NO_FAILURE_STRINGS
-   #define stbi__err(x,y)  0
-#elif defined(STBI_FAILURE_USERMSG)
-   #define stbi__err(x,y)  stbi__err(y)
-#else
-   #define stbi__err(x,y)  stbi__err(x)
-#endif
-
-#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
-#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
-
-STBIDEF void stbi_image_free(void *retval_from_stbi_load)
-{
-   STBI_FREE(retval_from_stbi_load);
-}
-
-#ifndef STBI_NO_LINEAR
-static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
-#endif
-
-#ifndef STBI_NO_HDR
-static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
-#endif
-
-static int stbi__vertically_flip_on_load = 0;
-
-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
-{
-    stbi__vertically_flip_on_load = flag_true_if_should_flip;
-}
-
-static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   #ifndef STBI_NO_JPEG
-   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp);
-   #endif
-   #ifndef STBI_NO_PNG
-   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp);
-   #endif
-   #ifndef STBI_NO_BMP
-   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp);
-   #endif
-   #ifndef STBI_NO_GIF
-   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp);
-   #endif
-   #ifndef STBI_NO_PSD
-   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp);
-   #endif
-   #ifndef STBI_NO_PIC
-   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp);
-   #endif
-   #ifndef STBI_NO_PNM
-   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp);
-   #endif
-
-   #ifndef STBI_NO_HDR
-   if (stbi__hdr_test(s)) {
-      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp);
-      return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
-   }
-   #endif
-
-   #ifndef STBI_NO_TGA
-   // test tga last because it's a crappy test!
-   if (stbi__tga_test(s))
-      return stbi__tga_load(s,x,y,comp,req_comp);
-   #endif
-
-   return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
-}
-
-static unsigned char *stbi__load_flip(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   unsigned char *result = stbi__load_main(s, x, y, comp, req_comp);
-
-   if (stbi__vertically_flip_on_load && result != NULL) {
-      int w = *x, h = *y;
-      int depth = req_comp ? req_comp : *comp;
-      int row,col,z;
-      stbi_uc temp;
-
-      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
-      for (row = 0; row < (h>>1); row++) {
-         for (col = 0; col < w; col++) {
-            for (z = 0; z < depth; z++) {
-               temp = result[(row * w + col) * depth + z];
-               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
-               result[((h - row - 1) * w + col) * depth + z] = temp;
-            }
-         }
-      }
-   }
-
-   return result;
-}
-
-#ifndef STBI_NO_HDR
-static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
-{
-   if (stbi__vertically_flip_on_load && result != NULL) {
-      int w = *x, h = *y;
-      int depth = req_comp ? req_comp : *comp;
-      int row,col,z;
-      float temp;
-
-      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
-      for (row = 0; row < (h>>1); row++) {
-         for (col = 0; col < w; col++) {
-            for (z = 0; z < depth; z++) {
-               temp = result[(row * w + col) * depth + z];
-               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
-               result[((h - row - 1) * w + col) * depth + z] = temp;
-            }
-         }
-      }
-   }
-}
-#endif
-
-#ifndef STBI_NO_STDIO
-
-static FILE *stbi__fopen(char const *filename, char const *mode)
-{
-   FILE *f;
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-   if (0 != fopen_s(&f, filename, mode))
-      f=0;
-#else
-   f = fopen(filename, mode);
-#endif
-   return f;
-}
-
-
-STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
-{
-   FILE *f = stbi__fopen(filename, "rb");
-   unsigned char *result;
-   if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
-   result = stbi_load_from_file(f,x,y,comp,req_comp);
-   fclose(f);
-   return result;
-}
-
-STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
-{
-   unsigned char *result;
-   stbi__context s;
-   stbi__start_file(&s,f);
-   result = stbi__load_flip(&s,x,y,comp,req_comp);
-   if (result) {
-      // need to 'unget' all the characters in the IO buffer
-      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
-   }
-   return result;
-}
-#endif //!STBI_NO_STDIO
-
-STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__load_flip(&s,x,y,comp,req_comp);
-}
-
-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__load_flip(&s,x,y,comp,req_comp);
-}
-
-#ifndef STBI_NO_LINEAR
-static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   unsigned char *data;
-   #ifndef STBI_NO_HDR
-   if (stbi__hdr_test(s)) {
-      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp);
-      if (hdr_data)
-         stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
-      return hdr_data;
-   }
-   #endif
-   data = stbi__load_flip(s, x, y, comp, req_comp);
-   if (data)
-      return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
-   return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
-}
-
-STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__loadf_main(&s,x,y,comp,req_comp);
-}
-
-STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__loadf_main(&s,x,y,comp,req_comp);
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
-{
-   float *result;
-   FILE *f = stbi__fopen(filename, "rb");
-   if (!f) return stbi__errpf("can't fopen", "Unable to open file");
-   result = stbi_loadf_from_file(f,x,y,comp,req_comp);
-   fclose(f);
-   return result;
-}
-
-STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_file(&s,f);
-   return stbi__loadf_main(&s,x,y,comp,req_comp);
-}
-#endif // !STBI_NO_STDIO
-
-#endif // !STBI_NO_LINEAR
-
-// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
-// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
-// reports false!
-
-STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
-{
-   #ifndef STBI_NO_HDR
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__hdr_test(&s);
-   #else
-   STBI_NOTUSED(buffer);
-   STBI_NOTUSED(len);
-   return 0;
-   #endif
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF int      stbi_is_hdr          (char const *filename)
-{
-   FILE *f = stbi__fopen(filename, "rb");
-   int result=0;
-   if (f) {
-      result = stbi_is_hdr_from_file(f);
-      fclose(f);
-   }
-   return result;
-}
-
-STBIDEF int      stbi_is_hdr_from_file(FILE *f)
-{
-   #ifndef STBI_NO_HDR
-   stbi__context s;
-   stbi__start_file(&s,f);
-   return stbi__hdr_test(&s);
-   #else
-   STBI_NOTUSED(f);
-   return 0;
-   #endif
-}
-#endif // !STBI_NO_STDIO
-
-STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
-{
-   #ifndef STBI_NO_HDR
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__hdr_test(&s);
-   #else
-   STBI_NOTUSED(clbk);
-   STBI_NOTUSED(user);
-   return 0;
-   #endif
-}
-
-#ifndef STBI_NO_LINEAR
-static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
-
-STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
-STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
-#endif
-
-static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
-
-STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
-STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// Common code used by all image loaders
-//
-
-enum
-{
-   STBI__SCAN_load=0,
-   STBI__SCAN_type,
-   STBI__SCAN_header
-};
-
-static void stbi__refill_buffer(stbi__context *s)
-{
-   int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
-   if (n == 0) {
-      // at end of file, treat same as if from memory, but need to handle case
-      // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
-      s->read_from_callbacks = 0;
-      s->img_buffer = s->buffer_start;
-      s->img_buffer_end = s->buffer_start+1;
-      *s->img_buffer = 0;
-   } else {
-      s->img_buffer = s->buffer_start;
-      s->img_buffer_end = s->buffer_start + n;
-   }
-}
-
-stbi_inline static stbi_uc stbi__get8(stbi__context *s)
-{
-   if (s->img_buffer < s->img_buffer_end)
-      return *s->img_buffer++;
-   if (s->read_from_callbacks) {
-      stbi__refill_buffer(s);
-      return *s->img_buffer++;
-   }
-   return 0;
-}
-
-stbi_inline static int stbi__at_eof(stbi__context *s)
-{
-   if (s->io.read) {
-      if (!(s->io.eof)(s->io_user_data)) return 0;
-      // if feof() is true, check if buffer = end
-      // special case: we've only got the special 0 character at the end
-      if (s->read_from_callbacks == 0) return 1;
-   }
-
-   return s->img_buffer >= s->img_buffer_end;
-}
-
-static void stbi__skip(stbi__context *s, int n)
-{
-   if (n < 0) {
-      s->img_buffer = s->img_buffer_end;
-      return;
-   }
-   if (s->io.read) {
-      int blen = (int) (s->img_buffer_end - s->img_buffer);
-      if (blen < n) {
-         s->img_buffer = s->img_buffer_end;
-         (s->io.skip)(s->io_user_data, n - blen);
-         return;
-      }
-   }
-   s->img_buffer += n;
-}
-
-static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
-{
-   if (s->io.read) {
-      int blen = (int) (s->img_buffer_end - s->img_buffer);
-      if (blen < n) {
-         int res, count;
-
-         memcpy(buffer, s->img_buffer, blen);
-
-         count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
-         res = (count == (n-blen));
-         s->img_buffer = s->img_buffer_end;
-         return res;
-      }
-   }
-
-   if (s->img_buffer+n <= s->img_buffer_end) {
-      memcpy(buffer, s->img_buffer, n);
-      s->img_buffer += n;
-      return 1;
-   } else
-      return 0;
-}
-
-static int stbi__get16be(stbi__context *s)
-{
-   int z = stbi__get8(s);
-   return (z << 8) + stbi__get8(s);
-}
-
-static stbi__uint32 stbi__get32be(stbi__context *s)
-{
-   stbi__uint32 z = stbi__get16be(s);
-   return (z << 16) + stbi__get16be(s);
-}
-
-#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
-// nothing
-#else
-static int stbi__get16le(stbi__context *s)
-{
-   int z = stbi__get8(s);
-   return z + (stbi__get8(s) << 8);
-}
-#endif
-
-#ifndef STBI_NO_BMP
-static stbi__uint32 stbi__get32le(stbi__context *s)
-{
-   stbi__uint32 z = stbi__get16le(s);
-   return z + (stbi__get16le(s) << 16);
-}
-#endif
-
-#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-//  generic converter from built-in img_n to req_comp
-//    individual types do this automatically as much as possible (e.g. jpeg
-//    does all cases internally since it needs to colorspace convert anyway,
-//    and it never has alpha, so very few cases ). png can automatically
-//    interleave an alpha=255 channel, but falls back to this for other cases
-//
-//  assume data buffer is malloced, so malloc a new one and free that one
-//  only failure mode is malloc failing
-
-static stbi_uc stbi__compute_y(int r, int g, int b)
-{
-   return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
-}
-
-static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
-{
-   int i,j;
-   unsigned char *good;
-
-   if (req_comp == img_n) return data;
-   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
-
-   good = (unsigned char *) stbi__malloc(req_comp * x * y);
-   if (good == NULL) {
-      STBI_FREE(data);
-      return stbi__errpuc("outofmem", "Out of memory");
-   }
-
-   for (j=0; j < (int) y; ++j) {
-      unsigned char *src  = data + j * x * img_n   ;
-      unsigned char *dest = good + j * x * req_comp;
-
-      #define COMBO(a,b)  ((a)*8+(b))
-      #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
-      // convert source image with img_n components to one with req_comp components;
-      // avoid switch per pixel, so use switch per scanline and massive macros
-      switch (COMBO(img_n, req_comp)) {
-         CASE(1,2) dest[0]=src[0], dest[1]=255; break;
-         CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
-         CASE(2,1) dest[0]=src[0]; break;
-         CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
-         CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
-         CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
-         CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
-         CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
-         default: STBI_ASSERT(0);
-      }
-      #undef CASE
-   }
-
-   STBI_FREE(data);
-   return good;
-}
-
-#ifndef STBI_NO_LINEAR
-static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
-{
-   int i,k,n;
-   float *output = (float *) stbi__malloc(x * y * comp * sizeof(float));
-   if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
-   // compute number of non-alpha components
-   if (comp & 1) n = comp; else n = comp-1;
-   for (i=0; i < x*y; ++i) {
-      for (k=0; k < n; ++k) {
-         output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
-      }
-      if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
-   }
-   STBI_FREE(data);
-   return output;
-}
-#endif
-
-#ifndef STBI_NO_HDR
-#define stbi__float2int(x)   ((int) (x))
-static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
-{
-   int i,k,n;
-   stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp);
-   if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
-   // compute number of non-alpha components
-   if (comp & 1) n = comp; else n = comp-1;
-   for (i=0; i < x*y; ++i) {
-      for (k=0; k < n; ++k) {
-         float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
-         if (z < 0) z = 0;
-         if (z > 255) z = 255;
-         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
-      }
-      if (k < comp) {
-         float z = data[i*comp+k] * 255 + 0.5f;
-         if (z < 0) z = 0;
-         if (z > 255) z = 255;
-         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
-      }
-   }
-   STBI_FREE(data);
-   return output;
-}
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-//
-//  "baseline" JPEG/JFIF decoder
-//
-//    simple implementation
-//      - doesn't support delayed output of y-dimension
-//      - simple interface (only one output format: 8-bit interleaved RGB)
-//      - doesn't try to recover corrupt jpegs
-//      - doesn't allow partial loading, loading multiple at once
-//      - still fast on x86 (copying globals into locals doesn't help x86)
-//      - allocates lots of intermediate memory (full size of all components)
-//        - non-interleaved case requires this anyway
-//        - allows good upsampling (see next)
-//    high-quality
-//      - upsampled channels are bilinearly interpolated, even across blocks
-//      - quality integer IDCT derived from IJG's 'slow'
-//    performance
-//      - fast huffman; reasonable integer IDCT
-//      - some SIMD kernels for common paths on targets with SSE2/NEON
-//      - uses a lot of intermediate memory, could cache poorly
-
-#ifndef STBI_NO_JPEG
-
-// huffman decoding acceleration
-#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
-
-typedef struct
-{
-   stbi_uc  fast[1 << FAST_BITS];
-   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
-   stbi__uint16 code[256];
-   stbi_uc  values[256];
-   stbi_uc  size[257];
-   unsigned int maxcode[18];
-   int    delta[17];   // old 'firstsymbol' - old 'firstcode'
-} stbi__huffman;
-
-typedef struct
-{
-   stbi__context *s;
-   stbi__huffman huff_dc[4];
-   stbi__huffman huff_ac[4];
-   stbi_uc dequant[4][64];
-   stbi__int16 fast_ac[4][1 << FAST_BITS];
-
-// sizes for components, interleaved MCUs
-   int img_h_max, img_v_max;
-   int img_mcu_x, img_mcu_y;
-   int img_mcu_w, img_mcu_h;
-
-// definition of jpeg image component
-   struct
-   {
-      int id;
-      int h,v;
-      int tq;
-      int hd,ha;
-      int dc_pred;
-
-      int x,y,w2,h2;
-      stbi_uc *data;
-      void *raw_data, *raw_coeff;
-      stbi_uc *linebuf;
-      short   *coeff;   // progressive only
-      int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
-   } img_comp[4];
-
-   stbi__uint32   code_buffer; // jpeg entropy-coded buffer
-   int            code_bits;   // number of valid bits
-   unsigned char  marker;      // marker seen while filling entropy buffer
-   int            nomore;      // flag if we saw a marker so must stop
-
-   int            progressive;
-   int            spec_start;
-   int            spec_end;
-   int            succ_high;
-   int            succ_low;
-   int            eob_run;
-   int            rgb;
-
-   int scan_n, order[4];
-   int restart_interval, todo;
-
-// kernels
-   void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
-   void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
-   stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
-} stbi__jpeg;
-
-static int stbi__build_huffman(stbi__huffman *h, int *count)
-{
-   int i,j,k=0,code;
-   // build size list for each symbol (from JPEG spec)
-   for (i=0; i < 16; ++i)
-      for (j=0; j < count[i]; ++j)
-         h->size[k++] = (stbi_uc) (i+1);
-   h->size[k] = 0;
-
-   // compute actual symbols (from jpeg spec)
-   code = 0;
-   k = 0;
-   for(j=1; j <= 16; ++j) {
-      // compute delta to add to code to compute symbol id
-      h->delta[j] = k - code;
-      if (h->size[k] == j) {
-         while (h->size[k] == j)
-            h->code[k++] = (stbi__uint16) (code++);
-         if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
-      }
-      // compute largest code + 1 for this size, preshifted as needed later
-      h->maxcode[j] = code << (16-j);
-      code <<= 1;
-   }
-   h->maxcode[j] = 0xffffffff;
-
-   // build non-spec acceleration table; 255 is flag for not-accelerated
-   memset(h->fast, 255, 1 << FAST_BITS);
-   for (i=0; i < k; ++i) {
-      int s = h->size[i];
-      if (s <= FAST_BITS) {
-         int c = h->code[i] << (FAST_BITS-s);
-         int m = 1 << (FAST_BITS-s);
-         for (j=0; j < m; ++j) {
-            h->fast[c+j] = (stbi_uc) i;
-         }
-      }
-   }
-   return 1;
-}
-
-// build a table that decodes both magnitude and value of small ACs in
-// one go.
-static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
-{
-   int i;
-   for (i=0; i < (1 << FAST_BITS); ++i) {
-      stbi_uc fast = h->fast[i];
-      fast_ac[i] = 0;
-      if (fast < 255) {
-         int rs = h->values[fast];
-         int run = (rs >> 4) & 15;
-         int magbits = rs & 15;
-         int len = h->size[fast];
-
-         if (magbits && len + magbits <= FAST_BITS) {
-            // magnitude code followed by receive_extend code
-            int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
-            int m = 1 << (magbits - 1);
-            if (k < m) k += (-1 << magbits) + 1;
-            // if the result is small enough, we can fit it in fast_ac table
-            if (k >= -128 && k <= 127)
-               fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
-         }
-      }
-   }
-}
-
-static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
-{
-   do {
-      int b = j->nomore ? 0 : stbi__get8(j->s);
-      if (b == 0xff) {
-         int c = stbi__get8(j->s);
-         if (c != 0) {
-            j->marker = (unsigned char) c;
-            j->nomore = 1;
-            return;
-         }
-      }
-      j->code_buffer |= b << (24 - j->code_bits);
-      j->code_bits += 8;
-   } while (j->code_bits <= 24);
-}
-
-// (1 << n) - 1
-static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
-
-// decode a jpeg huffman value from the bitstream
-stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
-{
-   unsigned int temp;
-   int c,k;
-
-   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-
-   // look at the top FAST_BITS and determine what symbol ID it is,
-   // if the code is <= FAST_BITS
-   c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
-   k = h->fast[c];
-   if (k < 255) {
-      int s = h->size[k];
-      if (s > j->code_bits)
-         return -1;
-      j->code_buffer <<= s;
-      j->code_bits -= s;
-      return h->values[k];
-   }
-
-   // naive test is to shift the code_buffer down so k bits are
-   // valid, then test against maxcode. To speed this up, we've
-   // preshifted maxcode left so that it has (16-k) 0s at the
-   // end; in other words, regardless of the number of bits, it
-   // wants to be compared against something shifted to have 16;
-   // that way we don't need to shift inside the loop.
-   temp = j->code_buffer >> 16;
-   for (k=FAST_BITS+1 ; ; ++k)
-      if (temp < h->maxcode[k])
-         break;
-   if (k == 17) {
-      // error! code not found
-      j->code_bits -= 16;
-      return -1;
-   }
-
-   if (k > j->code_bits)
-      return -1;
-
-   // convert the huffman code to the symbol id
-   c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
-   STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
-
-   // convert the id to a symbol
-   j->code_bits -= k;
-   j->code_buffer <<= k;
-   return h->values[c];
-}
-
-// bias[n] = (-1<<n) + 1
-static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
-
-// combined JPEG 'receive' and JPEG 'extend', since baseline
-// always extends everything it receives.
-stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
-{
-   unsigned int k;
-   int sgn;
-   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
-
-   sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
-   k = stbi_lrot(j->code_buffer, n);
-   STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
-   j->code_buffer = k & ~stbi__bmask[n];
-   k &= stbi__bmask[n];
-   j->code_bits -= n;
-   return k + (stbi__jbias[n] & ~sgn);
-}
-
-// get some unsigned bits
-stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
-{
-   unsigned int k;
-   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
-   k = stbi_lrot(j->code_buffer, n);
-   j->code_buffer = k & ~stbi__bmask[n];
-   k &= stbi__bmask[n];
-   j->code_bits -= n;
-   return k;
-}
-
-stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
-{
-   unsigned int k;
-   if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
-   k = j->code_buffer;
-   j->code_buffer <<= 1;
-   --j->code_bits;
-   return k & 0x80000000;
-}
-
-// given a value that's at position X in the zigzag stream,
-// where does it appear in the 8x8 matrix coded as row-major?
-static stbi_uc stbi__jpeg_dezigzag[64+15] =
-{
-    0,  1,  8, 16,  9,  2,  3, 10,
-   17, 24, 32, 25, 18, 11,  4,  5,
-   12, 19, 26, 33, 40, 48, 41, 34,
-   27, 20, 13,  6,  7, 14, 21, 28,
-   35, 42, 49, 56, 57, 50, 43, 36,
-   29, 22, 15, 23, 30, 37, 44, 51,
-   58, 59, 52, 45, 38, 31, 39, 46,
-   53, 60, 61, 54, 47, 55, 62, 63,
-   // let corrupt input sample past end
-   63, 63, 63, 63, 63, 63, 63, 63,
-   63, 63, 63, 63, 63, 63, 63
-};
-
-// decode one 64-entry block--
-static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
-{
-   int diff,dc,k;
-   int t;
-
-   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-   t = stbi__jpeg_huff_decode(j, hdc);
-   if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
-
-   // 0 all the ac values now so we can do it 32-bits at a time
-   memset(data,0,64*sizeof(data[0]));
-
-   diff = t ? stbi__extend_receive(j, t) : 0;
-   dc = j->img_comp[b].dc_pred + diff;
-   j->img_comp[b].dc_pred = dc;
-   data[0] = (short) (dc * dequant[0]);
-
-   // decode AC components, see JPEG spec
-   k = 1;
-   do {
-      unsigned int zig;
-      int c,r,s;
-      if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
-      r = fac[c];
-      if (r) { // fast-AC path
-         k += (r >> 4) & 15; // run
-         s = r & 15; // combined length
-         j->code_buffer <<= s;
-         j->code_bits -= s;
-         // decode into unzigzag'd location
-         zig = stbi__jpeg_dezigzag[k++];
-         data[zig] = (short) ((r >> 8) * dequant[zig]);
-      } else {
-         int rs = stbi__jpeg_huff_decode(j, hac);
-         if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
-         s = rs & 15;
-         r = rs >> 4;
-         if (s == 0) {
-            if (rs != 0xf0) break; // end block
-            k += 16;
-         } else {
-            k += r;
-            // decode into unzigzag'd location
-            zig = stbi__jpeg_dezigzag[k++];
-            data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
-         }
-      }
-   } while (k < 64);
-   return 1;
-}
-
-static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
-{
-   int diff,dc;
-   int t;
-   if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-
-   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-
-   if (j->succ_high == 0) {
-      // first scan for DC coefficient, must be first
-      memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
-      t = stbi__jpeg_huff_decode(j, hdc);
-      diff = t ? stbi__extend_receive(j, t) : 0;
-
-      dc = j->img_comp[b].dc_pred + diff;
-      j->img_comp[b].dc_pred = dc;
-      data[0] = (short) (dc << j->succ_low);
-   } else {
-      // refinement scan for DC coefficient
-      if (stbi__jpeg_get_bit(j))
-         data[0] += (short) (1 << j->succ_low);
-   }
-   return 1;
-}
-
-// @OPTIMIZE: store non-zigzagged during the decode passes,
-// and only de-zigzag when dequantizing
-static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
-{
-   int k;
-   if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-
-   if (j->succ_high == 0) {
-      int shift = j->succ_low;
-
-      if (j->eob_run) {
-         --j->eob_run;
-         return 1;
-      }
-
-      k = j->spec_start;
-      do {
-         unsigned int zig;
-         int c,r,s;
-         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
-         r = fac[c];
-         if (r) { // fast-AC path
-            k += (r >> 4) & 15; // run
-            s = r & 15; // combined length
-            j->code_buffer <<= s;
-            j->code_bits -= s;
-            zig = stbi__jpeg_dezigzag[k++];
-            data[zig] = (short) ((r >> 8) << shift);
-         } else {
-            int rs = stbi__jpeg_huff_decode(j, hac);
-            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
-            s = rs & 15;
-            r = rs >> 4;
-            if (s == 0) {
-               if (r < 15) {
-                  j->eob_run = (1 << r);
-                  if (r)
-                     j->eob_run += stbi__jpeg_get_bits(j, r);
-                  --j->eob_run;
-                  break;
-               }
-               k += 16;
-            } else {
-               k += r;
-               zig = stbi__jpeg_dezigzag[k++];
-               data[zig] = (short) (stbi__extend_receive(j,s) << shift);
-            }
-         }
-      } while (k <= j->spec_end);
-   } else {
-      // refinement scan for these AC coefficients
-
-      short bit = (short) (1 << j->succ_low);
-
-      if (j->eob_run) {
-         --j->eob_run;
-         for (k = j->spec_start; k <= j->spec_end; ++k) {
-            short *p = &data[stbi__jpeg_dezigzag[k]];
-            if (*p != 0)
-               if (stbi__jpeg_get_bit(j))
-                  if ((*p & bit)==0) {
-                     if (*p > 0)
-                        *p += bit;
-                     else
-                        *p -= bit;
-                  }
-         }
-      } else {
-         k = j->spec_start;
-         do {
-            int r,s;
-            int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
-            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
-            s = rs & 15;
-            r = rs >> 4;
-            if (s == 0) {
-               if (r < 15) {
-                  j->eob_run = (1 << r) - 1;
-                  if (r)
-                     j->eob_run += stbi__jpeg_get_bits(j, r);
-                  r = 64; // force end of block
-               } else {
-                  // r=15 s=0 should write 16 0s, so we just do
-                  // a run of 15 0s and then write s (which is 0),
-                  // so we don't have to do anything special here
-               }
-            } else {
-               if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
-               // sign bit
-               if (stbi__jpeg_get_bit(j))
-                  s = bit;
-               else
-                  s = -bit;
-            }
-
-            // advance by r
-            while (k <= j->spec_end) {
-               short *p = &data[stbi__jpeg_dezigzag[k++]];
-               if (*p != 0) {
-                  if (stbi__jpeg_get_bit(j))
-                     if ((*p & bit)==0) {
-                        if (*p > 0)
-                           *p += bit;
-                        else
-                           *p -= bit;
-                     }
-               } else {
-                  if (r == 0) {
-                     *p = (short) s;
-                     break;
-                  }
-                  --r;
-               }
-            }
-         } while (k <= j->spec_end);
-      }
-   }
-   return 1;
-}
-
-// take a -128..127 value and stbi__clamp it and convert to 0..255
-stbi_inline static stbi_uc stbi__clamp(int x)
-{
-   // trick to use a single test to catch both cases
-   if ((unsigned int) x > 255) {
-      if (x < 0) return 0;
-      if (x > 255) return 255;
-   }
-   return (stbi_uc) x;
-}
-
-#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
-#define stbi__fsh(x)  ((x) << 12)
-
-// derived from jidctint -- DCT_ISLOW
-#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
-   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
-   p2 = s2;                                    \
-   p3 = s6;                                    \
-   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
-   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
-   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
-   p2 = s0;                                    \
-   p3 = s4;                                    \
-   t0 = stbi__fsh(p2+p3);                      \
-   t1 = stbi__fsh(p2-p3);                      \
-   x0 = t0+t3;                                 \
-   x3 = t0-t3;                                 \
-   x1 = t1+t2;                                 \
-   x2 = t1-t2;                                 \
-   t0 = s7;                                    \
-   t1 = s5;                                    \
-   t2 = s3;                                    \
-   t3 = s1;                                    \
-   p3 = t0+t2;                                 \
-   p4 = t1+t3;                                 \
-   p1 = t0+t3;                                 \
-   p2 = t1+t2;                                 \
-   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
-   t0 = t0*stbi__f2f( 0.298631336f);           \
-   t1 = t1*stbi__f2f( 2.053119869f);           \
-   t2 = t2*stbi__f2f( 3.072711026f);           \
-   t3 = t3*stbi__f2f( 1.501321110f);           \
-   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
-   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
-   p3 = p3*stbi__f2f(-1.961570560f);           \
-   p4 = p4*stbi__f2f(-0.390180644f);           \
-   t3 += p1+p4;                                \
-   t2 += p2+p3;                                \
-   t1 += p2+p4;                                \
-   t0 += p1+p3;
-
-static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
-{
-   int i,val[64],*v=val;
-   stbi_uc *o;
-   short *d = data;
-
-   // columns
-   for (i=0; i < 8; ++i,++d, ++v) {
-      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
-      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
-           && d[40]==0 && d[48]==0 && d[56]==0) {
-         //    no shortcut                 0     seconds
-         //    (1|2|3|4|5|6|7)==0          0     seconds
-         //    all separate               -0.047 seconds
-         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
-         int dcterm = d[0] << 2;
-         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
-      } else {
-         STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
-         // constants scaled things up by 1<<12; let's bring them back
-         // down, but keep 2 extra bits of precision
-         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
-         v[ 0] = (x0+t3) >> 10;
-         v[56] = (x0-t3) >> 10;
-         v[ 8] = (x1+t2) >> 10;
-         v[48] = (x1-t2) >> 10;
-         v[16] = (x2+t1) >> 10;
-         v[40] = (x2-t1) >> 10;
-         v[24] = (x3+t0) >> 10;
-         v[32] = (x3-t0) >> 10;
-      }
-   }
-
-   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
-      // no fast case since the first 1D IDCT spread components out
-      STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
-      // constants scaled things up by 1<<12, plus we had 1<<2 from first
-      // loop, plus horizontal and vertical each scale by sqrt(8) so together
-      // we've got an extra 1<<3, so 1<<17 total we need to remove.
-      // so we want to round that, which means adding 0.5 * 1<<17,
-      // aka 65536. Also, we'll end up with -128 to 127 that we want
-      // to encode as 0..255 by adding 128, so we'll add that before the shift
-      x0 += 65536 + (128<<17);
-      x1 += 65536 + (128<<17);
-      x2 += 65536 + (128<<17);
-      x3 += 65536 + (128<<17);
-      // tried computing the shifts into temps, or'ing the temps to see
-      // if any were out of range, but that was slower
-      o[0] = stbi__clamp((x0+t3) >> 17);
-      o[7] = stbi__clamp((x0-t3) >> 17);
-      o[1] = stbi__clamp((x1+t2) >> 17);
-      o[6] = stbi__clamp((x1-t2) >> 17);
-      o[2] = stbi__clamp((x2+t1) >> 17);
-      o[5] = stbi__clamp((x2-t1) >> 17);
-      o[3] = stbi__clamp((x3+t0) >> 17);
-      o[4] = stbi__clamp((x3-t0) >> 17);
-   }
-}
-
-#ifdef STBI_SSE2
-// sse2 integer IDCT. not the fastest possible implementation but it
-// produces bit-identical results to the generic C version so it's
-// fully "transparent".
-static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
-{
-   // This is constructed to match our regular (generic) integer IDCT exactly.
-   __m128i row0, row1, row2, row3, row4, row5, row6, row7;
-   __m128i tmp;
-
-   // dot product constant: even elems=x, odd elems=y
-   #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
-
-   // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
-   // out(1) = c1[even]*x + c1[odd]*y
-   #define dct_rot(out0,out1, x,y,c0,c1) \
-      __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
-      __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
-      __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
-      __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
-      __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
-      __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
-
-   // out = in << 12  (in 16-bit, out 32-bit)
-   #define dct_widen(out, in) \
-      __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
-      __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
-
-   // wide add
-   #define dct_wadd(out, a, b) \
-      __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
-      __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
-
-   // wide sub
-   #define dct_wsub(out, a, b) \
-      __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
-      __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
-
-   // butterfly a/b, add bias, then shift by "s" and pack
-   #define dct_bfly32o(out0, out1, a,b,bias,s) \
-      { \
-         __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
-         __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
-         dct_wadd(sum, abiased, b); \
-         dct_wsub(dif, abiased, b); \
-         out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
-         out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
-      }
-
-   // 8-bit interleave step (for transposes)
-   #define dct_interleave8(a, b) \
-      tmp = a; \
-      a = _mm_unpacklo_epi8(a, b); \
-      b = _mm_unpackhi_epi8(tmp, b)
-
-   // 16-bit interleave step (for transposes)
-   #define dct_interleave16(a, b) \
-      tmp = a; \
-      a = _mm_unpacklo_epi16(a, b); \
-      b = _mm_unpackhi_epi16(tmp, b)
-
-   #define dct_pass(bias,shift) \
-      { \
-         /* even part */ \
-         dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
-         __m128i sum04 = _mm_add_epi16(row0, row4); \
-         __m128i dif04 = _mm_sub_epi16(row0, row4); \
-         dct_widen(t0e, sum04); \
-         dct_widen(t1e, dif04); \
-         dct_wadd(x0, t0e, t3e); \
-         dct_wsub(x3, t0e, t3e); \
-         dct_wadd(x1, t1e, t2e); \
-         dct_wsub(x2, t1e, t2e); \
-         /* odd part */ \
-         dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
-         dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
-         __m128i sum17 = _mm_add_epi16(row1, row7); \
-         __m128i sum35 = _mm_add_epi16(row3, row5); \
-         dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
-         dct_wadd(x4, y0o, y4o); \
-         dct_wadd(x5, y1o, y5o); \
-         dct_wadd(x6, y2o, y5o); \
-         dct_wadd(x7, y3o, y4o); \
-         dct_bfly32o(row0,row7, x0,x7,bias,shift); \
-         dct_bfly32o(row1,row6, x1,x6,bias,shift); \
-         dct_bfly32o(row2,row5, x2,x5,bias,shift); \
-         dct_bfly32o(row3,row4, x3,x4,bias,shift); \
-      }
-
-   __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
-   __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
-   __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
-   __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
-   __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
-   __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
-   __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
-   __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
-
-   // rounding biases in column/row passes, see stbi__idct_block for explanation.
-   __m128i bias_0 = _mm_set1_epi32(512);
-   __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
-
-   // load
-   row0 = _mm_load_si128((const __m128i *) (data + 0*8));
-   row1 = _mm_load_si128((const __m128i *) (data + 1*8));
-   row2 = _mm_load_si128((const __m128i *) (data + 2*8));
-   row3 = _mm_load_si128((const __m128i *) (data + 3*8));
-   row4 = _mm_load_si128((const __m128i *) (data + 4*8));
-   row5 = _mm_load_si128((const __m128i *) (data + 5*8));
-   row6 = _mm_load_si128((const __m128i *) (data + 6*8));
-   row7 = _mm_load_si128((const __m128i *) (data + 7*8));
-
-   // column pass
-   dct_pass(bias_0, 10);
-
-   {
-      // 16bit 8x8 transpose pass 1
-      dct_interleave16(row0, row4);
-      dct_interleave16(row1, row5);
-      dct_interleave16(row2, row6);
-      dct_interleave16(row3, row7);
-
-      // transpose pass 2
-      dct_interleave16(row0, row2);
-      dct_interleave16(row1, row3);
-      dct_interleave16(row4, row6);
-      dct_interleave16(row5, row7);
-
-      // transpose pass 3
-      dct_interleave16(row0, row1);
-      dct_interleave16(row2, row3);
-      dct_interleave16(row4, row5);
-      dct_interleave16(row6, row7);
-   }
-
-   // row pass
-   dct_pass(bias_1, 17);
-
-   {
-      // pack
-      __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
-      __m128i p1 = _mm_packus_epi16(row2, row3);
-      __m128i p2 = _mm_packus_epi16(row4, row5);
-      __m128i p3 = _mm_packus_epi16(row6, row7);
-
-      // 8bit 8x8 transpose pass 1
-      dct_interleave8(p0, p2); // a0e0a1e1...
-      dct_interleave8(p1, p3); // c0g0c1g1...
-
-      // transpose pass 2
-      dct_interleave8(p0, p1); // a0c0e0g0...
-      dct_interleave8(p2, p3); // b0d0f0h0...
-
-      // transpose pass 3
-      dct_interleave8(p0, p2); // a0b0c0d0...
-      dct_interleave8(p1, p3); // a4b4c4d4...
-
-      // store
-      _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
-   }
-
-#undef dct_const
-#undef dct_rot
-#undef dct_widen
-#undef dct_wadd
-#undef dct_wsub
-#undef dct_bfly32o
-#undef dct_interleave8
-#undef dct_interleave16
-#undef dct_pass
-}
-
-#endif // STBI_SSE2
-
-#ifdef STBI_NEON
-
-// NEON integer IDCT. should produce bit-identical
-// results to the generic C version.
-static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
-{
-   int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
-
-   int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
-   int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
-   int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
-   int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
-   int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
-   int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
-   int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
-   int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
-   int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
-   int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
-   int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
-   int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
-
-#define dct_long_mul(out, inq, coeff) \
-   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
-   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
-
-#define dct_long_mac(out, acc, inq, coeff) \
-   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
-   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
-
-#define dct_widen(out, inq) \
-   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
-   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
-
-// wide add
-#define dct_wadd(out, a, b) \
-   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
-   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
-
-// wide sub
-#define dct_wsub(out, a, b) \
-   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
-   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
-
-// butterfly a/b, then shift using "shiftop" by "s" and pack
-#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
-   { \
-      dct_wadd(sum, a, b); \
-      dct_wsub(dif, a, b); \
-      out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
-      out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
-   }
-
-#define dct_pass(shiftop, shift) \
-   { \
-      /* even part */ \
-      int16x8_t sum26 = vaddq_s16(row2, row6); \
-      dct_long_mul(p1e, sum26, rot0_0); \
-      dct_long_mac(t2e, p1e, row6, rot0_1); \
-      dct_long_mac(t3e, p1e, row2, rot0_2); \
-      int16x8_t sum04 = vaddq_s16(row0, row4); \
-      int16x8_t dif04 = vsubq_s16(row0, row4); \
-      dct_widen(t0e, sum04); \
-      dct_widen(t1e, dif04); \
-      dct_wadd(x0, t0e, t3e); \
-      dct_wsub(x3, t0e, t3e); \
-      dct_wadd(x1, t1e, t2e); \
-      dct_wsub(x2, t1e, t2e); \
-      /* odd part */ \
-      int16x8_t sum15 = vaddq_s16(row1, row5); \
-      int16x8_t sum17 = vaddq_s16(row1, row7); \
-      int16x8_t sum35 = vaddq_s16(row3, row5); \
-      int16x8_t sum37 = vaddq_s16(row3, row7); \
-      int16x8_t sumodd = vaddq_s16(sum17, sum35); \
-      dct_long_mul(p5o, sumodd, rot1_0); \
-      dct_long_mac(p1o, p5o, sum17, rot1_1); \
-      dct_long_mac(p2o, p5o, sum35, rot1_2); \
-      dct_long_mul(p3o, sum37, rot2_0); \
-      dct_long_mul(p4o, sum15, rot2_1); \
-      dct_wadd(sump13o, p1o, p3o); \
-      dct_wadd(sump24o, p2o, p4o); \
-      dct_wadd(sump23o, p2o, p3o); \
-      dct_wadd(sump14o, p1o, p4o); \
-      dct_long_mac(x4, sump13o, row7, rot3_0); \
-      dct_long_mac(x5, sump24o, row5, rot3_1); \
-      dct_long_mac(x6, sump23o, row3, rot3_2); \
-      dct_long_mac(x7, sump14o, row1, rot3_3); \
-      dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
-      dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
-      dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
-      dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
-   }
-
-   // load
-   row0 = vld1q_s16(data + 0*8);
-   row1 = vld1q_s16(data + 1*8);
-   row2 = vld1q_s16(data + 2*8);
-   row3 = vld1q_s16(data + 3*8);
-   row4 = vld1q_s16(data + 4*8);
-   row5 = vld1q_s16(data + 5*8);
-   row6 = vld1q_s16(data + 6*8);
-   row7 = vld1q_s16(data + 7*8);
-
-   // add DC bias
-   row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
-
-   // column pass
-   dct_pass(vrshrn_n_s32, 10);
-
-   // 16bit 8x8 transpose
-   {
-// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
-// whether compilers actually get this is another story, sadly.
-#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
-#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
-#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
-
-      // pass 1
-      dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
-      dct_trn16(row2, row3);
-      dct_trn16(row4, row5);
-      dct_trn16(row6, row7);
-
-      // pass 2
-      dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
-      dct_trn32(row1, row3);
-      dct_trn32(row4, row6);
-      dct_trn32(row5, row7);
-
-      // pass 3
-      dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
-      dct_trn64(row1, row5);
-      dct_trn64(row2, row6);
-      dct_trn64(row3, row7);
-
-#undef dct_trn16
-#undef dct_trn32
-#undef dct_trn64
-   }
-
-   // row pass
-   // vrshrn_n_s32 only supports shifts up to 16, we need
-   // 17. so do a non-rounding shift of 16 first then follow
-   // up with a rounding shift by 1.
-   dct_pass(vshrn_n_s32, 16);
-
-   {
-      // pack and round
-      uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
-      uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
-      uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
-      uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
-      uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
-      uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
-      uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
-      uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
-
-      // again, these can translate into one instruction, but often don't.
-#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
-#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
-#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
-
-      // sadly can't use interleaved stores here since we only write
-      // 8 bytes to each scan line!
-
-      // 8x8 8-bit transpose pass 1
-      dct_trn8_8(p0, p1);
-      dct_trn8_8(p2, p3);
-      dct_trn8_8(p4, p5);
-      dct_trn8_8(p6, p7);
-
-      // pass 2
-      dct_trn8_16(p0, p2);
-      dct_trn8_16(p1, p3);
-      dct_trn8_16(p4, p6);
-      dct_trn8_16(p5, p7);
-
-      // pass 3
-      dct_trn8_32(p0, p4);
-      dct_trn8_32(p1, p5);
-      dct_trn8_32(p2, p6);
-      dct_trn8_32(p3, p7);
-
-      // store
-      vst1_u8(out, p0); out += out_stride;
-      vst1_u8(out, p1); out += out_stride;
-      vst1_u8(out, p2); out += out_stride;
-      vst1_u8(out, p3); out += out_stride;
-      vst1_u8(out, p4); out += out_stride;
-      vst1_u8(out, p5); out += out_stride;
-      vst1_u8(out, p6); out += out_stride;
-      vst1_u8(out, p7);
-
-#undef dct_trn8_8
-#undef dct_trn8_16
-#undef dct_trn8_32
-   }
-
-#undef dct_long_mul
-#undef dct_long_mac
-#undef dct_widen
-#undef dct_wadd
-#undef dct_wsub
-#undef dct_bfly32o
-#undef dct_pass
-}
-
-#endif // STBI_NEON
-
-#define STBI__MARKER_none  0xff
-// if there's a pending marker from the entropy stream, return that
-// otherwise, fetch from the stream and get a marker. if there's no
-// marker, return 0xff, which is never a valid marker value
-static stbi_uc stbi__get_marker(stbi__jpeg *j)
-{
-   stbi_uc x;
-   if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
-   x = stbi__get8(j->s);
-   if (x != 0xff) return STBI__MARKER_none;
-   while (x == 0xff)
-      x = stbi__get8(j->s);
-   return x;
-}
-
-// in each scan, we'll have scan_n components, and the order
-// of the components is specified by order[]
-#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
-
-// after a restart interval, stbi__jpeg_reset the entropy decoder and
-// the dc prediction
-static void stbi__jpeg_reset(stbi__jpeg *j)
-{
-   j->code_bits = 0;
-   j->code_buffer = 0;
-   j->nomore = 0;
-   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
-   j->marker = STBI__MARKER_none;
-   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
-   j->eob_run = 0;
-   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
-   // since we don't even allow 1<<30 pixels
-}
-
-static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
-{
-   stbi__jpeg_reset(z);
-   if (!z->progressive) {
-      if (z->scan_n == 1) {
-         int i,j;
-         STBI_SIMD_ALIGN(short, data[64]);
-         int n = z->order[0];
-         // non-interleaved data, we just need to process one block at a time,
-         // in trivial scanline order
-         // number of blocks to do just depends on how many actual "pixels" this
-         // component has, independent of interleaved MCU blocking and such
-         int w = (z->img_comp[n].x+7) >> 3;
-         int h = (z->img_comp[n].y+7) >> 3;
-         for (j=0; j < h; ++j) {
-            for (i=0; i < w; ++i) {
-               int ha = z->img_comp[n].ha;
-               if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
-               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
-               // every data block is an MCU, so countdown the restart interval
-               if (--z->todo <= 0) {
-                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
-                  // if it's NOT a restart, then just bail, so we get corrupt data
-                  // rather than no data
-                  if (!STBI__RESTART(z->marker)) return 1;
-                  stbi__jpeg_reset(z);
-               }
-            }
-         }
-         return 1;
-      } else { // interleaved
-         int i,j,k,x,y;
-         STBI_SIMD_ALIGN(short, data[64]);
-         for (j=0; j < z->img_mcu_y; ++j) {
-            for (i=0; i < z->img_mcu_x; ++i) {
-               // scan an interleaved mcu... process scan_n components in order
-               for (k=0; k < z->scan_n; ++k) {
-                  int n = z->order[k];
-                  // scan out an mcu's worth of this component; that's just determined
-                  // by the basic H and V specified for the component
-                  for (y=0; y < z->img_comp[n].v; ++y) {
-                     for (x=0; x < z->img_comp[n].h; ++x) {
-                        int x2 = (i*z->img_comp[n].h + x)*8;
-                        int y2 = (j*z->img_comp[n].v + y)*8;
-                        int ha = z->img_comp[n].ha;
-                        if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
-                        z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
-                     }
-                  }
-               }
-               // after all interleaved components, that's an interleaved MCU,
-               // so now count down the restart interval
-               if (--z->todo <= 0) {
-                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
-                  if (!STBI__RESTART(z->marker)) return 1;
-                  stbi__jpeg_reset(z);
-               }
-            }
-         }
-         return 1;
-      }
-   } else {
-      if (z->scan_n == 1) {
-         int i,j;
-         int n = z->order[0];
-         // non-interleaved data, we just need to process one block at a time,
-         // in trivial scanline order
-         // number of blocks to do just depends on how many actual "pixels" this
-         // component has, independent of interleaved MCU blocking and such
-         int w = (z->img_comp[n].x+7) >> 3;
-         int h = (z->img_comp[n].y+7) >> 3;
-         for (j=0; j < h; ++j) {
-            for (i=0; i < w; ++i) {
-               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
-               if (z->spec_start == 0) {
-                  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
-                     return 0;
-               } else {
-                  int ha = z->img_comp[n].ha;
-                  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
-                     return 0;
-               }
-               // every data block is an MCU, so countdown the restart interval
-               if (--z->todo <= 0) {
-                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
-                  if (!STBI__RESTART(z->marker)) return 1;
-                  stbi__jpeg_reset(z);
-               }
-            }
-         }
-         return 1;
-      } else { // interleaved
-         int i,j,k,x,y;
-         for (j=0; j < z->img_mcu_y; ++j) {
-            for (i=0; i < z->img_mcu_x; ++i) {
-               // scan an interleaved mcu... process scan_n components in order
-               for (k=0; k < z->scan_n; ++k) {
-                  int n = z->order[k];
-                  // scan out an mcu's worth of this component; that's just determined
-                  // by the basic H and V specified for the component
-                  for (y=0; y < z->img_comp[n].v; ++y) {
-                     for (x=0; x < z->img_comp[n].h; ++x) {
-                        int x2 = (i*z->img_comp[n].h + x);
-                        int y2 = (j*z->img_comp[n].v + y);
-                        short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
-                        if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
-                           return 0;
-                     }
-                  }
-               }
-               // after all interleaved components, that's an interleaved MCU,
-               // so now count down the restart interval
-               if (--z->todo <= 0) {
-                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
-                  if (!STBI__RESTART(z->marker)) return 1;
-                  stbi__jpeg_reset(z);
-               }
-            }
-         }
-         return 1;
-      }
-   }
-}
-
-static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
-{
-   int i;
-   for (i=0; i < 64; ++i)
-      data[i] *= dequant[i];
-}
-
-static void stbi__jpeg_finish(stbi__jpeg *z)
-{
-   if (z->progressive) {
-      // dequantize and idct the data
-      int i,j,n;
-      for (n=0; n < z->s->img_n; ++n) {
-         int w = (z->img_comp[n].x+7) >> 3;
-         int h = (z->img_comp[n].y+7) >> 3;
-         for (j=0; j < h; ++j) {
-            for (i=0; i < w; ++i) {
-               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
-               stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
-               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
-            }
-         }
-      }
-   }
-}
-
-static int stbi__process_marker(stbi__jpeg *z, int m)
-{
-   int L;
-   switch (m) {
-      case STBI__MARKER_none: // no marker found
-         return stbi__err("expected marker","Corrupt JPEG");
-
-      case 0xDD: // DRI - specify restart interval
-         if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
-         z->restart_interval = stbi__get16be(z->s);
-         return 1;
-
-      case 0xDB: // DQT - define quantization table
-         L = stbi__get16be(z->s)-2;
-         while (L > 0) {
-            int q = stbi__get8(z->s);
-            int p = q >> 4;
-            int t = q & 15,i;
-            if (p != 0) return stbi__err("bad DQT type","Corrupt JPEG");
-            if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
-            for (i=0; i < 64; ++i)
-               z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s);
-            L -= 65;
-         }
-         return L==0;
-
-      case 0xC4: // DHT - define huffman table
-         L = stbi__get16be(z->s)-2;
-         while (L > 0) {
-            stbi_uc *v;
-            int sizes[16],i,n=0;
-            int q = stbi__get8(z->s);
-            int tc = q >> 4;
-            int th = q & 15;
-            if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
-            for (i=0; i < 16; ++i) {
-               sizes[i] = stbi__get8(z->s);
-               n += sizes[i];
-            }
-            L -= 17;
-            if (tc == 0) {
-               if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
-               v = z->huff_dc[th].values;
-            } else {
-               if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
-               v = z->huff_ac[th].values;
-            }
-            for (i=0; i < n; ++i)
-               v[i] = stbi__get8(z->s);
-            if (tc != 0)
-               stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
-            L -= n;
-         }
-         return L==0;
-   }
-   // check for comment block or APP blocks
-   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
-      stbi__skip(z->s, stbi__get16be(z->s)-2);
-      return 1;
-   }
-   return 0;
-}
-
-// after we see SOS
-static int stbi__process_scan_header(stbi__jpeg *z)
-{
-   int i;
-   int Ls = stbi__get16be(z->s);
-   z->scan_n = stbi__get8(z->s);
-   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
-   if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
-   for (i=0; i < z->scan_n; ++i) {
-      int id = stbi__get8(z->s), which;
-      int q = stbi__get8(z->s);
-      for (which = 0; which < z->s->img_n; ++which)
-         if (z->img_comp[which].id == id)
-            break;
-      if (which == z->s->img_n) return 0; // no match
-      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
-      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
-      z->order[i] = which;
-   }
-
-   {
-      int aa;
-      z->spec_start = stbi__get8(z->s);
-      z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
-      aa = stbi__get8(z->s);
-      z->succ_high = (aa >> 4);
-      z->succ_low  = (aa & 15);
-      if (z->progressive) {
-         if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
-            return stbi__err("bad SOS", "Corrupt JPEG");
-      } else {
-         if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
-         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
-         z->spec_end = 63;
-      }
-   }
-
-   return 1;
-}
-
-static int stbi__process_frame_header(stbi__jpeg *z, int scan)
-{
-   stbi__context *s = z->s;
-   int Lf,p,i,q, h_max=1,v_max=1,c;
-   Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
-   p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
-   s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
-   s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
-   c = stbi__get8(s);
-   if (c != 3 && c != 1) return stbi__err("bad component count","Corrupt JPEG");    // JFIF requires
-   s->img_n = c;
-   for (i=0; i < c; ++i) {
-      z->img_comp[i].data = NULL;
-      z->img_comp[i].linebuf = NULL;
-   }
-
-   if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
-
-   z->rgb = 0;
-   for (i=0; i < s->img_n; ++i) {
-      static unsigned char rgb[3] = { 'R', 'G', 'B' };
-      z->img_comp[i].id = stbi__get8(s);
-      if (z->img_comp[i].id != i+1)   // JFIF requires
-         if (z->img_comp[i].id != i) {  // some version of jpegtran outputs non-JFIF-compliant files!
-            // somethings output this (see http://fileformats.archiveteam.org/wiki/JPEG#Color_format)
-            if (z->img_comp[i].id != rgb[i])
-               return stbi__err("bad component ID","Corrupt JPEG");
-            ++z->rgb;
-         }
-      q = stbi__get8(s);
-      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
-      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
-      z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
-   }
-
-   if (scan != STBI__SCAN_load) return 1;
-
-   if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
-
-   for (i=0; i < s->img_n; ++i) {
-      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
-      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
-   }
-
-   // compute interleaved mcu info
-   z->img_h_max = h_max;
-   z->img_v_max = v_max;
-   z->img_mcu_w = h_max * 8;
-   z->img_mcu_h = v_max * 8;
-   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
-   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
-
-   for (i=0; i < s->img_n; ++i) {
-      // number of effective pixels (e.g. for non-interleaved MCU)
-      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
-      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
-      // to simplify generation, we'll allocate enough memory to decode
-      // the bogus oversized data from using interleaved MCUs and their
-      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
-      // discard the extra data until colorspace conversion
-      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
-      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
-      z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
-
-      if (z->img_comp[i].raw_data == NULL) {
-         for(--i; i >= 0; --i) {
-            STBI_FREE(z->img_comp[i].raw_data);
-            z->img_comp[i].raw_data = NULL;
-         }
-         return stbi__err("outofmem", "Out of memory");
-      }
-      // align blocks for idct using mmx/sse
-      z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
-      z->img_comp[i].linebuf = NULL;
-      if (z->progressive) {
-         z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
-         z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
-         z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
-         z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
-      } else {
-         z->img_comp[i].coeff = 0;
-         z->img_comp[i].raw_coeff = 0;
-      }
-   }
-
-   return 1;
-}
-
-// use comparisons since in some cases we handle more than one case (e.g. SOF)
-#define stbi__DNL(x)         ((x) == 0xdc)
-#define stbi__SOI(x)         ((x) == 0xd8)
-#define stbi__EOI(x)         ((x) == 0xd9)
-#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
-#define stbi__SOS(x)         ((x) == 0xda)
-
-#define stbi__SOF_progressive(x)   ((x) == 0xc2)
-
-static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
-{
-   int m;
-   z->marker = STBI__MARKER_none; // initialize cached marker to empty
-   m = stbi__get_marker(z);
-   if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
-   if (scan == STBI__SCAN_type) return 1;
-   m = stbi__get_marker(z);
-   while (!stbi__SOF(m)) {
-      if (!stbi__process_marker(z,m)) return 0;
-      m = stbi__get_marker(z);
-      while (m == STBI__MARKER_none) {
-         // some files have extra padding after their blocks, so ok, we'll scan
-         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
-         m = stbi__get_marker(z);
-      }
-   }
-   z->progressive = stbi__SOF_progressive(m);
-   if (!stbi__process_frame_header(z, scan)) return 0;
-   return 1;
-}
-
-// decode image to YCbCr format
-static int stbi__decode_jpeg_image(stbi__jpeg *j)
-{
-   int m;
-   for (m = 0; m < 4; m++) {
-      j->img_comp[m].raw_data = NULL;
-      j->img_comp[m].raw_coeff = NULL;
-   }
-   j->restart_interval = 0;
-   if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
-   m = stbi__get_marker(j);
-   while (!stbi__EOI(m)) {
-      if (stbi__SOS(m)) {
-         if (!stbi__process_scan_header(j)) return 0;
-         if (!stbi__parse_entropy_coded_data(j)) return 0;
-         if (j->marker == STBI__MARKER_none ) {
-            // handle 0s at the end of image data from IP Kamera 9060
-            while (!stbi__at_eof(j->s)) {
-               int x = stbi__get8(j->s);
-               if (x == 255) {
-                  j->marker = stbi__get8(j->s);
-                  break;
-               } else if (x != 0) {
-                  return stbi__err("junk before marker", "Corrupt JPEG");
-               }
-            }
-            // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
-         }
-      } else {
-         if (!stbi__process_marker(j, m)) return 0;
-      }
-      m = stbi__get_marker(j);
-   }
-   if (j->progressive)
-      stbi__jpeg_finish(j);
-   return 1;
-}
-
-// static jfif-centered resampling (across block boundaries)
-
-typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
-                                    int w, int hs);
-
-#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
-
-static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   STBI_NOTUSED(out);
-   STBI_NOTUSED(in_far);
-   STBI_NOTUSED(w);
-   STBI_NOTUSED(hs);
-   return in_near;
-}
-
-static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // need to generate two samples vertically for every one in input
-   int i;
-   STBI_NOTUSED(hs);
-   for (i=0; i < w; ++i)
-      out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
-   return out;
-}
-
-static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // need to generate two samples horizontally for every one in input
-   int i;
-   stbi_uc *input = in_near;
-
-   if (w == 1) {
-      // if only one sample, can't do any interpolation
-      out[0] = out[1] = input[0];
-      return out;
-   }
-
-   out[0] = input[0];
-   out[1] = stbi__div4(input[0]*3 + input[1] + 2);
-   for (i=1; i < w-1; ++i) {
-      int n = 3*input[i]+2;
-      out[i*2+0] = stbi__div4(n+input[i-1]);
-      out[i*2+1] = stbi__div4(n+input[i+1]);
-   }
-   out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
-   out[i*2+1] = input[w-1];
-
-   STBI_NOTUSED(in_far);
-   STBI_NOTUSED(hs);
-
-   return out;
-}
-
-#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
-
-static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // need to generate 2x2 samples for every one in input
-   int i,t0,t1;
-   if (w == 1) {
-      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
-      return out;
-   }
-
-   t1 = 3*in_near[0] + in_far[0];
-   out[0] = stbi__div4(t1+2);
-   for (i=1; i < w; ++i) {
-      t0 = t1;
-      t1 = 3*in_near[i]+in_far[i];
-      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
-      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
-   }
-   out[w*2-1] = stbi__div4(t1+2);
-
-   STBI_NOTUSED(hs);
-
-   return out;
-}
-
-#if defined(STBI_SSE2) || defined(STBI_NEON)
-static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // need to generate 2x2 samples for every one in input
-   int i=0,t0,t1;
-
-   if (w == 1) {
-      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
-      return out;
-   }
-
-   t1 = 3*in_near[0] + in_far[0];
-   // process groups of 8 pixels for as long as we can.
-   // note we can't handle the last pixel in a row in this loop
-   // because we need to handle the filter boundary conditions.
-   for (; i < ((w-1) & ~7); i += 8) {
-#if defined(STBI_SSE2)
-      // load and perform the vertical filtering pass
-      // this uses 3*x + y = 4*x + (y - x)
-      __m128i zero  = _mm_setzero_si128();
-      __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
-      __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
-      __m128i farw  = _mm_unpacklo_epi8(farb, zero);
-      __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
-      __m128i diff  = _mm_sub_epi16(farw, nearw);
-      __m128i nears = _mm_slli_epi16(nearw, 2);
-      __m128i curr  = _mm_add_epi16(nears, diff); // current row
-
-      // horizontal filter works the same based on shifted vers of current
-      // row. "prev" is current row shifted right by 1 pixel; we need to
-      // insert the previous pixel value (from t1).
-      // "next" is current row shifted left by 1 pixel, with first pixel
-      // of next block of 8 pixels added in.
-      __m128i prv0 = _mm_slli_si128(curr, 2);
-      __m128i nxt0 = _mm_srli_si128(curr, 2);
-      __m128i prev = _mm_insert_epi16(prv0, t1, 0);
-      __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
-
-      // horizontal filter, polyphase implementation since it's convenient:
-      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
-      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
-      // note the shared term.
-      __m128i bias  = _mm_set1_epi16(8);
-      __m128i curs = _mm_slli_epi16(curr, 2);
-      __m128i prvd = _mm_sub_epi16(prev, curr);
-      __m128i nxtd = _mm_sub_epi16(next, curr);
-      __m128i curb = _mm_add_epi16(curs, bias);
-      __m128i even = _mm_add_epi16(prvd, curb);
-      __m128i odd  = _mm_add_epi16(nxtd, curb);
-
-      // interleave even and odd pixels, then undo scaling.
-      __m128i int0 = _mm_unpacklo_epi16(even, odd);
-      __m128i int1 = _mm_unpackhi_epi16(even, odd);
-      __m128i de0  = _mm_srli_epi16(int0, 4);
-      __m128i de1  = _mm_srli_epi16(int1, 4);
-
-      // pack and write output
-      __m128i outv = _mm_packus_epi16(de0, de1);
-      _mm_storeu_si128((__m128i *) (out + i*2), outv);
-#elif defined(STBI_NEON)
-      // load and perform the vertical filtering pass
-      // this uses 3*x + y = 4*x + (y - x)
-      uint8x8_t farb  = vld1_u8(in_far + i);
-      uint8x8_t nearb = vld1_u8(in_near + i);
-      int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
-      int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
-      int16x8_t curr  = vaddq_s16(nears, diff); // current row
-
-      // horizontal filter works the same based on shifted vers of current
-      // row. "prev" is current row shifted right by 1 pixel; we need to
-      // insert the previous pixel value (from t1).
-      // "next" is current row shifted left by 1 pixel, with first pixel
-      // of next block of 8 pixels added in.
-      int16x8_t prv0 = vextq_s16(curr, curr, 7);
-      int16x8_t nxt0 = vextq_s16(curr, curr, 1);
-      int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
-      int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
-
-      // horizontal filter, polyphase implementation since it's convenient:
-      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
-      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
-      // note the shared term.
-      int16x8_t curs = vshlq_n_s16(curr, 2);
-      int16x8_t prvd = vsubq_s16(prev, curr);
-      int16x8_t nxtd = vsubq_s16(next, curr);
-      int16x8_t even = vaddq_s16(curs, prvd);
-      int16x8_t odd  = vaddq_s16(curs, nxtd);
-
-      // undo scaling and round, then store with even/odd phases interleaved
-      uint8x8x2_t o;
-      o.val[0] = vqrshrun_n_s16(even, 4);
-      o.val[1] = vqrshrun_n_s16(odd,  4);
-      vst2_u8(out + i*2, o);
-#endif
-
-      // "previous" value for next iter
-      t1 = 3*in_near[i+7] + in_far[i+7];
-   }
-
-   t0 = t1;
-   t1 = 3*in_near[i] + in_far[i];
-   out[i*2] = stbi__div16(3*t1 + t0 + 8);
-
-   for (++i; i < w; ++i) {
-      t0 = t1;
-      t1 = 3*in_near[i]+in_far[i];
-      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
-      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
-   }
-   out[w*2-1] = stbi__div4(t1+2);
-
-   STBI_NOTUSED(hs);
-
-   return out;
-}
-#endif
-
-static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // resample with nearest-neighbor
-   int i,j;
-   STBI_NOTUSED(in_far);
-   for (i=0; i < w; ++i)
-      for (j=0; j < hs; ++j)
-         out[i*hs+j] = in_near[i];
-   return out;
-}
-
-#ifdef STBI_JPEG_OLD
-// this is the same YCbCr-to-RGB calculation that stb_image has used
-// historically before the algorithm changes in 1.49
-#define float2fixed(x)  ((int) ((x) * 65536 + 0.5))
-static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
-{
-   int i;
-   for (i=0; i < count; ++i) {
-      int y_fixed = (y[i] << 16) + 32768; // rounding
-      int r,g,b;
-      int cr = pcr[i] - 128;
-      int cb = pcb[i] - 128;
-      r = y_fixed + cr*float2fixed(1.40200f);
-      g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
-      b = y_fixed                            + cb*float2fixed(1.77200f);
-      r >>= 16;
-      g >>= 16;
-      b >>= 16;
-      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
-      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
-      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
-      out[0] = (stbi_uc)r;
-      out[1] = (stbi_uc)g;
-      out[2] = (stbi_uc)b;
-      out[3] = 255;
-      out += step;
-   }
-}
-#else
-// this is a reduced-precision calculation of YCbCr-to-RGB introduced
-// to make sure the code produces the same results in both SIMD and scalar
-#define float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
-static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
-{
-   int i;
-   for (i=0; i < count; ++i) {
-      int y_fixed = (y[i] << 20) + (1<<19); // rounding
-      int r,g,b;
-      int cr = pcr[i] - 128;
-      int cb = pcb[i] - 128;
-      r = y_fixed +  cr* float2fixed(1.40200f);
-      g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
-      b = y_fixed                               +   cb* float2fixed(1.77200f);
-      r >>= 20;
-      g >>= 20;
-      b >>= 20;
-      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
-      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
-      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
-      out[0] = (stbi_uc)r;
-      out[1] = (stbi_uc)g;
-      out[2] = (stbi_uc)b;
-      out[3] = 255;
-      out += step;
-   }
-}
-#endif
-
-#if defined(STBI_SSE2) || defined(STBI_NEON)
-static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
-{
-   int i = 0;
-
-#ifdef STBI_SSE2
-   // step == 3 is pretty ugly on the final interleave, and i'm not convinced
-   // it's useful in practice (you wouldn't use it for textures, for example).
-   // so just accelerate step == 4 case.
-   if (step == 4) {
-      // this is a fairly straightforward implementation and not super-optimized.
-      __m128i signflip  = _mm_set1_epi8(-0x80);
-      __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
-      __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
-      __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
-      __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
-      __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
-      __m128i xw = _mm_set1_epi16(255); // alpha channel
-
-      for (; i+7 < count; i += 8) {
-         // load
-         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
-         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
-         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
-         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
-         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
-
-         // unpack to short (and left-shift cr, cb by 8)
-         __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
-         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
-         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
-
-         // color transform
-         __m128i yws = _mm_srli_epi16(yw, 4);
-         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
-         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
-         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
-         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
-         __m128i rws = _mm_add_epi16(cr0, yws);
-         __m128i gwt = _mm_add_epi16(cb0, yws);
-         __m128i bws = _mm_add_epi16(yws, cb1);
-         __m128i gws = _mm_add_epi16(gwt, cr1);
-
-         // descale
-         __m128i rw = _mm_srai_epi16(rws, 4);
-         __m128i bw = _mm_srai_epi16(bws, 4);
-         __m128i gw = _mm_srai_epi16(gws, 4);
-
-         // back to byte, set up for transpose
-         __m128i brb = _mm_packus_epi16(rw, bw);
-         __m128i gxb = _mm_packus_epi16(gw, xw);
-
-         // transpose to interleave channels
-         __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
-         __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
-         __m128i o0 = _mm_unpacklo_epi16(t0, t1);
-         __m128i o1 = _mm_unpackhi_epi16(t0, t1);
-
-         // store
-         _mm_storeu_si128((__m128i *) (out + 0), o0);
-         _mm_storeu_si128((__m128i *) (out + 16), o1);
-         out += 32;
-      }
-   }
-#endif
-
-#ifdef STBI_NEON
-   // in this version, step=3 support would be easy to add. but is there demand?
-   if (step == 4) {
-      // this is a fairly straightforward implementation and not super-optimized.
-      uint8x8_t signflip = vdup_n_u8(0x80);
-      int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
-      int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
-      int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
-      int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
-
-      for (; i+7 < count; i += 8) {
-         // load
-         uint8x8_t y_bytes  = vld1_u8(y + i);
-         uint8x8_t cr_bytes = vld1_u8(pcr + i);
-         uint8x8_t cb_bytes = vld1_u8(pcb + i);
-         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
-         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
-
-         // expand to s16
-         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
-         int16x8_t crw = vshll_n_s8(cr_biased, 7);
-         int16x8_t cbw = vshll_n_s8(cb_biased, 7);
-
-         // color transform
-         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
-         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
-         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
-         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
-         int16x8_t rws = vaddq_s16(yws, cr0);
-         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
-         int16x8_t bws = vaddq_s16(yws, cb1);
-
-         // undo scaling, round, convert to byte
-         uint8x8x4_t o;
-         o.val[0] = vqrshrun_n_s16(rws, 4);
-         o.val[1] = vqrshrun_n_s16(gws, 4);
-         o.val[2] = vqrshrun_n_s16(bws, 4);
-         o.val[3] = vdup_n_u8(255);
-
-         // store, interleaving r/g/b/a
-         vst4_u8(out, o);
-         out += 8*4;
-      }
-   }
-#endif
-
-   for (; i < count; ++i) {
-      int y_fixed = (y[i] << 20) + (1<<19); // rounding
-      int r,g,b;
-      int cr = pcr[i] - 128;
-      int cb = pcb[i] - 128;
-      r = y_fixed + cr* float2fixed(1.40200f);
-      g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
-      b = y_fixed                             +   cb* float2fixed(1.77200f);
-      r >>= 20;
-      g >>= 20;
-      b >>= 20;
-      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
-      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
-      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
-      out[0] = (stbi_uc)r;
-      out[1] = (stbi_uc)g;
-      out[2] = (stbi_uc)b;
-      out[3] = 255;
-      out += step;
-   }
-}
-#endif
-
-// set up the kernels
-static void stbi__setup_jpeg(stbi__jpeg *j)
-{
-   j->idct_block_kernel = stbi__idct_block;
-   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
-   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
-
-#ifdef STBI_SSE2
-   if (stbi__sse2_available()) {
-      j->idct_block_kernel = stbi__idct_simd;
-      #ifndef STBI_JPEG_OLD
-      j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-      #endif
-      j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
-   }
-#endif
-
-#ifdef STBI_NEON
-   j->idct_block_kernel = stbi__idct_simd;
-   #ifndef STBI_JPEG_OLD
-   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-   #endif
-   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
-#endif
-}
-
-// clean up the temporary component buffers
-static void stbi__cleanup_jpeg(stbi__jpeg *j)
-{
-   int i;
-   for (i=0; i < j->s->img_n; ++i) {
-      if (j->img_comp[i].raw_data) {
-         STBI_FREE(j->img_comp[i].raw_data);
-         j->img_comp[i].raw_data = NULL;
-         j->img_comp[i].data = NULL;
-      }
-      if (j->img_comp[i].raw_coeff) {
-         STBI_FREE(j->img_comp[i].raw_coeff);
-         j->img_comp[i].raw_coeff = 0;
-         j->img_comp[i].coeff = 0;
-      }
-      if (j->img_comp[i].linebuf) {
-         STBI_FREE(j->img_comp[i].linebuf);
-         j->img_comp[i].linebuf = NULL;
-      }
-   }
-}
-
-typedef struct
-{
-   resample_row_func resample;
-   stbi_uc *line0,*line1;
-   int hs,vs;   // expansion factor in each axis
-   int w_lores; // horizontal pixels pre-expansion
-   int ystep;   // how far through vertical expansion we are
-   int ypos;    // which pre-expansion row we're on
-} stbi__resample;
-
-static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
-{
-   int n, decode_n;
-   z->s->img_n = 0; // make stbi__cleanup_jpeg safe
-
-   // validate req_comp
-   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
-
-   // load a jpeg image from whichever source, but leave in YCbCr format
-   if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
-
-   // determine actual number of components to generate
-   n = req_comp ? req_comp : z->s->img_n;
-
-   if (z->s->img_n == 3 && n < 3)
-      decode_n = 1;
-   else
-      decode_n = z->s->img_n;
-
-   // resample and color-convert
-   {
-      int k;
-      unsigned int i,j;
-      stbi_uc *output;
-      stbi_uc *coutput[4];
-
-      stbi__resample res_comp[4];
-
-      for (k=0; k < decode_n; ++k) {
-         stbi__resample *r = &res_comp[k];
-
-         // allocate line buffer big enough for upsampling off the edges
-         // with upsample factor of 4
-         z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
-         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
-
-         r->hs      = z->img_h_max / z->img_comp[k].h;
-         r->vs      = z->img_v_max / z->img_comp[k].v;
-         r->ystep   = r->vs >> 1;
-         r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
-         r->ypos    = 0;
-         r->line0   = r->line1 = z->img_comp[k].data;
-
-         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
-         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
-         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
-         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
-         else                               r->resample = stbi__resample_row_generic;
-      }
-
-      // can't error after this so, this is safe
-      output = (stbi_uc *) stbi__malloc(n * z->s->img_x * z->s->img_y + 1);
-      if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
-
-      // now go ahead and resample
-      for (j=0; j < z->s->img_y; ++j) {
-         stbi_uc *out = output + n * z->s->img_x * j;
-         for (k=0; k < decode_n; ++k) {
-            stbi__resample *r = &res_comp[k];
-            int y_bot = r->ystep >= (r->vs >> 1);
-            coutput[k] = r->resample(z->img_comp[k].linebuf,
-                                     y_bot ? r->line1 : r->line0,
-                                     y_bot ? r->line0 : r->line1,
-                                     r->w_lores, r->hs);
-            if (++r->ystep >= r->vs) {
-               r->ystep = 0;
-               r->line0 = r->line1;
-               if (++r->ypos < z->img_comp[k].y)
-                  r->line1 += z->img_comp[k].w2;
-            }
-         }
-         if (n >= 3) {
-            stbi_uc *y = coutput[0];
-            if (z->s->img_n == 3) {
-               if (z->rgb == 3) {
-                  for (i=0; i < z->s->img_x; ++i) {
-                     out[0] = y[i];
-                     out[1] = coutput[1][i];
-                     out[2] = coutput[2][i];
-                     out[3] = 255;
-                     out += n;
-                  }
-               } else {
-                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
-               }
-            } else
-               for (i=0; i < z->s->img_x; ++i) {
-                  out[0] = out[1] = out[2] = y[i];
-                  out[3] = 255; // not used if n==3
-                  out += n;
-               }
-         } else {
-            stbi_uc *y = coutput[0];
-            if (n == 1)
-               for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
-            else
-               for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
-         }
-      }
-      stbi__cleanup_jpeg(z);
-      *out_x = z->s->img_x;
-      *out_y = z->s->img_y;
-      if (comp) *comp  = z->s->img_n; // report original components, not output
-      return output;
-   }
-}
-
-static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   unsigned char* result;
-   stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
-   j->s = s;
-   stbi__setup_jpeg(j);
-   result = load_jpeg_image(j, x,y,comp,req_comp);
-   STBI_FREE(j);
-   return result;
-}
-
-static int stbi__jpeg_test(stbi__context *s)
-{
-   int r;
-   stbi__jpeg j;
-   j.s = s;
-   stbi__setup_jpeg(&j);
-   r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
-   stbi__rewind(s);
-   return r;
-}
-
-static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
-{
-   if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
-      stbi__rewind( j->s );
-      return 0;
-   }
-   if (x) *x = j->s->img_x;
-   if (y) *y = j->s->img_y;
-   if (comp) *comp = j->s->img_n;
-   return 1;
-}
-
-static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   int result;
-   stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
-   j->s = s;
-   result = stbi__jpeg_info_raw(j, x, y, comp);
-   STBI_FREE(j);
-   return result;
-}
-#endif
-
-// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
-//    simple implementation
-//      - all input must be provided in an upfront buffer
-//      - all output is written to a single output buffer (can malloc/realloc)
-//    performance
-//      - fast huffman
-
-#ifndef STBI_NO_ZLIB
-
-// fast-way is faster to check than jpeg huffman, but slow way is slower
-#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
-#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
-
-// zlib-style huffman encoding
-// (jpegs packs from left, zlib from right, so can't share code)
-typedef struct
-{
-   stbi__uint16 fast[1 << STBI__ZFAST_BITS];
-   stbi__uint16 firstcode[16];
-   int maxcode[17];
-   stbi__uint16 firstsymbol[16];
-   stbi_uc  size[288];
-   stbi__uint16 value[288];
-} stbi__zhuffman;
-
-stbi_inline static int stbi__bitreverse16(int n)
-{
-  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
-  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
-  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
-  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
-  return n;
-}
-
-stbi_inline static int stbi__bit_reverse(int v, int bits)
-{
-   STBI_ASSERT(bits <= 16);
-   // to bit reverse n bits, reverse 16 and shift
-   // e.g. 11 bits, bit reverse and shift away 5
-   return stbi__bitreverse16(v) >> (16-bits);
-}
-
-static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
-{
-   int i,k=0;
-   int code, next_code[16], sizes[17];
-
-   // DEFLATE spec for generating codes
-   memset(sizes, 0, sizeof(sizes));
-   memset(z->fast, 0, sizeof(z->fast));
-   for (i=0; i < num; ++i)
-      ++sizes[sizelist[i]];
-   sizes[0] = 0;
-   for (i=1; i < 16; ++i)
-      if (sizes[i] > (1 << i))
-         return stbi__err("bad sizes", "Corrupt PNG");
-   code = 0;
-   for (i=1; i < 16; ++i) {
-      next_code[i] = code;
-      z->firstcode[i] = (stbi__uint16) code;
-      z->firstsymbol[i] = (stbi__uint16) k;
-      code = (code + sizes[i]);
-      if (sizes[i])
-         if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
-      z->maxcode[i] = code << (16-i); // preshift for inner loop
-      code <<= 1;
-      k += sizes[i];
-   }
-   z->maxcode[16] = 0x10000; // sentinel
-   for (i=0; i < num; ++i) {
-      int s = sizelist[i];
-      if (s) {
-         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
-         stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
-         z->size [c] = (stbi_uc     ) s;
-         z->value[c] = (stbi__uint16) i;
-         if (s <= STBI__ZFAST_BITS) {
-            int j = stbi__bit_reverse(next_code[s],s);
-            while (j < (1 << STBI__ZFAST_BITS)) {
-               z->fast[j] = fastv;
-               j += (1 << s);
-            }
-         }
-         ++next_code[s];
-      }
-   }
-   return 1;
-}
-
-// zlib-from-memory implementation for PNG reading
-//    because PNG allows splitting the zlib stream arbitrarily,
-//    and it's annoying structurally to have PNG call ZLIB call PNG,
-//    we require PNG read all the IDATs and combine them into a single
-//    memory buffer
-
-typedef struct
-{
-   stbi_uc *zbuffer, *zbuffer_end;
-   int num_bits;
-   stbi__uint32 code_buffer;
-
-   char *zout;
-   char *zout_start;
-   char *zout_end;
-   int   z_expandable;
-
-   stbi__zhuffman z_length, z_distance;
-} stbi__zbuf;
-
-stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
-{
-   if (z->zbuffer >= z->zbuffer_end) return 0;
-   return *z->zbuffer++;
-}
-
-static void stbi__fill_bits(stbi__zbuf *z)
-{
-   do {
-      STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
-      z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
-      z->num_bits += 8;
-   } while (z->num_bits <= 24);
-}
-
-stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
-{
-   unsigned int k;
-   if (z->num_bits < n) stbi__fill_bits(z);
-   k = z->code_buffer & ((1 << n) - 1);
-   z->code_buffer >>= n;
-   z->num_bits -= n;
-   return k;
-}
-
-static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
-{
-   int b,s,k;
-   // not resolved by fast table, so compute it the slow way
-   // use jpeg approach, which requires MSbits at top
-   k = stbi__bit_reverse(a->code_buffer, 16);
-   for (s=STBI__ZFAST_BITS+1; ; ++s)
-      if (k < z->maxcode[s])
-         break;
-   if (s == 16) return -1; // invalid code!
-   // code size is s, so:
-   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
-   STBI_ASSERT(z->size[b] == s);
-   a->code_buffer >>= s;
-   a->num_bits -= s;
-   return z->value[b];
-}
-
-stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
-{
-   int b,s;
-   if (a->num_bits < 16) stbi__fill_bits(a);
-   b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
-   if (b) {
-      s = b >> 9;
-      a->code_buffer >>= s;
-      a->num_bits -= s;
-      return b & 511;
-   }
-   return stbi__zhuffman_decode_slowpath(a, z);
-}
-
-static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
-{
-   char *q;
-   int cur, limit, old_limit;
-   z->zout = zout;
-   if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
-   cur   = (int) (z->zout     - z->zout_start);
-   limit = old_limit = (int) (z->zout_end - z->zout_start);
-   while (cur + n > limit)
-      limit *= 2;
-   q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
-   STBI_NOTUSED(old_limit);
-   if (q == NULL) return stbi__err("outofmem", "Out of memory");
-   z->zout_start = q;
-   z->zout       = q + cur;
-   z->zout_end   = q + limit;
-   return 1;
-}
-
-static int stbi__zlength_base[31] = {
-   3,4,5,6,7,8,9,10,11,13,
-   15,17,19,23,27,31,35,43,51,59,
-   67,83,99,115,131,163,195,227,258,0,0 };
-
-static int stbi__zlength_extra[31]=
-{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
-
-static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
-257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
-
-static int stbi__zdist_extra[32] =
-{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
-
-static int stbi__parse_huffman_block(stbi__zbuf *a)
-{
-   char *zout = a->zout;
-   for(;;) {
-      int z = stbi__zhuffman_decode(a, &a->z_length);
-      if (z < 256) {
-         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
-         if (zout >= a->zout_end) {
-            if (!stbi__zexpand(a, zout, 1)) return 0;
-            zout = a->zout;
-         }
-         *zout++ = (char) z;
-      } else {
-         stbi_uc *p;
-         int len,dist;
-         if (z == 256) {
-            a->zout = zout;
-            return 1;
-         }
-         z -= 257;
-         len = stbi__zlength_base[z];
-         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
-         z = stbi__zhuffman_decode(a, &a->z_distance);
-         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
-         dist = stbi__zdist_base[z];
-         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
-         if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
-         if (zout + len > a->zout_end) {
-            if (!stbi__zexpand(a, zout, len)) return 0;
-            zout = a->zout;
-         }
-         p = (stbi_uc *) (zout - dist);
-         if (dist == 1) { // run of one byte; common in images.
-            stbi_uc v = *p;
-            if (len) { do *zout++ = v; while (--len); }
-         } else {
-            if (len) { do *zout++ = *p++; while (--len); }
-         }
-      }
-   }
-}
-
-static int stbi__compute_huffman_codes(stbi__zbuf *a)
-{
-   static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
-   stbi__zhuffman z_codelength;
-   stbi_uc lencodes[286+32+137];//padding for maximum single op
-   stbi_uc codelength_sizes[19];
-   int i,n;
-
-   int hlit  = stbi__zreceive(a,5) + 257;
-   int hdist = stbi__zreceive(a,5) + 1;
-   int hclen = stbi__zreceive(a,4) + 4;
-
-   memset(codelength_sizes, 0, sizeof(codelength_sizes));
-   for (i=0; i < hclen; ++i) {
-      int s = stbi__zreceive(a,3);
-      codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
-   }
-   if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
-
-   n = 0;
-   while (n < hlit + hdist) {
-      int c = stbi__zhuffman_decode(a, &z_codelength);
-      if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
-      if (c < 16)
-         lencodes[n++] = (stbi_uc) c;
-      else if (c == 16) {
-         c = stbi__zreceive(a,2)+3;
-         memset(lencodes+n, lencodes[n-1], c);
-         n += c;
-      } else if (c == 17) {
-         c = stbi__zreceive(a,3)+3;
-         memset(lencodes+n, 0, c);
-         n += c;
-      } else {
-         STBI_ASSERT(c == 18);
-         c = stbi__zreceive(a,7)+11;
-         memset(lencodes+n, 0, c);
-         n += c;
-      }
-   }
-   if (n != hlit+hdist) return stbi__err("bad codelengths","Corrupt PNG");
-   if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
-   if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
-   return 1;
-}
-
-static int stbi__parse_uncompressed_block(stbi__zbuf *a)
-{
-   stbi_uc header[4];
-   int len,nlen,k;
-   if (a->num_bits & 7)
-      stbi__zreceive(a, a->num_bits & 7); // discard
-   // drain the bit-packed data into header
-   k = 0;
-   while (a->num_bits > 0) {
-      header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
-      a->code_buffer >>= 8;
-      a->num_bits -= 8;
-   }
-   STBI_ASSERT(a->num_bits == 0);
-   // now fill header the normal way
-   while (k < 4)
-      header[k++] = stbi__zget8(a);
-   len  = header[1] * 256 + header[0];
-   nlen = header[3] * 256 + header[2];
-   if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
-   if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
-   if (a->zout + len > a->zout_end)
-      if (!stbi__zexpand(a, a->zout, len)) return 0;
-   memcpy(a->zout, a->zbuffer, len);
-   a->zbuffer += len;
-   a->zout += len;
-   return 1;
-}
-
-static int stbi__parse_zlib_header(stbi__zbuf *a)
-{
-   int cmf   = stbi__zget8(a);
-   int cm    = cmf & 15;
-   /* int cinfo = cmf >> 4; */
-   int flg   = stbi__zget8(a);
-   if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
-   if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
-   if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
-   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
-   return 1;
-}
-
-// @TODO: should statically initialize these for optimal thread safety
-static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32];
-static void stbi__init_zdefaults(void)
-{
-   int i;   // use <= to match clearly with spec
-   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
-   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
-   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
-   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
-
-   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
-}
-
-static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
-{
-   int final, type;
-   if (parse_header)
-      if (!stbi__parse_zlib_header(a)) return 0;
-   a->num_bits = 0;
-   a->code_buffer = 0;
-   do {
-      final = stbi__zreceive(a,1);
-      type = stbi__zreceive(a,2);
-      if (type == 0) {
-         if (!stbi__parse_uncompressed_block(a)) return 0;
-      } else if (type == 3) {
-         return 0;
-      } else {
-         if (type == 1) {
-            // use fixed code lengths
-            if (!stbi__zdefault_distance[31]) stbi__init_zdefaults();
-            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
-            if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
-         } else {
-            if (!stbi__compute_huffman_codes(a)) return 0;
-         }
-         if (!stbi__parse_huffman_block(a)) return 0;
-      }
-   } while (!final);
-   return 1;
-}
-
-static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
-{
-   a->zout_start = obuf;
-   a->zout       = obuf;
-   a->zout_end   = obuf + olen;
-   a->z_expandable = exp;
-
-   return stbi__parse_zlib(a, parse_header);
-}
-
-STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
-{
-   stbi__zbuf a;
-   char *p = (char *) stbi__malloc(initial_size);
-   if (p == NULL) return NULL;
-   a.zbuffer = (stbi_uc *) buffer;
-   a.zbuffer_end = (stbi_uc *) buffer + len;
-   if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
-      if (outlen) *outlen = (int) (a.zout - a.zout_start);
-      return a.zout_start;
-   } else {
-      STBI_FREE(a.zout_start);
-      return NULL;
-   }
-}
-
-STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
-{
-   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
-}
-
-STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
-{
-   stbi__zbuf a;
-   char *p = (char *) stbi__malloc(initial_size);
-   if (p == NULL) return NULL;
-   a.zbuffer = (stbi_uc *) buffer;
-   a.zbuffer_end = (stbi_uc *) buffer + len;
-   if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
-      if (outlen) *outlen = (int) (a.zout - a.zout_start);
-      return a.zout_start;
-   } else {
-      STBI_FREE(a.zout_start);
-      return NULL;
-   }
-}
-
-STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
-{
-   stbi__zbuf a;
-   a.zbuffer = (stbi_uc *) ibuffer;
-   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
-   if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
-      return (int) (a.zout - a.zout_start);
-   else
-      return -1;
-}
-
-STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
-{
-   stbi__zbuf a;
-   char *p = (char *) stbi__malloc(16384);
-   if (p == NULL) return NULL;
-   a.zbuffer = (stbi_uc *) buffer;
-   a.zbuffer_end = (stbi_uc *) buffer+len;
-   if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
-      if (outlen) *outlen = (int) (a.zout - a.zout_start);
-      return a.zout_start;
-   } else {
-      STBI_FREE(a.zout_start);
-      return NULL;
-   }
-}
-
-STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
-{
-   stbi__zbuf a;
-   a.zbuffer = (stbi_uc *) ibuffer;
-   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
-   if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
-      return (int) (a.zout - a.zout_start);
-   else
-      return -1;
-}
-#endif
-
-// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
-//    simple implementation
-//      - only 8-bit samples
-//      - no CRC checking
-//      - allocates lots of intermediate memory
-//        - avoids problem of streaming data between subsystems
-//        - avoids explicit window management
-//    performance
-//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
-
-#ifndef STBI_NO_PNG
-typedef struct
-{
-   stbi__uint32 length;
-   stbi__uint32 type;
-} stbi__pngchunk;
-
-static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
-{
-   stbi__pngchunk c;
-   c.length = stbi__get32be(s);
-   c.type   = stbi__get32be(s);
-   return c;
-}
-
-static int stbi__check_png_header(stbi__context *s)
-{
-   static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
-   int i;
-   for (i=0; i < 8; ++i)
-      if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
-   return 1;
-}
-
-typedef struct
-{
-   stbi__context *s;
-   stbi_uc *idata, *expanded, *out;
-   int depth;
-} stbi__png;
-
-
-enum {
-   STBI__F_none=0,
-   STBI__F_sub=1,
-   STBI__F_up=2,
-   STBI__F_avg=3,
-   STBI__F_paeth=4,
-   // synthetic filters used for first scanline to avoid needing a dummy row of 0s
-   STBI__F_avg_first,
-   STBI__F_paeth_first
-};
-
-static stbi_uc first_row_filter[5] =
-{
-   STBI__F_none,
-   STBI__F_sub,
-   STBI__F_none,
-   STBI__F_avg_first,
-   STBI__F_paeth_first
-};
-
-static int stbi__paeth(int a, int b, int c)
-{
-   int p = a + b - c;
-   int pa = abs(p-a);
-   int pb = abs(p-b);
-   int pc = abs(p-c);
-   if (pa <= pb && pa <= pc) return a;
-   if (pb <= pc) return b;
-   return c;
-}
-
-static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
-
-// create the png data from post-deflated data
-static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
-{
-   int bytes = (depth == 16? 2 : 1);
-   stbi__context *s = a->s;
-   stbi__uint32 i,j,stride = x*out_n*bytes;
-   stbi__uint32 img_len, img_width_bytes;
-   int k;
-   int img_n = s->img_n; // copy it into a local for later
-
-   int output_bytes = out_n*bytes;
-   int filter_bytes = img_n*bytes;
-   int width = x;
-
-   STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
-   a->out = (stbi_uc *) stbi__malloc(x * y * output_bytes); // extra bytes to write off the end into
-   if (!a->out) return stbi__err("outofmem", "Out of memory");
-
-   img_width_bytes = (((img_n * x * depth) + 7) >> 3);
-   img_len = (img_width_bytes + 1) * y;
-   if (s->img_x == x && s->img_y == y) {
-      if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
-   } else { // interlaced:
-      if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
-   }
-
-   for (j=0; j < y; ++j) {
-      stbi_uc *cur = a->out + stride*j;
-      stbi_uc *prior = cur - stride;
-      int filter = *raw++;
-
-      if (filter > 4)
-         return stbi__err("invalid filter","Corrupt PNG");
-
-      if (depth < 8) {
-         STBI_ASSERT(img_width_bytes <= x);
-         cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
-         filter_bytes = 1;
-         width = img_width_bytes;
-      }
-
-      // if first row, use special filter that doesn't sample previous row
-      if (j == 0) filter = first_row_filter[filter];
-
-      // handle first byte explicitly
-      for (k=0; k < filter_bytes; ++k) {
-         switch (filter) {
-            case STBI__F_none       : cur[k] = raw[k]; break;
-            case STBI__F_sub        : cur[k] = raw[k]; break;
-            case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
-            case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
-            case STBI__F_avg_first  : cur[k] = raw[k]; break;
-            case STBI__F_paeth_first: cur[k] = raw[k]; break;
-         }
-      }
-
-      if (depth == 8) {
-         if (img_n != out_n)
-            cur[img_n] = 255; // first pixel
-         raw += img_n;
-         cur += out_n;
-         prior += out_n;
-      } else if (depth == 16) {
-         if (img_n != out_n) {
-            cur[filter_bytes]   = 255; // first pixel top byte
-            cur[filter_bytes+1] = 255; // first pixel bottom byte
-         }
-         raw += filter_bytes;
-         cur += output_bytes;
-         prior += output_bytes;
-      } else {
-         raw += 1;
-         cur += 1;
-         prior += 1;
-      }
-
-      // this is a little gross, so that we don't switch per-pixel or per-component
-      if (depth < 8 || img_n == out_n) {
-         int nk = (width - 1)*filter_bytes;
-         #define CASE(f) \
-             case f:     \
-                for (k=0; k < nk; ++k)
-         switch (filter) {
-            // "none" filter turns into a memcpy here; make that explicit.
-            case STBI__F_none:         memcpy(cur, raw, nk); break;
-            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
-            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
-            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
-            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
-            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
-         }
-         #undef CASE
-         raw += nk;
-      } else {
-         STBI_ASSERT(img_n+1 == out_n);
-         #define CASE(f) \
-             case f:     \
-                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
-                   for (k=0; k < filter_bytes; ++k)
-         switch (filter) {
-            CASE(STBI__F_none)         cur[k] = raw[k]; break;
-            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); break;
-            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); break;
-            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); break;
-            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); break;
-            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); break;
-         }
-         #undef CASE
-
-         // the loop above sets the high byte of the pixels' alpha, but for
-         // 16 bit png files we also need the low byte set. we'll do that here.
-         if (depth == 16) {
-            cur = a->out + stride*j; // start at the beginning of the row again
-            for (i=0; i < x; ++i,cur+=output_bytes) {
-               cur[filter_bytes+1] = 255;
-            }
-         }
-      }
-   }
-
-   // we make a separate pass to expand bits to pixels; for performance,
-   // this could run two scanlines behind the above code, so it won't
-   // intefere with filtering but will still be in the cache.
-   if (depth < 8) {
-      for (j=0; j < y; ++j) {
-         stbi_uc *cur = a->out + stride*j;
-         stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
-         // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
-         // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
-         stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
-
-         // note that the final byte might overshoot and write more data than desired.
-         // we can allocate enough data that this never writes out of memory, but it
-         // could also overwrite the next scanline. can it overwrite non-empty data
-         // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
-         // so we need to explicitly clamp the final ones
-
-         if (depth == 4) {
-            for (k=x*img_n; k >= 2; k-=2, ++in) {
-               *cur++ = scale * ((*in >> 4)       );
-               *cur++ = scale * ((*in     ) & 0x0f);
-            }
-            if (k > 0) *cur++ = scale * ((*in >> 4)       );
-         } else if (depth == 2) {
-            for (k=x*img_n; k >= 4; k-=4, ++in) {
-               *cur++ = scale * ((*in >> 6)       );
-               *cur++ = scale * ((*in >> 4) & 0x03);
-               *cur++ = scale * ((*in >> 2) & 0x03);
-               *cur++ = scale * ((*in     ) & 0x03);
-            }
-            if (k > 0) *cur++ = scale * ((*in >> 6)       );
-            if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
-            if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
-         } else if (depth == 1) {
-            for (k=x*img_n; k >= 8; k-=8, ++in) {
-               *cur++ = scale * ((*in >> 7)       );
-               *cur++ = scale * ((*in >> 6) & 0x01);
-               *cur++ = scale * ((*in >> 5) & 0x01);
-               *cur++ = scale * ((*in >> 4) & 0x01);
-               *cur++ = scale * ((*in >> 3) & 0x01);
-               *cur++ = scale * ((*in >> 2) & 0x01);
-               *cur++ = scale * ((*in >> 1) & 0x01);
-               *cur++ = scale * ((*in     ) & 0x01);
-            }
-            if (k > 0) *cur++ = scale * ((*in >> 7)       );
-            if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
-            if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
-            if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
-            if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
-            if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
-            if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
-         }
-         if (img_n != out_n) {
-            int q;
-            // insert alpha = 255
-            cur = a->out + stride*j;
-            if (img_n == 1) {
-               for (q=x-1; q >= 0; --q) {
-                  cur[q*2+1] = 255;
-                  cur[q*2+0] = cur[q];
-               }
-            } else {
-               STBI_ASSERT(img_n == 3);
-               for (q=x-1; q >= 0; --q) {
-                  cur[q*4+3] = 255;
-                  cur[q*4+2] = cur[q*3+2];
-                  cur[q*4+1] = cur[q*3+1];
-                  cur[q*4+0] = cur[q*3+0];
-               }
-            }
-         }
-      }
-   } else if (depth == 16) {
-      // force the image data from big-endian to platform-native.
-      // this is done in a separate pass due to the decoding relying
-      // on the data being untouched, but could probably be done
-      // per-line during decode if care is taken.
-      stbi_uc *cur = a->out;
-      stbi__uint16 *cur16 = (stbi__uint16*)cur;
-
-      for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
-         *cur16 = (cur[0] << 8) | cur[1];
-      }
-   }
-
-   return 1;
-}
-
-static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
-{
-   stbi_uc *final;
-   int p;
-   if (!interlaced)
-      return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
-
-   // de-interlacing
-   final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n);
-   for (p=0; p < 7; ++p) {
-      int xorig[] = { 0,4,0,2,0,1,0 };
-      int yorig[] = { 0,0,4,0,2,0,1 };
-      int xspc[]  = { 8,8,4,4,2,2,1 };
-      int yspc[]  = { 8,8,8,4,4,2,2 };
-      int i,j,x,y;
-      // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
-      x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
-      y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
-      if (x && y) {
-         stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
-         if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
-            STBI_FREE(final);
-            return 0;
-         }
-         for (j=0; j < y; ++j) {
-            for (i=0; i < x; ++i) {
-               int out_y = j*yspc[p]+yorig[p];
-               int out_x = i*xspc[p]+xorig[p];
-               memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
-                      a->out + (j*x+i)*out_n, out_n);
-            }
-         }
-         STBI_FREE(a->out);
-         image_data += img_len;
-         image_data_len -= img_len;
-      }
-   }
-   a->out = final;
-
-   return 1;
-}
-
-static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
-{
-   stbi__context *s = z->s;
-   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-   stbi_uc *p = z->out;
-
-   // compute color-based transparency, assuming we've
-   // already got 255 as the alpha value in the output
-   STBI_ASSERT(out_n == 2 || out_n == 4);
-
-   if (out_n == 2) {
-      for (i=0; i < pixel_count; ++i) {
-         p[1] = (p[0] == tc[0] ? 0 : 255);
-         p += 2;
-      }
-   } else {
-      for (i=0; i < pixel_count; ++i) {
-         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
-            p[3] = 0;
-         p += 4;
-      }
-   }
-   return 1;
-}
-
-static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
-{
-   stbi__context *s = z->s;
-   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-   stbi__uint16 *p = (stbi__uint16*) z->out;
-
-   // compute color-based transparency, assuming we've
-   // already got 65535 as the alpha value in the output
-   STBI_ASSERT(out_n == 2 || out_n == 4);
-
-   if (out_n == 2) {
-      for (i = 0; i < pixel_count; ++i) {
-         p[1] = (p[0] == tc[0] ? 0 : 65535);
-         p += 2;
-      }
-   } else {
-      for (i = 0; i < pixel_count; ++i) {
-         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
-            p[3] = 0;
-         p += 4;
-      }
-   }
-   return 1;
-}
-
-static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
-{
-   stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
-   stbi_uc *p, *temp_out, *orig = a->out;
-
-   p = (stbi_uc *) stbi__malloc(pixel_count * pal_img_n);
-   if (p == NULL) return stbi__err("outofmem", "Out of memory");
-
-   // between here and free(out) below, exitting would leak
-   temp_out = p;
-
-   if (pal_img_n == 3) {
-      for (i=0; i < pixel_count; ++i) {
-         int n = orig[i]*4;
-         p[0] = palette[n  ];
-         p[1] = palette[n+1];
-         p[2] = palette[n+2];
-         p += 3;
-      }
-   } else {
-      for (i=0; i < pixel_count; ++i) {
-         int n = orig[i]*4;
-         p[0] = palette[n  ];
-         p[1] = palette[n+1];
-         p[2] = palette[n+2];
-         p[3] = palette[n+3];
-         p += 4;
-      }
-   }
-   STBI_FREE(a->out);
-   a->out = temp_out;
-
-   STBI_NOTUSED(len);
-
-   return 1;
-}
-
-static int stbi__reduce_png(stbi__png *p)
-{
-   int i;
-   int img_len = p->s->img_x * p->s->img_y * p->s->img_out_n;
-   stbi_uc *reduced;
-   stbi__uint16 *orig = (stbi__uint16*)p->out;
-
-   if (p->depth != 16) return 1; // don't need to do anything if not 16-bit data
-
-   reduced = (stbi_uc *)stbi__malloc(img_len);
-   if (p == NULL) return stbi__err("outofmem", "Out of memory");
-
-   for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is a decent approx of 16->8 bit scaling
-
-   p->out = reduced;
-   STBI_FREE(orig);
-
-   return 1;
-}
-
-static int stbi__unpremultiply_on_load = 0;
-static int stbi__de_iphone_flag = 0;
-
-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
-{
-   stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
-}
-
-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
-{
-   stbi__de_iphone_flag = flag_true_if_should_convert;
-}
-
-static void stbi__de_iphone(stbi__png *z)
-{
-   stbi__context *s = z->s;
-   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-   stbi_uc *p = z->out;
-
-   if (s->img_out_n == 3) {  // convert bgr to rgb
-      for (i=0; i < pixel_count; ++i) {
-         stbi_uc t = p[0];
-         p[0] = p[2];
-         p[2] = t;
-         p += 3;
-      }
-   } else {
-      STBI_ASSERT(s->img_out_n == 4);
-      if (stbi__unpremultiply_on_load) {
-         // convert bgr to rgb and unpremultiply
-         for (i=0; i < pixel_count; ++i) {
-            stbi_uc a = p[3];
-            stbi_uc t = p[0];
-            if (a) {
-               p[0] = p[2] * 255 / a;
-               p[1] = p[1] * 255 / a;
-               p[2] =  t   * 255 / a;
-            } else {
-               p[0] = p[2];
-               p[2] = t;
-            }
-            p += 4;
-         }
-      } else {
-         // convert bgr to rgb
-         for (i=0; i < pixel_count; ++i) {
-            stbi_uc t = p[0];
-            p[0] = p[2];
-            p[2] = t;
-            p += 4;
-         }
-      }
-   }
-}
-
-#define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
-
-static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
-{
-   stbi_uc palette[1024], pal_img_n=0;
-   stbi_uc has_trans=0, tc[3];
-   stbi__uint16 tc16[3];
-   stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
-   int first=1,k,interlace=0, color=0, is_iphone=0;
-   stbi__context *s = z->s;
-
-   z->expanded = NULL;
-   z->idata = NULL;
-   z->out = NULL;
-
-   if (!stbi__check_png_header(s)) return 0;
-
-   if (scan == STBI__SCAN_type) return 1;
-
-   for (;;) {
-      stbi__pngchunk c = stbi__get_chunk_header(s);
-      switch (c.type) {
-         case STBI__PNG_TYPE('C','g','B','I'):
-            is_iphone = 1;
-            stbi__skip(s, c.length);
-            break;
-         case STBI__PNG_TYPE('I','H','D','R'): {
-            int comp,filter;
-            if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
-            first = 0;
-            if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
-            s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
-            s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
-            z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
-            color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
-			if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
-            if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
-            comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
-            filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
-            interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
-            if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
-            if (!pal_img_n) {
-               s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
-               if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
-               if (scan == STBI__SCAN_header) return 1;
-            } else {
-               // if paletted, then pal_n is our final components, and
-               // img_n is # components to decompress/filter.
-               s->img_n = 1;
-               if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
-               // if SCAN_header, have to scan to see if we have a tRNS
-            }
-            break;
-         }
-
-         case STBI__PNG_TYPE('P','L','T','E'):  {
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
-            pal_len = c.length / 3;
-            if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
-            for (i=0; i < pal_len; ++i) {
-               palette[i*4+0] = stbi__get8(s);
-               palette[i*4+1] = stbi__get8(s);
-               palette[i*4+2] = stbi__get8(s);
-               palette[i*4+3] = 255;
-            }
-            break;
-         }
-
-         case STBI__PNG_TYPE('t','R','N','S'): {
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
-            if (pal_img_n) {
-               if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
-               if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
-               if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
-               pal_img_n = 4;
-               for (i=0; i < c.length; ++i)
-                  palette[i*4+3] = stbi__get8(s);
-            } else {
-               if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
-               if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
-               has_trans = 1;
-               if (z->depth == 16) {
-                  for (k = 0; k < s->img_n; ++k) tc16[k] = stbi__get16be(s); // copy the values as-is
-               } else {
-                  for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
-               }
-            }
-            break;
-         }
-
-         case STBI__PNG_TYPE('I','D','A','T'): {
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
-            if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
-            if ((int)(ioff + c.length) < (int)ioff) return 0;
-            if (ioff + c.length > idata_limit) {
-               stbi__uint32 idata_limit_old = idata_limit;
-               stbi_uc *p;
-               if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
-               while (ioff + c.length > idata_limit)
-                  idata_limit *= 2;
-               STBI_NOTUSED(idata_limit_old);
-               p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
-               z->idata = p;
-            }
-            if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
-            ioff += c.length;
-            break;
-         }
-
-         case STBI__PNG_TYPE('I','E','N','D'): {
-            stbi__uint32 raw_len, bpl;
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if (scan != STBI__SCAN_load) return 1;
-            if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
-            // initial guess for decoded data size to avoid unnecessary reallocs
-            bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
-            raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
-            z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
-            if (z->expanded == NULL) return 0; // zlib should set error
-            STBI_FREE(z->idata); z->idata = NULL;
-            if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
-               s->img_out_n = s->img_n+1;
-            else
-               s->img_out_n = s->img_n;
-            if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
-            if (has_trans) {
-               if (z->depth == 16) {
-                  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
-               } else {
-                  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
-               }
-            }
-            if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
-               stbi__de_iphone(z);
-            if (pal_img_n) {
-               // pal_img_n == 3 or 4
-               s->img_n = pal_img_n; // record the actual colors we had
-               s->img_out_n = pal_img_n;
-               if (req_comp >= 3) s->img_out_n = req_comp;
-               if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
-                  return 0;
-            }
-            STBI_FREE(z->expanded); z->expanded = NULL;
-            return 1;
-         }
-
-         default:
-            // if critical, fail
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if ((c.type & (1 << 29)) == 0) {
-               #ifndef STBI_NO_FAILURE_STRINGS
-               // not threadsafe
-               static char invalid_chunk[] = "XXXX PNG chunk not known";
-               invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
-               invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
-               invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
-               invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
-               #endif
-               return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
-            }
-            stbi__skip(s, c.length);
-            break;
-      }
-      // end of PNG chunk, read and skip CRC
-      stbi__get32be(s);
-   }
-}
-
-static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp)
-{
-   unsigned char *result=NULL;
-   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
-   if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
-      if (p->depth == 16) {
-         if (!stbi__reduce_png(p)) {
-            return result;
-         }
-      }
-      result = p->out;
-      p->out = NULL;
-      if (req_comp && req_comp != p->s->img_out_n) {
-         result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
-         p->s->img_out_n = req_comp;
-         if (result == NULL) return result;
-      }
-      *x = p->s->img_x;
-      *y = p->s->img_y;
-      if (n) *n = p->s->img_n;
-   }
-   STBI_FREE(p->out);      p->out      = NULL;
-   STBI_FREE(p->expanded); p->expanded = NULL;
-   STBI_FREE(p->idata);    p->idata    = NULL;
-
-   return result;
-}
-
-static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__png p;
-   p.s = s;
-   return stbi__do_png(&p, x,y,comp,req_comp);
-}
-
-static int stbi__png_test(stbi__context *s)
-{
-   int r;
-   r = stbi__check_png_header(s);
-   stbi__rewind(s);
-   return r;
-}
-
-static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
-{
-   if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
-      stbi__rewind( p->s );
-      return 0;
-   }
-   if (x) *x = p->s->img_x;
-   if (y) *y = p->s->img_y;
-   if (comp) *comp = p->s->img_n;
-   return 1;
-}
-
-static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   stbi__png p;
-   p.s = s;
-   return stbi__png_info_raw(&p, x, y, comp);
-}
-#endif
-
-// Microsoft/Windows BMP image
-
-#ifndef STBI_NO_BMP
-static int stbi__bmp_test_raw(stbi__context *s)
-{
-   int r;
-   int sz;
-   if (stbi__get8(s) != 'B') return 0;
-   if (stbi__get8(s) != 'M') return 0;
-   stbi__get32le(s); // discard filesize
-   stbi__get16le(s); // discard reserved
-   stbi__get16le(s); // discard reserved
-   stbi__get32le(s); // discard data offset
-   sz = stbi__get32le(s);
-   r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
-   return r;
-}
-
-static int stbi__bmp_test(stbi__context *s)
-{
-   int r = stbi__bmp_test_raw(s);
-   stbi__rewind(s);
-   return r;
-}
-
-
-// returns 0..31 for the highest set bit
-static int stbi__high_bit(unsigned int z)
-{
-   int n=0;
-   if (z == 0) return -1;
-   if (z >= 0x10000) n += 16, z >>= 16;
-   if (z >= 0x00100) n +=  8, z >>=  8;
-   if (z >= 0x00010) n +=  4, z >>=  4;
-   if (z >= 0x00004) n +=  2, z >>=  2;
-   if (z >= 0x00002) n +=  1, z >>=  1;
-   return n;
-}
-
-static int stbi__bitcount(unsigned int a)
-{
-   a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
-   a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
-   a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
-   a = (a + (a >> 8)); // max 16 per 8 bits
-   a = (a + (a >> 16)); // max 32 per 8 bits
-   return a & 0xff;
-}
-
-static int stbi__shiftsigned(int v, int shift, int bits)
-{
-   int result;
-   int z=0;
-
-   if (shift < 0) v <<= -shift;
-   else v >>= shift;
-   result = v;
-
-   z = bits;
-   while (z < 8) {
-      result += v >> z;
-      z += bits;
-   }
-   return result;
-}
-
-typedef struct
-{
-   int bpp, offset, hsz;
-   unsigned int mr,mg,mb,ma, all_a;
-} stbi__bmp_data;
-
-static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
-{
-   int hsz;
-   if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
-   stbi__get32le(s); // discard filesize
-   stbi__get16le(s); // discard reserved
-   stbi__get16le(s); // discard reserved
-   info->offset = stbi__get32le(s);
-   info->hsz = hsz = stbi__get32le(s);
-   info->mr = info->mg = info->mb = info->ma = 0;
-   
-   if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
-   if (hsz == 12) {
-      s->img_x = stbi__get16le(s);
-      s->img_y = stbi__get16le(s);
-   } else {
-      s->img_x = stbi__get32le(s);
-      s->img_y = stbi__get32le(s);
-   }
-   if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
-   info->bpp = stbi__get16le(s);
-   if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
-   if (hsz != 12) {
-      int compress = stbi__get32le(s);
-      if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
-      stbi__get32le(s); // discard sizeof
-      stbi__get32le(s); // discard hres
-      stbi__get32le(s); // discard vres
-      stbi__get32le(s); // discard colorsused
-      stbi__get32le(s); // discard max important
-      if (hsz == 40 || hsz == 56) {
-         if (hsz == 56) {
-            stbi__get32le(s);
-            stbi__get32le(s);
-            stbi__get32le(s);
-            stbi__get32le(s);
-         }
-         if (info->bpp == 16 || info->bpp == 32) {
-            if (compress == 0) {
-               if (info->bpp == 32) {
-                  info->mr = 0xffu << 16;
-                  info->mg = 0xffu <<  8;
-                  info->mb = 0xffu <<  0;
-                  info->ma = 0xffu << 24;
-                  info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
-               } else {
-                  info->mr = 31u << 10;
-                  info->mg = 31u <<  5;
-                  info->mb = 31u <<  0;
-               }
-            } else if (compress == 3) {
-               info->mr = stbi__get32le(s);
-               info->mg = stbi__get32le(s);
-               info->mb = stbi__get32le(s);
-               // not documented, but generated by photoshop and handled by mspaint
-               if (info->mr == info->mg && info->mg == info->mb) {
-                  // ?!?!?
-                  return stbi__errpuc("bad BMP", "bad BMP");
-               }
-            } else
-               return stbi__errpuc("bad BMP", "bad BMP");
-         }
-      } else {
-         int i;
-         if (hsz != 108 && hsz != 124)
-            return stbi__errpuc("bad BMP", "bad BMP");
-         info->mr = stbi__get32le(s);
-         info->mg = stbi__get32le(s);
-         info->mb = stbi__get32le(s);
-         info->ma = stbi__get32le(s);
-         stbi__get32le(s); // discard color space
-         for (i=0; i < 12; ++i)
-            stbi__get32le(s); // discard color space parameters
-         if (hsz == 124) {
-            stbi__get32le(s); // discard rendering intent
-            stbi__get32le(s); // discard offset of profile data
-            stbi__get32le(s); // discard size of profile data
-            stbi__get32le(s); // discard reserved
-         }
-      }
-   }
-   return (void *) 1;
-}
-
-
-static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   stbi_uc *out;
-   unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
-   stbi_uc pal[256][4];
-   int psize=0,i,j,width;
-   int flip_vertically, pad, target;
-   stbi__bmp_data info;
-
-   info.all_a = 255;   
-   if (stbi__bmp_parse_header(s, &info) == NULL)
-      return NULL; // error code already set
-
-   flip_vertically = ((int) s->img_y) > 0;
-   s->img_y = abs((int) s->img_y);
-
-   mr = info.mr;
-   mg = info.mg;
-   mb = info.mb;
-   ma = info.ma;
-   all_a = info.all_a;
-
-   if (info.hsz == 12) {
-      if (info.bpp < 24)
-         psize = (info.offset - 14 - 24) / 3;
-   } else {
-      if (info.bpp < 16)
-         psize = (info.offset - 14 - info.hsz) >> 2;
-   }
-
-   s->img_n = ma ? 4 : 3;
-   if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
-      target = req_comp;
-   else
-      target = s->img_n; // if they want monochrome, we'll post-convert
-
-   out = (stbi_uc *) stbi__malloc(target * s->img_x * s->img_y);
-   if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   if (info.bpp < 16) {
-      int z=0;
-      if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
-      for (i=0; i < psize; ++i) {
-         pal[i][2] = stbi__get8(s);
-         pal[i][1] = stbi__get8(s);
-         pal[i][0] = stbi__get8(s);
-         if (info.hsz != 12) stbi__get8(s);
-         pal[i][3] = 255;
-      }
-      stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
-      if (info.bpp == 4) width = (s->img_x + 1) >> 1;
-      else if (info.bpp == 8) width = s->img_x;
-      else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
-      pad = (-width)&3;
-      for (j=0; j < (int) s->img_y; ++j) {
-         for (i=0; i < (int) s->img_x; i += 2) {
-            int v=stbi__get8(s),v2=0;
-            if (info.bpp == 4) {
-               v2 = v & 15;
-               v >>= 4;
-            }
-            out[z++] = pal[v][0];
-            out[z++] = pal[v][1];
-            out[z++] = pal[v][2];
-            if (target == 4) out[z++] = 255;
-            if (i+1 == (int) s->img_x) break;
-            v = (info.bpp == 8) ? stbi__get8(s) : v2;
-            out[z++] = pal[v][0];
-            out[z++] = pal[v][1];
-            out[z++] = pal[v][2];
-            if (target == 4) out[z++] = 255;
-         }
-         stbi__skip(s, pad);
-      }
-   } else {
-      int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
-      int z = 0;
-      int easy=0;
-      stbi__skip(s, info.offset - 14 - info.hsz);
-      if (info.bpp == 24) width = 3 * s->img_x;
-      else if (info.bpp == 16) width = 2*s->img_x;
-      else /* bpp = 32 and pad = 0 */ width=0;
-      pad = (-width) & 3;
-      if (info.bpp == 24) {
-         easy = 1;
-      } else if (info.bpp == 32) {
-         if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
-            easy = 2;
-      }
-      if (!easy) {
-         if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
-         // right shift amt to put high bit in position #7
-         rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
-         gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
-         bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
-         ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
-      }
-      for (j=0; j < (int) s->img_y; ++j) {
-         if (easy) {
-            for (i=0; i < (int) s->img_x; ++i) {
-               unsigned char a;
-               out[z+2] = stbi__get8(s);
-               out[z+1] = stbi__get8(s);
-               out[z+0] = stbi__get8(s);
-               z += 3;
-               a = (easy == 2 ? stbi__get8(s) : 255);
-               all_a |= a;
-               if (target == 4) out[z++] = a;
-            }
-         } else {
-            int bpp = info.bpp;
-            for (i=0; i < (int) s->img_x; ++i) {
-               stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
-               int a;
-               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
-               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
-               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
-               a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
-               all_a |= a;
-               if (target == 4) out[z++] = STBI__BYTECAST(a);
-            }
-         }
-         stbi__skip(s, pad);
-      }
-   }
-   
-   // if alpha channel is all 0s, replace with all 255s
-   if (target == 4 && all_a == 0)
-      for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
-         out[i] = 255;
-
-   if (flip_vertically) {
-      stbi_uc t;
-      for (j=0; j < (int) s->img_y>>1; ++j) {
-         stbi_uc *p1 = out +      j     *s->img_x*target;
-         stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
-         for (i=0; i < (int) s->img_x*target; ++i) {
-            t = p1[i], p1[i] = p2[i], p2[i] = t;
-         }
-      }
-   }
-
-   if (req_comp && req_comp != target) {
-      out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
-      if (out == NULL) return out; // stbi__convert_format frees input on failure
-   }
-
-   *x = s->img_x;
-   *y = s->img_y;
-   if (comp) *comp = s->img_n;
-   return out;
-}
-#endif
-
-// Targa Truevision - TGA
-// by Jonathan Dummer
-#ifndef STBI_NO_TGA
-// returns STBI_rgb or whatever, 0 on error
-static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
-{
-   // only RGB or RGBA (incl. 16bit) or grey allowed
-   if(is_rgb16) *is_rgb16 = 0;
-   switch(bits_per_pixel) {
-      case 8:  return STBI_grey;
-      case 16: if(is_grey) return STBI_grey_alpha;
-            // else: fall-through
-      case 15: if(is_rgb16) *is_rgb16 = 1;
-            return STBI_rgb;
-      case 24: // fall-through
-      case 32: return bits_per_pixel/8;
-      default: return 0;
-   }
-}
-
-static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
-{
-    int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
-    int sz, tga_colormap_type;
-    stbi__get8(s);                   // discard Offset
-    tga_colormap_type = stbi__get8(s); // colormap type
-    if( tga_colormap_type > 1 ) {
-        stbi__rewind(s);
-        return 0;      // only RGB or indexed allowed
-    }
-    tga_image_type = stbi__get8(s); // image type
-    if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
-        if (tga_image_type != 1 && tga_image_type != 9) {
-            stbi__rewind(s);
-            return 0;
-        }
-        stbi__skip(s,4);       // skip index of first colormap entry and number of entries
-        sz = stbi__get8(s);    //   check bits per palette color entry
-        if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
-            stbi__rewind(s);
-            return 0;
-        }
-        stbi__skip(s,4);       // skip image x and y origin
-        tga_colormap_bpp = sz;
-    } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
-        if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
-            stbi__rewind(s);
-            return 0; // only RGB or grey allowed, +/- RLE
-        }
-        stbi__skip(s,9); // skip colormap specification and image x/y origin
-        tga_colormap_bpp = 0;
-    }
-    tga_w = stbi__get16le(s);
-    if( tga_w < 1 ) {
-        stbi__rewind(s);
-        return 0;   // test width
-    }
-    tga_h = stbi__get16le(s);
-    if( tga_h < 1 ) {
-        stbi__rewind(s);
-        return 0;   // test height
-    }
-    tga_bits_per_pixel = stbi__get8(s); // bits per pixel
-    stbi__get8(s); // ignore alpha bits
-    if (tga_colormap_bpp != 0) {
-        if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
-            // when using a colormap, tga_bits_per_pixel is the size of the indexes
-            // I don't think anything but 8 or 16bit indexes makes sense
-            stbi__rewind(s);
-            return 0;
-        }
-        tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
-    } else {
-        tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
-    }
-    if(!tga_comp) {
-      stbi__rewind(s);
-      return 0;
-    }
-    if (x) *x = tga_w;
-    if (y) *y = tga_h;
-    if (comp) *comp = tga_comp;
-    return 1;                   // seems to have passed everything
-}
-
-static int stbi__tga_test(stbi__context *s)
-{
-   int res = 0;
-   int sz, tga_color_type;
-   stbi__get8(s);      //   discard Offset
-   tga_color_type = stbi__get8(s);   //   color type
-   if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
-   sz = stbi__get8(s);   //   image type
-   if ( tga_color_type == 1 ) { // colormapped (paletted) image
-      if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
-      stbi__skip(s,4);       // skip index of first colormap entry and number of entries
-      sz = stbi__get8(s);    //   check bits per palette color entry
-      if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
-      stbi__skip(s,4);       // skip image x and y origin
-   } else { // "normal" image w/o colormap
-      if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
-      stbi__skip(s,9); // skip colormap specification and image x/y origin
-   }
-   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
-   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
-   sz = stbi__get8(s);   //   bits per pixel
-   if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
-   if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
-
-   res = 1; // if we got this far, everything's good and we can return 1 instead of 0
-
-errorEnd:
-   stbi__rewind(s);
-   return res;
-}
-
-// read 16bit value and convert to 24bit RGB
-void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
-{
-   stbi__uint16 px = stbi__get16le(s);
-   stbi__uint16 fiveBitMask = 31;
-   // we have 3 channels with 5bits each
-   int r = (px >> 10) & fiveBitMask;
-   int g = (px >> 5) & fiveBitMask;
-   int b = px & fiveBitMask;
-   // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
-   out[0] = (r * 255)/31;
-   out[1] = (g * 255)/31;
-   out[2] = (b * 255)/31;
-
-   // some people claim that the most significant bit might be used for alpha
-   // (possibly if an alpha-bit is set in the "image descriptor byte")
-   // but that only made 16bit test images completely translucent..
-   // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
-}
-
-static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   //   read in the TGA header stuff
-   int tga_offset = stbi__get8(s);
-   int tga_indexed = stbi__get8(s);
-   int tga_image_type = stbi__get8(s);
-   int tga_is_RLE = 0;
-   int tga_palette_start = stbi__get16le(s);
-   int tga_palette_len = stbi__get16le(s);
-   int tga_palette_bits = stbi__get8(s);
-   int tga_x_origin = stbi__get16le(s);
-   int tga_y_origin = stbi__get16le(s);
-   int tga_width = stbi__get16le(s);
-   int tga_height = stbi__get16le(s);
-   int tga_bits_per_pixel = stbi__get8(s);
-   int tga_comp, tga_rgb16=0;
-   int tga_inverted = stbi__get8(s);
-   // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
-   //   image data
-   unsigned char *tga_data;
-   unsigned char *tga_palette = NULL;
-   int i, j;
-   unsigned char raw_data[4];
-   int RLE_count = 0;
-   int RLE_repeating = 0;
-   int read_next_pixel = 1;
-
-   //   do a tiny bit of precessing
-   if ( tga_image_type >= 8 )
-   {
-      tga_image_type -= 8;
-      tga_is_RLE = 1;
-   }
-   tga_inverted = 1 - ((tga_inverted >> 5) & 1);
-
-   //   If I'm paletted, then I'll use the number of bits from the palette
-   if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
-   else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
-
-   if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
-      return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
-
-   //   tga info
-   *x = tga_width;
-   *y = tga_height;
-   if (comp) *comp = tga_comp;
-
-   tga_data = (unsigned char*)stbi__malloc( (size_t)tga_width * tga_height * tga_comp );
-   if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
-
-   // skip to the data's starting position (offset usually = 0)
-   stbi__skip(s, tga_offset );
-
-   if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
-      for (i=0; i < tga_height; ++i) {
-         int row = tga_inverted ? tga_height -i - 1 : i;
-         stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
-         stbi__getn(s, tga_row, tga_width * tga_comp);
-      }
-   } else  {
-      //   do I need to load a palette?
-      if ( tga_indexed)
-      {
-         //   any data to skip? (offset usually = 0)
-         stbi__skip(s, tga_palette_start );
-         //   load the palette
-         tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_comp );
-         if (!tga_palette) {
-            STBI_FREE(tga_data);
-            return stbi__errpuc("outofmem", "Out of memory");
-         }
-         if (tga_rgb16) {
-            stbi_uc *pal_entry = tga_palette;
-            STBI_ASSERT(tga_comp == STBI_rgb);
-            for (i=0; i < tga_palette_len; ++i) {
-               stbi__tga_read_rgb16(s, pal_entry);
-               pal_entry += tga_comp;
-            }
-         } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
-               STBI_FREE(tga_data);
-               STBI_FREE(tga_palette);
-               return stbi__errpuc("bad palette", "Corrupt TGA");
-         }
-      }
-      //   load the data
-      for (i=0; i < tga_width * tga_height; ++i)
-      {
-         //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
-         if ( tga_is_RLE )
-         {
-            if ( RLE_count == 0 )
-            {
-               //   yep, get the next byte as a RLE command
-               int RLE_cmd = stbi__get8(s);
-               RLE_count = 1 + (RLE_cmd & 127);
-               RLE_repeating = RLE_cmd >> 7;
-               read_next_pixel = 1;
-            } else if ( !RLE_repeating )
-            {
-               read_next_pixel = 1;
-            }
-         } else
-         {
-            read_next_pixel = 1;
-         }
-         //   OK, if I need to read a pixel, do it now
-         if ( read_next_pixel )
-         {
-            //   load however much data we did have
-            if ( tga_indexed )
-            {
-               // read in index, then perform the lookup
-               int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
-               if ( pal_idx >= tga_palette_len ) {
-                  // invalid index
-                  pal_idx = 0;
-               }
-               pal_idx *= tga_comp;
-               for (j = 0; j < tga_comp; ++j) {
-                  raw_data[j] = tga_palette[pal_idx+j];
-               }
-            } else if(tga_rgb16) {
-               STBI_ASSERT(tga_comp == STBI_rgb);
-               stbi__tga_read_rgb16(s, raw_data);
-            } else {
-               //   read in the data raw
-               for (j = 0; j < tga_comp; ++j) {
-                  raw_data[j] = stbi__get8(s);
-               }
-            }
-            //   clear the reading flag for the next pixel
-            read_next_pixel = 0;
-         } // end of reading a pixel
-
-         // copy data
-         for (j = 0; j < tga_comp; ++j)
-           tga_data[i*tga_comp+j] = raw_data[j];
-
-         //   in case we're in RLE mode, keep counting down
-         --RLE_count;
-      }
-      //   do I need to invert the image?
-      if ( tga_inverted )
-      {
-         for (j = 0; j*2 < tga_height; ++j)
-         {
-            int index1 = j * tga_width * tga_comp;
-            int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
-            for (i = tga_width * tga_comp; i > 0; --i)
-            {
-               unsigned char temp = tga_data[index1];
-               tga_data[index1] = tga_data[index2];
-               tga_data[index2] = temp;
-               ++index1;
-               ++index2;
-            }
-         }
-      }
-      //   clear my palette, if I had one
-      if ( tga_palette != NULL )
-      {
-         STBI_FREE( tga_palette );
-      }
-   }
-
-   // swap RGB - if the source data was RGB16, it already is in the right order
-   if (tga_comp >= 3 && !tga_rgb16)
-   {
-      unsigned char* tga_pixel = tga_data;
-      for (i=0; i < tga_width * tga_height; ++i)
-      {
-         unsigned char temp = tga_pixel[0];
-         tga_pixel[0] = tga_pixel[2];
-         tga_pixel[2] = temp;
-         tga_pixel += tga_comp;
-      }
-   }
-
-   // convert to target component count
-   if (req_comp && req_comp != tga_comp)
-      tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
-
-   //   the things I do to get rid of an error message, and yet keep
-   //   Microsoft's C compilers happy... [8^(
-   tga_palette_start = tga_palette_len = tga_palette_bits =
-         tga_x_origin = tga_y_origin = 0;
-   //   OK, done
-   return tga_data;
-}
-#endif
-
-// *************************************************************************************************
-// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
-
-#ifndef STBI_NO_PSD
-static int stbi__psd_test(stbi__context *s)
-{
-   int r = (stbi__get32be(s) == 0x38425053);
-   stbi__rewind(s);
-   return r;
-}
-
-static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   int   pixelCount;
-   int channelCount, compression;
-   int channel, i, count, len;
-   int bitdepth;
-   int w,h;
-   stbi_uc *out;
-
-   // Check identifier
-   if (stbi__get32be(s) != 0x38425053)   // "8BPS"
-      return stbi__errpuc("not PSD", "Corrupt PSD image");
-
-   // Check file type version.
-   if (stbi__get16be(s) != 1)
-      return stbi__errpuc("wrong version", "Unsupported version of PSD image");
-
-   // Skip 6 reserved bytes.
-   stbi__skip(s, 6 );
-
-   // Read the number of channels (R, G, B, A, etc).
-   channelCount = stbi__get16be(s);
-   if (channelCount < 0 || channelCount > 16)
-      return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
-
-   // Read the rows and columns of the image.
-   h = stbi__get32be(s);
-   w = stbi__get32be(s);
-
-   // Make sure the depth is 8 bits.
-   bitdepth = stbi__get16be(s);
-   if (bitdepth != 8 && bitdepth != 16)
-      return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
-
-   // Make sure the color mode is RGB.
-   // Valid options are:
-   //   0: Bitmap
-   //   1: Grayscale
-   //   2: Indexed color
-   //   3: RGB color
-   //   4: CMYK color
-   //   7: Multichannel
-   //   8: Duotone
-   //   9: Lab color
-   if (stbi__get16be(s) != 3)
-      return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
-
-   // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
-   stbi__skip(s,stbi__get32be(s) );
-
-   // Skip the image resources.  (resolution, pen tool paths, etc)
-   stbi__skip(s, stbi__get32be(s) );
-
-   // Skip the reserved data.
-   stbi__skip(s, stbi__get32be(s) );
-
-   // Find out if the data is compressed.
-   // Known values:
-   //   0: no compression
-   //   1: RLE compressed
-   compression = stbi__get16be(s);
-   if (compression > 1)
-      return stbi__errpuc("bad compression", "PSD has an unknown compression format");
-
-   // Create the destination image.
-   out = (stbi_uc *) stbi__malloc(4 * w*h);
-   if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   pixelCount = w*h;
-
-   // Initialize the data to zero.
-   //memset( out, 0, pixelCount * 4 );
-
-   // Finally, the image data.
-   if (compression) {
-      // RLE as used by .PSD and .TIFF
-      // Loop until you get the number of unpacked bytes you are expecting:
-      //     Read the next source byte into n.
-      //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
-      //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
-      //     Else if n is 128, noop.
-      // Endloop
-
-      // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
-      // which we're going to just skip.
-      stbi__skip(s, h * channelCount * 2 );
-
-      // Read the RLE data by channel.
-      for (channel = 0; channel < 4; channel++) {
-         stbi_uc *p;
-
-         p = out+channel;
-         if (channel >= channelCount) {
-            // Fill this channel with default data.
-            for (i = 0; i < pixelCount; i++, p += 4)
-               *p = (channel == 3 ? 255 : 0);
-         } else {
-            // Read the RLE data.
-            count = 0;
-            while (count < pixelCount) {
-               len = stbi__get8(s);
-               if (len == 128) {
-                  // No-op.
-               } else if (len < 128) {
-                  // Copy next len+1 bytes literally.
-                  len++;
-                  count += len;
-                  while (len) {
-                     *p = stbi__get8(s);
-                     p += 4;
-                     len--;
-                  }
-               } else if (len > 128) {
-                  stbi_uc   val;
-                  // Next -len+1 bytes in the dest are replicated from next source byte.
-                  // (Interpret len as a negative 8-bit int.)
-                  len ^= 0x0FF;
-                  len += 2;
-                  val = stbi__get8(s);
-                  count += len;
-                  while (len) {
-                     *p = val;
-                     p += 4;
-                     len--;
-                  }
-               }
-            }
-         }
-      }
-
-   } else {
-      // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
-      // where each channel consists of an 8-bit value for each pixel in the image.
-
-      // Read the data by channel.
-      for (channel = 0; channel < 4; channel++) {
-         stbi_uc *p;
-
-         p = out + channel;
-         if (channel >= channelCount) {
-            // Fill this channel with default data.
-            stbi_uc val = channel == 3 ? 255 : 0;
-            for (i = 0; i < pixelCount; i++, p += 4)
-               *p = val;
-         } else {
-            // Read the data.
-            if (bitdepth == 16) {
-               for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = (stbi_uc) (stbi__get16be(s) >> 8);
-            } else {
-               for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = stbi__get8(s);
-            }
-         }
-      }
-   }
-
-   if (channelCount >= 4) {
-      for (i=0; i < w*h; ++i) {
-         unsigned char *pixel = out + 4*i;
-         if (pixel[3] != 0 && pixel[3] != 255) {
-            // remove weird white matte from PSD
-            float a = pixel[3] / 255.0f;
-            float ra = 1.0f / a;
-            float inv_a = 255.0f * (1 - ra);
-            pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
-            pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
-            pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
-         }
-      }
-   }
-
-   if (req_comp && req_comp != 4) {
-      out = stbi__convert_format(out, 4, req_comp, w, h);
-      if (out == NULL) return out; // stbi__convert_format frees input on failure
-   }
-
-   if (comp) *comp = 4;
-   *y = h;
-   *x = w;
-
-   return out;
-}
-#endif
-
-// *************************************************************************************************
-// Softimage PIC loader
-// by Tom Seddon
-//
-// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
-// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
-
-#ifndef STBI_NO_PIC
-static int stbi__pic_is4(stbi__context *s,const char *str)
-{
-   int i;
-   for (i=0; i<4; ++i)
-      if (stbi__get8(s) != (stbi_uc)str[i])
-         return 0;
-
-   return 1;
-}
-
-static int stbi__pic_test_core(stbi__context *s)
-{
-   int i;
-
-   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
-      return 0;
-
-   for(i=0;i<84;++i)
-      stbi__get8(s);
-
-   if (!stbi__pic_is4(s,"PICT"))
-      return 0;
-
-   return 1;
-}
-
-typedef struct
-{
-   stbi_uc size,type,channel;
-} stbi__pic_packet;
-
-static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
-{
-   int mask=0x80, i;
-
-   for (i=0; i<4; ++i, mask>>=1) {
-      if (channel & mask) {
-         if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
-         dest[i]=stbi__get8(s);
-      }
-   }
-
-   return dest;
-}
-
-static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
-{
-   int mask=0x80,i;
-
-   for (i=0;i<4; ++i, mask>>=1)
-      if (channel&mask)
-         dest[i]=src[i];
-}
-
-static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
-{
-   int act_comp=0,num_packets=0,y,chained;
-   stbi__pic_packet packets[10];
-
-   // this will (should...) cater for even some bizarre stuff like having data
-    // for the same channel in multiple packets.
-   do {
-      stbi__pic_packet *packet;
-
-      if (num_packets==sizeof(packets)/sizeof(packets[0]))
-         return stbi__errpuc("bad format","too many packets");
-
-      packet = &packets[num_packets++];
-
-      chained = stbi__get8(s);
-      packet->size    = stbi__get8(s);
-      packet->type    = stbi__get8(s);
-      packet->channel = stbi__get8(s);
-
-      act_comp |= packet->channel;
-
-      if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
-      if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
-   } while (chained);
-
-   *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
-
-   for(y=0; y<height; ++y) {
-      int packet_idx;
-
-      for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
-         stbi__pic_packet *packet = &packets[packet_idx];
-         stbi_uc *dest = result+y*width*4;
-
-         switch (packet->type) {
-            default:
-               return stbi__errpuc("bad format","packet has bad compression type");
-
-            case 0: {//uncompressed
-               int x;
-
-               for(x=0;x<width;++x, dest+=4)
-                  if (!stbi__readval(s,packet->channel,dest))
-                     return 0;
-               break;
-            }
-
-            case 1://Pure RLE
-               {
-                  int left=width, i;
-
-                  while (left>0) {
-                     stbi_uc count,value[4];
-
-                     count=stbi__get8(s);
-                     if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
-
-                     if (count > left)
-                        count = (stbi_uc) left;
-
-                     if (!stbi__readval(s,packet->channel,value))  return 0;
-
-                     for(i=0; i<count; ++i,dest+=4)
-                        stbi__copyval(packet->channel,dest,value);
-                     left -= count;
-                  }
-               }
-               break;
-
-            case 2: {//Mixed RLE
-               int left=width;
-               while (left>0) {
-                  int count = stbi__get8(s), i;
-                  if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
-
-                  if (count >= 128) { // Repeated
-                     stbi_uc value[4];
-
-                     if (count==128)
-                        count = stbi__get16be(s);
-                     else
-                        count -= 127;
-                     if (count > left)
-                        return stbi__errpuc("bad file","scanline overrun");
-
-                     if (!stbi__readval(s,packet->channel,value))
-                        return 0;
-
-                     for(i=0;i<count;++i, dest += 4)
-                        stbi__copyval(packet->channel,dest,value);
-                  } else { // Raw
-                     ++count;
-                     if (count>left) return stbi__errpuc("bad file","scanline overrun");
-
-                     for(i=0;i<count;++i, dest+=4)
-                        if (!stbi__readval(s,packet->channel,dest))
-                           return 0;
-                  }
-                  left-=count;
-               }
-               break;
-            }
-         }
-      }
-   }
-
-   return result;
-}
-
-static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp)
-{
-   stbi_uc *result;
-   int i, x,y;
-
-   for (i=0; i<92; ++i)
-      stbi__get8(s);
-
-   x = stbi__get16be(s);
-   y = stbi__get16be(s);
-   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
-   if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode");
-
-   stbi__get32be(s); //skip `ratio'
-   stbi__get16be(s); //skip `fields'
-   stbi__get16be(s); //skip `pad'
-
-   // intermediate buffer is RGBA
-   result = (stbi_uc *) stbi__malloc(x*y*4);
-   memset(result, 0xff, x*y*4);
-
-   if (!stbi__pic_load_core(s,x,y,comp, result)) {
-      STBI_FREE(result);
-      result=0;
-   }
-   *px = x;
-   *py = y;
-   if (req_comp == 0) req_comp = *comp;
-   result=stbi__convert_format(result,4,req_comp,x,y);
-
-   return result;
-}
-
-static int stbi__pic_test(stbi__context *s)
-{
-   int r = stbi__pic_test_core(s);
-   stbi__rewind(s);
-   return r;
-}
-#endif
-
-// *************************************************************************************************
-// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
-
-#ifndef STBI_NO_GIF
-typedef struct
-{
-   stbi__int16 prefix;
-   stbi_uc first;
-   stbi_uc suffix;
-} stbi__gif_lzw;
-
-typedef struct
-{
-   int w,h;
-   stbi_uc *out, *old_out;             // output buffer (always 4 components)
-   int flags, bgindex, ratio, transparent, eflags, delay;
-   stbi_uc  pal[256][4];
-   stbi_uc lpal[256][4];
-   stbi__gif_lzw codes[4096];
-   stbi_uc *color_table;
-   int parse, step;
-   int lflags;
-   int start_x, start_y;
-   int max_x, max_y;
-   int cur_x, cur_y;
-   int line_size;
-} stbi__gif;
-
-static int stbi__gif_test_raw(stbi__context *s)
-{
-   int sz;
-   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
-   sz = stbi__get8(s);
-   if (sz != '9' && sz != '7') return 0;
-   if (stbi__get8(s) != 'a') return 0;
-   return 1;
-}
-
-static int stbi__gif_test(stbi__context *s)
-{
-   int r = stbi__gif_test_raw(s);
-   stbi__rewind(s);
-   return r;
-}
-
-static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
-{
-   int i;
-   for (i=0; i < num_entries; ++i) {
-      pal[i][2] = stbi__get8(s);
-      pal[i][1] = stbi__get8(s);
-      pal[i][0] = stbi__get8(s);
-      pal[i][3] = transp == i ? 0 : 255;
-   }
-}
-
-static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
-{
-   stbi_uc version;
-   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
-      return stbi__err("not GIF", "Corrupt GIF");
-
-   version = stbi__get8(s);
-   if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
-   if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
-
-   stbi__g_failure_reason = "";
-   g->w = stbi__get16le(s);
-   g->h = stbi__get16le(s);
-   g->flags = stbi__get8(s);
-   g->bgindex = stbi__get8(s);
-   g->ratio = stbi__get8(s);
-   g->transparent = -1;
-
-   if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
-
-   if (is_info) return 1;
-
-   if (g->flags & 0x80)
-      stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
-
-   return 1;
-}
-
-static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
-{
-   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
-   if (!stbi__gif_header(s, g, comp, 1)) {
-      STBI_FREE(g);
-      stbi__rewind( s );
-      return 0;
-   }
-   if (x) *x = g->w;
-   if (y) *y = g->h;
-   STBI_FREE(g);
-   return 1;
-}
-
-static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
-{
-   stbi_uc *p, *c;
-
-   // recurse to decode the prefixes, since the linked-list is backwards,
-   // and working backwards through an interleaved image would be nasty
-   if (g->codes[code].prefix >= 0)
-      stbi__out_gif_code(g, g->codes[code].prefix);
-
-   if (g->cur_y >= g->max_y) return;
-
-   p = &g->out[g->cur_x + g->cur_y];
-   c = &g->color_table[g->codes[code].suffix * 4];
-
-   if (c[3] >= 128) {
-      p[0] = c[2];
-      p[1] = c[1];
-      p[2] = c[0];
-      p[3] = c[3];
-   }
-   g->cur_x += 4;
-
-   if (g->cur_x >= g->max_x) {
-      g->cur_x = g->start_x;
-      g->cur_y += g->step;
-
-      while (g->cur_y >= g->max_y && g->parse > 0) {
-         g->step = (1 << g->parse) * g->line_size;
-         g->cur_y = g->start_y + (g->step >> 1);
-         --g->parse;
-      }
-   }
-}
-
-static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
-{
-   stbi_uc lzw_cs;
-   stbi__int32 len, init_code;
-   stbi__uint32 first;
-   stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
-   stbi__gif_lzw *p;
-
-   lzw_cs = stbi__get8(s);
-   if (lzw_cs > 12) return NULL;
-   clear = 1 << lzw_cs;
-   first = 1;
-   codesize = lzw_cs + 1;
-   codemask = (1 << codesize) - 1;
-   bits = 0;
-   valid_bits = 0;
-   for (init_code = 0; init_code < clear; init_code++) {
-      g->codes[init_code].prefix = -1;
-      g->codes[init_code].first = (stbi_uc) init_code;
-      g->codes[init_code].suffix = (stbi_uc) init_code;
-   }
-
-   // support no starting clear code
-   avail = clear+2;
-   oldcode = -1;
-
-   len = 0;
-   for(;;) {
-      if (valid_bits < codesize) {
-         if (len == 0) {
-            len = stbi__get8(s); // start new block
-            if (len == 0)
-               return g->out;
-         }
-         --len;
-         bits |= (stbi__int32) stbi__get8(s) << valid_bits;
-         valid_bits += 8;
-      } else {
-         stbi__int32 code = bits & codemask;
-         bits >>= codesize;
-         valid_bits -= codesize;
-         // @OPTIMIZE: is there some way we can accelerate the non-clear path?
-         if (code == clear) {  // clear code
-            codesize = lzw_cs + 1;
-            codemask = (1 << codesize) - 1;
-            avail = clear + 2;
-            oldcode = -1;
-            first = 0;
-         } else if (code == clear + 1) { // end of stream code
-            stbi__skip(s, len);
-            while ((len = stbi__get8(s)) > 0)
-               stbi__skip(s,len);
-            return g->out;
-         } else if (code <= avail) {
-            if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
-
-            if (oldcode >= 0) {
-               p = &g->codes[avail++];
-               if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
-               p->prefix = (stbi__int16) oldcode;
-               p->first = g->codes[oldcode].first;
-               p->suffix = (code == avail) ? p->first : g->codes[code].first;
-            } else if (code == avail)
-               return stbi__errpuc("illegal code in raster", "Corrupt GIF");
-
-            stbi__out_gif_code(g, (stbi__uint16) code);
-
-            if ((avail & codemask) == 0 && avail <= 0x0FFF) {
-               codesize++;
-               codemask = (1 << codesize) - 1;
-            }
-
-            oldcode = code;
-         } else {
-            return stbi__errpuc("illegal code in raster", "Corrupt GIF");
-         }
-      }
-   }
-}
-
-static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
-{
-   int x, y;
-   stbi_uc *c = g->pal[g->bgindex];
-   for (y = y0; y < y1; y += 4 * g->w) {
-      for (x = x0; x < x1; x += 4) {
-         stbi_uc *p  = &g->out[y + x];
-         p[0] = c[2];
-         p[1] = c[1];
-         p[2] = c[0];
-         p[3] = 0;
-      }
-   }
-}
-
-// this function is designed to support animated gifs, although stb_image doesn't support it
-static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
-{
-   int i;
-   stbi_uc *prev_out = 0;
-
-   if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
-      return 0; // stbi__g_failure_reason set by stbi__gif_header
-
-   prev_out = g->out;
-   g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
-   if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
-
-   switch ((g->eflags & 0x1C) >> 2) {
-      case 0: // unspecified (also always used on 1st frame)
-         stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
-         break;
-      case 1: // do not dispose
-         if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
-         g->old_out = prev_out;
-         break;
-      case 2: // dispose to background
-         if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
-         stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
-         break;
-      case 3: // dispose to previous
-         if (g->old_out) {
-            for (i = g->start_y; i < g->max_y; i += 4 * g->w)
-               memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
-         }
-         break;
-   }
-
-   for (;;) {
-      switch (stbi__get8(s)) {
-         case 0x2C: /* Image Descriptor */
-         {
-            int prev_trans = -1;
-            stbi__int32 x, y, w, h;
-            stbi_uc *o;
-
-            x = stbi__get16le(s);
-            y = stbi__get16le(s);
-            w = stbi__get16le(s);
-            h = stbi__get16le(s);
-            if (((x + w) > (g->w)) || ((y + h) > (g->h)))
-               return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
-
-            g->line_size = g->w * 4;
-            g->start_x = x * 4;
-            g->start_y = y * g->line_size;
-            g->max_x   = g->start_x + w * 4;
-            g->max_y   = g->start_y + h * g->line_size;
-            g->cur_x   = g->start_x;
-            g->cur_y   = g->start_y;
-
-            g->lflags = stbi__get8(s);
-
-            if (g->lflags & 0x40) {
-               g->step = 8 * g->line_size; // first interlaced spacing
-               g->parse = 3;
-            } else {
-               g->step = g->line_size;
-               g->parse = 0;
-            }
-
-            if (g->lflags & 0x80) {
-               stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
-               g->color_table = (stbi_uc *) g->lpal;
-            } else if (g->flags & 0x80) {
-               if (g->transparent >= 0 && (g->eflags & 0x01)) {
-                  prev_trans = g->pal[g->transparent][3];
-                  g->pal[g->transparent][3] = 0;
-               }
-               g->color_table = (stbi_uc *) g->pal;
-            } else
-               return stbi__errpuc("missing color table", "Corrupt GIF");
-
-            o = stbi__process_gif_raster(s, g);
-            if (o == NULL) return NULL;
-
-            if (prev_trans != -1)
-               g->pal[g->transparent][3] = (stbi_uc) prev_trans;
-
-            return o;
-         }
-
-         case 0x21: // Comment Extension.
-         {
-            int len;
-            if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
-               len = stbi__get8(s);
-               if (len == 4) {
-                  g->eflags = stbi__get8(s);
-                  g->delay = stbi__get16le(s);
-                  g->transparent = stbi__get8(s);
-               } else {
-                  stbi__skip(s, len);
-                  break;
-               }
-            }
-            while ((len = stbi__get8(s)) != 0)
-               stbi__skip(s, len);
-            break;
-         }
-
-         case 0x3B: // gif stream termination code
-            return (stbi_uc *) s; // using '1' causes warning on some compilers
-
-         default:
-            return stbi__errpuc("unknown code", "Corrupt GIF");
-      }
-   }
-
-   STBI_NOTUSED(req_comp);
-}
-
-static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   stbi_uc *u = 0;
-   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
-   memset(g, 0, sizeof(*g));
-
-   u = stbi__gif_load_next(s, g, comp, req_comp);
-   if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
-   if (u) {
-      *x = g->w;
-      *y = g->h;
-      if (req_comp && req_comp != 4)
-         u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
-   }
-   else if (g->out)
-      STBI_FREE(g->out);
-   STBI_FREE(g);
-   return u;
-}
-
-static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   return stbi__gif_info_raw(s,x,y,comp);
-}
-#endif
-
-// *************************************************************************************************
-// Radiance RGBE HDR loader
-// originally by Nicolas Schulz
-#ifndef STBI_NO_HDR
-static int stbi__hdr_test_core(stbi__context *s)
-{
-   const char *signature = "#?RADIANCE\n";
-   int i;
-   for (i=0; signature[i]; ++i)
-      if (stbi__get8(s) != signature[i])
-         return 0;
-   return 1;
-}
-
-static int stbi__hdr_test(stbi__context* s)
-{
-   int r = stbi__hdr_test_core(s);
-   stbi__rewind(s);
-   return r;
-}
-
-#define STBI__HDR_BUFLEN  1024
-static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
-{
-   int len=0;
-   char c = '\0';
-
-   c = (char) stbi__get8(z);
-
-   while (!stbi__at_eof(z) && c != '\n') {
-      buffer[len++] = c;
-      if (len == STBI__HDR_BUFLEN-1) {
-         // flush to end of line
-         while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
-            ;
-         break;
-      }
-      c = (char) stbi__get8(z);
-   }
-
-   buffer[len] = 0;
-   return buffer;
-}
-
-static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
-{
-   if ( input[3] != 0 ) {
-      float f1;
-      // Exponent
-      f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
-      if (req_comp <= 2)
-         output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
-      else {
-         output[0] = input[0] * f1;
-         output[1] = input[1] * f1;
-         output[2] = input[2] * f1;
-      }
-      if (req_comp == 2) output[1] = 1;
-      if (req_comp == 4) output[3] = 1;
-   } else {
-      switch (req_comp) {
-         case 4: output[3] = 1; /* fallthrough */
-         case 3: output[0] = output[1] = output[2] = 0;
-                 break;
-         case 2: output[1] = 1; /* fallthrough */
-         case 1: output[0] = 0;
-                 break;
-      }
-   }
-}
-
-static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   char buffer[STBI__HDR_BUFLEN];
-   char *token;
-   int valid = 0;
-   int width, height;
-   stbi_uc *scanline;
-   float *hdr_data;
-   int len;
-   unsigned char count, value;
-   int i, j, k, c1,c2, z;
-
-
-   // Check identifier
-   if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
-      return stbi__errpf("not HDR", "Corrupt HDR image");
-
-   // Parse header
-   for(;;) {
-      token = stbi__hdr_gettoken(s,buffer);
-      if (token[0] == 0) break;
-      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
-   }
-
-   if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
-
-   // Parse width and height
-   // can't use sscanf() if we're not using stdio!
-   token = stbi__hdr_gettoken(s,buffer);
-   if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
-   token += 3;
-   height = (int) strtol(token, &token, 10);
-   while (*token == ' ') ++token;
-   if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
-   token += 3;
-   width = (int) strtol(token, NULL, 10);
-
-   *x = width;
-   *y = height;
-
-   if (comp) *comp = 3;
-   if (req_comp == 0) req_comp = 3;
-
-   // Read data
-   hdr_data = (float *) stbi__malloc(height * width * req_comp * sizeof(float));
-
-   // Load image data
-   // image data is stored as some number of sca
-   if ( width < 8 || width >= 32768) {
-      // Read flat data
-      for (j=0; j < height; ++j) {
-         for (i=0; i < width; ++i) {
-            stbi_uc rgbe[4];
-           main_decode_loop:
-            stbi__getn(s, rgbe, 4);
-            stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
-         }
-      }
-   } else {
-      // Read RLE-encoded data
-      scanline = NULL;
-
-      for (j = 0; j < height; ++j) {
-         c1 = stbi__get8(s);
-         c2 = stbi__get8(s);
-         len = stbi__get8(s);
-         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
-            // not run-length encoded, so we have to actually use THIS data as a decoded
-            // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
-            stbi_uc rgbe[4];
-            rgbe[0] = (stbi_uc) c1;
-            rgbe[1] = (stbi_uc) c2;
-            rgbe[2] = (stbi_uc) len;
-            rgbe[3] = (stbi_uc) stbi__get8(s);
-            stbi__hdr_convert(hdr_data, rgbe, req_comp);
-            i = 1;
-            j = 0;
-            STBI_FREE(scanline);
-            goto main_decode_loop; // yes, this makes no sense
-         }
-         len <<= 8;
-         len |= stbi__get8(s);
-         if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
-         if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4);
-
-         for (k = 0; k < 4; ++k) {
-            i = 0;
-            while (i < width) {
-               count = stbi__get8(s);
-               if (count > 128) {
-                  // Run
-                  value = stbi__get8(s);
-                  count -= 128;
-                  for (z = 0; z < count; ++z)
-                     scanline[i++ * 4 + k] = value;
-               } else {
-                  // Dump
-                  for (z = 0; z < count; ++z)
-                     scanline[i++ * 4 + k] = stbi__get8(s);
-               }
-            }
-         }
-         for (i=0; i < width; ++i)
-            stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
-      }
-      STBI_FREE(scanline);
-   }
-
-   return hdr_data;
-}
-
-static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   char buffer[STBI__HDR_BUFLEN];
-   char *token;
-   int valid = 0;
-
-   if (stbi__hdr_test(s) == 0) {
-       stbi__rewind( s );
-       return 0;
-   }
-
-   for(;;) {
-      token = stbi__hdr_gettoken(s,buffer);
-      if (token[0] == 0) break;
-      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
-   }
-
-   if (!valid) {
-       stbi__rewind( s );
-       return 0;
-   }
-   token = stbi__hdr_gettoken(s,buffer);
-   if (strncmp(token, "-Y ", 3)) {
-       stbi__rewind( s );
-       return 0;
-   }
-   token += 3;
-   *y = (int) strtol(token, &token, 10);
-   while (*token == ' ') ++token;
-   if (strncmp(token, "+X ", 3)) {
-       stbi__rewind( s );
-       return 0;
-   }
-   token += 3;
-   *x = (int) strtol(token, NULL, 10);
-   *comp = 3;
-   return 1;
-}
-#endif // STBI_NO_HDR
-
-#ifndef STBI_NO_BMP
-static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   void *p;
-   stbi__bmp_data info;
-
-   info.all_a = 255;   
-   p = stbi__bmp_parse_header(s, &info);
-   stbi__rewind( s );
-   if (p == NULL)
-      return 0;
-   *x = s->img_x;
-   *y = s->img_y;
-   *comp = info.ma ? 4 : 3;
-   return 1;
-}
-#endif
-
-#ifndef STBI_NO_PSD
-static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   int channelCount;
-   if (stbi__get32be(s) != 0x38425053) {
-       stbi__rewind( s );
-       return 0;
-   }
-   if (stbi__get16be(s) != 1) {
-       stbi__rewind( s );
-       return 0;
-   }
-   stbi__skip(s, 6);
-   channelCount = stbi__get16be(s);
-   if (channelCount < 0 || channelCount > 16) {
-       stbi__rewind( s );
-       return 0;
-   }
-   *y = stbi__get32be(s);
-   *x = stbi__get32be(s);
-   if (stbi__get16be(s) != 8) {
-       stbi__rewind( s );
-       return 0;
-   }
-   if (stbi__get16be(s) != 3) {
-       stbi__rewind( s );
-       return 0;
-   }
-   *comp = 4;
-   return 1;
-}
-#endif
-
-#ifndef STBI_NO_PIC
-static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   int act_comp=0,num_packets=0,chained;
-   stbi__pic_packet packets[10];
-
-   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
-      stbi__rewind(s);
-      return 0;
-   }
-
-   stbi__skip(s, 88);
-
-   *x = stbi__get16be(s);
-   *y = stbi__get16be(s);
-   if (stbi__at_eof(s)) {
-      stbi__rewind( s);
-      return 0;
-   }
-   if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
-      stbi__rewind( s );
-      return 0;
-   }
-
-   stbi__skip(s, 8);
-
-   do {
-      stbi__pic_packet *packet;
-
-      if (num_packets==sizeof(packets)/sizeof(packets[0]))
-         return 0;
-
-      packet = &packets[num_packets++];
-      chained = stbi__get8(s);
-      packet->size    = stbi__get8(s);
-      packet->type    = stbi__get8(s);
-      packet->channel = stbi__get8(s);
-      act_comp |= packet->channel;
-
-      if (stbi__at_eof(s)) {
-          stbi__rewind( s );
-          return 0;
-      }
-      if (packet->size != 8) {
-          stbi__rewind( s );
-          return 0;
-      }
-   } while (chained);
-
-   *comp = (act_comp & 0x10 ? 4 : 3);
-
-   return 1;
-}
-#endif
-
-// *************************************************************************************************
-// Portable Gray Map and Portable Pixel Map loader
-// by Ken Miller
-//
-// PGM: http://netpbm.sourceforge.net/doc/pgm.html
-// PPM: http://netpbm.sourceforge.net/doc/ppm.html
-//
-// Known limitations:
-//    Does not support comments in the header section
-//    Does not support ASCII image data (formats P2 and P3)
-//    Does not support 16-bit-per-channel
-
-#ifndef STBI_NO_PNM
-
-static int      stbi__pnm_test(stbi__context *s)
-{
-   char p, t;
-   p = (char) stbi__get8(s);
-   t = (char) stbi__get8(s);
-   if (p != 'P' || (t != '5' && t != '6')) {
-       stbi__rewind( s );
-       return 0;
-   }
-   return 1;
-}
-
-static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   stbi_uc *out;
-   if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
-      return 0;
-   *x = s->img_x;
-   *y = s->img_y;
-   *comp = s->img_n;
-
-   out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y);
-   if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
-
-   if (req_comp && req_comp != s->img_n) {
-      out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
-      if (out == NULL) return out; // stbi__convert_format frees input on failure
-   }
-   return out;
-}
-
-static int      stbi__pnm_isspace(char c)
-{
-   return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
-}
-
-static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
-{
-   for (;;) {
-      while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
-         *c = (char) stbi__get8(s);
-
-      if (stbi__at_eof(s) || *c != '#')
-         break;
-
-      while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
-         *c = (char) stbi__get8(s);
-   }
-}
-
-static int      stbi__pnm_isdigit(char c)
-{
-   return c >= '0' && c <= '9';
-}
-
-static int      stbi__pnm_getinteger(stbi__context *s, char *c)
-{
-   int value = 0;
-
-   while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
-      value = value*10 + (*c - '0');
-      *c = (char) stbi__get8(s);
-   }
-
-   return value;
-}
-
-static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   int maxv;
-   char c, p, t;
-
-   stbi__rewind( s );
-
-   // Get identifier
-   p = (char) stbi__get8(s);
-   t = (char) stbi__get8(s);
-   if (p != 'P' || (t != '5' && t != '6')) {
-       stbi__rewind( s );
-       return 0;
-   }
-
-   *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
-
-   c = (char) stbi__get8(s);
-   stbi__pnm_skip_whitespace(s, &c);
-
-   *x = stbi__pnm_getinteger(s, &c); // read width
-   stbi__pnm_skip_whitespace(s, &c);
-
-   *y = stbi__pnm_getinteger(s, &c); // read height
-   stbi__pnm_skip_whitespace(s, &c);
-
-   maxv = stbi__pnm_getinteger(s, &c);  // read max value
-
-   if (maxv > 255)
-      return stbi__err("max value > 255", "PPM image not 8-bit");
-   else
-      return 1;
-}
-#endif
-
-static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
-{
-   #ifndef STBI_NO_JPEG
-   if (stbi__jpeg_info(s, x, y, comp)) return 1;
-   #endif
-
-   #ifndef STBI_NO_PNG
-   if (stbi__png_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_GIF
-   if (stbi__gif_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_BMP
-   if (stbi__bmp_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_PSD
-   if (stbi__psd_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_PIC
-   if (stbi__pic_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_PNM
-   if (stbi__pnm_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_HDR
-   if (stbi__hdr_info(s, x, y, comp))  return 1;
-   #endif
-
-   // test tga last because it's a crappy test!
-   #ifndef STBI_NO_TGA
-   if (stbi__tga_info(s, x, y, comp))
-       return 1;
-   #endif
-   return stbi__err("unknown image type", "Image not of any known type, or corrupt");
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
-{
-    FILE *f = stbi__fopen(filename, "rb");
-    int result;
-    if (!f) return stbi__err("can't fopen", "Unable to open file");
-    result = stbi_info_from_file(f, x, y, comp);
-    fclose(f);
-    return result;
-}
-
-STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
-{
-   int r;
-   stbi__context s;
-   long pos = ftell(f);
-   stbi__start_file(&s, f);
-   r = stbi__info_main(&s,x,y,comp);
-   fseek(f,pos,SEEK_SET);
-   return r;
-}
-#endif // !STBI_NO_STDIO
-
-STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
-{
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__info_main(&s,x,y,comp);
-}
-
-STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
-{
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
-   return stbi__info_main(&s,x,y,comp);
-}
-
-#endif // STB_IMAGE_IMPLEMENTATION
-
-/*
-   revision history:
-      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
-      2.11  (2016-04-02) allocate large structures on the stack
-                         remove white matting for transparent PSD
-                         fix reported channel count for PNG & BMP
-                         re-enable SSE2 in non-gcc 64-bit
-                         support RGB-formatted JPEG
-                         read 16-bit PNGs (only as 8-bit)
-      2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
-      2.09  (2016-01-16) allow comments in PNM files
-                         16-bit-per-pixel TGA (not bit-per-component)
-                         info() for TGA could break due to .hdr handling
-                         info() for BMP to shares code instead of sloppy parse
-                         can use STBI_REALLOC_SIZED if allocator doesn't support realloc
-                         code cleanup
-      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
-      2.07  (2015-09-13) fix compiler warnings
-                         partial animated GIF support
-                         limited 16-bpc PSD support
-                         #ifdef unused functions
-                         bug with < 92 byte PIC,PNM,HDR,TGA
-      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
-      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
-      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
-      2.03  (2015-04-12) extra corruption checking (mmozeiko)
-                         stbi_set_flip_vertically_on_load (nguillemot)
-                         fix NEON support; fix mingw support
-      2.02  (2015-01-19) fix incorrect assert, fix warning
-      2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
-      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
-      2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
-                         progressive JPEG (stb)
-                         PGM/PPM support (Ken Miller)
-                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
-                         GIF bugfix -- seemingly never worked
-                         STBI_NO_*, STBI_ONLY_*
-      1.48  (2014-12-14) fix incorrectly-named assert()
-      1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
-                         optimize PNG (ryg)
-                         fix bug in interlaced PNG with user-specified channel count (stb)
-      1.46  (2014-08-26)
-              fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
-      1.45  (2014-08-16)
-              fix MSVC-ARM internal compiler error by wrapping malloc
-      1.44  (2014-08-07)
-              various warning fixes from Ronny Chevalier
-      1.43  (2014-07-15)
-              fix MSVC-only compiler problem in code changed in 1.42
-      1.42  (2014-07-09)
-              don't define _CRT_SECURE_NO_WARNINGS (affects user code)
-              fixes to stbi__cleanup_jpeg path
-              added STBI_ASSERT to avoid requiring assert.h
-      1.41  (2014-06-25)
-              fix search&replace from 1.36 that messed up comments/error messages
-      1.40  (2014-06-22)
-              fix gcc struct-initialization warning
-      1.39  (2014-06-15)
-              fix to TGA optimization when req_comp != number of components in TGA;
-              fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
-              add support for BMP version 5 (more ignored fields)
-      1.38  (2014-06-06)
-              suppress MSVC warnings on integer casts truncating values
-              fix accidental rename of 'skip' field of I/O
-      1.37  (2014-06-04)
-              remove duplicate typedef
-      1.36  (2014-06-03)
-              convert to header file single-file library
-              if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
-      1.35  (2014-05-27)
-              various warnings
-              fix broken STBI_SIMD path
-              fix bug where stbi_load_from_file no longer left file pointer in correct place
-              fix broken non-easy path for 32-bit BMP (possibly never used)
-              TGA optimization by Arseny Kapoulkine
-      1.34  (unknown)
-              use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
-      1.33  (2011-07-14)
-              make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
-      1.32  (2011-07-13)
-              support for "info" function for all supported filetypes (SpartanJ)
-      1.31  (2011-06-20)
-              a few more leak fixes, bug in PNG handling (SpartanJ)
-      1.30  (2011-06-11)
-              added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
-              removed deprecated format-specific test/load functions
-              removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
-              error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
-              fix inefficiency in decoding 32-bit BMP (David Woo)
-      1.29  (2010-08-16)
-              various warning fixes from Aurelien Pocheville
-      1.28  (2010-08-01)
-              fix bug in GIF palette transparency (SpartanJ)
-      1.27  (2010-08-01)
-              cast-to-stbi_uc to fix warnings
-      1.26  (2010-07-24)
-              fix bug in file buffering for PNG reported by SpartanJ
-      1.25  (2010-07-17)
-              refix trans_data warning (Won Chun)
-      1.24  (2010-07-12)
-              perf improvements reading from files on platforms with lock-heavy fgetc()
-              minor perf improvements for jpeg
-              deprecated type-specific functions so we'll get feedback if they're needed
-              attempt to fix trans_data warning (Won Chun)
-      1.23    fixed bug in iPhone support
-      1.22  (2010-07-10)
-              removed image *writing* support
-              stbi_info support from Jetro Lauha
-              GIF support from Jean-Marc Lienher
-              iPhone PNG-extensions from James Brown
-              warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
-      1.21    fix use of 'stbi_uc' in header (reported by jon blow)
-      1.20    added support for Softimage PIC, by Tom Seddon
-      1.19    bug in interlaced PNG corruption check (found by ryg)
-      1.18  (2008-08-02)
-              fix a threading bug (local mutable static)
-      1.17    support interlaced PNG
-      1.16    major bugfix - stbi__convert_format converted one too many pixels
-      1.15    initialize some fields for thread safety
-      1.14    fix threadsafe conversion bug
-              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
-      1.13    threadsafe
-      1.12    const qualifiers in the API
-      1.11    Support installable IDCT, colorspace conversion routines
-      1.10    Fixes for 64-bit (don't use "unsigned long")
-              optimized upsampling by Fabian "ryg" Giesen
-      1.09    Fix format-conversion for PSD code (bad global variables!)
-      1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
-      1.07    attempt to fix C++ warning/errors again
-      1.06    attempt to fix C++ warning/errors again
-      1.05    fix TGA loading to return correct *comp and use good luminance calc
-      1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
-      1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
-      1.02    support for (subset of) HDR files, float interface for preferred access to them
-      1.01    fix bug: possible bug in handling right-side up bmps... not sure
-              fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
-      1.00    interface to zlib that skips zlib header
-      0.99    correct handling of alpha in palette
-      0.98    TGA loader by lonesock; dynamically add loaders (untested)
-      0.97    jpeg errors on too large a file; also catch another malloc failure
-      0.96    fix detection of invalid v value - particleman@mollyrocket forum
-      0.95    during header scan, seek to markers in case of padding
-      0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
-      0.93    handle jpegtran output; verbose errors
-      0.92    read 4,8,16,24,32-bit BMP files of several formats
-      0.91    output 24-bit Windows 3.0 BMP files
-      0.90    fix a few more warnings; bump version number to approach 1.0
-      0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
-      0.60    fix compiling as c++
-      0.59    fix warnings: merge Dave Moore's -Wall fixes
-      0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
-      0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
-      0.56    fix bug: zlib uncompressed mode len vs. nlen
-      0.55    fix bug: restart_interval not initialized to 0
-      0.54    allow NULL for 'int *comp'
-      0.53    fix bug in png 3->4; speedup png decoding
-      0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
-      0.51    obey req_comp requests, 1-component jpegs return as 1-component,
-              on 'test' only check type, not whether we support this variant
-      0.50  (2006-11-19)
-              first released version
-*/
diff --git a/3rdparty/tinyexr/README.md b/3rdparty/tinyexr/README.md
deleted file mode 100644
index aea03fd97..000000000
--- a/3rdparty/tinyexr/README.md
+++ /dev/null
@@ -1,274 +0,0 @@
-# Tiny OpenEXR image library.
-
-![Example](https://github.com/syoyo/tinyexr/blob/master/asakusa.png?raw=true)
-
-[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/k07ftfe4ph057qau/branch/master?svg=true)](https://ci.appveyor.com/project/syoyo/tinyexr/branch/master)
-
-[![Travis build Status](https://travis-ci.org/syoyo/tinyexr.svg)](https://travis-ci.org/syoyo/tinyexr)
-
-[![Coverity Scan Build Status](https://scan.coverity.com/projects/5827/badge.svg)](https://scan.coverity.com/projects/5827)
-
-`tinyexr` is a small, single header-only library to load and save OpenEXR(.exr) images.
-`tinyexr` is written in portable C++(no library dependency except for STL), thus `tinyexr` is good to embed into your application.
-To use `tinyexr`, simply copy `tinyexr.h` into your project.
-
-`tinyexr` currently supports:
-
-* OpenEXR version 1.x.
-* Normal image
-  * Scanline format.
-  * Uncompress("compress" = 0), ZIPS("compress" = 2), ZIP compression("compress" = 3) and PIZ compression("compress" = 4).
-  * Half/Uint/Float pixel type.
-  * Custom attributes(up to 128)
-* Deep image
-  * Scanline format.
-  * ZIPS compression("compress" = 2).
-  * Half, float pixel type.
-* Litte endian machine.
-* Limited support for big endian machine.
-  * read/write normal image.
-* C interface.
-  * You can easily write language bindings(e.g. golang)
-* EXR saving
-  * with ZIP compression.
-* JavaScript library
-  * Through emscripten.
-
-# Use case 
-
-* mallie https://github.com/lighttransport/mallie
-* PBRT v3 https://github.com/mmp/pbrt-v3
-* Cinder 0.9.0 https://libcinder.org/notes/v0.9.0
-* Piccante(develop branch) http://piccantelib.net/
-* Your project here!
-
-## Examples
-
-* [examples/deepview/](examples/deepview) Deep image view
-* [examples/rgbe2exr/](examples/rgbe2exr) .hdr to EXR converter
-* [examples/exr2rgbe/](examples/exr2rgbe) EXR to .hdr converter
-
-## Usage
-
-NOTE: **API is still subject to change**. See the source code for details.
-
-Include `tinyexr.h` with `TINYEXR_IMPLEMENTATION` flag(do this only for **one** .cc file).
-
-```
-#define TINYEXR_IMPLEMENTATION
-#include "tinyexr.h"
-```
-
-Quickly reading RGB(A) EXR file.
-
-```
-  const char* input = "asakusa.exr";
-  float* out; // width * height * RGBA
-  int width;
-  int height;
-  const char* err;
-
-  int ret = LoadEXR(&out, &width, &height, input, &err);
-```
-
-Loading EXR from a file.
-
-```
-  const char* input = "asakusa.exr";
-  const char* err;
-
-  EXRImage exrImage;
-  InitEXRImage(&exrImage);
-
-  int ret = ParseMultiChannelEXRHeaderFromFile(&exrImage, input, &err);
-  if (ret != 0) {
-    fprintf(stderr, "Parse EXR err: %s\n", err);
-    return;
-  }
-
-  //// Uncomment if you want reading HALF image as FLOAT.
-  //for (int i = 0; i < exrImage.num_channels; i++) {
-  //  if (exrImage.pixel_types[i] = TINYEXR_PIXELTYPE_HALF) {
-  //    exrImage.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;
-  //  }
-  //}
-
-  ret = LoadMultiChannelEXRFromFile(&exrImage, input, &err);
-  if (ret != 0) {
-    fprintf(stderr, "Load EXR err: %s\n", err);
-    return;
-  }
-```
-
-Saving EXR file.
-
-```
-  bool SaveEXR(const float* rgb, int width, int height, const char* outfilename) {
-
-    float* channels[3];
-
-    EXRImage image;
-    InitEXRImage(&image);
-
-    image.num_channels = 3;
-
-    // Must be BGR(A) order, since most of EXR viewers expect this channel order.
-    const char* channel_names[] = {"B", "G", "R"}; // "B", "G", "R", "A" for RGBA image
-
-    std::vector<float> images[3];
-    images[0].resize(width * height);
-    images[1].resize(width * height);
-    images[2].resize(width * height);
-
-    for (int i = 0; i < width * height; i++) {
-      images[0][i] = rgb[3*i+0];
-      images[1][i] = rgb[3*i+1];
-      images[2][i] = rgb[3*i+2];
-    }
-
-    float* image_ptr[3];
-    image_ptr[0] = &(images[2].at(0)); // B
-    image_ptr[1] = &(images[1].at(0)); // G
-    image_ptr[2] = &(images[0].at(0)); // R
-
-    image.channel_names = channel_names;
-    image.images = (unsigned char**)image_ptr;
-    image.width = width;
-    image.height = height;
-    image.compression = TINYEXR_COMPRESSIONTYPE_ZIP;
-
-    image.pixel_types = (int *)malloc(sizeof(int) * image.num_channels);
-    image.requested_pixel_types = (int *)malloc(sizeof(int) * image.num_channels);
-    for (int i = 0; i < image.num_channels; i++) {
-      image.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image
-      image.requested_pixel_types[i] = TINYEXR_PIXELTYPE_HALF; // pixel type of output image to be stored in .EXR
-    }
-
-    const char* err;
-    int ret = SaveMultiChannelEXRToFile(&image, outfilename, &err);
-    if (ret != 0) {
-      fprintf(stderr, "Save EXR err: %s\n", err);
-      return ret;
-    }
-    printf("Saved exr file. [ %s ] \n", outfilename);
-
-    free(image.pixel_types);
-    free(image.requested_pixel_types);
-
-    return ret;
-
-  }
-```
-
-
-Reading deep image EXR file.
-See `example/deepview` for actual usage.
-
-```
-  const char* input = "deepimage.exr";
-  const char* err;
-  DeepImage deepImage;
-
-  int ret = LoadDeepEXR(&deepImage, input, &err);
-
-  // acccess to each sample in the deep pixel.
-  for (int y = 0; y < deepImage.height; y++) {
-    int sampleNum = deepImage.offset_table[y][deepImage.width-1];
-    for (int x = 0; x < deepImage.width-1; x++) {
-      int s_start = deepImage.offset_table[y][x];
-      int s_end   = deepImage.offset_table[y][x+1];
-      if (s_start >= sampleNum) {
-        continue;
-      }
-      s_end = (s_end < sampleNum) ? s_end : sampleNum;
-      for (int s = s_start; s < s_end; s++) {
-        float val = deepImage.image[depthChan][y][s];
-        ...
-      }
-    }
-  }
-
-```
-
-### deepview
-
-`examples/deepview` is simple deep image viewer in OpenGL.
-
-![DeepViewExample](https://github.com/syoyo/tinyexr/blob/master/examples/deepview/deepview_screencast.gif?raw=true)
-
-## TODO
-
-Contribution is welcome!
-
-- [ ] Compression
-  - [ ] NONE("compress" = 0, load)
-  - [ ] RLE("compress" = 1, load)
-  - [x] ZIPS("compress" = 2, load)
-  - [x] ZIP("compress" = 3, load)
-  - [x] PIZ("compress" = 4, load)
-  - [x] NONE("compress" = 0, save)
-  - [ ] RLE("compress" = 1, save)
-  - [x] ZIPS("compress" = 2, save)
-  - [x] ZIP("compress" = 3, save)
-  - [ ] PIZ("compress" = 4, save)
-- [ ] Custom attributes
-  - [x] Normal image(EXR 1.x)
-  - [ ] Deep image(EXR 2.x)
-- [ ] JavaScript library
-  - [x] LoadEXRFromMemory
-  - [ ] SaveMultiChannelEXR
-  - [ ] Deep image save/load
-- [ ] Write from/to memory buffer.
-  - [x] SaveMultiChannelEXR
-  - [x] LoadMultiChannelEXR
-  - [ ] Deep image save/load
-- [ ] Tile format.
-- [ ] Support for various compression type.
-  - [x] zstd compression(Not in OpenEXR spec, though)
-- [x] Multi-channel.
-- [ ] Multi-part(EXR2.0)
-- [ ] Line order.
-  - [x] Increasing, decreasing(load)
-  - [ ] Random?
-  - [ ] Increasing, decreasing(save)
-- [ ] Pixel format(UINT, FLOAT).
-  - [x] UINT, FLOAT(load)
-  - [x] UINT, FLOAT(deep load)
-  - [x] UINT, FLOAT(save)
-  - [ ] UINT, FLOAT(deep save)
-- [ ] Full support for big endian machine.
-  - [x] Loading multi channel EXR
-  - [x] Saving multi channel EXR
-  - [ ] Loading deep image
-  - [ ] Saving deep image
-- [ ] Optimization
-  - [ ] ISPC?
-  - [x] OpenMP multi-threading in EXR loading.
-  - [x] OpenMP multi-threading in EXR saving.
-  - [ ] OpenMP multi-threading in deep image loading.
-  - [ ] OpenMP multi-threading in deep image saving.
-
-## Similar or related projects
-
-* miniexr: https://github.com/aras-p/miniexr (Write OpenEXR)
-* stb_image_resize.h: https://github.com/nothings/stb (Good for HDR image resizing)
-
-## License
-
-3-clause BSD
-
-`tinyexr` uses miniz, which is developed by Rich Geldreich <richgel99@gmail.com>, and licensed under public domain.
-
-`tinyexr` tools uses stb, which is licensed under public domain: https://github.com/nothings/stb
-`tinyexr` uses some code from OpenEXR, which is licensed under 3-clause BSD license.
-
-## Author(s)
-
-Syoyo Fujita(syoyo@lighttransport.com)
-
-## Contributor(s)
-
-* Matt Ebb (http://mattebb.com) : deep image example. Thanks!
-* Matt Pharr (http://pharr.org/matt/) : Testing tinyexr with OpenEXR(IlmImf). Thanks! 
-* Andrew Bell (https://github.com/andrewfb) & Richard Eakin (https://github.com/richardeakin) : Improving TinyEXR API. Thanks!
-* Mike Wong (https://github.com/mwkm) : ZIPS compression support in loading. Thanks!
diff --git a/3rdparty/tinyexr/tinyexr.h b/3rdparty/tinyexr/tinyexr.h
deleted file mode 100644
index 947941de4..000000000
--- a/3rdparty/tinyexr/tinyexr.h
+++ /dev/null
@@ -1,12354 +0,0 @@
-/*
-Copyright (c) 2014 - 2016, Syoyo Fujita
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the <organization> nor the
-      names of its contributors may be used to endorse or promote products
-      derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-// TinyEXR contains some OpenEXR code, which is licensed under ------------
-
-///////////////////////////////////////////////////////////////////////////
-//
-// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
-// Digital Ltd. LLC
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// *       Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// *       Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// *       Neither the name of Industrial Light & Magic nor the names of
-// its contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-///////////////////////////////////////////////////////////////////////////
-
-// End of OpenEXR license -------------------------------------------------
-
-#ifndef TINYEXR_H_
-#define TINYEXR_H_
-
-//
-//
-//   Do this:
-//    #define TINYEXR_IMPLEMENTATION
-//   before you include this file in *one* C or C++ file to create the
-//   implementation.
-//
-//   // i.e. it should look like this:
-//   #include ...
-//   #include ...
-//   #include ...
-//   #define TINYEXR_IMPLEMENTATION
-//   #include "tinyexr.h"
-//
-//
-
-#include <stddef.h>  // for size_t
-#include <stdint.h>  // guess stdint.h is available(C99)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Use embedded miniz or not to decode ZIP format pixel. Linking with zlib
-// required if this flas is 0.
-#ifndef TINYEXR_USE_MINIZ
-#define TINYEXR_USE_MINIZ (1)
-#endif
-
-// Disable PIZ comporession when applying cpplint.
-#ifndef TINYEXR_USE_PIZ
-#define TINYEXR_USE_PIZ (1)
-#endif
-
-#ifndef TINYEXR_USE_ZFP
-#define TINYEXR_USE_ZFP (0)  // TinyEXR extension.
-// http://computation.llnl.gov/projects/floating-point-compression
-#endif
-
-#define TINYEXR_SUCCESS (0)
-#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1)
-#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2)
-#define TINYEXR_ERROR_INVALID_ARGUMENT (-3)
-#define TINYEXR_ERROR_INVALID_DATA (-4)
-#define TINYEXR_ERROR_INVALID_FILE (-5)
-#define TINYEXR_ERROR_INVALID_PARAMETER (-5)
-#define TINYEXR_ERROR_CANT_OPEN_FILE (-6)
-#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-7)
-#define TINYEXR_ERROR_INVALID_HEADER (-8)
-
-// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf }
-
-// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2
-#define TINYEXR_PIXELTYPE_UINT (0)
-#define TINYEXR_PIXELTYPE_HALF (1)
-#define TINYEXR_PIXELTYPE_FLOAT (2)
-
-#define TINYEXR_MAX_ATTRIBUTES (128)
-
-#define TINYEXR_COMPRESSIONTYPE_NONE (0)
-#define TINYEXR_COMPRESSIONTYPE_RLE (1)
-#define TINYEXR_COMPRESSIONTYPE_ZIPS (2)
-#define TINYEXR_COMPRESSIONTYPE_ZIP (3)
-#define TINYEXR_COMPRESSIONTYPE_PIZ (4)
-#define TINYEXR_COMPRESSIONTYPE_ZFP (128)  // TinyEXR extension
-
-#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0)
-#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1)
-#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2)
-
-#define TINYEXR_TILE_ONE_LEVEL (0)
-#define TINYEXR_TILE_MIPMAP_LEVELS (1)
-#define TINYEXR_TILE_RIPMAP_LEVELS (2)
-
-#define TINYEXR_TILE_ROUND_DOWN (0)
-#define TINYEXR_TILE_ROUND_UP (1)
-
-typedef struct _EXRVersion {
-  int version;    // this must be 2
-  int tiled;      // tile format image
-  int long_name;  // long name attribute
-  int non_image;  // deep image(EXR 2.0)
-  int multipart;  // multi-part(EXR 2.0)
-} EXRVersion;
-
-typedef struct _EXRAttribute {
-  char name[256];  // name and type are up to 255 chars long.
-  char type[256];
-  unsigned char *value;  // uint8_t*
-  int size;
-  int pad0;
-} EXRAttribute;
-
-typedef struct _EXRChannelInfo {
-  char name[256];  // less than 255 bytes long
-  int pixel_type;
-  int x_sampling;
-  int y_sampling;
-  unsigned char p_linear;
-  unsigned char pad[3];
-} EXRChannelInfo;
-
-typedef struct _EXRTile {
-  int offset_x;
-  int offset_y;
-  int level_x;
-  int level_y;
-
-  int width;   // actual width in a tile.
-  int height;  // actual height int a tile.
-
-  unsigned char **images;  // image[channels][pixels]
-} EXRTile;
-
-typedef struct _EXRHeader {
-  float pixel_aspect_ratio;
-  int line_order;
-  int data_window[4];
-  int display_window[4];
-  float screen_window_center[2];
-  float screen_window_width;
-
-  int chunk_count;
-
-  // Properties for tiled format(`tiledesc`).
-  int tiled;
-  int tile_size_x;
-  int tile_size_y;
-  int tile_level_mode;
-  int tile_rounding_mode;
-
-  int long_name;
-  int non_image;
-  int multipart;
-  unsigned int header_len;
-
-  // Custom attributes(exludes required attributes(e.g. `channels`,
-  // `compression`, etc)
-  int num_custom_attributes;
-  EXRAttribute custom_attributes[TINYEXR_MAX_ATTRIBUTES];
-
-  EXRChannelInfo *channels;  // [num_channels]
-
-  int *pixel_types;  // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for
-  // each channel. This is overwritten with `requested_pixel_types` when
-  // loading.
-  int num_channels;
-
-  int compression_type;        // compression type(TINYEXR_COMPRESSIONTYPE_*)
-  int *requested_pixel_types;  // Filled initially by
-                               // ParseEXRHeaderFrom(Meomory|File), then users
-                               // can edit it(only valid for HALF pixel type
-                               // channel)
-
-} EXRHeader;
-
-typedef struct _EXRMultiPartHeader {
-  int num_headers;
-  EXRHeader *headers;
-
-} EXRMultiPartHeader;
-
-typedef struct _EXRImage {
-  EXRTile *tiles;  // Tiled pixel data. The application must reconstruct image
-                   // from tiles manually. NULL if scanline format.
-  unsigned char **images;  // image[channels][pixels]. NULL if tiled format.
-
-  int width;
-  int height;
-  int num_channels;
-
-  // Properties for tile format.
-  int num_tiles;
-
-} EXRImage;
-
-typedef struct _EXRMultiPartImage {
-  int num_images;
-  EXRImage *images;
-
-} EXRMultiPartImage;
-
-typedef struct _DeepImage {
-  const char **channel_names;
-  float ***image;      // image[channels][scanlines][samples]
-  int **offset_table;  // offset_table[scanline][offsets]
-  int num_channels;
-  int width;
-  int height;
-  int pad0;
-} DeepImage;
-
-// @deprecated { to be removed. }
-// Loads single-frame OpenEXR image. Assume EXR image contains RGB(A) channels.
-// Application must free image data as returned by `out_rgba`
-// Result image format is: float x RGBA x width x hight
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int LoadEXR(float **out_rgba, int *width, int *height,
-                   const char *filename, const char **err);
-
-// @deprecated { to be removed. }
-// Saves single-frame OpenEXR image. Assume EXR image contains RGB(A) channels.
-// components must be 3(RGB) or 4(RGBA).
-// Result image format is: float x RGB(A) x width x hight
-extern int SaveEXR(const float *data, int width, int height, int components,
-                   const char *filename);
-
-// Initialize EXRHeader struct
-extern void InitEXRHeader(EXRHeader *exr_header);
-
-// Initialize EXRImage struct
-extern void InitEXRImage(EXRImage *exr_image);
-
-// Free's internal data of EXRHeader struct
-extern int FreeEXRHeader(EXRHeader *exr_header);
-
-// Free's internal data of EXRImage struct
-extern int FreeEXRImage(EXRImage *exr_image);
-
-// Parse EXR version header of a file.
-extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename);
-
-// Parse EXR version header from memory-mapped EXR data.
-extern int ParseEXRVersionFromMemory(EXRVersion *version,
-                                     const unsigned char *memory, size_t size);
-
-// Parse single-part OpenEXR header from a file and initialize `EXRHeader`.
-extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version,
-                                  const char *filename, const char **err);
-
-// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`.
-extern int ParseEXRHeaderFromMemory(EXRHeader *header,
-                                    const EXRVersion *version,
-                                    const unsigned char *memory, size_t size,
-                                    const char **err);
-
-// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*`
-// array.
-extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers,
-                                           int *num_headers,
-                                           const EXRVersion *version,
-                                           const char *filename,
-                                           const char **err);
-
-// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*`
-// array
-extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers,
-                                             int *num_headers,
-                                             const EXRVersion *version,
-                                             const unsigned char *memory,
-                                             size_t size, const char **err);
-
-// Loads single-part OpenEXR image from a file.
-// Application must setup `ParseEXRHeaderFromFile` before calling this function.
-// Application can free EXRImage using `FreeEXRImage`
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header,
-                                const char *filename, const char **err);
-
-// Loads single-part OpenEXR image from a memory.
-// Application must setup `EXRHeader` with
-// `ParseEXRHeaderFromMemory` before calling this function.
-// Application can free EXRImage using `FreeEXRImage`
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header,
-                                  const unsigned char *memory,
-                                  const size_t size,
-                                  const char **err);
-
-// Loads multi-part OpenEXR image from a file.
-// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this
-// function.
-// Application can free EXRImage using `FreeEXRImage`
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int LoadEXRMultipartImageFromFile(EXRImage *images,
-                                         const EXRHeader **headers,
-                                         unsigned int num_parts,
-                                         const char *filename,
-                                         const char **err);
-
-// Loads multi-part OpenEXR image from a memory.
-// Application must setup `EXRHeader*` array with
-// `ParseEXRMultipartHeaderFromMemory` before calling this function.
-// Application can free EXRImage using `FreeEXRImage`
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int LoadEXRMultipartImageFromMemory(EXRImage *images,
-                                           const EXRHeader **headers,
-                                           unsigned int num_parts,
-                                           const unsigned char *memory,
-                                           const char **err);
-
-// Saves multi-channel, single-frame OpenEXR image to a file.
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int SaveEXRImageToFile(const EXRImage *image,
-                              const EXRHeader *exr_header, const char *filename,
-                              const char **err);
-
-// Saves multi-channel, single-frame OpenEXR image to a memory.
-// Image is compressed using EXRImage.compression value.
-// Return the number of bytes if succes.
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern size_t SaveEXRImageToMemory(const EXRImage *image,
-                                   const EXRHeader *exr_header,
-                                   unsigned char **memory, const char **err);
-
-// Loads single-frame OpenEXR deep image.
-// Application must free memory of variables in DeepImage(image, offset_table)
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int LoadDeepEXR(DeepImage *out_image, const char *filename,
-                       const char **err);
-
-// NOT YET IMPLEMENTED:
-// Saves single-frame OpenEXR deep image.
-// Returns negative value and may set error string in `err` when there's an
-// error
-// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename,
-//                       const char **err);
-
-// NOT YET IMPLEMENTED:
-// Loads multi-part OpenEXR deep image.
-// Application must free memory of variables in DeepImage(image, offset_table)
-// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const
-// char *filename,
-//                       const char **err);
-
-// For emscripten.
-// Loads single-frame OpenEXR image from memory. Assume EXR image contains
-// RGB(A) channels.
-// `out_rgba` must have enough memory(at least sizeof(float) x 4(RGBA) x width x
-// hight)
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int LoadEXRFromMemory(float *out_rgba, const unsigned char *memory,
-                             size_t size, const char **err);
-
-#ifdef __cplusplus
-}
-#endif
-
-#ifdef TINYEXR_IMPLEMENTATION
-#include <algorithm>
-#include <cassert>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <sstream>
-
-#include <string>
-#include <vector>
-
-// @todo { remove including tinyexr.h }
-#include "tinyexr.h"
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#if TINYEXR_USE_MINIZ
-#else
-#include "zlib.h"
-#endif
-
-#if TINYEXR_USE_ZFP
-#include "zfp.h"
-#endif
-
-#if __cplusplus > 199711L
-// C++11
-#include <cstdint>
-#endif // __cplusplus > 199711L
-
-namespace tinyexr {
-
-#if __cplusplus > 199711L
-// C++11
-typedef uint64_t tinyexr_uint64;
-typedef int64_t tinyexr_int64;
-#else
-// Although `long long` is not a standard type pre C++11, assume it is defined
-// as a compiler's extension.
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wc++11-long-long"
-#endif
-typedef unsigned long long tinyexr_uint64;
-typedef long long tinyexr_int64;
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-#endif
-
-#if TINYEXR_USE_MINIZ
-
-namespace miniz {
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wc++11-long-long"
-#pragma clang diagnostic ignored "-Wold-style-cast"
-#pragma clang diagnostic ignored "-Wpadded"
-#pragma clang diagnostic ignored "-Wsign-conversion"
-#pragma clang diagnostic ignored "-Wc++11-extensions"
-#pragma clang diagnostic ignored "-Wconversion"
-#endif
-
-/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP
-   reading/writing/appending, PNG writing
-   See "unlicense" statement at the end of this file.
-   Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013
-   Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951:
-   http://www.ietf.org/rfc/rfc1951.txt
-
-   Most API's defined in miniz.c are optional. For example, to disable the
-   archive related functions just define
-   MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO
-   (see the list below for more macros).
-
-   * Change History
-     10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major
-   release with Zip64 support (almost there!):
-       - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug
-   (thanks kahmyong.moon@hp.com) which could cause locate files to not find
-   files. This bug
-        would only have occured in earlier versions if you explicitly used this
-   flag, OR if you used mz_zip_extract_archive_file_to_heap() or
-   mz_zip_add_mem_to_archive_file_in_place()
-        (which used this flag). If you can't switch to v1.15 but want to fix
-   this bug, just remove the uses of this flag from both helper funcs (and of
-   course don't use the flag).
-       - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when
-   pUser_read_buf is not NULL and compressed size is > uncompressed size
-       - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract
-   compressed data from directory entries, to account for weird zipfiles which
-   contain zero-size compressed data on dir entries.
-         Hopefully this fix won't cause any issues on weird zip archives,
-   because it assumes the low 16-bits of zip external attributes are DOS
-   attributes (which I believe they always are in practice).
-       - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the
-   internal attributes, just the filename and external attributes
-       - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed
-       - Added cmake support for Linux builds which builds all the examples,
-   tested with clang v3.3 and gcc v4.6.
-       - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti
-       - Merged MZ_FORCEINLINE fix from hdeanclark
-       - Fix <time.h> include before config #ifdef, thanks emil.brink
-       - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping
-   (super useful for OpenGL apps), and explicit control over the compression
-   level (so you can
-        set it to 1 for real-time compression).
-       - Merged in some compiler fixes from paulharris's github repro.
-       - Retested this build under Windows (VS 2010, including static analysis),
-   tcc  0.9.26, gcc v4.6 and clang v3.3.
-       - Added example6.c, which dumps an image of the mandelbrot set to a PNG
-   file.
-       - Modified example2 to help test the
-   MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more.
-       - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix
-   possible src file fclose() leak if alignment bytes+local header file write
-   faiiled
-                 - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader():
-   Was pushing the wrong central dir header offset, appears harmless in this
-   release, but it became a problem in the zip64 branch
-     5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE,
-   #include <time.h> (thanks fermtect).
-     5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix
-   mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit.
-       - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and
-   re-ran a randomized regression test on ~500k files.
-       - Eliminated a bunch of warnings when compiling with GCC 32-bit/64.
-       - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze
-   (static analysis) option and fixed all warnings (except for the silly
-        "Use of the comma-operator in a tested expression.." analysis warning,
-   which I purposely use to work around a MSVC compiler warning).
-       - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and
-   tested Linux executables. The codeblocks workspace is compatible with
-   Linux+Win32/x64.
-       - Added miniz_tester solution/project, which is a useful little app
-   derived from LZHAM's tester app that I use as part of the regression test.
-       - Ran miniz.c and tinfl.c through another series of regression testing on
-   ~500,000 files and archives.
-       - Modified example5.c so it purposely disables a bunch of high-level
-   functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the
-   MINIZ_NO_STDIO bug report.)
-       - Fix ftell() usage in examples so they exit with an error on files which
-   are too large (a limitation of the examples, not miniz itself).
-     4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple
-   minor level_and_flags issues in the archive API's.
-      level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce
-   Dawson <bruced@valvesoftware.com> for the feedback/bug report.
-     5/28/11 v1.11 - Added statement from unlicense.org
-     5/27/11 v1.10 - Substantial compressor optimizations:
-      - Level 1 is now ~4x faster than before. The L1 compressor's throughput
-   now varies between 70-110MB/sec. on a
-      - Core i7 (actual throughput varies depending on the type of data, and x64
-   vs. x86).
-      - Improved baseline L2-L9 compression perf. Also, greatly improved
-   compression perf. issues on some file types.
-      - Refactored the compression code for better readability and
-   maintainability.
-      - Added level 10 compression level (L10 has slightly better ratio than
-   level 9, but could have a potentially large
-       drop in throughput on some files).
-     5/15/11 v1.09 - Initial stable release.
-
-   * Low-level Deflate/Inflate implementation notes:
-
-     Compression: Use the "tdefl" API's. The compressor supports raw, static,
-   and dynamic blocks, lazy or
-     greedy parsing, match length filtering, RLE-only, and Huffman-only streams.
-   It performs and compresses
-     approximately as well as zlib.
-
-     Decompression: Use the "tinfl" API's. The entire decompressor is
-   implemented as a single function
-     coroutine: see tinfl_decompress(). It supports decompression into a 32KB
-   (or larger power of 2) wrapping buffer, or into a memory
-     block large enough to hold the entire file.
-
-     The low-level tdefl/tinfl API's do not make any use of dynamic memory
-   allocation.
-
-   * zlib-style API notes:
-
-     miniz.c implements a fairly large subset of zlib. There's enough
-   functionality present for it to be a drop-in
-     zlib replacement in many apps:
-        The z_stream struct, optional memory allocation callbacks
-        deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
-        inflateInit/inflateInit2/inflate/inflateEnd
-        compress, compress2, compressBound, uncompress
-        CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly
-   routines.
-        Supports raw deflate streams or standard zlib streams with adler-32
-   checking.
-
-     Limitations:
-      The callback API's are not implemented yet. No support for gzip headers or
-   zlib static dictionaries.
-      I've tried to closely emulate zlib's various flavors of stream flushing
-   and return status codes, but
-      there are no guarantees that miniz.c pulls this off perfectly.
-
-   * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function,
-   originally written by
-     Alex Evans. Supports 1-4 bytes/pixel images.
-
-   * ZIP archive API notes:
-
-     The ZIP archive API's where designed with simplicity and efficiency in
-   mind, with just enough abstraction to
-     get the job done with minimal fuss. There are simple API's to retrieve file
-   information, read files from
-     existing archives, create new archives, append new files to existing
-   archives, or clone archive data from
-     one archive to another. It supports archives located in memory or the heap,
-   on disk (using stdio.h),
-     or you can specify custom file read/write callbacks.
-
-     - Archive reading: Just call this function to read a single file from a
-   disk archive:
-
-      void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const
-   char *pArchive_name,
-        size_t *pSize, mz_uint zip_flags);
-
-     For more complex cases, use the "mz_zip_reader" functions. Upon opening an
-   archive, the entire central
-     directory is located and read as-is into memory, and subsequent file access
-   only occurs when reading individual files.
-
-     - Archives file scanning: The simple way is to use this function to scan a
-   loaded archive for a specific file:
-
-     int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName,
-   const char *pComment, mz_uint flags);
-
-     The locate operation can optionally check file comments too, which (as one
-   example) can be used to identify
-     multiple versions of the same file in an archive. This function uses a
-   simple linear search through the central
-     directory, so it's not very fast.
-
-     Alternately, you can iterate through all the files in an archive (using
-   mz_zip_reader_get_num_files()) and
-     retrieve detailed info on each file by calling mz_zip_reader_file_stat().
-
-     - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer
-   immediately writes compressed file data
-     to disk and builds an exact image of the central directory in memory. The
-   central directory image is written
-     all at once at the end of the archive file when the archive is finalized.
-
-     The archive writer can optionally align each file's local header and file
-   data to any power of 2 alignment,
-     which can be useful when the archive will be read from optical media. Also,
-   the writer supports placing
-     arbitrary data blobs at the very beginning of ZIP archives. Archives
-   written using either feature are still
-     readable by any ZIP tool.
-
-     - Archive appending: The simple way to add a single file to an archive is
-   to call this function:
-
-      mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename,
-   const char *pArchive_name,
-        const void *pBuf, size_t buf_size, const void *pComment, mz_uint16
-   comment_size, mz_uint level_and_flags);
-
-     The archive will be created if it doesn't already exist, otherwise it'll be
-   appended to.
-     Note the appending is done in-place and is not an atomic operation, so if
-   something goes wrong
-     during the operation it's possible the archive could be left without a
-   central directory (although the local
-     file headers and file data will be fine, so the archive will be
-   recoverable).
-
-     For more complex archive modification scenarios:
-     1. The safest way is to use a mz_zip_reader to read the existing archive,
-   cloning only those bits you want to
-     preserve into a new archive using using the
-   mz_zip_writer_add_from_zip_reader() function (which compiles the
-     compressed file data as-is). When you're done, delete the old archive and
-   rename the newly written archive, and
-     you're done. This is safe but requires a bunch of temporary disk space or
-   heap memory.
-
-     2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using
-   mz_zip_writer_init_from_reader(),
-     append new files as needed, then finalize the archive which will write an
-   updated central directory to the
-     original archive. (This is basically what
-   mz_zip_add_mem_to_archive_file_in_place() does.) There's a
-     possibility that the archive's central directory could be lost with this
-   method if anything goes wrong, though.
-
-     - ZIP archive support limitations:
-     No zip64 or spanning support. Extraction functions can only handle
-   unencrypted, stored or deflated files.
-     Requires streams capable of seeking.
-
-   * This is a header file library, like stb_image.c. To get only a header file,
-   either cut and paste the
-     below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then
-   include miniz.c from it.
-
-   * Important: For best perf. be sure to customize the below macros for your
-   target platform:
-     #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
-     #define MINIZ_LITTLE_ENDIAN 1
-     #define MINIZ_HAS_64BIT_REGISTERS 1
-
-   * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before
-   including miniz.c to ensure miniz
-     uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be
-   able to process large files
-     (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
-*/
-
-#ifndef MINIZ_HEADER_INCLUDED
-#define MINIZ_HEADER_INCLUDED
-
-#include <stdlib.h>
-
-// Defines to completely disable specific portions of miniz.c:
-// If all macros here are defined the only functionality remaining will be
-// CRC-32, adler-32, tinfl, and tdefl.
-
-// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on
-// stdio for file I/O.
-//#define MINIZ_NO_STDIO
-
-// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able
-// to get the current time, or
-// get/set file times, and the C run-time funcs that get/set times won't be
-// called.
-// The current downside is the times written to your archives will be from 1979.
-#define MINIZ_NO_TIME
-
-// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's.
-//#define MINIZ_NO_ARCHIVE_APIS
-
-// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive
-// API's.
-//#define MINIZ_NO_ARCHIVE_WRITING_APIS
-
-// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression
-// API's.
-//#define MINIZ_NO_ZLIB_APIS
-
-// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent
-// conflicts against stock zlib.
-//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
-
-// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc.
-// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom
-// user alloc/free/realloc
-// callbacks to the zlib and archive API's, and a few stand-alone helper API's
-// which don't provide custom user
-// functions (such as tdefl_compress_mem_to_heap() and
-// tinfl_decompress_mem_to_heap()) won't work.
-//#define MINIZ_NO_MALLOC
-
-#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
-// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc
-// on Linux
-#define MINIZ_NO_TIME
-#endif
-
-#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
-#include <time.h>
-#endif
-
-#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
-    defined(__i386) || defined(__i486__) || defined(__i486) ||  \
-    defined(i386) || defined(__ia64__) || defined(__x86_64__)
-// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
-#define MINIZ_X86_OR_X64_CPU 1
-#endif
-
-#if defined(__sparcv9)
-// Big endian
-#else
-#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
-// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
-#define MINIZ_LITTLE_ENDIAN 1
-#endif
-#endif
-
-#if MINIZ_X86_OR_X64_CPU
-// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient
-// integer loads and stores from unaligned addresses.
-//#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
-#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES \
-  0  // disable to suppress compiler warnings
-#endif
-
-#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || \
-    defined(_LP64) || defined(__LP64__) || defined(__ia64__) ||   \
-    defined(__x86_64__)
-// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are
-// reasonably fast (and don't involve compiler generated calls to helper
-// functions).
-#define MINIZ_HAS_64BIT_REGISTERS 1
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// ------------------- zlib-style API Definitions.
-
-// For more compatibility with zlib, miniz.c uses unsigned long for some
-// parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits!
-typedef unsigned long mz_ulong;
-
-// mz_free() internally uses the MZ_FREE() macro (which by default calls free()
-// unless you've modified the MZ_MALLOC macro) to release a block allocated from
-// the heap.
-void mz_free(void *p);
-
-#define MZ_ADLER32_INIT (1)
-// mz_adler32() returns the initial adler-32 value to use when called with
-// ptr==NULL.
-mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
-
-#define MZ_CRC32_INIT (0)
-// mz_crc32() returns the initial CRC-32 value to use when called with
-// ptr==NULL.
-mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
-
-// Compression strategies.
-enum {
-  MZ_DEFAULT_STRATEGY = 0,
-  MZ_FILTERED = 1,
-  MZ_HUFFMAN_ONLY = 2,
-  MZ_RLE = 3,
-  MZ_FIXED = 4
-};
-
-// Method
-#define MZ_DEFLATED 8
-
-#ifndef MINIZ_NO_ZLIB_APIS
-
-// Heap allocation callbacks.
-// Note that mz_alloc_func parameter types purpsosely differ from zlib's:
-// items/size is size_t, not unsigned long.
-typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
-typedef void (*mz_free_func)(void *opaque, void *address);
-typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items,
-                                 size_t size);
-
-#define MZ_VERSION "9.1.15"
-#define MZ_VERNUM 0x91F0
-#define MZ_VER_MAJOR 9
-#define MZ_VER_MINOR 1
-#define MZ_VER_REVISION 15
-#define MZ_VER_SUBREVISION 0
-
-// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The
-// other values are for advanced use (refer to the zlib docs).
-enum {
-  MZ_NO_FLUSH = 0,
-  MZ_PARTIAL_FLUSH = 1,
-  MZ_SYNC_FLUSH = 2,
-  MZ_FULL_FLUSH = 3,
-  MZ_FINISH = 4,
-  MZ_BLOCK = 5
-};
-
-// Return status codes. MZ_PARAM_ERROR is non-standard.
-enum {
-  MZ_OK = 0,
-  MZ_STREAM_END = 1,
-  MZ_NEED_DICT = 2,
-  MZ_ERRNO = -1,
-  MZ_STREAM_ERROR = -2,
-  MZ_DATA_ERROR = -3,
-  MZ_MEM_ERROR = -4,
-  MZ_BUF_ERROR = -5,
-  MZ_VERSION_ERROR = -6,
-  MZ_PARAM_ERROR = -10000
-};
-
-// Compression levels: 0-9 are the standard zlib-style levels, 10 is best
-// possible compression (not zlib compatible, and may be very slow),
-// MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
-enum {
-  MZ_NO_COMPRESSION = 0,
-  MZ_BEST_SPEED = 1,
-  MZ_BEST_COMPRESSION = 9,
-  MZ_UBER_COMPRESSION = 10,
-  MZ_DEFAULT_LEVEL = 6,
-  MZ_DEFAULT_COMPRESSION = -1
-};
-
-// Window bits
-#define MZ_DEFAULT_WINDOW_BITS 15
-
-struct mz_internal_state;
-
-// Compression/decompression stream struct.
-typedef struct mz_stream_s {
-  const unsigned char *next_in;  // pointer to next byte to read
-  unsigned int avail_in;         // number of bytes available at next_in
-  mz_ulong total_in;             // total number of bytes consumed so far
-
-  unsigned char *next_out;  // pointer to next byte to write
-  unsigned int avail_out;   // number of bytes that can be written to next_out
-  mz_ulong total_out;       // total number of bytes produced so far
-
-  char *msg;                        // error msg (unused)
-  struct mz_internal_state *state;  // internal state, allocated by zalloc/zfree
-
-  mz_alloc_func
-      zalloc;          // optional heap allocation function (defaults to malloc)
-  mz_free_func zfree;  // optional heap free function (defaults to free)
-  void *opaque;        // heap alloc function user pointer
-
-  int data_type;      // data_type (unused)
-  mz_ulong adler;     // adler32 of the source or uncompressed data
-  mz_ulong reserved;  // not used
-} mz_stream;
-
-typedef mz_stream *mz_streamp;
-
-// Returns the version string of miniz.c.
-const char *mz_version(void);
-
-// mz_deflateInit() initializes a compressor with default options:
-// Parameters:
-//  pStream must point to an initialized mz_stream struct.
-//  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
-//  level 1 enables a specially optimized compression function that's been
-//  optimized purely for performance, not ratio.
-//  (This special func. is currently only enabled when
-//  MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
-// Return values:
-//  MZ_OK on success.
-//  MZ_STREAM_ERROR if the stream is bogus.
-//  MZ_PARAM_ERROR if the input parameters are bogus.
-//  MZ_MEM_ERROR on out of memory.
-int mz_deflateInit(mz_streamp pStream, int level);
-
-// mz_deflateInit2() is like mz_deflate(), except with more control:
-// Additional parameters:
-//   method must be MZ_DEFLATED
-//   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with
-//   zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no
-//   header or footer)
-//   mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
-int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits,
-                    int mem_level, int strategy);
-
-// Quickly resets a compressor without having to reallocate anything. Same as
-// calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
-int mz_deflateReset(mz_streamp pStream);
-
-// mz_deflate() compresses the input to output, consuming as much of the input
-// and producing as much output as possible.
-// Parameters:
-//   pStream is the stream to read from and write to. You must initialize/update
-//   the next_in, avail_in, next_out, and avail_out members.
-//   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or
-//   MZ_FINISH.
-// Return values:
-//   MZ_OK on success (when flushing, or if more input is needed but not
-//   available, and/or there's more output to be written but the output buffer
-//   is full).
-//   MZ_STREAM_END if all input has been consumed and all output bytes have been
-//   written. Don't call mz_deflate() on the stream anymore.
-//   MZ_STREAM_ERROR if the stream is bogus.
-//   MZ_PARAM_ERROR if one of the parameters is invalid.
-//   MZ_BUF_ERROR if no forward progress is possible because the input and/or
-//   output buffers are empty. (Fill up the input buffer or free up some output
-//   space and try again.)
-int mz_deflate(mz_streamp pStream, int flush);
-
-// mz_deflateEnd() deinitializes a compressor:
-// Return values:
-//  MZ_OK on success.
-//  MZ_STREAM_ERROR if the stream is bogus.
-int mz_deflateEnd(mz_streamp pStream);
-
-// mz_deflateBound() returns a (very) conservative upper bound on the amount of
-// data that could be generated by deflate(), assuming flush is set to only
-// MZ_NO_FLUSH or MZ_FINISH.
-mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
-
-// Single-call compression functions mz_compress() and mz_compress2():
-// Returns MZ_OK on success, or one of the error codes from mz_deflate() on
-// failure.
-int mz_compress(unsigned char *pDest, mz_ulong *pDest_len,
-                const unsigned char *pSource, mz_ulong source_len);
-int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len,
-                 const unsigned char *pSource, mz_ulong source_len, int level);
-
-// mz_compressBound() returns a (very) conservative upper bound on the amount of
-// data that could be generated by calling mz_compress().
-mz_ulong mz_compressBound(mz_ulong source_len);
-
-// Initializes a decompressor.
-int mz_inflateInit(mz_streamp pStream);
-
-// mz_inflateInit2() is like mz_inflateInit() with an additional option that
-// controls the window size and whether or not the stream has been wrapped with
-// a zlib header/footer:
-// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or
-// -MZ_DEFAULT_WINDOW_BITS (raw deflate).
-int mz_inflateInit2(mz_streamp pStream, int window_bits);
-
-// Decompresses the input stream to the output, consuming only as much of the
-// input as needed, and writing as much to the output as possible.
-// Parameters:
-//   pStream is the stream to read from and write to. You must initialize/update
-//   the next_in, avail_in, next_out, and avail_out members.
-//   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
-//   On the first call, if flush is MZ_FINISH it's assumed the input and output
-//   buffers are both sized large enough to decompress the entire stream in a
-//   single call (this is slightly faster).
-//   MZ_FINISH implies that there are no more source bytes available beside
-//   what's already in the input buffer, and that the output buffer is large
-//   enough to hold the rest of the decompressed data.
-// Return values:
-//   MZ_OK on success. Either more input is needed but not available, and/or
-//   there's more output to be written but the output buffer is full.
-//   MZ_STREAM_END if all needed input has been consumed and all output bytes
-//   have been written. For zlib streams, the adler-32 of the decompressed data
-//   has also been verified.
-//   MZ_STREAM_ERROR if the stream is bogus.
-//   MZ_DATA_ERROR if the deflate stream is invalid.
-//   MZ_PARAM_ERROR if one of the parameters is invalid.
-//   MZ_BUF_ERROR if no forward progress is possible because the input buffer is
-//   empty but the inflater needs more input to continue, or if the output
-//   buffer is not large enough. Call mz_inflate() again
-//   with more input data, or with more room in the output buffer (except when
-//   using single call decompression, described above).
-int mz_inflate(mz_streamp pStream, int flush);
-
-// Deinitializes a decompressor.
-int mz_inflateEnd(mz_streamp pStream);
-
-// Single-call decompression.
-// Returns MZ_OK on success, or one of the error codes from mz_inflate() on
-// failure.
-int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len,
-                  const unsigned char *pSource, mz_ulong source_len);
-
-// Returns a string description of the specified error code, or NULL if the
-// error code is invalid.
-const char *mz_error(int err);
-
-// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used
-// as a drop-in replacement for the subset of zlib that miniz.c supports.
-// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you
-// use zlib in the same project.
-#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
-typedef unsigned char Byte;
-typedef unsigned int uInt;
-typedef mz_ulong uLong;
-typedef Byte Bytef;
-typedef uInt uIntf;
-typedef char charf;
-typedef int intf;
-typedef void *voidpf;
-typedef uLong uLongf;
-typedef void *voidp;
-typedef void *const voidpc;
-#define Z_NULL 0
-#define Z_NO_FLUSH MZ_NO_FLUSH
-#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
-#define Z_SYNC_FLUSH MZ_SYNC_FLUSH
-#define Z_FULL_FLUSH MZ_FULL_FLUSH
-#define Z_FINISH MZ_FINISH
-#define Z_BLOCK MZ_BLOCK
-#define Z_OK MZ_OK
-#define Z_STREAM_END MZ_STREAM_END
-#define Z_NEED_DICT MZ_NEED_DICT
-#define Z_ERRNO MZ_ERRNO
-#define Z_STREAM_ERROR MZ_STREAM_ERROR
-#define Z_DATA_ERROR MZ_DATA_ERROR
-#define Z_MEM_ERROR MZ_MEM_ERROR
-#define Z_BUF_ERROR MZ_BUF_ERROR
-#define Z_VERSION_ERROR MZ_VERSION_ERROR
-#define Z_PARAM_ERROR MZ_PARAM_ERROR
-#define Z_NO_COMPRESSION MZ_NO_COMPRESSION
-#define Z_BEST_SPEED MZ_BEST_SPEED
-#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
-#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
-#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
-#define Z_FILTERED MZ_FILTERED
-#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
-#define Z_RLE MZ_RLE
-#define Z_FIXED MZ_FIXED
-#define Z_DEFLATED MZ_DEFLATED
-#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
-#define alloc_func mz_alloc_func
-#define free_func mz_free_func
-#define internal_state mz_internal_state
-#define z_stream mz_stream
-#define deflateInit mz_deflateInit
-#define deflateInit2 mz_deflateInit2
-#define deflateReset mz_deflateReset
-#define deflate mz_deflate
-#define deflateEnd mz_deflateEnd
-#define deflateBound mz_deflateBound
-#define compress mz_compress
-#define compress2 mz_compress2
-#define compressBound mz_compressBound
-#define inflateInit mz_inflateInit
-#define inflateInit2 mz_inflateInit2
-#define inflate mz_inflate
-#define inflateEnd mz_inflateEnd
-#define uncompress mz_uncompress
-#define crc32 mz_crc32
-#define adler32 mz_adler32
-#define MAX_WBITS 15
-#define MAX_MEM_LEVEL 9
-#define zError mz_error
-#define ZLIB_VERSION MZ_VERSION
-#define ZLIB_VERNUM MZ_VERNUM
-#define ZLIB_VER_MAJOR MZ_VER_MAJOR
-#define ZLIB_VER_MINOR MZ_VER_MINOR
-#define ZLIB_VER_REVISION MZ_VER_REVISION
-#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
-#define zlibVersion mz_version
-#define zlib_version mz_version()
-#endif  // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
-
-#endif  // MINIZ_NO_ZLIB_APIS
-
-// ------------------- Types and macros
-
-typedef unsigned char mz_uint8;
-typedef signed short mz_int16;
-typedef unsigned short mz_uint16;
-typedef unsigned int mz_uint32;
-typedef unsigned int mz_uint;
-typedef long long mz_int64;
-typedef unsigned long long mz_uint64;
-typedef int mz_bool;
-
-#define MZ_FALSE (0)
-#define MZ_TRUE (1)
-
-// An attempt to work around MSVC's spammy "warning C4127: conditional
-// expression is constant" message.
-#ifdef _MSC_VER
-#define MZ_MACRO_END while (0, 0)
-#else
-#define MZ_MACRO_END while (0)
-#endif
-
-// ------------------- ZIP archive reading/writing
-
-#ifndef MINIZ_NO_ARCHIVE_APIS
-
-enum {
-  MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024,
-  MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
-  MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
-};
-
-typedef struct {
-  mz_uint32 m_file_index;
-  mz_uint32 m_central_dir_ofs;
-  mz_uint16 m_version_made_by;
-  mz_uint16 m_version_needed;
-  mz_uint16 m_bit_flag;
-  mz_uint16 m_method;
-#ifndef MINIZ_NO_TIME
-  time_t m_time;
-#endif
-  mz_uint32 m_crc32;
-  mz_uint64 m_comp_size;
-  mz_uint64 m_uncomp_size;
-  mz_uint16 m_internal_attr;
-  mz_uint32 m_external_attr;
-  mz_uint64 m_local_header_ofs;
-  mz_uint32 m_comment_size;
-  char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
-  char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
-} mz_zip_archive_file_stat;
-
-typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs,
-                                    void *pBuf, size_t n);
-typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs,
-                                     const void *pBuf, size_t n);
-
-struct mz_zip_internal_state_tag;
-typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
-
-typedef enum {
-  MZ_ZIP_MODE_INVALID = 0,
-  MZ_ZIP_MODE_READING = 1,
-  MZ_ZIP_MODE_WRITING = 2,
-  MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
-} mz_zip_mode;
-
-typedef struct mz_zip_archive_tag {
-  mz_uint64 m_archive_size;
-  mz_uint64 m_central_directory_file_ofs;
-  mz_uint m_total_files;
-  mz_zip_mode m_zip_mode;
-
-  mz_uint m_file_offset_alignment;
-
-  mz_alloc_func m_pAlloc;
-  mz_free_func m_pFree;
-  mz_realloc_func m_pRealloc;
-  void *m_pAlloc_opaque;
-
-  mz_file_read_func m_pRead;
-  mz_file_write_func m_pWrite;
-  void *m_pIO_opaque;
-
-  mz_zip_internal_state *m_pState;
-
-} mz_zip_archive;
-
-typedef enum {
-  MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100,
-  MZ_ZIP_FLAG_IGNORE_PATH = 0x0200,
-  MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400,
-  MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
-} mz_zip_flags;
-
-// ZIP archive reading
-
-// Inits a ZIP archive reader.
-// These functions read and validate the archive's central directory.
-mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size,
-                           mz_uint32 flags);
-mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem,
-                               size_t size, mz_uint32 flags);
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename,
-                                mz_uint32 flags);
-#endif
-
-// Returns the total number of files in the archive.
-mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
-
-// Returns detailed information about an archive file entry.
-mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index,
-                                mz_zip_archive_file_stat *pStat);
-
-// Determines if an archive file entry is a directory entry.
-mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip,
-                                          mz_uint file_index);
-mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip,
-                                        mz_uint file_index);
-
-// Retrieves the filename of an archive file entry.
-// Returns the number of bytes written to pFilename, or if filename_buf_size is
-// 0 this function returns the number of bytes needed to fully store the
-// filename.
-mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index,
-                                   char *pFilename, mz_uint filename_buf_size);
-
-// Attempts to locates a file in the archive's central directory.
-// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
-// Returns -1 if the file cannot be found.
-int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName,
-                              const char *pComment, mz_uint flags);
-
-// Extracts a archive file to a memory buffer using no memory allocation.
-mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip,
-                                              mz_uint file_index, void *pBuf,
-                                              size_t buf_size, mz_uint flags,
-                                              void *pUser_read_buf,
-                                              size_t user_read_buf_size);
-mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(
-    mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size,
-    mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
-
-// Extracts a archive file to a memory buffer.
-mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index,
-                                     void *pBuf, size_t buf_size,
-                                     mz_uint flags);
-mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip,
-                                          const char *pFilename, void *pBuf,
-                                          size_t buf_size, mz_uint flags);
-
-// Extracts a archive file to a dynamically allocated heap buffer.
-void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index,
-                                    size_t *pSize, mz_uint flags);
-void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip,
-                                         const char *pFilename, size_t *pSize,
-                                         mz_uint flags);
-
-// Extracts a archive file using a callback function to output the file's data.
-mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip,
-                                          mz_uint file_index,
-                                          mz_file_write_func pCallback,
-                                          void *pOpaque, mz_uint flags);
-mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip,
-                                               const char *pFilename,
-                                               mz_file_write_func pCallback,
-                                               void *pOpaque, mz_uint flags);
-
-#ifndef MINIZ_NO_STDIO
-// Extracts a archive file to a disk file and sets its last accessed and
-// modified times.
-// This function only extracts files, not archive directory records.
-mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index,
-                                      const char *pDst_filename, mz_uint flags);
-mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip,
-                                           const char *pArchive_filename,
-                                           const char *pDst_filename,
-                                           mz_uint flags);
-#endif
-
-// Ends archive reading, freeing all allocations, and closing the input archive
-// file if mz_zip_reader_init_file() was used.
-mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
-
-// ZIP archive writing
-
-#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-// Inits a ZIP archive writer.
-mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
-mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip,
-                                size_t size_to_reserve_at_beginning,
-                                size_t initial_allocation_size);
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename,
-                                mz_uint64 size_to_reserve_at_beginning);
-#endif
-
-// Converts a ZIP archive reader object into a writer object, to allow efficient
-// in-place file appends to occur on an existing archive.
-// For archives opened using mz_zip_reader_init_file, pFilename must be the
-// archive's filename so it can be reopened for writing. If the file can't be
-// reopened, mz_zip_reader_end() will be called.
-// For archives opened using mz_zip_reader_init_mem, the memory block must be
-// growable using the realloc callback (which defaults to realloc unless you've
-// overridden it).
-// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's
-// user provided m_pWrite function cannot be NULL.
-// Note: In-place archive modification is not recommended unless you know what
-// you're doing, because if execution stops or something goes wrong before
-// the archive is finalized the file's central directory will be hosed.
-mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip,
-                                       const char *pFilename);
-
-// Adds the contents of a memory buffer to an archive. These functions record
-// the current local time into the archive.
-// To add a directory entry, call this method with an archive name ending in a
-// forwardslash with empty buffer.
-// level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
-// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
-// just set to MZ_DEFAULT_COMPRESSION.
-mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name,
-                              const void *pBuf, size_t buf_size,
-                              mz_uint level_and_flags);
-mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip,
-                                 const char *pArchive_name, const void *pBuf,
-                                 size_t buf_size, const void *pComment,
-                                 mz_uint16 comment_size,
-                                 mz_uint level_and_flags, mz_uint64 uncomp_size,
-                                 mz_uint32 uncomp_crc32);
-
-#ifndef MINIZ_NO_STDIO
-// Adds the contents of a disk file to an archive. This function also records
-// the disk file's modified time into the archive.
-// level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
-// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
-// just set to MZ_DEFAULT_COMPRESSION.
-mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name,
-                               const char *pSrc_filename, const void *pComment,
-                               mz_uint16 comment_size, mz_uint level_and_flags);
-#endif
-
-// Adds a file to an archive by fully cloning the data from another archive.
-// This function fully clones the source file's compressed data (no
-// recompression), along with its full filename, extra data, and comment fields.
-mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip,
-                                          mz_zip_archive *pSource_zip,
-                                          mz_uint file_index);
-
-// Finalizes the archive by writing the central directory records followed by
-// the end of central directory record.
-// After an archive is finalized, the only valid call on the mz_zip_archive
-// struct is mz_zip_writer_end().
-// An archive must be manually finalized by calling this function for it to be
-// valid.
-mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
-mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf,
-                                            size_t *pSize);
-
-// Ends archive writing, freeing all allocations, and closing the output file if
-// mz_zip_writer_init_file() was used.
-// Note for the archive to be valid, it must have been finalized before ending.
-mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
-
-// Misc. high-level helper functions:
-
-// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically)
-// appends a memory blob to a ZIP archive.
-// level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
-// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
-// just set to MZ_DEFAULT_COMPRESSION.
-mz_bool mz_zip_add_mem_to_archive_file_in_place(
-    const char *pZip_filename, const char *pArchive_name, const void *pBuf,
-    size_t buf_size, const void *pComment, mz_uint16 comment_size,
-    mz_uint level_and_flags);
-
-// Reads a single file from an archive into a heap block.
-// Returns NULL on failure.
-void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename,
-                                          const char *pArchive_name,
-                                          size_t *pSize, mz_uint zip_flags);
-
-#endif  // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-#endif  // #ifndef MINIZ_NO_ARCHIVE_APIS
-
-// ------------------- Low-level Decompression API Definitions
-
-// Decompression flags used by tinfl_decompress().
-// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and
-// ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the
-// input is a raw deflate stream.
-// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available
-// beyond the end of the supplied input buffer. If clear, the input buffer
-// contains all remaining input.
-// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large
-// enough to hold the entire decompressed stream. If clear, the output buffer is
-// at least the size of the dictionary (typically 32KB).
-// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the
-// decompressed bytes.
-enum {
-  TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
-  TINFL_FLAG_HAS_MORE_INPUT = 2,
-  TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
-  TINFL_FLAG_COMPUTE_ADLER32 = 8
-};
-
-// High level decompression functions:
-// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block
-// allocated via malloc().
-// On entry:
-//  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data
-//  to decompress.
-// On return:
-//  Function returns a pointer to the decompressed data, or NULL on failure.
-//  *pOut_len will be set to the decompressed data's size, which could be larger
-//  than src_buf_len on uncompressible data.
-//  The caller must call mz_free() on the returned block when it's no longer
-//  needed.
-void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len,
-                                   size_t *pOut_len, int flags);
-
-// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block
-// in memory.
-// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes
-// written on success.
-#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
-size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len,
-                                   const void *pSrc_buf, size_t src_buf_len,
-                                   int flags);
-
-// tinfl_decompress_mem_to_callback() decompresses a block in memory to an
-// internal 32KB buffer, and a user provided callback function will be called to
-// flush the buffer.
-// Returns 1 on success or 0 on failure.
-typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);
-int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size,
-                                     tinfl_put_buf_func_ptr pPut_buf_func,
-                                     void *pPut_buf_user, int flags);
-
-struct tinfl_decompressor_tag;
-typedef struct tinfl_decompressor_tag tinfl_decompressor;
-
-// Max size of LZ dictionary.
-#define TINFL_LZ_DICT_SIZE 32768
-
-// Return status.
-typedef enum {
-  TINFL_STATUS_BAD_PARAM = -3,
-  TINFL_STATUS_ADLER32_MISMATCH = -2,
-  TINFL_STATUS_FAILED = -1,
-  TINFL_STATUS_DONE = 0,
-  TINFL_STATUS_NEEDS_MORE_INPUT = 1,
-  TINFL_STATUS_HAS_MORE_OUTPUT = 2
-} tinfl_status;
-
-// Initializes the decompressor to its initial state.
-#define tinfl_init(r) \
-  do {                \
-    (r)->m_state = 0; \
-  }                   \
-  MZ_MACRO_END
-#define tinfl_get_adler32(r) (r)->m_check_adler32
-
-// Main low-level decompressor coroutine function. This is the only function
-// actually needed for decompression. All the other functions are just
-// high-level helpers for improved usability.
-// This is a universal API, i.e. it can be used as a building block to build any
-// desired higher level decompression API. In the limit case, it can be called
-// once per every byte input or output.
-tinfl_status tinfl_decompress(tinfl_decompressor *r,
-                              const mz_uint8 *pIn_buf_next,
-                              size_t *pIn_buf_size, mz_uint8 *pOut_buf_start,
-                              mz_uint8 *pOut_buf_next, size_t *pOut_buf_size,
-                              const mz_uint32 decomp_flags);
-
-// Internal/private bits follow.
-enum {
-  TINFL_MAX_HUFF_TABLES = 3,
-  TINFL_MAX_HUFF_SYMBOLS_0 = 288,
-  TINFL_MAX_HUFF_SYMBOLS_1 = 32,
-  TINFL_MAX_HUFF_SYMBOLS_2 = 19,
-  TINFL_FAST_LOOKUP_BITS = 10,
-  TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
-};
-
-typedef struct {
-  mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
-  mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE],
-      m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
-} tinfl_huff_table;
-
-
-#ifndef MINIZ_HAS_64BIT_REGISTERS
-#	define MINIZ_HAS_64BIT_REGISTERS 0
-#endif
-
-#ifndef TINFL_USE_64BIT_BITBUF
-#	if MINIZ_HAS_64BIT_REGISTERS
-#		define TINFL_USE_64BIT_BITBUF 1
-#	else
-#		define TINFL_USE_64BIT_BITBUF 0
-#	endif
-#endif
-
-#if TINFL_USE_64BIT_BITBUF
-typedef mz_uint64 tinfl_bit_buf_t;
-#define TINFL_BITBUF_SIZE (64)
-#else
-typedef mz_uint32 tinfl_bit_buf_t;
-#define TINFL_BITBUF_SIZE (32)
-#endif
-
-struct tinfl_decompressor_tag {
-  mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type,
-      m_check_adler32, m_dist, m_counter, m_num_extra,
-      m_table_sizes[TINFL_MAX_HUFF_TABLES];
-  tinfl_bit_buf_t m_bit_buf;
-  size_t m_dist_from_out_buf_start;
-  tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
-  mz_uint8 m_raw_header[4],
-      m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
-};
-
-// ------------------- Low-level Compression API Definitions
-
-// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly
-// slower, and raw/dynamic blocks will be output more frequently).
-#define TDEFL_LESS_MEMORY 0
-
-// tdefl_init() compression flags logically OR'd together (low 12 bits contain
-// the max. number of probes per dictionary search):
-// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes
-// per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap
-// compression), 4095=Huffman+LZ (slowest/best compression).
-enum {
-  TDEFL_HUFFMAN_ONLY = 0,
-  TDEFL_DEFAULT_MAX_PROBES = 128,
-  TDEFL_MAX_PROBES_MASK = 0xFFF
-};
-
-// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before
-// the deflate data, and the Adler-32 of the source data at the end. Otherwise,
-// you'll get raw deflate data.
-// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even
-// when not writing zlib headers).
-// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more
-// efficient lazy parsing.
-// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's
-// initialization time to the minimum, but the output may vary from run to run
-// given the same input (depending on the contents of memory).
-// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
-// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
-// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
-// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
-// The low 12 bits are reserved to control the max # of hash probes per
-// dictionary lookup (see TDEFL_MAX_PROBES_MASK).
-enum {
-  TDEFL_WRITE_ZLIB_HEADER = 0x01000,
-  TDEFL_COMPUTE_ADLER32 = 0x02000,
-  TDEFL_GREEDY_PARSING_FLAG = 0x04000,
-  TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
-  TDEFL_RLE_MATCHES = 0x10000,
-  TDEFL_FILTER_MATCHES = 0x20000,
-  TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
-  TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
-};
-
-// High level compression functions:
-// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block
-// allocated via malloc().
-// On entry:
-//  pSrc_buf, src_buf_len: Pointer and size of source block to compress.
-//  flags: The max match finder probes (default is 128) logically OR'd against
-//  the above flags. Higher probes are slower but improve compression.
-// On return:
-//  Function returns a pointer to the compressed data, or NULL on failure.
-//  *pOut_len will be set to the compressed data's size, which could be larger
-//  than src_buf_len on uncompressible data.
-//  The caller must free() the returned block when it's no longer needed.
-void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len,
-                                 size_t *pOut_len, int flags);
-
-// tdefl_compress_mem_to_mem() compresses a block in memory to another block in
-// memory.
-// Returns 0 on failure.
-size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len,
-                                 const void *pSrc_buf, size_t src_buf_len,
-                                 int flags);
-
-// Compresses an image to a compressed PNG file in memory.
-// On entry:
-//  pImage, w, h, and num_chans describe the image to compress. num_chans may be
-//  1, 2, 3, or 4.
-//  The image pitch in bytes per scanline will be w*num_chans. The leftmost
-//  pixel on the top scanline is stored first in memory.
-//  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED,
-//  MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
-//  If flip is true, the image will be flipped on the Y axis (useful for OpenGL
-//  apps).
-// On return:
-//  Function returns a pointer to the compressed data, or NULL on failure.
-//  *pLen_out will be set to the size of the PNG image file.
-//  The caller must mz_free() the returned heap block (which will typically be
-//  larger than *pLen_out) when it's no longer needed.
-void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w,
-                                                 int h, int num_chans,
-                                                 size_t *pLen_out,
-                                                 mz_uint level, mz_bool flip);
-void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h,
-                                              int num_chans, size_t *pLen_out);
-
-// Output stream interface. The compressor uses this interface to write
-// compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
-typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len,
-                                          void *pUser);
-
-// tdefl_compress_mem_to_output() compresses a block to an output stream. The
-// above helpers use this function internally.
-mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len,
-                                     tdefl_put_buf_func_ptr pPut_buf_func,
-                                     void *pPut_buf_user, int flags);
-
-enum {
-  TDEFL_MAX_HUFF_TABLES = 3,
-  TDEFL_MAX_HUFF_SYMBOLS_0 = 288,
-  TDEFL_MAX_HUFF_SYMBOLS_1 = 32,
-  TDEFL_MAX_HUFF_SYMBOLS_2 = 19,
-  TDEFL_LZ_DICT_SIZE = 32768,
-  TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1,
-  TDEFL_MIN_MATCH_LEN = 3,
-  TDEFL_MAX_MATCH_LEN = 258
-};
-
-// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed
-// output block (using static/fixed Huffman codes).
-#if TDEFL_LESS_MEMORY
-enum {
-  TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024,
-  TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
-  TDEFL_MAX_HUFF_SYMBOLS = 288,
-  TDEFL_LZ_HASH_BITS = 12,
-  TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
-  TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
-  TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
-};
-#else
-enum {
-  TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024,
-  TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
-  TDEFL_MAX_HUFF_SYMBOLS = 288,
-  TDEFL_LZ_HASH_BITS = 15,
-  TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
-  TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
-  TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
-};
-#endif
-
-// The low-level tdefl functions below may be used directly if the above helper
-// functions aren't flexible enough. The low-level functions don't make any heap
-// allocations, unlike the above helper functions.
-typedef enum {
-  TDEFL_STATUS_BAD_PARAM = -2,
-  TDEFL_STATUS_PUT_BUF_FAILED = -1,
-  TDEFL_STATUS_OKAY = 0,
-  TDEFL_STATUS_DONE = 1
-} tdefl_status;
-
-// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
-typedef enum {
-  TDEFL_NO_FLUSH = 0,
-  TDEFL_SYNC_FLUSH = 2,
-  TDEFL_FULL_FLUSH = 3,
-  TDEFL_FINISH = 4
-} tdefl_flush;
-
-// tdefl's compression state structure.
-typedef struct {
-  tdefl_put_buf_func_ptr m_pPut_buf_func;
-  void *m_pPut_buf_user;
-  mz_uint m_flags, m_max_probes[2];
-  int m_greedy_parsing;
-  mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
-  mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
-  mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in,
-      m_bit_buffer;
-  mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit,
-      m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index,
-      m_wants_to_finish;
-  tdefl_status m_prev_return_status;
-  const void *m_pIn_buf;
-  void *m_pOut_buf;
-  size_t *m_pIn_buf_size, *m_pOut_buf_size;
-  tdefl_flush m_flush;
-  const mz_uint8 *m_pSrc;
-  size_t m_src_buf_left, m_out_buf_ofs;
-  mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
-  mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-  mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-  mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-  mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
-  mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
-  mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
-  mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
-} tdefl_compressor;
-
-// Initializes the compressor.
-// There is no corresponding deinit() function because the tdefl API's do not
-// dynamically allocate memory.
-// pBut_buf_func: If NULL, output data will be supplied to the specified
-// callback. In this case, the user should call the tdefl_compress_buffer() API
-// for compression.
-// If pBut_buf_func is NULL the user should always call the tdefl_compress()
-// API.
-// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER,
-// etc.)
-tdefl_status tdefl_init(tdefl_compressor *d,
-                        tdefl_put_buf_func_ptr pPut_buf_func,
-                        void *pPut_buf_user, int flags);
-
-// Compresses a block of data, consuming as much of the specified input buffer
-// as possible, and writing as much compressed data to the specified output
-// buffer as possible.
-tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf,
-                            size_t *pIn_buf_size, void *pOut_buf,
-                            size_t *pOut_buf_size, tdefl_flush flush);
-
-// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a
-// non-NULL tdefl_put_buf_func_ptr.
-// tdefl_compress_buffer() always consumes the entire input buffer.
-tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf,
-                                   size_t in_buf_size, tdefl_flush flush);
-
-tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
-mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
-
-// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't
-// defined, because it uses some of its macros.
-#ifndef MINIZ_NO_ZLIB_APIS
-// Create tdefl_compress() flags given zlib-style compression parameters.
-// level may range from [0,10] (where 10 is absolute max compression, but may be
-// much slower on some files)
-// window_bits may be -15 (raw deflate) or 15 (zlib)
-// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY,
-// MZ_RLE, or MZ_FIXED
-mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits,
-                                                int strategy);
-#endif  // #ifndef MINIZ_NO_ZLIB_APIS
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINIZ_HEADER_INCLUDED
-
-// ------------------- End of Header: Implementation follows. (If you only want
-// the header, define MINIZ_HEADER_FILE_ONLY.)
-
-#ifndef MINIZ_HEADER_FILE_ONLY
-
-typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1];
-typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1];
-typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1];
-
-#include <assert.h>
-#include <string.h>
-
-#define MZ_ASSERT(x) assert(x)
-
-#ifdef MINIZ_NO_MALLOC
-#define MZ_MALLOC(x) NULL
-#define MZ_FREE(x) (void)x, ((void)0)
-#define MZ_REALLOC(p, x) NULL
-#else
-#define MZ_MALLOC(x) malloc(x)
-#define MZ_FREE(x) free(x)
-#define MZ_REALLOC(p, x) realloc(p, x)
-#endif
-
-#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b))
-#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b))
-#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-#define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
-#define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
-#else
-#define MZ_READ_LE16(p)                      \
-  ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \
-   ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
-#define MZ_READ_LE32(p)                               \
-  ((mz_uint32)(((const mz_uint8 *)(p))[0]) |          \
-   ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) |  \
-   ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | \
-   ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
-#endif
-
-#ifdef _MSC_VER
-#define MZ_FORCEINLINE __forceinline
-#elif defined(__GNUC__)
-#define MZ_FORCEINLINE inline __attribute__((__always_inline__))
-#else
-#define MZ_FORCEINLINE inline
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// ------------------- zlib-style API's
-
-mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) {
-  mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16);
-  size_t block_len = buf_len % 5552;
-  if (!ptr) return MZ_ADLER32_INIT;
-  while (buf_len) {
-    for (i = 0; i + 7 < block_len; i += 8, ptr += 8) {
-      s1 += ptr[0], s2 += s1;
-      s1 += ptr[1], s2 += s1;
-      s1 += ptr[2], s2 += s1;
-      s1 += ptr[3], s2 += s1;
-      s1 += ptr[4], s2 += s1;
-      s1 += ptr[5], s2 += s1;
-      s1 += ptr[6], s2 += s1;
-      s1 += ptr[7], s2 += s1;
-    }
-    for (; i < block_len; ++i) s1 += *ptr++, s2 += s1;
-    s1 %= 65521U, s2 %= 65521U;
-    buf_len -= block_len;
-    block_len = 5552;
-  }
-  return (s2 << 16) + s1;
-}
-
-// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C
-// implementation that balances processor cache usage against speed":
-// http://www.geocities.com/malbrain/
-mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) {
-  static const mz_uint32 s_crc32[16] = {
-      0,          0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4,
-      0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
-      0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c};
-  mz_uint32 crcu32 = (mz_uint32)crc;
-  if (!ptr) return MZ_CRC32_INIT;
-  crcu32 = ~crcu32;
-  while (buf_len--) {
-    mz_uint8 b = *ptr++;
-    crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)];
-    crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)];
-  }
-  return ~crcu32;
-}
-
-void mz_free(void *p) { MZ_FREE(p); }
-
-#ifndef MINIZ_NO_ZLIB_APIS
-
-static void *def_alloc_func(void *opaque, size_t items, size_t size) {
-  (void)opaque, (void)items, (void)size;
-  return MZ_MALLOC(items * size);
-}
-static void def_free_func(void *opaque, void *address) {
-  (void)opaque, (void)address;
-  MZ_FREE(address);
-}
-static void *def_realloc_func(void *opaque, void *address, size_t items,
-                              size_t size) {
-  (void)opaque, (void)address, (void)items, (void)size;
-  return MZ_REALLOC(address, items * size);
-}
-
-const char *mz_version(void) { return MZ_VERSION; }
-
-int mz_deflateInit(mz_streamp pStream, int level) {
-  return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9,
-                         MZ_DEFAULT_STRATEGY);
-}
-
-int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits,
-                    int mem_level, int strategy) {
-  tdefl_compressor *pComp;
-  mz_uint comp_flags =
-      TDEFL_COMPUTE_ADLER32 |
-      tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy);
-
-  if (!pStream) return MZ_STREAM_ERROR;
-  if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) ||
-      ((window_bits != MZ_DEFAULT_WINDOW_BITS) &&
-       (-window_bits != MZ_DEFAULT_WINDOW_BITS)))
-    return MZ_PARAM_ERROR;
-
-  pStream->data_type = 0;
-  pStream->adler = MZ_ADLER32_INIT;
-  pStream->msg = NULL;
-  pStream->reserved = 0;
-  pStream->total_in = 0;
-  pStream->total_out = 0;
-  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
-  if (!pStream->zfree) pStream->zfree = def_free_func;
-
-  pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1,
-                                              sizeof(tdefl_compressor));
-  if (!pComp) return MZ_MEM_ERROR;
-
-  pStream->state = (struct mz_internal_state *)pComp;
-
-  if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) {
-    mz_deflateEnd(pStream);
-    return MZ_PARAM_ERROR;
-  }
-
-  return MZ_OK;
-}
-
-int mz_deflateReset(mz_streamp pStream) {
-  if ((!pStream) || (!pStream->state) || (!pStream->zalloc) ||
-      (!pStream->zfree))
-    return MZ_STREAM_ERROR;
-  pStream->total_in = pStream->total_out = 0;
-  tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL,
-             ((tdefl_compressor *)pStream->state)->m_flags);
-  return MZ_OK;
-}
-
-int mz_deflate(mz_streamp pStream, int flush) {
-  size_t in_bytes, out_bytes;
-  mz_ulong orig_total_in, orig_total_out;
-  int mz_status = MZ_OK;
-
-  if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) ||
-      (!pStream->next_out))
-    return MZ_STREAM_ERROR;
-  if (!pStream->avail_out) return MZ_BUF_ERROR;
-
-  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
-
-  if (((tdefl_compressor *)pStream->state)->m_prev_return_status ==
-      TDEFL_STATUS_DONE)
-    return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR;
-
-  orig_total_in = pStream->total_in;
-  orig_total_out = pStream->total_out;
-  for (;;) {
-    tdefl_status defl_status;
-    in_bytes = pStream->avail_in;
-    out_bytes = pStream->avail_out;
-
-    defl_status = tdefl_compress((tdefl_compressor *)pStream->state,
-                                 pStream->next_in, &in_bytes, pStream->next_out,
-                                 &out_bytes, (tdefl_flush)flush);
-    pStream->next_in += (mz_uint)in_bytes;
-    pStream->avail_in -= (mz_uint)in_bytes;
-    pStream->total_in += (mz_uint)in_bytes;
-    pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state);
-
-    pStream->next_out += (mz_uint)out_bytes;
-    pStream->avail_out -= (mz_uint)out_bytes;
-    pStream->total_out += (mz_uint)out_bytes;
-
-    if (defl_status < 0) {
-      mz_status = MZ_STREAM_ERROR;
-      break;
-    } else if (defl_status == TDEFL_STATUS_DONE) {
-      mz_status = MZ_STREAM_END;
-      break;
-    } else if (!pStream->avail_out)
-      break;
-    else if ((!pStream->avail_in) && (flush != MZ_FINISH)) {
-      if ((flush) || (pStream->total_in != orig_total_in) ||
-          (pStream->total_out != orig_total_out))
-        break;
-      return MZ_BUF_ERROR;  // Can't make forward progress without some input.
-    }
-  }
-  return mz_status;
-}
-
-int mz_deflateEnd(mz_streamp pStream) {
-  if (!pStream) return MZ_STREAM_ERROR;
-  if (pStream->state) {
-    pStream->zfree(pStream->opaque, pStream->state);
-    pStream->state = NULL;
-  }
-  return MZ_OK;
-}
-
-mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) {
-  (void)pStream;
-  // This is really over conservative. (And lame, but it's actually pretty
-  // tricky to compute a true upper bound given the way tdefl's blocking works.)
-  return MZ_MAX(128 + (source_len * 110) / 100,
-                128 + source_len + ((source_len / (31 * 1024)) + 1) * 5);
-}
-
-int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len,
-                 const unsigned char *pSource, mz_ulong source_len, int level) {
-  int status;
-  mz_stream stream;
-  memset(&stream, 0, sizeof(stream));
-
-  // In case mz_ulong is 64-bits (argh I hate longs).
-  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
-
-  stream.next_in = pSource;
-  stream.avail_in = (mz_uint32)source_len;
-  stream.next_out = pDest;
-  stream.avail_out = (mz_uint32)*pDest_len;
-
-  status = mz_deflateInit(&stream, level);
-  if (status != MZ_OK) return status;
-
-  status = mz_deflate(&stream, MZ_FINISH);
-  if (status != MZ_STREAM_END) {
-    mz_deflateEnd(&stream);
-    return (status == MZ_OK) ? MZ_BUF_ERROR : status;
-  }
-
-  *pDest_len = stream.total_out;
-  return mz_deflateEnd(&stream);
-}
-
-int mz_compress(unsigned char *pDest, mz_ulong *pDest_len,
-                const unsigned char *pSource, mz_ulong source_len) {
-  return mz_compress2(pDest, pDest_len, pSource, source_len,
-                      MZ_DEFAULT_COMPRESSION);
-}
-
-mz_ulong mz_compressBound(mz_ulong source_len) {
-  return mz_deflateBound(NULL, source_len);
-}
-
-typedef struct {
-  tinfl_decompressor m_decomp;
-  mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed;
-  int m_window_bits;
-  mz_uint8 m_dict[TINFL_LZ_DICT_SIZE];
-  tinfl_status m_last_status;
-} inflate_state;
-
-int mz_inflateInit2(mz_streamp pStream, int window_bits) {
-  inflate_state *pDecomp;
-  if (!pStream) return MZ_STREAM_ERROR;
-  if ((window_bits != MZ_DEFAULT_WINDOW_BITS) &&
-      (-window_bits != MZ_DEFAULT_WINDOW_BITS))
-    return MZ_PARAM_ERROR;
-
-  pStream->data_type = 0;
-  pStream->adler = 0;
-  pStream->msg = NULL;
-  pStream->total_in = 0;
-  pStream->total_out = 0;
-  pStream->reserved = 0;
-  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
-  if (!pStream->zfree) pStream->zfree = def_free_func;
-
-  pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1,
-                                             sizeof(inflate_state));
-  if (!pDecomp) return MZ_MEM_ERROR;
-
-  pStream->state = (struct mz_internal_state *)pDecomp;
-
-  tinfl_init(&pDecomp->m_decomp);
-  pDecomp->m_dict_ofs = 0;
-  pDecomp->m_dict_avail = 0;
-  pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
-  pDecomp->m_first_call = 1;
-  pDecomp->m_has_flushed = 0;
-  pDecomp->m_window_bits = window_bits;
-
-  return MZ_OK;
-}
-
-int mz_inflateInit(mz_streamp pStream) {
-  return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS);
-}
-
-int mz_inflate(mz_streamp pStream, int flush) {
-  inflate_state *pState;
-  mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32;
-  size_t in_bytes, out_bytes, orig_avail_in;
-  tinfl_status status;
-
-  if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR;
-  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
-  if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH))
-    return MZ_STREAM_ERROR;
-
-  pState = (inflate_state *)pStream->state;
-  if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
-  orig_avail_in = pStream->avail_in;
-
-  first_call = pState->m_first_call;
-  pState->m_first_call = 0;
-  if (pState->m_last_status < 0) return MZ_DATA_ERROR;
-
-  if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR;
-  pState->m_has_flushed |= (flush == MZ_FINISH);
-
-  if ((flush == MZ_FINISH) && (first_call)) {
-    // MZ_FINISH on the first call implies that the input and output buffers are
-    // large enough to hold the entire compressed/decompressed file.
-    decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
-    in_bytes = pStream->avail_in;
-    out_bytes = pStream->avail_out;
-    status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes,
-                              pStream->next_out, pStream->next_out, &out_bytes,
-                              decomp_flags);
-    pState->m_last_status = status;
-    pStream->next_in += (mz_uint)in_bytes;
-    pStream->avail_in -= (mz_uint)in_bytes;
-    pStream->total_in += (mz_uint)in_bytes;
-    pStream->adler = tinfl_get_adler32(&pState->m_decomp);
-    pStream->next_out += (mz_uint)out_bytes;
-    pStream->avail_out -= (mz_uint)out_bytes;
-    pStream->total_out += (mz_uint)out_bytes;
-
-    if (status < 0)
-      return MZ_DATA_ERROR;
-    else if (status != TINFL_STATUS_DONE) {
-      pState->m_last_status = TINFL_STATUS_FAILED;
-      return MZ_BUF_ERROR;
-    }
-    return MZ_STREAM_END;
-  }
-  // flush != MZ_FINISH then we must assume there's more input.
-  if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;
-
-  if (pState->m_dict_avail) {
-    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
-    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
-    pStream->next_out += n;
-    pStream->avail_out -= n;
-    pStream->total_out += n;
-    pState->m_dict_avail -= n;
-    pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
-    return ((pState->m_last_status == TINFL_STATUS_DONE) &&
-            (!pState->m_dict_avail))
-               ? MZ_STREAM_END
-               : MZ_OK;
-  }
-
-  for (;;) {
-    in_bytes = pStream->avail_in;
-    out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;
-
-    status = tinfl_decompress(
-        &pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict,
-        pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags);
-    pState->m_last_status = status;
-
-    pStream->next_in += (mz_uint)in_bytes;
-    pStream->avail_in -= (mz_uint)in_bytes;
-    pStream->total_in += (mz_uint)in_bytes;
-    pStream->adler = tinfl_get_adler32(&pState->m_decomp);
-
-    pState->m_dict_avail = (mz_uint)out_bytes;
-
-    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
-    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
-    pStream->next_out += n;
-    pStream->avail_out -= n;
-    pStream->total_out += n;
-    pState->m_dict_avail -= n;
-    pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
-
-    if (status < 0)
-      return MZ_DATA_ERROR;  // Stream is corrupted (there could be some
-    // uncompressed data left in the output dictionary -
-    // oh well).
-    else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in))
-      return MZ_BUF_ERROR;  // Signal caller that we can't make forward progress
-                            // without supplying more input or by setting flush
-                            // to MZ_FINISH.
-    else if (flush == MZ_FINISH) {
-      // The output buffer MUST be large to hold the remaining uncompressed data
-      // when flush==MZ_FINISH.
-      if (status == TINFL_STATUS_DONE)
-        return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
-      // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's
-      // at least 1 more byte on the way. If there's no more room left in the
-      // output buffer then something is wrong.
-      else if (!pStream->avail_out)
-        return MZ_BUF_ERROR;
-    } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) ||
-               (!pStream->avail_out) || (pState->m_dict_avail))
-      break;
-  }
-
-  return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail))
-             ? MZ_STREAM_END
-             : MZ_OK;
-}
-
-int mz_inflateEnd(mz_streamp pStream) {
-  if (!pStream) return MZ_STREAM_ERROR;
-  if (pStream->state) {
-    pStream->zfree(pStream->opaque, pStream->state);
-    pStream->state = NULL;
-  }
-  return MZ_OK;
-}
-
-int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len,
-                  const unsigned char *pSource, mz_ulong source_len) {
-  mz_stream stream;
-  int status;
-  memset(&stream, 0, sizeof(stream));
-
-  // In case mz_ulong is 64-bits (argh I hate longs).
-  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
-
-  stream.next_in = pSource;
-  stream.avail_in = (mz_uint32)source_len;
-  stream.next_out = pDest;
-  stream.avail_out = (mz_uint32)*pDest_len;
-
-  status = mz_inflateInit(&stream);
-  if (status != MZ_OK) return status;
-
-  status = mz_inflate(&stream, MZ_FINISH);
-  if (status != MZ_STREAM_END) {
-    mz_inflateEnd(&stream);
-    return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR
-                                                            : status;
-  }
-  *pDest_len = stream.total_out;
-
-  return mz_inflateEnd(&stream);
-}
-
-const char *mz_error(int err) {
-  static struct {
-    int m_err;
-    const char *m_pDesc;
-  } s_error_descs[] = {{MZ_OK, ""},
-                       {MZ_STREAM_END, "stream end"},
-                       {MZ_NEED_DICT, "need dictionary"},
-                       {MZ_ERRNO, "file error"},
-                       {MZ_STREAM_ERROR, "stream error"},
-                       {MZ_DATA_ERROR, "data error"},
-                       {MZ_MEM_ERROR, "out of memory"},
-                       {MZ_BUF_ERROR, "buf error"},
-                       {MZ_VERSION_ERROR, "version error"},
-                       {MZ_PARAM_ERROR, "parameter error"}};
-  mz_uint i;
-  for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i)
-    if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc;
-  return NULL;
-}
-
-#endif  // MINIZ_NO_ZLIB_APIS
-
-// ------------------- Low-level Decompression (completely independent from all
-// compression API's)
-
-#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
-#define TINFL_MEMSET(p, c, l) memset(p, c, l)
-
-#define TINFL_CR_BEGIN  \
-  switch (r->m_state) { \
-    case 0:
-#define TINFL_CR_RETURN(state_index, result) \
-  do {                                       \
-    status = result;                         \
-    r->m_state = state_index;                \
-    goto common_exit;                        \
-    case state_index:;                       \
-  }                                          \
-  MZ_MACRO_END
-#define TINFL_CR_RETURN_FOREVER(state_index, result) \
-  do {                                               \
-    for (;;) {                                       \
-      TINFL_CR_RETURN(state_index, result);          \
-    }                                                \
-  }                                                  \
-  MZ_MACRO_END
-#define TINFL_CR_FINISH }
-
-// TODO: If the caller has indicated that there's no more input, and we attempt
-// to read beyond the input buf, then something is wrong with the input because
-// the inflator never
-// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of
-// the stream with 0's in this scenario.
-#define TINFL_GET_BYTE(state_index, c)                                 \
-  do {                                                                 \
-    if (pIn_buf_cur >= pIn_buf_end) {                                  \
-      for (;;) {                                                       \
-        if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) {                \
-          TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \
-          if (pIn_buf_cur < pIn_buf_end) {                             \
-            c = *pIn_buf_cur++;                                        \
-            break;                                                     \
-          }                                                            \
-        } else {                                                       \
-          c = 0;                                                       \
-          break;                                                       \
-        }                                                              \
-      }                                                                \
-    } else                                                             \
-      c = *pIn_buf_cur++;                                              \
-  }                                                                    \
-  MZ_MACRO_END
-
-#define TINFL_NEED_BITS(state_index, n)            \
-  do {                                             \
-    mz_uint c;                                     \
-    TINFL_GET_BYTE(state_index, c);                \
-    bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \
-    num_bits += 8;                                 \
-  } while (num_bits < (mz_uint)(n))
-#define TINFL_SKIP_BITS(state_index, n) \
-  do {                                  \
-    if (num_bits < (mz_uint)(n)) {      \
-      TINFL_NEED_BITS(state_index, n);  \
-    }                                   \
-    bit_buf >>= (n);                    \
-    num_bits -= (n);                    \
-  }                                     \
-  MZ_MACRO_END
-#define TINFL_GET_BITS(state_index, b, n) \
-  do {                                    \
-    if (num_bits < (mz_uint)(n)) {        \
-      TINFL_NEED_BITS(state_index, n);    \
-    }                                     \
-    b = bit_buf & ((1 << (n)) - 1);       \
-    bit_buf >>= (n);                      \
-    num_bits -= (n);                      \
-  }                                       \
-  MZ_MACRO_END
-
-// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes
-// remaining in the input buffer falls below 2.
-// It reads just enough bytes from the input stream that are needed to decode
-// the next Huffman code (and absolutely no more). It works by trying to fully
-// decode a
-// Huffman code by using whatever bits are currently present in the bit buffer.
-// If this fails, it reads another byte, and tries again until it succeeds or
-// until the
-// bit buffer contains >=15 bits (deflate's max. Huffman code size).
-#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff)                     \
-  do {                                                                 \
-    temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \
-    if (temp >= 0) {                                                   \
-      code_len = temp >> 9;                                            \
-      if ((code_len) && (num_bits >= code_len)) break;                 \
-    } else if (num_bits > TINFL_FAST_LOOKUP_BITS) {                    \
-      code_len = TINFL_FAST_LOOKUP_BITS;                               \
-      do {                                                             \
-        temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \
-      } while ((temp < 0) && (num_bits >= (code_len + 1)));            \
-      if (temp >= 0) break;                                            \
-    }                                                                  \
-    TINFL_GET_BYTE(state_index, c);                                    \
-    bit_buf |= (((tinfl_bit_buf_t)c) << num_bits);                     \
-    num_bits += 8;                                                     \
-  } while (num_bits < 15);
-
-// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex
-// than you would initially expect because the zlib API expects the decompressor
-// to never read
-// beyond the final byte of the deflate stream. (In other words, when this macro
-// wants to read another byte from the input, it REALLY needs another byte in
-// order to fully
-// decode the next Huffman code.) Handling this properly is particularly
-// important on raw deflate (non-zlib) streams, which aren't followed by a byte
-// aligned adler-32.
-// The slow path is only executed at the very end of the input buffer.
-#define TINFL_HUFF_DECODE(state_index, sym, pHuff)                             \
-  do {                                                                         \
-    int temp;                                                                  \
-    mz_uint code_len, c;                                                       \
-    if (num_bits < 15) {                                                       \
-      if ((pIn_buf_end - pIn_buf_cur) < 2) {                                   \
-        TINFL_HUFF_BITBUF_FILL(state_index, pHuff);                            \
-      } else {                                                                 \
-        bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) |           \
-                   (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8));      \
-        pIn_buf_cur += 2;                                                      \
-        num_bits += 16;                                                        \
-      }                                                                        \
-    }                                                                          \
-    if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= \
-        0)                                                                     \
-      code_len = temp >> 9, temp &= 511;                                       \
-    else {                                                                     \
-      code_len = TINFL_FAST_LOOKUP_BITS;                                       \
-      do {                                                                     \
-        temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)];         \
-      } while (temp < 0);                                                      \
-    }                                                                          \
-    sym = temp;                                                                \
-    bit_buf >>= code_len;                                                      \
-    num_bits -= code_len;                                                      \
-  }                                                                            \
-  MZ_MACRO_END
-
-tinfl_status tinfl_decompress(tinfl_decompressor *r,
-                              const mz_uint8 *pIn_buf_next,
-                              size_t *pIn_buf_size, mz_uint8 *pOut_buf_start,
-                              mz_uint8 *pOut_buf_next, size_t *pOut_buf_size,
-                              const mz_uint32 decomp_flags) {
-  static const int s_length_base[31] = {
-      3,  4,  5,  6,  7,  8,  9,  10,  11,  13,  15,  17,  19,  23, 27, 31,
-      35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0,  0};
-  static const int s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
-                                         1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4,
-                                         4, 4, 5, 5, 5, 5, 0, 0, 0};
-  static const int s_dist_base[32] = {
-      1,    2,    3,    4,    5,    7,     9,     13,    17,  25,   33,
-      49,   65,   97,   129,  193,  257,   385,   513,   769, 1025, 1537,
-      2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0,   0};
-  static const int s_dist_extra[32] = {0, 0, 0,  0,  1,  1,  2,  2,  3,  3,
-                                       4, 4, 5,  5,  6,  6,  7,  7,  8,  8,
-                                       9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
-  static const mz_uint8 s_length_dezigzag[19] = {
-      16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-  static const int s_min_table_sizes[3] = {257, 1, 4};
-
-  tinfl_status status = TINFL_STATUS_FAILED;
-  mz_uint32 num_bits, dist, counter, num_extra;
-  tinfl_bit_buf_t bit_buf;
-  const mz_uint8 *pIn_buf_cur = pIn_buf_next,
-                 *const pIn_buf_end = pIn_buf_next + *pIn_buf_size;
-  mz_uint8 *pOut_buf_cur = pOut_buf_next,
-           *const pOut_buf_end = pOut_buf_next + *pOut_buf_size;
-  size_t out_buf_size_mask =
-             (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)
-                 ? (size_t)-1
-                 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1,
-         dist_from_out_buf_start;
-
-  // Ensure the output buffer's size is a power of 2, unless the output buffer
-  // is large enough to hold the entire output file (in which case it doesn't
-  // matter).
-  if (((out_buf_size_mask + 1) & out_buf_size_mask) ||
-      (pOut_buf_next < pOut_buf_start)) {
-    *pIn_buf_size = *pOut_buf_size = 0;
-    return TINFL_STATUS_BAD_PARAM;
-  }
-
-  num_bits = r->m_num_bits;
-  bit_buf = r->m_bit_buf;
-  dist = r->m_dist;
-  counter = r->m_counter;
-  num_extra = r->m_num_extra;
-  dist_from_out_buf_start = r->m_dist_from_out_buf_start;
-  TINFL_CR_BEGIN
-
-  bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0;
-  r->m_z_adler32 = r->m_check_adler32 = 1;
-  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) {
-    TINFL_GET_BYTE(1, r->m_zhdr0);
-    TINFL_GET_BYTE(2, r->m_zhdr1);
-    counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) ||
-               (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
-    if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
-      counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) ||
-                  ((out_buf_size_mask + 1) <
-                   (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4)))));
-    if (counter) {
-      TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED);
-    }
-  }
-
-  do {
-    TINFL_GET_BITS(3, r->m_final, 3);
-    r->m_type = r->m_final >> 1;
-    if (r->m_type == 0) {
-      TINFL_SKIP_BITS(5, num_bits & 7);
-      for (counter = 0; counter < 4; ++counter) {
-        if (num_bits)
-          TINFL_GET_BITS(6, r->m_raw_header[counter], 8);
-        else
-          TINFL_GET_BYTE(7, r->m_raw_header[counter]);
-      }
-      if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) !=
-          (mz_uint)(0xFFFF ^
-                    (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) {
-        TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED);
-      }
-      while ((counter) && (num_bits)) {
-        TINFL_GET_BITS(51, dist, 8);
-        while (pOut_buf_cur >= pOut_buf_end) {
-          TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT);
-        }
-        *pOut_buf_cur++ = (mz_uint8)dist;
-        counter--;
-      }
-      while (counter) {
-        size_t n;
-        while (pOut_buf_cur >= pOut_buf_end) {
-          TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT);
-        }
-        while (pIn_buf_cur >= pIn_buf_end) {
-          if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) {
-            TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT);
-          } else {
-            TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED);
-          }
-        }
-        n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur),
-                          (size_t)(pIn_buf_end - pIn_buf_cur)),
-                   counter);
-        TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n);
-        pIn_buf_cur += n;
-        pOut_buf_cur += n;
-        counter -= (mz_uint)n;
-      }
-    } else if (r->m_type == 3) {
-      TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED);
-    } else {
-      if (r->m_type == 1) {
-        mz_uint8 *p = r->m_tables[0].m_code_size;
-        mz_uint i;
-        r->m_table_sizes[0] = 288;
-        r->m_table_sizes[1] = 32;
-        TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32);
-        for (i = 0; i <= 143; ++i) *p++ = 8;
-        for (; i <= 255; ++i) *p++ = 9;
-        for (; i <= 279; ++i) *p++ = 7;
-        for (; i <= 287; ++i) *p++ = 8;
-      } else {
-        for (counter = 0; counter < 3; counter++) {
-          TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]);
-          r->m_table_sizes[counter] += s_min_table_sizes[counter];
-        }
-        MZ_CLEAR_OBJ(r->m_tables[2].m_code_size);
-        for (counter = 0; counter < r->m_table_sizes[2]; counter++) {
-          mz_uint s;
-          TINFL_GET_BITS(14, s, 3);
-          r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s;
-        }
-        r->m_table_sizes[2] = 19;
-      }
-      for (; (int)r->m_type >= 0; r->m_type--) {
-        int tree_next, tree_cur;
-        tinfl_huff_table *pTable;
-        mz_uint i, j, used_syms, total, sym_index, next_code[17],
-            total_syms[16];
-        pTable = &r->m_tables[r->m_type];
-        MZ_CLEAR_OBJ(total_syms);
-        MZ_CLEAR_OBJ(pTable->m_look_up);
-        MZ_CLEAR_OBJ(pTable->m_tree);
-        for (i = 0; i < r->m_table_sizes[r->m_type]; ++i)
-          total_syms[pTable->m_code_size[i]]++;
-        used_syms = 0, total = 0;
-        next_code[0] = next_code[1] = 0;
-        for (i = 1; i <= 15; ++i) {
-          used_syms += total_syms[i];
-          next_code[i + 1] = (total = ((total + total_syms[i]) << 1));
-        }
-        if ((65536 != total) && (used_syms > 1)) {
-          TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED);
-        }
-        for (tree_next = -1, sym_index = 0;
-             sym_index < r->m_table_sizes[r->m_type]; ++sym_index) {
-          mz_uint rev_code = 0, l, cur_code,
-                  code_size = pTable->m_code_size[sym_index];
-          if (!code_size) continue;
-          cur_code = next_code[code_size]++;
-          for (l = code_size; l > 0; l--, cur_code >>= 1)
-            rev_code = (rev_code << 1) | (cur_code & 1);
-          if (code_size <= TINFL_FAST_LOOKUP_BITS) {
-            mz_int16 k = (mz_int16)((code_size << 9) | sym_index);
-            while (rev_code < TINFL_FAST_LOOKUP_SIZE) {
-              pTable->m_look_up[rev_code] = k;
-              rev_code += (1 << code_size);
-            }
-            continue;
-          }
-          if (0 ==
-              (tree_cur = pTable->m_look_up[rev_code &
-                                            (TINFL_FAST_LOOKUP_SIZE - 1)])) {
-            pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] =
-                (mz_int16)tree_next;
-            tree_cur = tree_next;
-            tree_next -= 2;
-          }
-          rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1);
-          for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) {
-            tree_cur -= ((rev_code >>= 1) & 1);
-            if (!pTable->m_tree[-tree_cur - 1]) {
-              pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next;
-              tree_cur = tree_next;
-              tree_next -= 2;
-            } else
-              tree_cur = pTable->m_tree[-tree_cur - 1];
-          }
-          tree_cur -= ((rev_code >>= 1) & 1);
-          pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index;
-        }
-        if (r->m_type == 2) {
-          for (counter = 0;
-               counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) {
-            mz_uint s;
-            TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]);
-            if (dist < 16) {
-              r->m_len_codes[counter++] = (mz_uint8)dist;
-              continue;
-            }
-            if ((dist == 16) && (!counter)) {
-              TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED);
-            }
-            num_extra = "\02\03\07"[dist - 16];
-            TINFL_GET_BITS(18, s, num_extra);
-            s += "\03\03\013"[dist - 16];
-            TINFL_MEMSET(r->m_len_codes + counter,
-                         (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
-            counter += s;
-          }
-          if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) {
-            TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
-          }
-          TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes,
-                       r->m_table_sizes[0]);
-          TINFL_MEMCPY(r->m_tables[1].m_code_size,
-                       r->m_len_codes + r->m_table_sizes[0],
-                       r->m_table_sizes[1]);
-        }
-      }
-      for (;;) {
-        mz_uint8 *pSrc;
-        for (;;) {
-          if (((pIn_buf_end - pIn_buf_cur) < 4) ||
-              ((pOut_buf_end - pOut_buf_cur) < 2)) {
-            TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]);
-            if (counter >= 256) break;
-            while (pOut_buf_cur >= pOut_buf_end) {
-              TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT);
-            }
-            *pOut_buf_cur++ = (mz_uint8)counter;
-          } else {
-            int sym2;
-            mz_uint code_len;
-#if TINFL_USE_64BIT_BITBUF
-            if (num_bits < 30) {
-              bit_buf |=
-                  (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits);
-              pIn_buf_cur += 4;
-              num_bits += 32;
-            }
-#else
-            if (num_bits < 15) {
-              bit_buf |=
-                  (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits);
-              pIn_buf_cur += 2;
-              num_bits += 16;
-            }
-#endif
-            if ((sym2 =
-                     r->m_tables[0]
-                         .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >=
-                0)
-              code_len = sym2 >> 9;
-            else {
-              code_len = TINFL_FAST_LOOKUP_BITS;
-              do {
-                sym2 = r->m_tables[0]
-                           .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)];
-              } while (sym2 < 0);
-            }
-            counter = sym2;
-            bit_buf >>= code_len;
-            num_bits -= code_len;
-            if (counter & 256) break;
-
-#if !TINFL_USE_64BIT_BITBUF
-            if (num_bits < 15) {
-              bit_buf |=
-                  (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits);
-              pIn_buf_cur += 2;
-              num_bits += 16;
-            }
-#endif
-            if ((sym2 =
-                     r->m_tables[0]
-                         .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >=
-                0)
-              code_len = sym2 >> 9;
-            else {
-              code_len = TINFL_FAST_LOOKUP_BITS;
-              do {
-                sym2 = r->m_tables[0]
-                           .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)];
-              } while (sym2 < 0);
-            }
-            bit_buf >>= code_len;
-            num_bits -= code_len;
-
-            pOut_buf_cur[0] = (mz_uint8)counter;
-            if (sym2 & 256) {
-              pOut_buf_cur++;
-              counter = sym2;
-              break;
-            }
-            pOut_buf_cur[1] = (mz_uint8)sym2;
-            pOut_buf_cur += 2;
-          }
-        }
-        if ((counter &= 511) == 256) break;
-
-        num_extra = s_length_extra[counter - 257];
-        counter = s_length_base[counter - 257];
-        if (num_extra) {
-          mz_uint extra_bits;
-          TINFL_GET_BITS(25, extra_bits, num_extra);
-          counter += extra_bits;
-        }
-
-        TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]);
-        num_extra = s_dist_extra[dist];
-        dist = s_dist_base[dist];
-        if (num_extra) {
-          mz_uint extra_bits;
-          TINFL_GET_BITS(27, extra_bits, num_extra);
-          dist += extra_bits;
-        }
-
-        dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
-        if ((dist > dist_from_out_buf_start) &&
-            (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) {
-          TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED);
-        }
-
-        pSrc = pOut_buf_start +
-               ((dist_from_out_buf_start - dist) & out_buf_size_mask);
-
-        if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) {
-          while (counter--) {
-            while (pOut_buf_cur >= pOut_buf_end) {
-              TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT);
-            }
-            *pOut_buf_cur++ =
-                pOut_buf_start[(dist_from_out_buf_start++ - dist) &
-                               out_buf_size_mask];
-          }
-          continue;
-        }
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
-        else if ((counter >= 9) && (counter <= dist)) {
-          const mz_uint8 *pSrc_end = pSrc + (counter & ~7);
-          do {
-            ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0];
-            ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1];
-            pOut_buf_cur += 8;
-          } while ((pSrc += 8) < pSrc_end);
-          if ((counter &= 7) < 3) {
-            if (counter) {
-              pOut_buf_cur[0] = pSrc[0];
-              if (counter > 1) pOut_buf_cur[1] = pSrc[1];
-              pOut_buf_cur += counter;
-            }
-            continue;
-          }
-        }
-#endif
-        do {
-          pOut_buf_cur[0] = pSrc[0];
-          pOut_buf_cur[1] = pSrc[1];
-          pOut_buf_cur[2] = pSrc[2];
-          pOut_buf_cur += 3;
-          pSrc += 3;
-        } while ((int)(counter -= 3) > 2);
-        if ((int)counter > 0) {
-          pOut_buf_cur[0] = pSrc[0];
-          if ((int)counter > 1) pOut_buf_cur[1] = pSrc[1];
-          pOut_buf_cur += counter;
-        }
-      }
-    }
-  } while (!(r->m_final & 1));
-  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) {
-    TINFL_SKIP_BITS(32, num_bits & 7);
-    for (counter = 0; counter < 4; ++counter) {
-      mz_uint s;
-      if (num_bits)
-        TINFL_GET_BITS(41, s, 8);
-      else
-        TINFL_GET_BYTE(42, s);
-      r->m_z_adler32 = (r->m_z_adler32 << 8) | s;
-    }
-  }
-  TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE);
-  TINFL_CR_FINISH
-
-common_exit:
-  r->m_num_bits = num_bits;
-  r->m_bit_buf = bit_buf;
-  r->m_dist = dist;
-  r->m_counter = counter;
-  r->m_num_extra = num_extra;
-  r->m_dist_from_out_buf_start = dist_from_out_buf_start;
-  *pIn_buf_size = pIn_buf_cur - pIn_buf_next;
-  *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
-  if ((decomp_flags &
-       (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) &&
-      (status >= 0)) {
-    const mz_uint8 *ptr = pOut_buf_next;
-    size_t buf_len = *pOut_buf_size;
-    mz_uint32 i, s1 = r->m_check_adler32 & 0xffff,
-                 s2 = r->m_check_adler32 >> 16;
-    size_t block_len = buf_len % 5552;
-    while (buf_len) {
-      for (i = 0; i + 7 < block_len; i += 8, ptr += 8) {
-        s1 += ptr[0], s2 += s1;
-        s1 += ptr[1], s2 += s1;
-        s1 += ptr[2], s2 += s1;
-        s1 += ptr[3], s2 += s1;
-        s1 += ptr[4], s2 += s1;
-        s1 += ptr[5], s2 += s1;
-        s1 += ptr[6], s2 += s1;
-        s1 += ptr[7], s2 += s1;
-      }
-      for (; i < block_len; ++i) s1 += *ptr++, s2 += s1;
-      s1 %= 65521U, s2 %= 65521U;
-      buf_len -= block_len;
-      block_len = 5552;
-    }
-    r->m_check_adler32 = (s2 << 16) + s1;
-    if ((status == TINFL_STATUS_DONE) &&
-        (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) &&
-        (r->m_check_adler32 != r->m_z_adler32))
-      status = TINFL_STATUS_ADLER32_MISMATCH;
-  }
-  return status;
-}
-
-// Higher level helper functions.
-void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len,
-                                   size_t *pOut_len, int flags) {
-  tinfl_decompressor decomp;
-  void *pBuf = NULL, *pNew_buf;
-  size_t src_buf_ofs = 0, out_buf_capacity = 0;
-  *pOut_len = 0;
-  tinfl_init(&decomp);
-  for (;;) {
-    size_t src_buf_size = src_buf_len - src_buf_ofs,
-           dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
-    tinfl_status status = tinfl_decompress(
-        &decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size,
-        (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL,
-        &dst_buf_size, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) |
-                           TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
-    if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) {
-      MZ_FREE(pBuf);
-      *pOut_len = 0;
-      return NULL;
-    }
-    src_buf_ofs += src_buf_size;
-    *pOut_len += dst_buf_size;
-    if (status == TINFL_STATUS_DONE) break;
-    new_out_buf_capacity = out_buf_capacity * 2;
-    if (new_out_buf_capacity < 128) new_out_buf_capacity = 128;
-    pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity);
-    if (!pNew_buf) {
-      MZ_FREE(pBuf);
-      *pOut_len = 0;
-      return NULL;
-    }
-    pBuf = pNew_buf;
-    out_buf_capacity = new_out_buf_capacity;
-  }
-  return pBuf;
-}
-
-size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len,
-                                   const void *pSrc_buf, size_t src_buf_len,
-                                   int flags) {
-  tinfl_decompressor decomp;
-  tinfl_status status;
-  tinfl_init(&decomp);
-  status =
-      tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len,
-                       (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len,
-                       (flags & ~TINFL_FLAG_HAS_MORE_INPUT) |
-                           TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
-  return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED
-                                       : out_buf_len;
-}
-
-int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size,
-                                     tinfl_put_buf_func_ptr pPut_buf_func,
-                                     void *pPut_buf_user, int flags) {
-  int result = 0;
-  tinfl_decompressor decomp;
-  mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE);
-  size_t in_buf_ofs = 0, dict_ofs = 0;
-  if (!pDict) return TINFL_STATUS_FAILED;
-  tinfl_init(&decomp);
-  for (;;) {
-    size_t in_buf_size = *pIn_buf_size - in_buf_ofs,
-           dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
-    tinfl_status status =
-        tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs,
-                         &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size,
-                         (flags &
-                          ~(TINFL_FLAG_HAS_MORE_INPUT |
-                            TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)));
-    in_buf_ofs += in_buf_size;
-    if ((dst_buf_size) &&
-        (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user)))
-      break;
-    if (status != TINFL_STATUS_HAS_MORE_OUTPUT) {
-      result = (status == TINFL_STATUS_DONE);
-      break;
-    }
-    dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1);
-  }
-  MZ_FREE(pDict);
-  *pIn_buf_size = in_buf_ofs;
-  return result;
-}
-
-// ------------------- Low-level Compression (independent from all decompression
-// API's)
-
-// Purposely making these tables static for faster init and thread safety.
-static const mz_uint16 s_tdefl_len_sym[256] = {
-    257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268,
-    268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272,
-    272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274,
-    274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276,
-    276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
-    277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
-    278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279,
-    279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280,
-    280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281,
-    281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
-    281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282,
-    282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
-    282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283,
-    283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
-    283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284,
-    284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
-    284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
-    285};
-
-static const mz_uint8 s_tdefl_len_extra[256] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0};
-
-static const mz_uint8 s_tdefl_small_dist_sym[512] = {
-    0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,
-    8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17};
-
-static const mz_uint8 s_tdefl_small_dist_extra[512] = {
-    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
-
-static const mz_uint8 s_tdefl_large_dist_sym[128] = {
-    0,  0,  18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24,
-    24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26,
-    26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-    27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-    28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-    28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-    29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29};
-
-static const mz_uint8 s_tdefl_large_dist_extra[128] = {
-    0,  0,  8,  8,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13};
-
-// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted
-// values.
-typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq;
-static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms,
-                                             tdefl_sym_freq *pSyms0,
-                                             tdefl_sym_freq *pSyms1) {
-  mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2];
-  tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
-  MZ_CLEAR_OBJ(hist);
-  for (i = 0; i < num_syms; i++) {
-    mz_uint freq = pSyms0[i].m_key;
-    hist[freq & 0xFF]++;
-    hist[256 + ((freq >> 8) & 0xFF)]++;
-  }
-  while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
-    total_passes--;
-  for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) {
-    const mz_uint32 *pHist = &hist[pass << 8];
-    mz_uint offsets[256], cur_ofs = 0;
-    for (i = 0; i < 256; i++) {
-      offsets[i] = cur_ofs;
-      cur_ofs += pHist[i];
-    }
-    for (i = 0; i < num_syms; i++)
-      pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] =
-          pCur_syms[i];
-    {
-      tdefl_sym_freq *t = pCur_syms;
-      pCur_syms = pNew_syms;
-      pNew_syms = t;
-    }
-  }
-  return pCur_syms;
-}
-
-// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat,
-// alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
-static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) {
-  int root, leaf, next, avbl, used, dpth;
-  if (n == 0)
-    return;
-  else if (n == 1) {
-    A[0].m_key = 1;
-    return;
-  }
-  A[0].m_key += A[1].m_key;
-  root = 0;
-  leaf = 2;
-  for (next = 1; next < n - 1; next++) {
-    if (leaf >= n || A[root].m_key < A[leaf].m_key) {
-      A[next].m_key = A[root].m_key;
-      A[root++].m_key = (mz_uint16)next;
-    } else
-      A[next].m_key = A[leaf++].m_key;
-    if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) {
-      A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key);
-      A[root++].m_key = (mz_uint16)next;
-    } else
-      A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key);
-  }
-  A[n - 2].m_key = 0;
-  for (next = n - 3; next >= 0; next--)
-    A[next].m_key = A[A[next].m_key].m_key + 1;
-  avbl = 1;
-  used = dpth = 0;
-  root = n - 2;
-  next = n - 1;
-  while (avbl > 0) {
-    while (root >= 0 && (int)A[root].m_key == dpth) {
-      used++;
-      root--;
-    }
-    while (avbl > used) {
-      A[next--].m_key = (mz_uint16)(dpth);
-      avbl--;
-    }
-    avbl = 2 * used;
-    dpth++;
-    used = 0;
-  }
-}
-
-// Limits canonical Huffman code table's max code size.
-enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 };
-static void tdefl_huffman_enforce_max_code_size(int *pNum_codes,
-                                                int code_list_len,
-                                                int max_code_size) {
-  int i;
-  mz_uint32 total = 0;
-  if (code_list_len <= 1) return;
-  for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++)
-    pNum_codes[max_code_size] += pNum_codes[i];
-  for (i = max_code_size; i > 0; i--)
-    total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i));
-  while (total != (1UL << max_code_size)) {
-    pNum_codes[max_code_size]--;
-    for (i = max_code_size - 1; i > 0; i--)
-      if (pNum_codes[i]) {
-        pNum_codes[i]--;
-        pNum_codes[i + 1] += 2;
-        break;
-      }
-    total--;
-  }
-}
-
-static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num,
-                                         int table_len, int code_size_limit,
-                                         int static_table) {
-  int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE];
-  mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1];
-  MZ_CLEAR_OBJ(num_codes);
-  if (static_table) {
-    for (i = 0; i < table_len; i++)
-      num_codes[d->m_huff_code_sizes[table_num][i]]++;
-  } else {
-    tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS],
-        *pSyms;
-    int num_used_syms = 0;
-    const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0];
-    for (i = 0; i < table_len; i++)
-      if (pSym_count[i]) {
-        syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i];
-        syms0[num_used_syms++].m_sym_index = (mz_uint16)i;
-      }
-
-    pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1);
-    tdefl_calculate_minimum_redundancy(pSyms, num_used_syms);
-
-    for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
-
-    tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms,
-                                        code_size_limit);
-
-    MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]);
-    MZ_CLEAR_OBJ(d->m_huff_codes[table_num]);
-    for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
-      for (l = num_codes[i]; l > 0; l--)
-        d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i);
-  }
-
-  next_code[1] = 0;
-  for (j = 0, i = 2; i <= code_size_limit; i++)
-    next_code[i] = j = ((j + num_codes[i - 1]) << 1);
-
-  for (i = 0; i < table_len; i++) {
-    mz_uint rev_code = 0, code, code_size;
-    if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue;
-    code = next_code[code_size]++;
-    for (l = code_size; l > 0; l--, code >>= 1)
-      rev_code = (rev_code << 1) | (code & 1);
-    d->m_huff_codes[table_num][i] = (mz_uint16)rev_code;
-  }
-}
-
-#define TDEFL_PUT_BITS(b, l)                               \
-  do {                                                     \
-    mz_uint bits = b;                                      \
-    mz_uint len = l;                                       \
-    MZ_ASSERT(bits <= ((1U << len) - 1U));                 \
-    d->m_bit_buffer |= (bits << d->m_bits_in);             \
-    d->m_bits_in += len;                                   \
-    while (d->m_bits_in >= 8) {                            \
-      if (d->m_pOutput_buf < d->m_pOutput_buf_end)         \
-        *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \
-      d->m_bit_buffer >>= 8;                               \
-      d->m_bits_in -= 8;                                   \
-    }                                                      \
-  }                                                        \
-  MZ_MACRO_END
-
-#define TDEFL_RLE_PREV_CODE_SIZE()                                        \
-  {                                                                       \
-    if (rle_repeat_count) {                                               \
-      if (rle_repeat_count < 3) {                                         \
-        d->m_huff_count[2][prev_code_size] = (mz_uint16)(                 \
-            d->m_huff_count[2][prev_code_size] + rle_repeat_count);       \
-        while (rle_repeat_count--)                                        \
-          packed_code_sizes[num_packed_code_sizes++] = prev_code_size;    \
-      } else {                                                            \
-        d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \
-        packed_code_sizes[num_packed_code_sizes++] = 16;                  \
-        packed_code_sizes[num_packed_code_sizes++] =                      \
-            (mz_uint8)(rle_repeat_count - 3);                             \
-      }                                                                   \
-      rle_repeat_count = 0;                                               \
-    }                                                                     \
-  }
-
-#define TDEFL_RLE_ZERO_CODE_SIZE()                                            \
-  {                                                                           \
-    if (rle_z_count) {                                                        \
-      if (rle_z_count < 3) {                                                  \
-        d->m_huff_count[2][0] =                                               \
-            (mz_uint16)(d->m_huff_count[2][0] + rle_z_count);                 \
-        while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \
-      } else if (rle_z_count <= 10) {                                         \
-        d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1);     \
-        packed_code_sizes[num_packed_code_sizes++] = 17;                      \
-        packed_code_sizes[num_packed_code_sizes++] =                          \
-            (mz_uint8)(rle_z_count - 3);                                      \
-      } else {                                                                \
-        d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1);     \
-        packed_code_sizes[num_packed_code_sizes++] = 18;                      \
-        packed_code_sizes[num_packed_code_sizes++] =                          \
-            (mz_uint8)(rle_z_count - 11);                                     \
-      }                                                                       \
-      rle_z_count = 0;                                                        \
-    }                                                                         \
-  }
-
-static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = {
-    16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
-static void tdefl_start_dynamic_block(tdefl_compressor *d) {
-  int num_lit_codes, num_dist_codes, num_bit_lengths;
-  mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count,
-      rle_repeat_count, packed_code_sizes_index;
-  mz_uint8
-      code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1],
-      packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1],
-      prev_code_size = 0xFF;
-
-  d->m_huff_count[0][256] = 1;
-
-  tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE);
-  tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE);
-
-  for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--)
-    if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break;
-  for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--)
-    if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break;
-
-  memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
-  memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0],
-         num_dist_codes);
-  total_code_sizes_to_pack = num_lit_codes + num_dist_codes;
-  num_packed_code_sizes = 0;
-  rle_z_count = 0;
-  rle_repeat_count = 0;
-
-  memset(&d->m_huff_count[2][0], 0,
-         sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2);
-  for (i = 0; i < total_code_sizes_to_pack; i++) {
-    mz_uint8 code_size = code_sizes_to_pack[i];
-    if (!code_size) {
-      TDEFL_RLE_PREV_CODE_SIZE();
-      if (++rle_z_count == 138) {
-        TDEFL_RLE_ZERO_CODE_SIZE();
-      }
-    } else {
-      TDEFL_RLE_ZERO_CODE_SIZE();
-      if (code_size != prev_code_size) {
-        TDEFL_RLE_PREV_CODE_SIZE();
-        d->m_huff_count[2][code_size] =
-            (mz_uint16)(d->m_huff_count[2][code_size] + 1);
-        packed_code_sizes[num_packed_code_sizes++] = code_size;
-      } else if (++rle_repeat_count == 6) {
-        TDEFL_RLE_PREV_CODE_SIZE();
-      }
-    }
-    prev_code_size = code_size;
-  }
-  if (rle_repeat_count) {
-    TDEFL_RLE_PREV_CODE_SIZE();
-  } else {
-    TDEFL_RLE_ZERO_CODE_SIZE();
-  }
-
-  tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE);
-
-  TDEFL_PUT_BITS(2, 2);
-
-  TDEFL_PUT_BITS(num_lit_codes - 257, 5);
-  TDEFL_PUT_BITS(num_dist_codes - 1, 5);
-
-  for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--)
-    if (d->m_huff_code_sizes
-            [2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]])
-      break;
-  num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1));
-  TDEFL_PUT_BITS(num_bit_lengths - 4, 4);
-  for (i = 0; (int)i < num_bit_lengths; i++)
-    TDEFL_PUT_BITS(
-        d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3);
-
-  for (packed_code_sizes_index = 0;
-       packed_code_sizes_index < num_packed_code_sizes;) {
-    mz_uint code = packed_code_sizes[packed_code_sizes_index++];
-    MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2);
-    TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
-    if (code >= 16)
-      TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++],
-                     "\02\03\07"[code - 16]);
-  }
-}
-
-static void tdefl_start_static_block(tdefl_compressor *d) {
-  mz_uint i;
-  mz_uint8 *p = &d->m_huff_code_sizes[0][0];
-
-  for (i = 0; i <= 143; ++i) *p++ = 8;
-  for (; i <= 255; ++i) *p++ = 9;
-  for (; i <= 279; ++i) *p++ = 7;
-  for (; i <= 287; ++i) *p++ = 8;
-
-  memset(d->m_huff_code_sizes[1], 5, 32);
-
-  tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE);
-  tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE);
-
-  TDEFL_PUT_BITS(1, 2);
-}
-
-static const mz_uint mz_bitmasks[17] = {
-    0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
-    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF};
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && \
-    MINIZ_HAS_64BIT_REGISTERS
-static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) {
-  mz_uint flags;
-  mz_uint8 *pLZ_codes;
-  mz_uint8 *pOutput_buf = d->m_pOutput_buf;
-  mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf;
-  mz_uint64 bit_buffer = d->m_bit_buffer;
-  mz_uint bits_in = d->m_bits_in;
-
-#define TDEFL_PUT_BITS_FAST(b, l)                \
-  {                                              \
-    bit_buffer |= (((mz_uint64)(b)) << bits_in); \
-    bits_in += (l);                              \
-  }
-
-  flags = 1;
-  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end;
-       flags >>= 1) {
-    if (flags == 1) flags = *pLZ_codes++ | 0x100;
-
-    if (flags & 1) {
-      mz_uint s0, s1, n0, n1, sym, num_extra_bits;
-      mz_uint match_len = pLZ_codes[0],
-              match_dist = *(const mz_uint16 *)(pLZ_codes + 1);
-      pLZ_codes += 3;
-
-      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]],
-                          d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-      TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]],
-                          s_tdefl_len_extra[match_len]);
-
-      // This sequence coaxes MSVC into using cmov's vs. jmp's.
-      s0 = s_tdefl_small_dist_sym[match_dist & 511];
-      n0 = s_tdefl_small_dist_extra[match_dist & 511];
-      s1 = s_tdefl_large_dist_sym[match_dist >> 8];
-      n1 = s_tdefl_large_dist_extra[match_dist >> 8];
-      sym = (match_dist < 512) ? s0 : s1;
-      num_extra_bits = (match_dist < 512) ? n0 : n1;
-
-      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
-      TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym],
-                          d->m_huff_code_sizes[1][sym]);
-      TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits],
-                          num_extra_bits);
-    } else {
-      mz_uint lit = *pLZ_codes++;
-      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit],
-                          d->m_huff_code_sizes[0][lit]);
-
-      if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) {
-        flags >>= 1;
-        lit = *pLZ_codes++;
-        MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-        TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit],
-                            d->m_huff_code_sizes[0][lit]);
-
-        if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) {
-          flags >>= 1;
-          lit = *pLZ_codes++;
-          MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-          TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit],
-                              d->m_huff_code_sizes[0][lit]);
-        }
-      }
-    }
-
-    if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE;
-
-    *(mz_uint64 *)pOutput_buf = bit_buffer;
-    pOutput_buf += (bits_in >> 3);
-    bit_buffer >>= (bits_in & ~7);
-    bits_in &= 7;
-  }
-
-#undef TDEFL_PUT_BITS_FAST
-
-  d->m_pOutput_buf = pOutput_buf;
-  d->m_bits_in = 0;
-  d->m_bit_buffer = 0;
-
-  while (bits_in) {
-    mz_uint32 n = MZ_MIN(bits_in, 16);
-    TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n);
-    bit_buffer >>= n;
-    bits_in -= n;
-  }
-
-  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
-
-  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
-}
-#else
-static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) {
-  mz_uint flags;
-  mz_uint8 *pLZ_codes;
-
-  flags = 1;
-  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf;
-       flags >>= 1) {
-    if (flags == 1) flags = *pLZ_codes++ | 0x100;
-    if (flags & 1) {
-      mz_uint sym, num_extra_bits;
-      mz_uint match_len = pLZ_codes[0],
-              match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8));
-      pLZ_codes += 3;
-
-      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-      TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]],
-                     d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-      TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]],
-                     s_tdefl_len_extra[match_len]);
-
-      if (match_dist < 512) {
-        sym = s_tdefl_small_dist_sym[match_dist];
-        num_extra_bits = s_tdefl_small_dist_extra[match_dist];
-      } else {
-        sym = s_tdefl_large_dist_sym[match_dist >> 8];
-        num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
-      }
-      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
-      TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
-      TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
-    } else {
-      mz_uint lit = *pLZ_codes++;
-      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-      TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
-    }
-  }
-
-  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
-
-  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
-}
-#endif  // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN &&
-        // MINIZ_HAS_64BIT_REGISTERS
-
-static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) {
-  if (static_block)
-    tdefl_start_static_block(d);
-  else
-    tdefl_start_dynamic_block(d);
-  return tdefl_compress_lz_codes(d);
-}
-
-static int tdefl_flush_block(tdefl_compressor *d, int flush) {
-  mz_uint saved_bit_buf, saved_bits_in;
-  mz_uint8 *pSaved_output_buf;
-  mz_bool comp_block_succeeded = MZ_FALSE;
-  int n, use_raw_block =
-             ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) &&
-             (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
-  mz_uint8 *pOutput_buf_start =
-      ((d->m_pPut_buf_func == NULL) &&
-       ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE))
-          ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs)
-          : d->m_output_buf;
-
-  d->m_pOutput_buf = pOutput_buf_start;
-  d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;
-
-  MZ_ASSERT(!d->m_output_flush_remaining);
-  d->m_output_flush_ofs = 0;
-  d->m_output_flush_remaining = 0;
-
-  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left);
-  d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);
-
-  if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) {
-    TDEFL_PUT_BITS(0x78, 8);
-    TDEFL_PUT_BITS(0x01, 8);
-  }
-
-  TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1);
-
-  pSaved_output_buf = d->m_pOutput_buf;
-  saved_bit_buf = d->m_bit_buffer;
-  saved_bits_in = d->m_bits_in;
-
-  if (!use_raw_block)
-    comp_block_succeeded =
-        tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) ||
-                                    (d->m_total_lz_bytes < 48));
-
-  // If the block gets expanded, forget the current contents of the output
-  // buffer and send a raw block instead.
-  if (((use_raw_block) ||
-       ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >=
-                                  d->m_total_lz_bytes))) &&
-      ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) {
-    mz_uint i;
-    d->m_pOutput_buf = pSaved_output_buf;
-    d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
-    TDEFL_PUT_BITS(0, 2);
-    if (d->m_bits_in) {
-      TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-    }
-    for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) {
-      TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
-    }
-    for (i = 0; i < d->m_total_lz_bytes; ++i) {
-      TDEFL_PUT_BITS(
-          d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK],
-          8);
-    }
-  }
-  // Check for the extremely unlikely (if not impossible) case of the compressed
-  // block not fitting into the output buffer when using dynamic codes.
-  else if (!comp_block_succeeded) {
-    d->m_pOutput_buf = pSaved_output_buf;
-    d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
-    tdefl_compress_block(d, MZ_TRUE);
-  }
-
-  if (flush) {
-    if (flush == TDEFL_FINISH) {
-      if (d->m_bits_in) {
-        TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-      }
-      if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) {
-        mz_uint i, a = d->m_adler32;
-        for (i = 0; i < 4; i++) {
-          TDEFL_PUT_BITS((a >> 24) & 0xFF, 8);
-          a <<= 8;
-        }
-      }
-    } else {
-      mz_uint i, z = 0;
-      TDEFL_PUT_BITS(0, 3);
-      if (d->m_bits_in) {
-        TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-      }
-      for (i = 2; i; --i, z ^= 0xFFFF) {
-        TDEFL_PUT_BITS(z & 0xFFFF, 16);
-      }
-    }
-  }
-
-  MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);
-
-  memset(&d->m_huff_count[0][0], 0,
-         sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
-  memset(&d->m_huff_count[1][0], 0,
-         sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
-
-  d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
-  d->m_pLZ_flags = d->m_lz_code_buf;
-  d->m_num_flags_left = 8;
-  d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes;
-  d->m_total_lz_bytes = 0;
-  d->m_block_index++;
-
-  if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) {
-    if (d->m_pPut_buf_func) {
-      *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
-      if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
-        return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED);
-    } else if (pOutput_buf_start == d->m_output_buf) {
-      int bytes_to_copy = (int)MZ_MIN(
-          (size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs));
-      memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf,
-             bytes_to_copy);
-      d->m_out_buf_ofs += bytes_to_copy;
-      if ((n -= bytes_to_copy) != 0) {
-        d->m_output_flush_ofs = bytes_to_copy;
-        d->m_output_flush_remaining = n;
-      }
-    } else {
-      d->m_out_buf_ofs += n;
-    }
-  }
-
-  return d->m_output_flush_remaining;
-}
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
-#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p)
-static MZ_FORCEINLINE void tdefl_find_match(
-    tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist,
-    mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) {
-  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK,
-                match_len = *pMatch_len, probe_pos = pos, next_probe_pos,
-                probe_len;
-  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
-  const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q;
-  mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]),
-            s01 = TDEFL_READ_UNALIGNED_WORD(s);
-  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
-  if (max_match_len <= match_len) return;
-  for (;;) {
-    for (;;) {
-      if (--num_probes_left == 0) return;
-#define TDEFL_PROBE                                                            \
-  next_probe_pos = d->m_next[probe_pos];                                       \
-  if ((!next_probe_pos) ||                                                     \
-      ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist))       \
-    return;                                                                    \
-  probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK;                        \
-  if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \
-    break;
-      TDEFL_PROBE;
-      TDEFL_PROBE;
-      TDEFL_PROBE;
-    }
-    if (!dist) break;
-    q = (const mz_uint16 *)(d->m_dict + probe_pos);
-    if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue;
-    p = s;
-    probe_len = 32;
-    do {
-    } while (
-        (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
-        (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
-        (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
-        (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
-        (--probe_len > 0));
-    if (!probe_len) {
-      *pMatch_dist = dist;
-      *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN);
-      break;
-    } else if ((probe_len = ((mz_uint)(p - s) * 2) +
-                            (mz_uint)(*(const mz_uint8 *)p ==
-                                      *(const mz_uint8 *)q)) > match_len) {
-      *pMatch_dist = dist;
-      if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) ==
-          max_match_len)
-        break;
-      c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]);
-    }
-  }
-}
-#else
-static MZ_FORCEINLINE void tdefl_find_match(
-    tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist,
-    mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) {
-  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK,
-                match_len = *pMatch_len, probe_pos = pos, next_probe_pos,
-                probe_len;
-  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
-  const mz_uint8 *s = d->m_dict + pos, *p, *q;
-  mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
-  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
-  if (max_match_len <= match_len) return;
-  for (;;) {
-    for (;;) {
-      if (--num_probes_left == 0) return;
-#define TDEFL_PROBE                                                      \
-  next_probe_pos = d->m_next[probe_pos];                                 \
-  if ((!next_probe_pos) ||                                               \
-      ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \
-    return;                                                              \
-  probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK;                  \
-  if ((d->m_dict[probe_pos + match_len] == c0) &&                        \
-      (d->m_dict[probe_pos + match_len - 1] == c1))                      \
-    break;
-      TDEFL_PROBE;
-      TDEFL_PROBE;
-      TDEFL_PROBE;
-    }
-    if (!dist) break;
-    p = s;
-    q = d->m_dict + probe_pos;
-    for (probe_len = 0; probe_len < max_match_len; probe_len++)
-      if (*p++ != *q++) break;
-    if (probe_len > match_len) {
-      *pMatch_dist = dist;
-      if ((*pMatch_len = match_len = probe_len) == max_match_len) return;
-      c0 = d->m_dict[pos + match_len];
-      c1 = d->m_dict[pos + match_len - 1];
-    }
-  }
-}
-#endif  // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-static mz_bool tdefl_compress_fast(tdefl_compressor *d) {
-  // Faster, minimally featured LZRW1-style match+parse loop with better
-  // register utilization. Intended for applications where raw throughput is
-  // valued more highly than ratio.
-  mz_uint lookahead_pos = d->m_lookahead_pos,
-          lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size,
-          total_lz_bytes = d->m_total_lz_bytes,
-          num_flags_left = d->m_num_flags_left;
-  mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
-  mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
-
-  while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) {
-    const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
-    mz_uint dst_pos =
-        (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
-    mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(
-        d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size);
-    d->m_src_buf_left -= num_bytes_to_process;
-    lookahead_size += num_bytes_to_process;
-
-    while (num_bytes_to_process) {
-      mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process);
-      memcpy(d->m_dict + dst_pos, d->m_pSrc, n);
-      if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-        memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc,
-               MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos));
-      d->m_pSrc += n;
-      dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK;
-      num_bytes_to_process -= n;
-    }
-
-    dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size);
-    if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE))
-      break;
-
-    while (lookahead_size >= 4) {
-      mz_uint cur_match_dist, cur_match_len = 1;
-      mz_uint8 *pCur_dict = d->m_dict + cur_pos;
-      mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF;
-      mz_uint hash =
-          (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) &
-          TDEFL_LEVEL1_HASH_SIZE_MASK;
-      mz_uint probe_pos = d->m_hash[hash];
-      d->m_hash[hash] = (mz_uint16)lookahead_pos;
-
-      if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <=
-           dict_size) &&
-          ((*(const mz_uint32 *)(d->m_dict +
-                                 (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) &
-            0xFFFFFF) == first_trigram)) {
-        const mz_uint16 *p = (const mz_uint16 *)pCur_dict;
-        const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos);
-        mz_uint32 probe_len = 32;
-        do {
-        } while ((TDEFL_READ_UNALIGNED_WORD(++p) ==
-                  TDEFL_READ_UNALIGNED_WORD(++q)) &&
-                 (TDEFL_READ_UNALIGNED_WORD(++p) ==
-                  TDEFL_READ_UNALIGNED_WORD(++q)) &&
-                 (TDEFL_READ_UNALIGNED_WORD(++p) ==
-                  TDEFL_READ_UNALIGNED_WORD(++q)) &&
-                 (TDEFL_READ_UNALIGNED_WORD(++p) ==
-                  TDEFL_READ_UNALIGNED_WORD(++q)) &&
-                 (--probe_len > 0));
-        cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) +
-                        (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q);
-        if (!probe_len)
-          cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;
-
-        if ((cur_match_len < TDEFL_MIN_MATCH_LEN) ||
-            ((cur_match_len == TDEFL_MIN_MATCH_LEN) &&
-             (cur_match_dist >= 8U * 1024U))) {
-          cur_match_len = 1;
-          *pLZ_code_buf++ = (mz_uint8)first_trigram;
-          *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
-          d->m_huff_count[0][(mz_uint8)first_trigram]++;
-        } else {
-          mz_uint32 s0, s1;
-          cur_match_len = MZ_MIN(cur_match_len, lookahead_size);
-
-          MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) &&
-                    (cur_match_dist >= 1) &&
-                    (cur_match_dist <= TDEFL_LZ_DICT_SIZE));
-
-          cur_match_dist--;
-
-          pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN);
-          *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist;
-          pLZ_code_buf += 3;
-          *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80);
-
-          s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
-          s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
-          d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++;
-
-          d->m_huff_count[0][s_tdefl_len_sym[cur_match_len -
-                                             TDEFL_MIN_MATCH_LEN]]++;
-        }
-      } else {
-        *pLZ_code_buf++ = (mz_uint8)first_trigram;
-        *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
-        d->m_huff_count[0][(mz_uint8)first_trigram]++;
-      }
-
-      if (--num_flags_left == 0) {
-        num_flags_left = 8;
-        pLZ_flags = pLZ_code_buf++;
-      }
-
-      total_lz_bytes += cur_match_len;
-      lookahead_pos += cur_match_len;
-      dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE);
-      cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
-      MZ_ASSERT(lookahead_size >= cur_match_len);
-      lookahead_size -= cur_match_len;
-
-      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) {
-        int n;
-        d->m_lookahead_pos = lookahead_pos;
-        d->m_lookahead_size = lookahead_size;
-        d->m_dict_size = dict_size;
-        d->m_total_lz_bytes = total_lz_bytes;
-        d->m_pLZ_code_buf = pLZ_code_buf;
-        d->m_pLZ_flags = pLZ_flags;
-        d->m_num_flags_left = num_flags_left;
-        if ((n = tdefl_flush_block(d, 0)) != 0)
-          return (n < 0) ? MZ_FALSE : MZ_TRUE;
-        total_lz_bytes = d->m_total_lz_bytes;
-        pLZ_code_buf = d->m_pLZ_code_buf;
-        pLZ_flags = d->m_pLZ_flags;
-        num_flags_left = d->m_num_flags_left;
-      }
-    }
-
-    while (lookahead_size) {
-      mz_uint8 lit = d->m_dict[cur_pos];
-
-      total_lz_bytes++;
-      *pLZ_code_buf++ = lit;
-      *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
-      if (--num_flags_left == 0) {
-        num_flags_left = 8;
-        pLZ_flags = pLZ_code_buf++;
-      }
-
-      d->m_huff_count[0][lit]++;
-
-      lookahead_pos++;
-      dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE);
-      cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
-      lookahead_size--;
-
-      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) {
-        int n;
-        d->m_lookahead_pos = lookahead_pos;
-        d->m_lookahead_size = lookahead_size;
-        d->m_dict_size = dict_size;
-        d->m_total_lz_bytes = total_lz_bytes;
-        d->m_pLZ_code_buf = pLZ_code_buf;
-        d->m_pLZ_flags = pLZ_flags;
-        d->m_num_flags_left = num_flags_left;
-        if ((n = tdefl_flush_block(d, 0)) != 0)
-          return (n < 0) ? MZ_FALSE : MZ_TRUE;
-        total_lz_bytes = d->m_total_lz_bytes;
-        pLZ_code_buf = d->m_pLZ_code_buf;
-        pLZ_flags = d->m_pLZ_flags;
-        num_flags_left = d->m_num_flags_left;
-      }
-    }
-  }
-
-  d->m_lookahead_pos = lookahead_pos;
-  d->m_lookahead_size = lookahead_size;
-  d->m_dict_size = dict_size;
-  d->m_total_lz_bytes = total_lz_bytes;
-  d->m_pLZ_code_buf = pLZ_code_buf;
-  d->m_pLZ_flags = pLZ_flags;
-  d->m_num_flags_left = num_flags_left;
-  return MZ_TRUE;
-}
-#endif  // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-
-static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d,
-                                                mz_uint8 lit) {
-  d->m_total_lz_bytes++;
-  *d->m_pLZ_code_buf++ = lit;
-  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1);
-  if (--d->m_num_flags_left == 0) {
-    d->m_num_flags_left = 8;
-    d->m_pLZ_flags = d->m_pLZ_code_buf++;
-  }
-  d->m_huff_count[0][lit]++;
-}
-
-static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d,
-                                              mz_uint match_len,
-                                              mz_uint match_dist) {
-  mz_uint32 s0, s1;
-
-  MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) &&
-            (match_dist <= TDEFL_LZ_DICT_SIZE));
-
-  d->m_total_lz_bytes += match_len;
-
-  d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN);
-
-  match_dist -= 1;
-  d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF);
-  d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8);
-  d->m_pLZ_code_buf += 3;
-
-  *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80);
-  if (--d->m_num_flags_left == 0) {
-    d->m_num_flags_left = 8;
-    d->m_pLZ_flags = d->m_pLZ_code_buf++;
-  }
-
-  s0 = s_tdefl_small_dist_sym[match_dist & 511];
-  s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127];
-  d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
-
-  if (match_len >= TDEFL_MIN_MATCH_LEN)
-    d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
-}
-
-static mz_bool tdefl_compress_normal(tdefl_compressor *d) {
-  const mz_uint8 *pSrc = d->m_pSrc;
-  size_t src_buf_left = d->m_src_buf_left;
-  tdefl_flush flush = d->m_flush;
-
-  while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) {
-    mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
-    // Update dictionary and hash chains. Keeps the lookahead size equal to
-    // TDEFL_MAX_MATCH_LEN.
-    if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) {
-      mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) &
-                        TDEFL_LZ_DICT_SIZE_MASK,
-              ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
-      mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK]
-                      << TDEFL_LZ_HASH_SHIFT) ^
-                     d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK];
-      mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(
-          src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size);
-      const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process;
-      src_buf_left -= num_bytes_to_process;
-      d->m_lookahead_size += num_bytes_to_process;
-      while (pSrc != pSrc_end) {
-        mz_uint8 c = *pSrc++;
-        d->m_dict[dst_pos] = c;
-        if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-          d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
-        hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
-        d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
-        d->m_hash[hash] = (mz_uint16)(ins_pos);
-        dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
-        ins_pos++;
-      }
-    } else {
-      while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) {
-        mz_uint8 c = *pSrc++;
-        mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) &
-                          TDEFL_LZ_DICT_SIZE_MASK;
-        src_buf_left--;
-        d->m_dict[dst_pos] = c;
-        if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-          d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
-        if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) {
-          mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
-          mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK]
-                           << (TDEFL_LZ_HASH_SHIFT * 2)) ^
-                          (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]
-                           << TDEFL_LZ_HASH_SHIFT) ^
-                          c) &
-                         (TDEFL_LZ_HASH_SIZE - 1);
-          d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
-          d->m_hash[hash] = (mz_uint16)(ins_pos);
-        }
-      }
-    }
-    d->m_dict_size =
-        MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size);
-    if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break;
-
-    // Simple lazy/greedy parsing state machine.
-    len_to_move = 1;
-    cur_match_dist = 0;
-    cur_match_len =
-        d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1);
-    cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
-    if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) {
-      if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) {
-        mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK];
-        cur_match_len = 0;
-        while (cur_match_len < d->m_lookahead_size) {
-          if (d->m_dict[cur_pos + cur_match_len] != c) break;
-          cur_match_len++;
-        }
-        if (cur_match_len < TDEFL_MIN_MATCH_LEN)
-          cur_match_len = 0;
-        else
-          cur_match_dist = 1;
-      }
-    } else {
-      tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size,
-                       d->m_lookahead_size, &cur_match_dist, &cur_match_len);
-    }
-    if (((cur_match_len == TDEFL_MIN_MATCH_LEN) &&
-         (cur_match_dist >= 8U * 1024U)) ||
-        (cur_pos == cur_match_dist) ||
-        ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) {
-      cur_match_dist = cur_match_len = 0;
-    }
-    if (d->m_saved_match_len) {
-      if (cur_match_len > d->m_saved_match_len) {
-        tdefl_record_literal(d, (mz_uint8)d->m_saved_lit);
-        if (cur_match_len >= 128) {
-          tdefl_record_match(d, cur_match_len, cur_match_dist);
-          d->m_saved_match_len = 0;
-          len_to_move = cur_match_len;
-        } else {
-          d->m_saved_lit = d->m_dict[cur_pos];
-          d->m_saved_match_dist = cur_match_dist;
-          d->m_saved_match_len = cur_match_len;
-        }
-      } else {
-        tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
-        len_to_move = d->m_saved_match_len - 1;
-        d->m_saved_match_len = 0;
-      }
-    } else if (!cur_match_dist)
-      tdefl_record_literal(d,
-                           d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
-    else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) ||
-             (cur_match_len >= 128)) {
-      tdefl_record_match(d, cur_match_len, cur_match_dist);
-      len_to_move = cur_match_len;
-    } else {
-      d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)];
-      d->m_saved_match_dist = cur_match_dist;
-      d->m_saved_match_len = cur_match_len;
-    }
-    // Move the lookahead forward by len_to_move bytes.
-    d->m_lookahead_pos += len_to_move;
-    MZ_ASSERT(d->m_lookahead_size >= len_to_move);
-    d->m_lookahead_size -= len_to_move;
-    d->m_dict_size =
-        MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE);
-    // Check if it's time to flush the current LZ codes to the internal output
-    // buffer.
-    if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
-        ((d->m_total_lz_bytes > 31 * 1024) &&
-         (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >=
-           d->m_total_lz_bytes) ||
-          (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) {
-      int n;
-      d->m_pSrc = pSrc;
-      d->m_src_buf_left = src_buf_left;
-      if ((n = tdefl_flush_block(d, 0)) != 0)
-        return (n < 0) ? MZ_FALSE : MZ_TRUE;
-    }
-  }
-
-  d->m_pSrc = pSrc;
-  d->m_src_buf_left = src_buf_left;
-  return MZ_TRUE;
-}
-
-static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) {
-  if (d->m_pIn_buf_size) {
-    *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
-  }
-
-  if (d->m_pOut_buf_size) {
-    size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs,
-                      d->m_output_flush_remaining);
-    memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs,
-           d->m_output_buf + d->m_output_flush_ofs, n);
-    d->m_output_flush_ofs += (mz_uint)n;
-    d->m_output_flush_remaining -= (mz_uint)n;
-    d->m_out_buf_ofs += n;
-
-    *d->m_pOut_buf_size = d->m_out_buf_ofs;
-  }
-
-  return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE
-                                                         : TDEFL_STATUS_OKAY;
-}
-
-tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf,
-                            size_t *pIn_buf_size, void *pOut_buf,
-                            size_t *pOut_buf_size, tdefl_flush flush) {
-  if (!d) {
-    if (pIn_buf_size) *pIn_buf_size = 0;
-    if (pOut_buf_size) *pOut_buf_size = 0;
-    return TDEFL_STATUS_BAD_PARAM;
-  }
-
-  d->m_pIn_buf = pIn_buf;
-  d->m_pIn_buf_size = pIn_buf_size;
-  d->m_pOut_buf = pOut_buf;
-  d->m_pOut_buf_size = pOut_buf_size;
-  d->m_pSrc = (const mz_uint8 *)(pIn_buf);
-  d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0;
-  d->m_out_buf_ofs = 0;
-  d->m_flush = flush;
-
-  if (((d->m_pPut_buf_func != NULL) ==
-       ((pOut_buf != NULL) || (pOut_buf_size != NULL))) ||
-      (d->m_prev_return_status != TDEFL_STATUS_OKAY) ||
-      (d->m_wants_to_finish && (flush != TDEFL_FINISH)) ||
-      (pIn_buf_size && *pIn_buf_size && !pIn_buf) ||
-      (pOut_buf_size && *pOut_buf_size && !pOut_buf)) {
-    if (pIn_buf_size) *pIn_buf_size = 0;
-    if (pOut_buf_size) *pOut_buf_size = 0;
-    return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM);
-  }
-  d->m_wants_to_finish |= (flush == TDEFL_FINISH);
-
-  if ((d->m_output_flush_remaining) || (d->m_finished))
-    return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-  if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) &&
-      ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) &&
-      ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS |
-                      TDEFL_RLE_MATCHES)) == 0)) {
-    if (!tdefl_compress_fast(d)) return d->m_prev_return_status;
-  } else
-#endif  // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-  {
-    if (!tdefl_compress_normal(d)) return d->m_prev_return_status;
-  }
-
-  if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) &&
-      (pIn_buf))
-    d->m_adler32 =
-        (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf,
-                              d->m_pSrc - (const mz_uint8 *)pIn_buf);
-
-  if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) &&
-      (!d->m_output_flush_remaining)) {
-    if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status;
-    d->m_finished = (flush == TDEFL_FINISH);
-    if (flush == TDEFL_FULL_FLUSH) {
-      MZ_CLEAR_OBJ(d->m_hash);
-      MZ_CLEAR_OBJ(d->m_next);
-      d->m_dict_size = 0;
-    }
-  }
-
-  return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
-}
-
-tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf,
-                                   size_t in_buf_size, tdefl_flush flush) {
-  MZ_ASSERT(d->m_pPut_buf_func);
-  return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush);
-}
-
-tdefl_status tdefl_init(tdefl_compressor *d,
-                        tdefl_put_buf_func_ptr pPut_buf_func,
-                        void *pPut_buf_user, int flags) {
-  d->m_pPut_buf_func = pPut_buf_func;
-  d->m_pPut_buf_user = pPut_buf_user;
-  d->m_flags = (mz_uint)(flags);
-  d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3;
-  d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0;
-  d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
-  if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash);
-  d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size =
-      d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
-  d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished =
-      d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
-  d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
-  d->m_pLZ_flags = d->m_lz_code_buf;
-  d->m_num_flags_left = 8;
-  d->m_pOutput_buf = d->m_output_buf;
-  d->m_pOutput_buf_end = d->m_output_buf;
-  d->m_prev_return_status = TDEFL_STATUS_OKAY;
-  d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0;
-  d->m_adler32 = 1;
-  d->m_pIn_buf = NULL;
-  d->m_pOut_buf = NULL;
-  d->m_pIn_buf_size = NULL;
-  d->m_pOut_buf_size = NULL;
-  d->m_flush = TDEFL_NO_FLUSH;
-  d->m_pSrc = NULL;
-  d->m_src_buf_left = 0;
-  d->m_out_buf_ofs = 0;
-  memset(&d->m_huff_count[0][0], 0,
-         sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
-  memset(&d->m_huff_count[1][0], 0,
-         sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
-  return TDEFL_STATUS_OKAY;
-}
-
-tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) {
-  return d->m_prev_return_status;
-}
-
-mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; }
-
-mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len,
-                                     tdefl_put_buf_func_ptr pPut_buf_func,
-                                     void *pPut_buf_user, int flags) {
-  tdefl_compressor *pComp;
-  mz_bool succeeded;
-  if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE;
-  pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
-  if (!pComp) return MZ_FALSE;
-  succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) ==
-               TDEFL_STATUS_OKAY);
-  succeeded =
-      succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) ==
-                    TDEFL_STATUS_DONE);
-  MZ_FREE(pComp);
-  return succeeded;
-}
-
-typedef struct {
-  size_t m_size, m_capacity;
-  mz_uint8 *m_pBuf;
-  mz_bool m_expandable;
-} tdefl_output_buffer;
-
-static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len,
-                                          void *pUser) {
-  tdefl_output_buffer *p = (tdefl_output_buffer *)pUser;
-  size_t new_size = p->m_size + len;
-  if (new_size > p->m_capacity) {
-    size_t new_capacity = p->m_capacity;
-    mz_uint8 *pNew_buf;
-    if (!p->m_expandable) return MZ_FALSE;
-    do {
-      new_capacity = MZ_MAX(128U, new_capacity << 1U);
-    } while (new_size > new_capacity);
-    pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity);
-    if (!pNew_buf) return MZ_FALSE;
-    p->m_pBuf = pNew_buf;
-    p->m_capacity = new_capacity;
-  }
-  memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len);
-  p->m_size = new_size;
-  return MZ_TRUE;
-}
-
-void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len,
-                                 size_t *pOut_len, int flags) {
-  tdefl_output_buffer out_buf;
-  MZ_CLEAR_OBJ(out_buf);
-  if (!pOut_len)
-    return MZ_FALSE;
-  else
-    *pOut_len = 0;
-  out_buf.m_expandable = MZ_TRUE;
-  if (!tdefl_compress_mem_to_output(
-          pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags))
-    return NULL;
-  *pOut_len = out_buf.m_size;
-  return out_buf.m_pBuf;
-}
-
-size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len,
-                                 const void *pSrc_buf, size_t src_buf_len,
-                                 int flags) {
-  tdefl_output_buffer out_buf;
-  MZ_CLEAR_OBJ(out_buf);
-  if (!pOut_buf) return 0;
-  out_buf.m_pBuf = (mz_uint8 *)pOut_buf;
-  out_buf.m_capacity = out_buf_len;
-  if (!tdefl_compress_mem_to_output(
-          pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags))
-    return 0;
-  return out_buf.m_size;
-}
-
-#ifndef MINIZ_NO_ZLIB_APIS
-static const mz_uint s_tdefl_num_probes[11] = {0,   1,   6,   32,  16,  32,
-                                               128, 256, 512, 768, 1500};
-
-// level may actually range from [0,10] (10 is a "hidden" max level, where we
-// want a bit more compression and it's fine if throughput to fall off a cliff
-// on some files).
-mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits,
-                                                int strategy) {
-  mz_uint comp_flags =
-      s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] |
-      ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0);
-  if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER;
-
-  if (!level)
-    comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
-  else if (strategy == MZ_FILTERED)
-    comp_flags |= TDEFL_FILTER_MATCHES;
-  else if (strategy == MZ_HUFFMAN_ONLY)
-    comp_flags &= ~TDEFL_MAX_PROBES_MASK;
-  else if (strategy == MZ_FIXED)
-    comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
-  else if (strategy == MZ_RLE)
-    comp_flags |= TDEFL_RLE_MATCHES;
-
-  return comp_flags;
-}
-#endif  // MINIZ_NO_ZLIB_APIS
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4204)  // nonstandard extension used : non-constant
-                                 // aggregate initializer (also supported by GNU
-                                 // C and C99, so no big deal)
-#pragma warning(disable : 4244)  // 'initializing': conversion from '__int64' to
-                                 // 'int', possible loss of data
-#pragma warning( \
-    disable : 4267)  // 'argument': conversion from '__int64' to 'int',
-                     // possible loss of data
-#pragma warning(disable : 4996)  // 'strdup': The POSIX name for this item is
-                                 // deprecated. Instead, use the ISO C and C++
-                                 // conformant name: _strdup.
-#endif
-
-// Simple PNG writer function by Alex Evans, 2011. Released into the public
-// domain: https://gist.github.com/908299, more context at
-// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
-// This is actually a modification of Alex's original code so PNG files
-// generated by this function pass pngcheck.
-void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w,
-                                                 int h, int num_chans,
-                                                 size_t *pLen_out,
-                                                 mz_uint level, mz_bool flip) {
-  // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was
-  // defined.
-  static const mz_uint s_tdefl_png_num_probes[11] = {
-      0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500};
-  tdefl_compressor *pComp =
-      (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
-  tdefl_output_buffer out_buf;
-  int i, bpl = w * num_chans, y, z;
-  mz_uint32 c;
-  *pLen_out = 0;
-  if (!pComp) return NULL;
-  MZ_CLEAR_OBJ(out_buf);
-  out_buf.m_expandable = MZ_TRUE;
-  out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h);
-  if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) {
-    MZ_FREE(pComp);
-    return NULL;
-  }
-  // write dummy header
-  for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf);
-  // compress image data
-  tdefl_init(
-      pComp, tdefl_output_buffer_putter, &out_buf,
-      s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER);
-  for (y = 0; y < h; ++y) {
-    tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH);
-    tdefl_compress_buffer(pComp,
-                          (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl,
-                          bpl, TDEFL_NO_FLUSH);
-  }
-  if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) !=
-      TDEFL_STATUS_DONE) {
-    MZ_FREE(pComp);
-    MZ_FREE(out_buf.m_pBuf);
-    return NULL;
-  }
-  // write real header
-  *pLen_out = out_buf.m_size - 41;
-  {
-    static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06};
-    mz_uint8 pnghdr[41] = {0x89,
-                           0x50,
-                           0x4e,
-                           0x47,
-                           0x0d,
-                           0x0a,
-                           0x1a,
-                           0x0a,
-                           0x00,
-                           0x00,
-                           0x00,
-                           0x0d,
-                           0x49,
-                           0x48,
-                           0x44,
-                           0x52,
-                           0,
-                           0,
-                           (mz_uint8)(w >> 8),
-                           (mz_uint8)w,
-                           0,
-                           0,
-                           (mz_uint8)(h >> 8),
-                           (mz_uint8)h,
-                           8,
-                           chans[num_chans],
-                           0,
-                           0,
-                           0,
-                           0,
-                           0,
-                           0,
-                           0,
-                           (mz_uint8)(*pLen_out >> 24),
-                           (mz_uint8)(*pLen_out >> 16),
-                           (mz_uint8)(*pLen_out >> 8),
-                           (mz_uint8)*pLen_out,
-                           0x49,
-                           0x44,
-                           0x41,
-                           0x54};
-    c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17);
-    for (i = 0; i < 4; ++i, c <<= 8)
-      ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24);
-    memcpy(out_buf.m_pBuf, pnghdr, 41);
-  }
-  // write footer (IDAT CRC-32, followed by IEND chunk)
-  if (!tdefl_output_buffer_putter(
-          "\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) {
-    *pLen_out = 0;
-    MZ_FREE(pComp);
-    MZ_FREE(out_buf.m_pBuf);
-    return NULL;
-  }
-  c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4,
-                          *pLen_out + 4);
-  for (i = 0; i < 4; ++i, c <<= 8)
-    (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24);
-  // compute final size of file, grab compressed data buffer and return
-  *pLen_out += 57;
-  MZ_FREE(pComp);
-  return out_buf.m_pBuf;
-}
-void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h,
-                                              int num_chans, size_t *pLen_out) {
-  // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we
-  // can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's
-  // where #defined out)
-  return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans,
-                                                    pLen_out, 6, MZ_FALSE);
-}
-
-// ------------------- .ZIP archive reading
-
-#ifndef MINIZ_NO_ARCHIVE_APIS
-
-#ifdef MINIZ_NO_STDIO
-#define MZ_FILE void *
-#else
-#include <stdio.h>
-#include <sys/stat.h>
-
-#if defined(_MSC_VER) || defined(__MINGW64__)
-static FILE *mz_fopen(const char *pFilename, const char *pMode) {
-  FILE *pFile = NULL;
-  fopen_s(&pFile, pFilename, pMode);
-  return pFile;
-}
-static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) {
-  FILE *pFile = NULL;
-  if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL;
-  return pFile;
-}
-#ifndef MINIZ_NO_TIME
-#include <sys/utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN mz_fopen
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 _ftelli64
-#define MZ_FSEEK64 _fseeki64
-#define MZ_FILE_STAT_STRUCT _stat
-#define MZ_FILE_STAT _stat
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN mz_freopen
-#define MZ_DELETE_FILE remove
-#elif defined(__MINGW32__)
-#ifndef MINIZ_NO_TIME
-#include <sys/utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN(f, m) fopen(f, m)
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 ftello64
-#define MZ_FSEEK64 fseeko64
-#define MZ_FILE_STAT_STRUCT _stat
-#define MZ_FILE_STAT _stat
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-#define MZ_DELETE_FILE remove
-#elif defined(__TINYC__)
-#ifndef MINIZ_NO_TIME
-#include <sys/utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN(f, m) fopen(f, m)
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 ftell
-#define MZ_FSEEK64 fseek
-#define MZ_FILE_STAT_STRUCT stat
-#define MZ_FILE_STAT stat
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-#define MZ_DELETE_FILE remove
-#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) && _LARGEFILE64_SOURCE
-#ifndef MINIZ_NO_TIME
-#include <utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN(f, m) fopen64(f, m)
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 ftello64
-#define MZ_FSEEK64 fseeko64
-#define MZ_FILE_STAT_STRUCT stat64
-#define MZ_FILE_STAT stat64
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN(p, m, s) freopen64(p, m, s)
-#define MZ_DELETE_FILE remove
-#else
-#ifndef MINIZ_NO_TIME
-#include <utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN(f, m) fopen(f, m)
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 ftello
-#define MZ_FSEEK64 fseeko
-#define MZ_FILE_STAT_STRUCT stat
-#define MZ_FILE_STAT stat
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-#define MZ_DELETE_FILE remove
-#endif  // #ifdef _MSC_VER
-#endif  // #ifdef MINIZ_NO_STDIO
-
-#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c))
-
-// Various ZIP archive enums. To completely avoid cross platform compiler
-// alignment and platform endian issues, miniz.c doesn't use structs for any of
-// this stuff.
-enum {
-  // ZIP archive identifiers and record sizes
-  MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50,
-  MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50,
-  MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50,
-  MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30,
-  MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46,
-  MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22,
-  // Central directory header record offsets
-  MZ_ZIP_CDH_SIG_OFS = 0,
-  MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4,
-  MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6,
-  MZ_ZIP_CDH_BIT_FLAG_OFS = 8,
-  MZ_ZIP_CDH_METHOD_OFS = 10,
-  MZ_ZIP_CDH_FILE_TIME_OFS = 12,
-  MZ_ZIP_CDH_FILE_DATE_OFS = 14,
-  MZ_ZIP_CDH_CRC32_OFS = 16,
-  MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20,
-  MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24,
-  MZ_ZIP_CDH_FILENAME_LEN_OFS = 28,
-  MZ_ZIP_CDH_EXTRA_LEN_OFS = 30,
-  MZ_ZIP_CDH_COMMENT_LEN_OFS = 32,
-  MZ_ZIP_CDH_DISK_START_OFS = 34,
-  MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36,
-  MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38,
-  MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42,
-  // Local directory header offsets
-  MZ_ZIP_LDH_SIG_OFS = 0,
-  MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4,
-  MZ_ZIP_LDH_BIT_FLAG_OFS = 6,
-  MZ_ZIP_LDH_METHOD_OFS = 8,
-  MZ_ZIP_LDH_FILE_TIME_OFS = 10,
-  MZ_ZIP_LDH_FILE_DATE_OFS = 12,
-  MZ_ZIP_LDH_CRC32_OFS = 14,
-  MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18,
-  MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22,
-  MZ_ZIP_LDH_FILENAME_LEN_OFS = 26,
-  MZ_ZIP_LDH_EXTRA_LEN_OFS = 28,
-  // End of central directory offsets
-  MZ_ZIP_ECDH_SIG_OFS = 0,
-  MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4,
-  MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6,
-  MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8,
-  MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10,
-  MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12,
-  MZ_ZIP_ECDH_CDIR_OFS_OFS = 16,
-  MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20,
-};
-
-typedef struct {
-  void *m_p;
-  size_t m_size, m_capacity;
-  mz_uint m_element_size;
-} mz_zip_array;
-
-struct mz_zip_internal_state_tag {
-  mz_zip_array m_central_dir;
-  mz_zip_array m_central_dir_offsets;
-  mz_zip_array m_sorted_central_dir_offsets;
-  MZ_FILE *m_pFile;
-  void *m_pMem;
-  size_t m_mem_size;
-  size_t m_mem_capacity;
-};
-
-#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) \
-  (array_ptr)->m_element_size = element_size
-#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \
-  ((element_type *)((array_ptr)->m_p))[index]
-
-static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip,
-                                              mz_zip_array *pArray) {
-  pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p);
-  memset(pArray, 0, sizeof(mz_zip_array));
-}
-
-static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip,
-                                            mz_zip_array *pArray,
-                                            size_t min_new_capacity,
-                                            mz_uint growing) {
-  void *pNew_p;
-  size_t new_capacity = min_new_capacity;
-  MZ_ASSERT(pArray->m_element_size);
-  if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE;
-  if (growing) {
-    new_capacity = MZ_MAX(1, pArray->m_capacity);
-    while (new_capacity < min_new_capacity) new_capacity *= 2;
-  }
-  if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p,
-                                         pArray->m_element_size, new_capacity)))
-    return MZ_FALSE;
-  pArray->m_p = pNew_p;
-  pArray->m_capacity = new_capacity;
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip,
-                                                   mz_zip_array *pArray,
-                                                   size_t new_capacity,
-                                                   mz_uint growing) {
-  if (new_capacity > pArray->m_capacity) {
-    if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing))
-      return MZ_FALSE;
-  }
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip,
-                                                  mz_zip_array *pArray,
-                                                  size_t new_size,
-                                                  mz_uint growing) {
-  if (new_size > pArray->m_capacity) {
-    if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing))
-      return MZ_FALSE;
-  }
-  pArray->m_size = new_size;
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip,
-                                                       mz_zip_array *pArray,
-                                                       size_t n) {
-  return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE);
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip,
-                                                     mz_zip_array *pArray,
-                                                     const void *pElements,
-                                                     size_t n) {
-  size_t orig_size = pArray->m_size;
-  if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE))
-    return MZ_FALSE;
-  memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size,
-         pElements, n * pArray->m_element_size);
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_TIME
-static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) {
-  struct tm tm;
-  memset(&tm, 0, sizeof(tm));
-  tm.tm_isdst = -1;
-  tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900;
-  tm.tm_mon = ((dos_date >> 5) & 15) - 1;
-  tm.tm_mday = dos_date & 31;
-  tm.tm_hour = (dos_time >> 11) & 31;
-  tm.tm_min = (dos_time >> 5) & 63;
-  tm.tm_sec = (dos_time << 1) & 62;
-  return mktime(&tm);
-}
-
-static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time,
-                                    mz_uint16 *pDOS_date) {
-#ifdef _MSC_VER
-  struct tm tm_struct;
-  struct tm *tm = &tm_struct;
-  errno_t err = localtime_s(tm, &time);
-  if (err) {
-    *pDOS_date = 0;
-    *pDOS_time = 0;
-    return;
-  }
-#else
-  struct tm *tm = localtime(&time);
-#endif
-  *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) +
-                           ((tm->tm_sec) >> 1));
-  *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) +
-                           ((tm->tm_mon + 1) << 5) + tm->tm_mday);
-}
-#endif
-
-#ifndef MINIZ_NO_STDIO
-static mz_bool mz_zip_get_file_modified_time(const char *pFilename,
-                                             mz_uint16 *pDOS_time,
-                                             mz_uint16 *pDOS_date) {
-#ifdef MINIZ_NO_TIME
-  (void)pFilename;
-  *pDOS_date = *pDOS_time = 0;
-#else
-  struct MZ_FILE_STAT_STRUCT file_stat;
-  // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000
-  // bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh.
-  if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE;
-  mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date);
-#endif  // #ifdef MINIZ_NO_TIME
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_TIME
-static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time,
-                                     time_t modified_time) {
-  struct utimbuf t;
-  t.actime = access_time;
-  t.modtime = modified_time;
-  return !utime(pFilename, &t);
-}
-#endif  // #ifndef MINIZ_NO_TIME
-#endif  // #ifndef MINIZ_NO_STDIO
-
-static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip,
-                                           mz_uint32 flags) {
-  (void)flags;
-  if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
-    return MZ_FALSE;
-
-  if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func;
-  if (!pZip->m_pFree) pZip->m_pFree = def_free_func;
-  if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func;
-
-  pZip->m_zip_mode = MZ_ZIP_MODE_READING;
-  pZip->m_archive_size = 0;
-  pZip->m_central_directory_file_ofs = 0;
-  pZip->m_total_files = 0;
-
-  if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(
-                   pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
-    return MZ_FALSE;
-  memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir,
-                                sizeof(mz_uint8));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets,
-                                sizeof(mz_uint32));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets,
-                                sizeof(mz_uint32));
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool
-mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array,
-                            const mz_zip_array *pCentral_dir_offsets,
-                            mz_uint l_index, mz_uint r_index) {
-  const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(
-                     pCentral_dir_array, mz_uint8,
-                     MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32,
-                                          l_index)),
-                 *pE;
-  const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(
-      pCentral_dir_array, mz_uint8,
-      MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index));
-  mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS),
-          r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  mz_uint8 l = 0, r = 0;
-  pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-  pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-  pE = pL + MZ_MIN(l_len, r_len);
-  while (pL < pE) {
-    if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break;
-    pL++;
-    pR++;
-  }
-  return (pL == pE) ? (l_len < r_len) : (l < r);
-}
-
-#define MZ_SWAP_UINT32(a, b) \
-  do {                       \
-    mz_uint32 t = a;         \
-    a = b;                   \
-    b = t;                   \
-  }                          \
-  MZ_MACRO_END
-
-// Heap sort of lowercased filenames, used to help accelerate plain central
-// directory searches by mz_zip_reader_locate_file(). (Could also use qsort(),
-// but it could allocate memory.)
-static void mz_zip_reader_sort_central_dir_offsets_by_filename(
-    mz_zip_archive *pZip) {
-  mz_zip_internal_state *pState = pZip->m_pState;
-  const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
-  const mz_zip_array *pCentral_dir = &pState->m_central_dir;
-  mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(
-      &pState->m_sorted_central_dir_offsets, mz_uint32, 0);
-  const int size = pZip->m_total_files;
-  int start = (size - 2) >> 1, end;
-  while (start >= 0) {
-    int child, root = start;
-    for (;;) {
-      if ((child = (root << 1) + 1) >= size) break;
-      child +=
-          (((child + 1) < size) &&
-           (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                        pIndices[child], pIndices[child + 1])));
-      if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                       pIndices[root], pIndices[child]))
-        break;
-      MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
-      root = child;
-    }
-    start--;
-  }
-
-  end = size - 1;
-  while (end > 0) {
-    int child, root = 0;
-    MZ_SWAP_UINT32(pIndices[end], pIndices[0]);
-    for (;;) {
-      if ((child = (root << 1) + 1) >= end) break;
-      child +=
-          (((child + 1) < end) &&
-           mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                       pIndices[child], pIndices[child + 1]));
-      if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                       pIndices[root], pIndices[child]))
-        break;
-      MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
-      root = child;
-    }
-    end--;
-  }
-}
-
-static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip,
-                                              mz_uint32 flags) {
-  mz_uint cdir_size, num_this_disk, cdir_disk_index;
-  mz_uint64 cdir_ofs;
-  mz_int64 cur_file_ofs;
-  const mz_uint8 *p;
-  mz_uint32 buf_u32[4096 / sizeof(mz_uint32)];
-  mz_uint8 *pBuf = (mz_uint8 *)buf_u32;
-  mz_bool sort_central_dir =
-      ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0);
-  // Basic sanity checks - reject files which are too small, and check the first
-  // 4 bytes of the file to make sure a local header is there.
-  if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  // Find the end of central directory record by scanning the file from the end
-  // towards the beginning.
-  cur_file_ofs =
-      MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0);
-  for (;;) {
-    int i,
-        n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs);
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n)
-      return MZ_FALSE;
-    for (i = n - 4; i >= 0; --i)
-      if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) break;
-    if (i >= 0) {
-      cur_file_ofs += i;
-      break;
-    }
-    if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >=
-                            (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)))
-      return MZ_FALSE;
-    cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0);
-  }
-  // Read and verify the end of central directory record.
-  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf,
-                    MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) !=
-       MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) ||
-      ((pZip->m_total_files =
-            MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) !=
-       MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS)))
-    return MZ_FALSE;
-
-  num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS);
-  cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS);
-  if (((num_this_disk | cdir_disk_index) != 0) &&
-      ((num_this_disk != 1) || (cdir_disk_index != 1)))
-    return MZ_FALSE;
-
-  if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) <
-      pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-
-  cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS);
-  if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return MZ_FALSE;
-
-  pZip->m_central_directory_file_ofs = cdir_ofs;
-
-  if (pZip->m_total_files) {
-    mz_uint i, n;
-
-    // Read the entire central directory into a heap block, and allocate another
-    // heap block to hold the unsorted central dir file record offsets, and
-    // another to hold the sorted indices.
-    if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size,
-                              MZ_FALSE)) ||
-        (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets,
-                              pZip->m_total_files, MZ_FALSE)))
-      return MZ_FALSE;
-
-    if (sort_central_dir) {
-      if (!mz_zip_array_resize(pZip,
-                               &pZip->m_pState->m_sorted_central_dir_offsets,
-                               pZip->m_total_files, MZ_FALSE))
-        return MZ_FALSE;
-    }
-
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs,
-                      pZip->m_pState->m_central_dir.m_p,
-                      cdir_size) != cdir_size)
-      return MZ_FALSE;
-
-    // Now create an index into the central directory file records, do some
-    // basic sanity checking on each record, and check for zip64 entries (which
-    // are not yet supported).
-    p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p;
-    for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) {
-      mz_uint total_header_size, comp_size, decomp_size, disk_index;
-      if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) ||
-          (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG))
-        return MZ_FALSE;
-      MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32,
-                           i) =
-          (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p);
-      if (sort_central_dir)
-        MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets,
-                             mz_uint32, i) = i;
-      comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-      decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
-      if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) &&
-           (decomp_size != comp_size)) ||
-          (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) ||
-          (comp_size == 0xFFFFFFFF))
-        return MZ_FALSE;
-      disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS);
-      if ((disk_index != num_this_disk) && (disk_index != 1)) return MZ_FALSE;
-      if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) +
-           MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size)
-        return MZ_FALSE;
-      if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-                               MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) +
-                               MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) +
-                               MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) >
-          n)
-        return MZ_FALSE;
-      n -= total_header_size;
-      p += total_header_size;
-    }
-  }
-
-  if (sort_central_dir)
-    mz_zip_reader_sort_central_dir_offsets_by_filename(pZip);
-
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size,
-                           mz_uint32 flags) {
-  if ((!pZip) || (!pZip->m_pRead)) return MZ_FALSE;
-  if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE;
-  pZip->m_archive_size = size;
-  if (!mz_zip_reader_read_central_dir(pZip, flags)) {
-    mz_zip_reader_end(pZip);
-    return MZ_FALSE;
-  }
-  return MZ_TRUE;
-}
-
-static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs,
-                                   void *pBuf, size_t n) {
-  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
-  size_t s = (file_ofs >= pZip->m_archive_size)
-                 ? 0
-                 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n);
-  memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s);
-  return s;
-}
-
-mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem,
-                               size_t size, mz_uint32 flags) {
-  if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE;
-  pZip->m_archive_size = size;
-  pZip->m_pRead = mz_zip_mem_read_func;
-  pZip->m_pIO_opaque = pZip;
-#ifdef __cplusplus
-  pZip->m_pState->m_pMem = const_cast<void *>(pMem);
-#else
-  pZip->m_pState->m_pMem = (void *)pMem;
-#endif
-  pZip->m_pState->m_mem_size = size;
-  if (!mz_zip_reader_read_central_dir(pZip, flags)) {
-    mz_zip_reader_end(pZip);
-    return MZ_FALSE;
-  }
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_STDIO
-static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs,
-                                    void *pBuf, size_t n) {
-  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
-  mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
-  if (((mz_int64)file_ofs < 0) ||
-      (((cur_ofs != (mz_int64)file_ofs)) &&
-       (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
-    return 0;
-  return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile);
-}
-
-mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename,
-                                mz_uint32 flags) {
-  mz_uint64 file_size;
-  MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb");
-  if (!pFile) return MZ_FALSE;
-  if (MZ_FSEEK64(pFile, 0, SEEK_END)) {
-    MZ_FCLOSE(pFile);
-    return MZ_FALSE;
-  }
-  file_size = MZ_FTELL64(pFile);
-  if (!mz_zip_reader_init_internal(pZip, flags)) {
-    MZ_FCLOSE(pFile);
-    return MZ_FALSE;
-  }
-  pZip->m_pRead = mz_zip_file_read_func;
-  pZip->m_pIO_opaque = pZip;
-  pZip->m_pState->m_pFile = pFile;
-  pZip->m_archive_size = file_size;
-  if (!mz_zip_reader_read_central_dir(pZip, flags)) {
-    mz_zip_reader_end(pZip);
-    return MZ_FALSE;
-  }
-  return MZ_TRUE;
-}
-#endif  // #ifndef MINIZ_NO_STDIO
-
-mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) {
-  return pZip ? pZip->m_total_files : 0;
-}
-
-static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh(
-    mz_zip_archive *pZip, mz_uint file_index) {
-  if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    return NULL;
-  return &MZ_ZIP_ARRAY_ELEMENT(
-      &pZip->m_pState->m_central_dir, mz_uint8,
-      MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32,
-                           file_index));
-}
-
-mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip,
-                                        mz_uint file_index) {
-  mz_uint m_bit_flag;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  if (!p) return MZ_FALSE;
-  m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
-  return (m_bit_flag & 1);
-}
-
-mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip,
-                                          mz_uint file_index) {
-  mz_uint filename_len, external_attr;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  if (!p) return MZ_FALSE;
-
-  // First see if the filename ends with a '/' character.
-  filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  if (filename_len) {
-    if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/')
-      return MZ_TRUE;
-  }
-
-  // Bugfix: This code was also checking if the internal attribute was non-zero,
-  // which wasn't correct.
-  // Most/all zip writers (hopefully) set DOS file/directory attributes in the
-  // low 16-bits, so check for the DOS directory flag and ignore the source OS
-  // ID in the created by field.
-  // FIXME: Remove this check? Is it necessary - we already check the filename.
-  external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
-  if ((external_attr & 0x10) != 0) return MZ_TRUE;
-
-  return MZ_FALSE;
-}
-
-mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index,
-                                mz_zip_archive_file_stat *pStat) {
-  mz_uint n;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  if ((!p) || (!pStat)) return MZ_FALSE;
-
-  // Unpack the central directory record.
-  pStat->m_file_index = file_index;
-  pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(
-      &pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index);
-  pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS);
-  pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS);
-  pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
-  pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS);
-#ifndef MINIZ_NO_TIME
-  pStat->m_time =
-      mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS),
-                           MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS));
-#endif
-  pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS);
-  pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-  pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
-  pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS);
-  pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
-  pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
-
-  // Copy as much of the filename and comment as possible.
-  n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1);
-  memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
-  pStat->m_filename[n] = '\0';
-
-  n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-  n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1);
-  pStat->m_comment_size = n;
-  memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-                               MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) +
-                               MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS),
-         n);
-  pStat->m_comment[n] = '\0';
-
-  return MZ_TRUE;
-}
-
-mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index,
-                                   char *pFilename, mz_uint filename_buf_size) {
-  mz_uint n;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  if (!p) {
-    if (filename_buf_size) pFilename[0] = '\0';
-    return 0;
-  }
-  n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  if (filename_buf_size) {
-    n = MZ_MIN(n, filename_buf_size - 1);
-    memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
-    pFilename[n] = '\0';
-  }
-  return n + 1;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA,
-                                                         const char *pB,
-                                                         mz_uint len,
-                                                         mz_uint flags) {
-  mz_uint i;
-  if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len);
-  for (i = 0; i < len; ++i)
-    if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE;
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE int mz_zip_reader_filename_compare(
-    const mz_zip_array *pCentral_dir_array,
-    const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR,
-    mz_uint r_len) {
-  const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(
-                     pCentral_dir_array, mz_uint8,
-                     MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32,
-                                          l_index)),
-                 *pE;
-  mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  mz_uint8 l = 0, r = 0;
-  pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-  pE = pL + MZ_MIN(l_len, r_len);
-  while (pL < pE) {
-    if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break;
-    pL++;
-    pR++;
-  }
-  return (pL == pE) ? (int)(l_len - r_len) : (l - r);
-}
-
-static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip,
-                                                   const char *pFilename) {
-  mz_zip_internal_state *pState = pZip->m_pState;
-  const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
-  const mz_zip_array *pCentral_dir = &pState->m_central_dir;
-  mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(
-      &pState->m_sorted_central_dir_offsets, mz_uint32, 0);
-  const int size = pZip->m_total_files;
-  const mz_uint filename_len = (mz_uint)strlen(pFilename);
-  int l = 0, h = size - 1;
-  while (l <= h) {
-    int m = (l + h) >> 1, file_index = pIndices[m],
-        comp =
-            mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets,
-                                           file_index, pFilename, filename_len);
-    if (!comp)
-      return file_index;
-    else if (comp < 0)
-      l = m + 1;
-    else
-      h = m - 1;
-  }
-  return -1;
-}
-
-int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName,
-                              const char *pComment, mz_uint flags) {
-  mz_uint file_index;
-  size_t name_len, comment_len;
-  if ((!pZip) || (!pZip->m_pState) || (!pName) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    return -1;
-  if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) &&
-      (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size))
-    return mz_zip_reader_locate_file_binary_search(pZip, pName);
-  name_len = strlen(pName);
-  if (name_len > 0xFFFF) return -1;
-  comment_len = pComment ? strlen(pComment) : 0;
-  if (comment_len > 0xFFFF) return -1;
-  for (file_index = 0; file_index < pZip->m_total_files; file_index++) {
-    const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(
-        &pZip->m_pState->m_central_dir, mz_uint8,
-        MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32,
-                             file_index));
-    mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    const char *pFilename =
-        (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-    if (filename_len < name_len) continue;
-    if (comment_len) {
-      mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS),
-              file_comment_len =
-                  MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-      const char *pFile_comment = pFilename + filename_len + file_extra_len;
-      if ((file_comment_len != comment_len) ||
-          (!mz_zip_reader_string_equal(pComment, pFile_comment,
-                                       file_comment_len, flags)))
-        continue;
-    }
-    if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) {
-      int ofs = filename_len - 1;
-      do {
-        if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') ||
-            (pFilename[ofs] == ':'))
-          break;
-      } while (--ofs >= 0);
-      ofs++;
-      pFilename += ofs;
-      filename_len -= ofs;
-    }
-    if ((filename_len == name_len) &&
-        (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags)))
-      return file_index;
-  }
-  return -1;
-}
-
-mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip,
-                                              mz_uint file_index, void *pBuf,
-                                              size_t buf_size, mz_uint flags,
-                                              void *pUser_read_buf,
-                                              size_t user_read_buf_size) {
-  int status = TINFL_STATUS_DONE;
-  mz_uint64 needed_size, cur_file_ofs, comp_remaining,
-      out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail;
-  mz_zip_archive_file_stat file_stat;
-  void *pRead_buf;
-  mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) /
-                       sizeof(mz_uint32)];
-  mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
-  tinfl_decompressor inflator;
-
-  if ((buf_size) && (!pBuf)) return MZ_FALSE;
-
-  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE;
-
-  // Empty file, or a directory (but not always a directory - I've seen odd zips
-  // with directories that have compressed data which inflates to 0 bytes)
-  if (!file_stat.m_comp_size) return MZ_TRUE;
-
-  // Entry is a subdirectory (I've seen old zips with dir entries which have
-  // compressed deflate data which inflates to 0 bytes, but these entries claim
-  // to uncompress to 512 bytes in the headers).
-  // I'm torn how to handle this case - should it fail instead?
-  if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE;
-
-  // Encryption and patch files are not supported.
-  if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE;
-
-  // This function only supports stored and deflate.
-  if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) &&
-      (file_stat.m_method != MZ_DEFLATED))
-    return MZ_FALSE;
-
-  // Ensure supplied output buffer is large enough.
-  needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size
-                                                      : file_stat.m_uncomp_size;
-  if (buf_size < needed_size) return MZ_FALSE;
-
-  // Read and parse the local directory entry.
-  cur_file_ofs = file_stat.m_local_header_ofs;
-  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header,
-                    MZ_ZIP_LOCAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-    return MZ_FALSE;
-
-  cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE +
-                  MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) +
-                  MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-  if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
-    return MZ_FALSE;
-
-  if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) {
-    // The file is stored or the caller has requested the compressed data.
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf,
-                      (size_t)needed_size) != needed_size)
-      return MZ_FALSE;
-    return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) ||
-           (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf,
-                     (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32);
-  }
-
-  // Decompress the file either directly from memory or from a file input
-  // buffer.
-  tinfl_init(&inflator);
-
-  if (pZip->m_pState->m_pMem) {
-    // Read directly from the archive in memory.
-    pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
-    read_buf_size = read_buf_avail = file_stat.m_comp_size;
-    comp_remaining = 0;
-  } else if (pUser_read_buf) {
-    // Use a user provided read buffer.
-    if (!user_read_buf_size) return MZ_FALSE;
-    pRead_buf = (mz_uint8 *)pUser_read_buf;
-    read_buf_size = user_read_buf_size;
-    read_buf_avail = 0;
-    comp_remaining = file_stat.m_comp_size;
-  } else {
-    // Temporarily allocate a read buffer.
-    read_buf_size =
-        MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE);
-#ifdef _MSC_VER
-    if (((0, sizeof(size_t) == sizeof(mz_uint32))) &&
-        (read_buf_size > 0x7FFFFFFF))
-#else
-    if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF))
-#endif
-      return MZ_FALSE;
-    if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                            (size_t)read_buf_size)))
-      return MZ_FALSE;
-    read_buf_avail = 0;
-    comp_remaining = file_stat.m_comp_size;
-  }
-
-  do {
-    size_t in_buf_size,
-        out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs);
-    if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) {
-      read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-      if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf,
-                        (size_t)read_buf_avail) != read_buf_avail) {
-        status = TINFL_STATUS_FAILED;
-        break;
-      }
-      cur_file_ofs += read_buf_avail;
-      comp_remaining -= read_buf_avail;
-      read_buf_ofs = 0;
-    }
-    in_buf_size = (size_t)read_buf_avail;
-    status = tinfl_decompress(
-        &inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size,
-        (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size,
-        TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF |
-            (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0));
-    read_buf_avail -= in_buf_size;
-    read_buf_ofs += in_buf_size;
-    out_buf_ofs += out_buf_size;
-  } while (status == TINFL_STATUS_NEEDS_MORE_INPUT);
-
-  if (status == TINFL_STATUS_DONE) {
-    // Make sure the entire file was decompressed, and check its CRC.
-    if ((out_buf_ofs != file_stat.m_uncomp_size) ||
-        (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf,
-                  (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32))
-      status = TINFL_STATUS_FAILED;
-  }
-
-  if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf))
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-
-  return status == TINFL_STATUS_DONE;
-}
-
-mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(
-    mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size,
-    mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) {
-  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
-  if (file_index < 0) return MZ_FALSE;
-  return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size,
-                                               flags, pUser_read_buf,
-                                               user_read_buf_size);
-}
-
-mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index,
-                                     void *pBuf, size_t buf_size,
-                                     mz_uint flags) {
-  return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size,
-                                               flags, NULL, 0);
-}
-
-mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip,
-                                          const char *pFilename, void *pBuf,
-                                          size_t buf_size, mz_uint flags) {
-  return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf,
-                                                    buf_size, flags, NULL, 0);
-}
-
-void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index,
-                                    size_t *pSize, mz_uint flags) {
-  mz_uint64 comp_size, uncomp_size, alloc_size;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  void *pBuf;
-
-  if (pSize) *pSize = 0;
-  if (!p) return NULL;
-
-  comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-  uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
-
-  alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size;
-#ifdef _MSC_VER
-  if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
-#else
-  if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
-#endif
-    return NULL;
-  if (NULL ==
-      (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size)))
-    return NULL;
-
-  if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size,
-                                    flags)) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-    return NULL;
-  }
-
-  if (pSize) *pSize = (size_t)alloc_size;
-  return pBuf;
-}
-
-void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip,
-                                         const char *pFilename, size_t *pSize,
-                                         mz_uint flags) {
-  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
-  if (file_index < 0) {
-    if (pSize) *pSize = 0;
-    return MZ_FALSE;
-  }
-  return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags);
-}
-
-mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip,
-                                          mz_uint file_index,
-                                          mz_file_write_func pCallback,
-                                          void *pOpaque, mz_uint flags) {
-  int status = TINFL_STATUS_DONE;
-  mz_uint file_crc32 = MZ_CRC32_INIT;
-  mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining,
-                           out_buf_ofs = 0, cur_file_ofs;
-  mz_zip_archive_file_stat file_stat;
-  void *pRead_buf = NULL;
-  void *pWrite_buf = NULL;
-  mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) /
-                       sizeof(mz_uint32)];
-  mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
-
-  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE;
-
-  // Empty file, or a directory (but not always a directory - I've seen odd zips
-  // with directories that have compressed data which inflates to 0 bytes)
-  if (!file_stat.m_comp_size) return MZ_TRUE;
-
-  // Entry is a subdirectory (I've seen old zips with dir entries which have
-  // compressed deflate data which inflates to 0 bytes, but these entries claim
-  // to uncompress to 512 bytes in the headers).
-  // I'm torn how to handle this case - should it fail instead?
-  if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE;
-
-  // Encryption and patch files are not supported.
-  if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE;
-
-  // This function only supports stored and deflate.
-  if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) &&
-      (file_stat.m_method != MZ_DEFLATED))
-    return MZ_FALSE;
-
-  // Read and parse the local directory entry.
-  cur_file_ofs = file_stat.m_local_header_ofs;
-  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header,
-                    MZ_ZIP_LOCAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-    return MZ_FALSE;
-
-  cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE +
-                  MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) +
-                  MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-  if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
-    return MZ_FALSE;
-
-  // Decompress the file either directly from memory or from a file input
-  // buffer.
-  if (pZip->m_pState->m_pMem) {
-    pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
-    read_buf_size = read_buf_avail = file_stat.m_comp_size;
-    comp_remaining = 0;
-  } else {
-    read_buf_size =
-        MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE);
-    if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                            (size_t)read_buf_size)))
-      return MZ_FALSE;
-    read_buf_avail = 0;
-    comp_remaining = file_stat.m_comp_size;
-  }
-
-  if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) {
-    // The file is stored or the caller has requested the compressed data.
-    if (pZip->m_pState->m_pMem) {
-#ifdef _MSC_VER
-      if (((0, sizeof(size_t) == sizeof(mz_uint32))) &&
-          (file_stat.m_comp_size > 0xFFFFFFFF))
-#else
-      if (((sizeof(size_t) == sizeof(mz_uint32))) &&
-          (file_stat.m_comp_size > 0xFFFFFFFF))
-#endif
-        return MZ_FALSE;
-      if (pCallback(pOpaque, out_buf_ofs, pRead_buf,
-                    (size_t)file_stat.m_comp_size) != file_stat.m_comp_size)
-        status = TINFL_STATUS_FAILED;
-      else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-        file_crc32 =
-            (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf,
-                                (size_t)file_stat.m_comp_size);
-      cur_file_ofs += file_stat.m_comp_size;
-      out_buf_ofs += file_stat.m_comp_size;
-      comp_remaining = 0;
-    } else {
-      while (comp_remaining) {
-        read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf,
-                          (size_t)read_buf_avail) != read_buf_avail) {
-          status = TINFL_STATUS_FAILED;
-          break;
-        }
-
-        if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-          file_crc32 = (mz_uint32)mz_crc32(
-              file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail);
-
-        if (pCallback(pOpaque, out_buf_ofs, pRead_buf,
-                      (size_t)read_buf_avail) != read_buf_avail) {
-          status = TINFL_STATUS_FAILED;
-          break;
-        }
-        cur_file_ofs += read_buf_avail;
-        out_buf_ofs += read_buf_avail;
-        comp_remaining -= read_buf_avail;
-      }
-    }
-  } else {
-    tinfl_decompressor inflator;
-    tinfl_init(&inflator);
-
-    if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                             TINFL_LZ_DICT_SIZE)))
-      status = TINFL_STATUS_FAILED;
-    else {
-      do {
-        mz_uint8 *pWrite_buf_cur =
-            (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
-        size_t in_buf_size,
-            out_buf_size =
-                TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
-        if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) {
-          read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-          if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf,
-                            (size_t)read_buf_avail) != read_buf_avail) {
-            status = TINFL_STATUS_FAILED;
-            break;
-          }
-          cur_file_ofs += read_buf_avail;
-          comp_remaining -= read_buf_avail;
-          read_buf_ofs = 0;
-        }
-
-        in_buf_size = (size_t)read_buf_avail;
-        status = tinfl_decompress(
-            &inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size,
-            (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size,
-            comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0);
-        read_buf_avail -= in_buf_size;
-        read_buf_ofs += in_buf_size;
-
-        if (out_buf_size) {
-          if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) !=
-              out_buf_size) {
-            status = TINFL_STATUS_FAILED;
-            break;
-          }
-          file_crc32 =
-              (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size);
-          if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) {
-            status = TINFL_STATUS_FAILED;
-            break;
-          }
-        }
-      } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) ||
-               (status == TINFL_STATUS_HAS_MORE_OUTPUT));
-    }
-  }
-
-  if ((status == TINFL_STATUS_DONE) &&
-      (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) {
-    // Make sure the entire file was decompressed, and check its CRC.
-    if ((out_buf_ofs != file_stat.m_uncomp_size) ||
-        (file_crc32 != file_stat.m_crc32))
-      status = TINFL_STATUS_FAILED;
-  }
-
-  if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-  if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf);
-
-  return status == TINFL_STATUS_DONE;
-}
-
-mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip,
-                                               const char *pFilename,
-                                               mz_file_write_func pCallback,
-                                               void *pOpaque, mz_uint flags) {
-  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
-  if (file_index < 0) return MZ_FALSE;
-  return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque,
-                                           flags);
-}
-
-#ifndef MINIZ_NO_STDIO
-static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs,
-                                         const void *pBuf, size_t n) {
-  (void)ofs;
-  return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque);
-}
-
-mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index,
-                                      const char *pDst_filename,
-                                      mz_uint flags) {
-  mz_bool status;
-  mz_zip_archive_file_stat file_stat;
-  MZ_FILE *pFile;
-  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE;
-  pFile = MZ_FOPEN(pDst_filename, "wb");
-  if (!pFile) return MZ_FALSE;
-  status = mz_zip_reader_extract_to_callback(
-      pZip, file_index, mz_zip_file_write_callback, pFile, flags);
-  if (MZ_FCLOSE(pFile) == EOF) return MZ_FALSE;
-#ifndef MINIZ_NO_TIME
-  if (status)
-    mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time);
-#endif
-  return status;
-}
-#endif  // #ifndef MINIZ_NO_STDIO
-
-mz_bool mz_zip_reader_end(mz_zip_archive *pZip) {
-  if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    return MZ_FALSE;
-
-  if (pZip->m_pState) {
-    mz_zip_internal_state *pState = pZip->m_pState;
-    pZip->m_pState = NULL;
-    mz_zip_array_clear(pZip, &pState->m_central_dir);
-    mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
-    mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
-
-#ifndef MINIZ_NO_STDIO
-    if (pState->m_pFile) {
-      MZ_FCLOSE(pState->m_pFile);
-      pState->m_pFile = NULL;
-    }
-#endif  // #ifndef MINIZ_NO_STDIO
-
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-  }
-  pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
-
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip,
-                                           const char *pArchive_filename,
-                                           const char *pDst_filename,
-                                           mz_uint flags) {
-  int file_index =
-      mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags);
-  if (file_index < 0) return MZ_FALSE;
-  return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags);
-}
-#endif
-
-// ------------------- .ZIP archive writing
-
-#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-static void mz_write_le16(mz_uint8 *p, mz_uint16 v) {
-  p[0] = (mz_uint8)v;
-  p[1] = (mz_uint8)(v >> 8);
-}
-static void mz_write_le32(mz_uint8 *p, mz_uint32 v) {
-  p[0] = (mz_uint8)v;
-  p[1] = (mz_uint8)(v >> 8);
-  p[2] = (mz_uint8)(v >> 16);
-  p[3] = (mz_uint8)(v >> 24);
-}
-#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v))
-#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v))
-
-mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) {
-  if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
-    return MZ_FALSE;
-
-  if (pZip->m_file_offset_alignment) {
-    // Ensure user specified file offset alignment is a power of 2.
-    if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1))
-      return MZ_FALSE;
-  }
-
-  if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func;
-  if (!pZip->m_pFree) pZip->m_pFree = def_free_func;
-  if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func;
-
-  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
-  pZip->m_archive_size = existing_size;
-  pZip->m_central_directory_file_ofs = 0;
-  pZip->m_total_files = 0;
-
-  if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(
-                   pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
-    return MZ_FALSE;
-  memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir,
-                                sizeof(mz_uint8));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets,
-                                sizeof(mz_uint32));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets,
-                                sizeof(mz_uint32));
-  return MZ_TRUE;
-}
-
-static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs,
-                                     const void *pBuf, size_t n) {
-  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
-  mz_zip_internal_state *pState = pZip->m_pState;
-  mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size);
-#ifdef _MSC_VER
-  if ((!n) ||
-      ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)))
-#else
-  if ((!n) ||
-      ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)))
-#endif
-    return 0;
-  if (new_size > pState->m_mem_capacity) {
-    void *pNew_block;
-    size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity);
-    while (new_capacity < new_size) new_capacity *= 2;
-    if (NULL == (pNew_block = pZip->m_pRealloc(
-                     pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity)))
-      return 0;
-    pState->m_pMem = pNew_block;
-    pState->m_mem_capacity = new_capacity;
-  }
-  memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n);
-  pState->m_mem_size = (size_t)new_size;
-  return n;
-}
-
-mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip,
-                                size_t size_to_reserve_at_beginning,
-                                size_t initial_allocation_size) {
-  pZip->m_pWrite = mz_zip_heap_write_func;
-  pZip->m_pIO_opaque = pZip;
-  if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE;
-  if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size,
-                                             size_to_reserve_at_beginning))) {
-    if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(
-                     pZip->m_pAlloc_opaque, 1, initial_allocation_size))) {
-      mz_zip_writer_end(pZip);
-      return MZ_FALSE;
-    }
-    pZip->m_pState->m_mem_capacity = initial_allocation_size;
-  }
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_STDIO
-static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs,
-                                     const void *pBuf, size_t n) {
-  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
-  mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
-  if (((mz_int64)file_ofs < 0) ||
-      (((cur_ofs != (mz_int64)file_ofs)) &&
-       (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
-    return 0;
-  return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile);
-}
-
-mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename,
-                                mz_uint64 size_to_reserve_at_beginning) {
-  MZ_FILE *pFile;
-  pZip->m_pWrite = mz_zip_file_write_func;
-  pZip->m_pIO_opaque = pZip;
-  if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE;
-  if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) {
-    mz_zip_writer_end(pZip);
-    return MZ_FALSE;
-  }
-  pZip->m_pState->m_pFile = pFile;
-  if (size_to_reserve_at_beginning) {
-    mz_uint64 cur_ofs = 0;
-    char buf[4096];
-    MZ_CLEAR_OBJ(buf);
-    do {
-      size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning);
-      if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) {
-        mz_zip_writer_end(pZip);
-        return MZ_FALSE;
-      }
-      cur_ofs += n;
-      size_to_reserve_at_beginning -= n;
-    } while (size_to_reserve_at_beginning);
-  }
-  return MZ_TRUE;
-}
-#endif  // #ifndef MINIZ_NO_STDIO
-
-mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip,
-                                       const char *pFilename) {
-  mz_zip_internal_state *pState;
-  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    return MZ_FALSE;
-  // No sense in trying to write to an archive that's already at the support max
-  // size
-  if ((pZip->m_total_files == 0xFFFF) ||
-      ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-        MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  pState = pZip->m_pState;
-
-  if (pState->m_pFile) {
-#ifdef MINIZ_NO_STDIO
-    pFilename;
-    return MZ_FALSE;
-#else
-    // Archive is being read from stdio - try to reopen as writable.
-    if (pZip->m_pIO_opaque != pZip) return MZ_FALSE;
-    if (!pFilename) return MZ_FALSE;
-    pZip->m_pWrite = mz_zip_file_write_func;
-    if (NULL ==
-        (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) {
-      // The mz_zip_archive is now in a bogus state because pState->m_pFile is
-      // NULL, so just close it.
-      mz_zip_reader_end(pZip);
-      return MZ_FALSE;
-    }
-#endif  // #ifdef MINIZ_NO_STDIO
-  } else if (pState->m_pMem) {
-    // Archive lives in a memory block. Assume it's from the heap that we can
-    // resize using the realloc callback.
-    if (pZip->m_pIO_opaque != pZip) return MZ_FALSE;
-    pState->m_mem_capacity = pState->m_mem_size;
-    pZip->m_pWrite = mz_zip_heap_write_func;
-  }
-  // Archive is being read via a user provided read function - make sure the
-  // user has specified a write function too.
-  else if (!pZip->m_pWrite)
-    return MZ_FALSE;
-
-  // Start writing new files at the archive's current central directory
-  // location.
-  pZip->m_archive_size = pZip->m_central_directory_file_ofs;
-  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
-  pZip->m_central_directory_file_ofs = 0;
-
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name,
-                              const void *pBuf, size_t buf_size,
-                              mz_uint level_and_flags) {
-  return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0,
-                                  level_and_flags, 0, 0);
-}
-
-typedef struct {
-  mz_zip_archive *m_pZip;
-  mz_uint64 m_cur_archive_file_ofs;
-  mz_uint64 m_comp_size;
-} mz_zip_writer_add_state;
-
-static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len,
-                                                  void *pUser) {
-  mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser;
-  if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque,
-                                    pState->m_cur_archive_file_ofs, pBuf,
-                                    len) != len)
-    return MZ_FALSE;
-  pState->m_cur_archive_file_ofs += len;
-  pState->m_comp_size += len;
-  return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_create_local_dir_header(
-    mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size,
-    mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size,
-    mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags,
-    mz_uint16 dos_time, mz_uint16 dos_date) {
-  (void)pZip;
-  memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size);
-  return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_create_central_dir_header(
-    mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size,
-    mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size,
-    mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method,
-    mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date,
-    mz_uint64 local_header_ofs, mz_uint32 ext_attributes) {
-  (void)pZip;
-  memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs);
-  return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_add_to_central_dir(
-    mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size,
-    const void *pExtra, mz_uint16 extra_size, const void *pComment,
-    mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size,
-    mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags,
-    mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs,
-    mz_uint32 ext_attributes) {
-  mz_zip_internal_state *pState = pZip->m_pState;
-  mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size;
-  size_t orig_central_dir_size = pState->m_central_dir.m_size;
-  mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
-
-  // No zip64 support yet
-  if ((local_header_ofs > 0xFFFFFFFF) ||
-      (((mz_uint64)pState->m_central_dir.m_size +
-        MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size +
-        comment_size) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_create_central_dir_header(
-          pZip, central_dir_header, filename_size, extra_size, comment_size,
-          uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time,
-          dos_date, local_header_ofs, ext_attributes))
-    return MZ_FALSE;
-
-  if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header,
-                               MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) ||
-      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename,
-                               filename_size)) ||
-      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra,
-                               extra_size)) ||
-      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment,
-                               comment_size)) ||
-      (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets,
-                               &central_dir_ofs, 1))) {
-    // Try to push the central directory array back into its original state.
-    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size,
-                        MZ_FALSE);
-    return MZ_FALSE;
-  }
-
-  return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) {
-  // Basic ZIP archive filename validity checks: Valid filenames cannot start
-  // with a forward slash, cannot contain a drive letter, and cannot use
-  // DOS-style backward slashes.
-  if (*pArchive_name == '/') return MZ_FALSE;
-  while (*pArchive_name) {
-    if ((*pArchive_name == '\\') || (*pArchive_name == ':')) return MZ_FALSE;
-    pArchive_name++;
-  }
-  return MZ_TRUE;
-}
-
-static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(
-    mz_zip_archive *pZip) {
-  mz_uint32 n;
-  if (!pZip->m_file_offset_alignment) return 0;
-  n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1));
-  return (pZip->m_file_offset_alignment - n) &
-         (pZip->m_file_offset_alignment - 1);
-}
-
-static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip,
-                                         mz_uint64 cur_file_ofs, mz_uint32 n) {
-  char buf[4096];
-  memset(buf, 0, MZ_MIN(sizeof(buf), n));
-  while (n) {
-    mz_uint32 s = MZ_MIN(sizeof(buf), n);
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s)
-      return MZ_FALSE;
-    cur_file_ofs += s;
-    n -= s;
-  }
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip,
-                                 const char *pArchive_name, const void *pBuf,
-                                 size_t buf_size, const void *pComment,
-                                 mz_uint16 comment_size,
-                                 mz_uint level_and_flags, mz_uint64 uncomp_size,
-                                 mz_uint32 uncomp_crc32) {
-  mz_uint16 method = 0, dos_time = 0, dos_date = 0;
-  mz_uint level, ext_attributes = 0, num_alignment_padding_bytes;
-  mz_uint64 local_dir_header_ofs = pZip->m_archive_size,
-            cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0;
-  size_t archive_name_size;
-  mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
-  tdefl_compressor *pComp = NULL;
-  mz_bool store_data_uncompressed;
-  mz_zip_internal_state *pState;
-
-  if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL;
-  level = level_and_flags & 0xF;
-  store_data_uncompressed =
-      ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA));
-
-  if ((!pZip) || (!pZip->m_pState) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) ||
-      (!pArchive_name) || ((comment_size) && (!pComment)) ||
-      (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION))
-    return MZ_FALSE;
-
-  pState = pZip->m_pState;
-
-  if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size))
-    return MZ_FALSE;
-  // No zip64 support yet
-  if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) return MZ_FALSE;
-  if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE;
-
-#ifndef MINIZ_NO_TIME
-  {
-    time_t cur_time;
-    time(&cur_time);
-    mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date);
-  }
-#endif  // #ifndef MINIZ_NO_TIME
-
-  archive_name_size = strlen(pArchive_name);
-  if (archive_name_size > 0xFFFF) return MZ_FALSE;
-
-  num_alignment_padding_bytes =
-      mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-  // no zip64 support yet
-  if ((pZip->m_total_files == 0xFFFF) ||
-      ((pZip->m_archive_size + num_alignment_padding_bytes +
-        MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-        comment_size + archive_name_size) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) {
-    // Set DOS Subdirectory attribute bit.
-    ext_attributes |= 0x10;
-    // Subdirectories cannot contain data.
-    if ((buf_size) || (uncomp_size)) return MZ_FALSE;
-  }
-
-  // Try to do any allocations before writing to the archive, so if an
-  // allocation fails the file remains unmodified. (A good idea if we're doing
-  // an in-place modification.)
-  if ((!mz_zip_array_ensure_room(
-          pZip, &pState->m_central_dir,
-          MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) ||
-      (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1)))
-    return MZ_FALSE;
-
-  if ((!store_data_uncompressed) && (buf_size)) {
-    if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(
-                     pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor))))
-      return MZ_FALSE;
-  }
-
-  if (!mz_zip_writer_write_zeros(
-          pZip, cur_archive_file_ofs,
-          num_alignment_padding_bytes + sizeof(local_dir_header))) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-    return MZ_FALSE;
-  }
-  local_dir_header_ofs += num_alignment_padding_bytes;
-  if (pZip->m_file_offset_alignment) {
-    MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) ==
-              0);
-  }
-  cur_archive_file_ofs +=
-      num_alignment_padding_bytes + sizeof(local_dir_header);
-
-  MZ_CLEAR_OBJ(local_dir_header);
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name,
-                     archive_name_size) != archive_name_size) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-    return MZ_FALSE;
-  }
-  cur_archive_file_ofs += archive_name_size;
-
-  if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) {
-    uncomp_crc32 =
-        (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size);
-    uncomp_size = buf_size;
-    if (uncomp_size <= 3) {
-      level = 0;
-      store_data_uncompressed = MZ_TRUE;
-    }
-  }
-
-  if (store_data_uncompressed) {
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf,
-                       buf_size) != buf_size) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-      return MZ_FALSE;
-    }
-
-    cur_archive_file_ofs += buf_size;
-    comp_size = buf_size;
-
-    if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) method = MZ_DEFLATED;
-  } else if (buf_size) {
-    mz_zip_writer_add_state state;
-
-    state.m_pZip = pZip;
-    state.m_cur_archive_file_ofs = cur_archive_file_ofs;
-    state.m_comp_size = 0;
-
-    if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state,
-                    tdefl_create_comp_flags_from_zip_params(
-                        level, -15, MZ_DEFAULT_STRATEGY)) !=
-         TDEFL_STATUS_OKAY) ||
-        (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) !=
-         TDEFL_STATUS_DONE)) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-      return MZ_FALSE;
-    }
-
-    comp_size = state.m_comp_size;
-    cur_archive_file_ofs = state.m_cur_archive_file_ofs;
-
-    method = MZ_DEFLATED;
-  }
-
-  pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-  pComp = NULL;
-
-  // no zip64 support yet
-  if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_create_local_dir_header(
-          pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size,
-          comp_size, uncomp_crc32, method, 0, dos_time, dos_date))
-    return MZ_FALSE;
-
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header,
-                     sizeof(local_dir_header)) != sizeof(local_dir_header))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_add_to_central_dir(
-          pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment,
-          comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0,
-          dos_time, dos_date, local_dir_header_ofs, ext_attributes))
-    return MZ_FALSE;
-
-  pZip->m_total_files++;
-  pZip->m_archive_size = cur_archive_file_ofs;
-
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name,
-                               const char *pSrc_filename, const void *pComment,
-                               mz_uint16 comment_size,
-                               mz_uint level_and_flags) {
-  mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes;
-  mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0;
-  mz_uint64 local_dir_header_ofs = pZip->m_archive_size,
-            cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0,
-            comp_size = 0;
-  size_t archive_name_size;
-  mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
-  MZ_FILE *pSrc_file = NULL;
-
-  if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL;
-  level = level_and_flags & 0xF;
-
-  if ((!pZip) || (!pZip->m_pState) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) ||
-      ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION))
-    return MZ_FALSE;
-  if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return MZ_FALSE;
-  if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE;
-
-  archive_name_size = strlen(pArchive_name);
-  if (archive_name_size > 0xFFFF) return MZ_FALSE;
-
-  num_alignment_padding_bytes =
-      mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-  // no zip64 support yet
-  if ((pZip->m_total_files == 0xFFFF) ||
-      ((pZip->m_archive_size + num_alignment_padding_bytes +
-        MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-        comment_size + archive_name_size) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date))
-    return MZ_FALSE;
-
-  pSrc_file = MZ_FOPEN(pSrc_filename, "rb");
-  if (!pSrc_file) return MZ_FALSE;
-  MZ_FSEEK64(pSrc_file, 0, SEEK_END);
-  uncomp_size = MZ_FTELL64(pSrc_file);
-  MZ_FSEEK64(pSrc_file, 0, SEEK_SET);
-
-  if (uncomp_size > 0xFFFFFFFF) {
-    // No zip64 support yet
-    MZ_FCLOSE(pSrc_file);
-    return MZ_FALSE;
-  }
-  if (uncomp_size <= 3) level = 0;
-
-  if (!mz_zip_writer_write_zeros(
-          pZip, cur_archive_file_ofs,
-          num_alignment_padding_bytes + sizeof(local_dir_header))) {
-    MZ_FCLOSE(pSrc_file);
-    return MZ_FALSE;
-  }
-  local_dir_header_ofs += num_alignment_padding_bytes;
-  if (pZip->m_file_offset_alignment) {
-    MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) ==
-              0);
-  }
-  cur_archive_file_ofs +=
-      num_alignment_padding_bytes + sizeof(local_dir_header);
-
-  MZ_CLEAR_OBJ(local_dir_header);
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name,
-                     archive_name_size) != archive_name_size) {
-    MZ_FCLOSE(pSrc_file);
-    return MZ_FALSE;
-  }
-  cur_archive_file_ofs += archive_name_size;
-
-  if (uncomp_size) {
-    mz_uint64 uncomp_remaining = uncomp_size;
-    void *pRead_buf =
-        pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE);
-    if (!pRead_buf) {
-      MZ_FCLOSE(pSrc_file);
-      return MZ_FALSE;
-    }
-
-    if (!level) {
-      while (uncomp_remaining) {
-        mz_uint n =
-            (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining);
-        if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) ||
-            (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf,
-                            n) != n)) {
-          pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-          MZ_FCLOSE(pSrc_file);
-          return MZ_FALSE;
-        }
-        uncomp_crc32 =
-            (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n);
-        uncomp_remaining -= n;
-        cur_archive_file_ofs += n;
-      }
-      comp_size = uncomp_size;
-    } else {
-      mz_bool result = MZ_FALSE;
-      mz_zip_writer_add_state state;
-      tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(
-          pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor));
-      if (!pComp) {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-        MZ_FCLOSE(pSrc_file);
-        return MZ_FALSE;
-      }
-
-      state.m_pZip = pZip;
-      state.m_cur_archive_file_ofs = cur_archive_file_ofs;
-      state.m_comp_size = 0;
-
-      if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state,
-                     tdefl_create_comp_flags_from_zip_params(
-                         level, -15, MZ_DEFAULT_STRATEGY)) !=
-          TDEFL_STATUS_OKAY) {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-        MZ_FCLOSE(pSrc_file);
-        return MZ_FALSE;
-      }
-
-      for (;;) {
-        size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining,
-                                               (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE);
-        tdefl_status status;
-
-        if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size)
-          break;
-
-        uncomp_crc32 = (mz_uint32)mz_crc32(
-            uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size);
-        uncomp_remaining -= in_buf_size;
-
-        status = tdefl_compress_buffer(
-            pComp, pRead_buf, in_buf_size,
-            uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH);
-        if (status == TDEFL_STATUS_DONE) {
-          result = MZ_TRUE;
-          break;
-        } else if (status != TDEFL_STATUS_OKAY)
-          break;
-      }
-
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-
-      if (!result) {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-        MZ_FCLOSE(pSrc_file);
-        return MZ_FALSE;
-      }
-
-      comp_size = state.m_comp_size;
-      cur_archive_file_ofs = state.m_cur_archive_file_ofs;
-
-      method = MZ_DEFLATED;
-    }
-
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-  }
-
-  MZ_FCLOSE(pSrc_file);
-  pSrc_file = NULL;
-
-  // no zip64 support yet
-  if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_create_local_dir_header(
-          pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size,
-          comp_size, uncomp_crc32, method, 0, dos_time, dos_date))
-    return MZ_FALSE;
-
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header,
-                     sizeof(local_dir_header)) != sizeof(local_dir_header))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_add_to_central_dir(
-          pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment,
-          comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0,
-          dos_time, dos_date, local_dir_header_ofs, ext_attributes))
-    return MZ_FALSE;
-
-  pZip->m_total_files++;
-  pZip->m_archive_size = cur_archive_file_ofs;
-
-  return MZ_TRUE;
-}
-#endif  // #ifndef MINIZ_NO_STDIO
-
-mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip,
-                                          mz_zip_archive *pSource_zip,
-                                          mz_uint file_index) {
-  mz_uint n, bit_flags, num_alignment_padding_bytes;
-  mz_uint64 comp_bytes_remaining, local_dir_header_ofs;
-  mz_uint64 cur_src_file_ofs, cur_dst_file_ofs;
-  mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) /
-                       sizeof(mz_uint32)];
-  mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
-  mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
-  size_t orig_central_dir_size;
-  mz_zip_internal_state *pState;
-  void *pBuf;
-  const mz_uint8 *pSrc_central_header;
-
-  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
-    return MZ_FALSE;
-  if (NULL ==
-      (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index)))
-    return MZ_FALSE;
-  pState = pZip->m_pState;
-
-  num_alignment_padding_bytes =
-      mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-  // no zip64 support yet
-  if ((pZip->m_total_files == 0xFFFF) ||
-      ((pZip->m_archive_size + num_alignment_padding_bytes +
-        MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) >
-       0xFFFFFFFF))
-    return MZ_FALSE;
-
-  cur_src_file_ofs =
-      MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
-  cur_dst_file_ofs = pZip->m_archive_size;
-
-  if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs,
-                           pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-    return MZ_FALSE;
-  cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
-
-  if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs,
-                                 num_alignment_padding_bytes))
-    return MZ_FALSE;
-  cur_dst_file_ofs += num_alignment_padding_bytes;
-  local_dir_header_ofs = cur_dst_file_ofs;
-  if (pZip->m_file_offset_alignment) {
-    MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) ==
-              0);
-  }
-
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header,
-                     MZ_ZIP_LOCAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
-
-  n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) +
-      MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-  comp_bytes_remaining =
-      n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-
-  if (NULL == (pBuf = pZip->m_pAlloc(
-                   pZip->m_pAlloc_opaque, 1,
-                   (size_t)MZ_MAX(sizeof(mz_uint32) * 4,
-                                  MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE,
-                                         comp_bytes_remaining)))))
-    return MZ_FALSE;
-
-  while (comp_bytes_remaining) {
-    n = (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining);
-    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf,
-                             n) != n) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-      return MZ_FALSE;
-    }
-    cur_src_file_ofs += n;
-
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-      return MZ_FALSE;
-    }
-    cur_dst_file_ofs += n;
-
-    comp_bytes_remaining -= n;
-  }
-
-  bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS);
-  if (bit_flags & 8) {
-    // Copy data descriptor
-    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf,
-                             sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-      return MZ_FALSE;
-    }
-
-    n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3);
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-      return MZ_FALSE;
-    }
-
-    cur_src_file_ofs += n;
-    cur_dst_file_ofs += n;
-  }
-  pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-
-  // no zip64 support yet
-  if (cur_dst_file_ofs > 0xFFFFFFFF) return MZ_FALSE;
-
-  orig_central_dir_size = pState->m_central_dir.m_size;
-
-  memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
-  MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS,
-                local_dir_header_ofs);
-  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header,
-                              MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
-    return MZ_FALSE;
-
-  n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) +
-      MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) +
-      MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-  if (!mz_zip_array_push_back(
-          pZip, &pState->m_central_dir,
-          pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) {
-    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size,
-                        MZ_FALSE);
-    return MZ_FALSE;
-  }
-
-  if (pState->m_central_dir.m_size > 0xFFFFFFFF) return MZ_FALSE;
-  n = (mz_uint32)orig_central_dir_size;
-  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) {
-    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size,
-                        MZ_FALSE);
-    return MZ_FALSE;
-  }
-
-  pZip->m_total_files++;
-  pZip->m_archive_size = cur_dst_file_ofs;
-
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) {
-  mz_zip_internal_state *pState;
-  mz_uint64 central_dir_ofs, central_dir_size;
-  mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE];
-
-  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
-    return MZ_FALSE;
-
-  pState = pZip->m_pState;
-
-  // no zip64 support yet
-  if ((pZip->m_total_files > 0xFFFF) ||
-      ((pZip->m_archive_size + pState->m_central_dir.m_size +
-        MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  central_dir_ofs = 0;
-  central_dir_size = 0;
-  if (pZip->m_total_files) {
-    // Write central directory
-    central_dir_ofs = pZip->m_archive_size;
-    central_dir_size = pState->m_central_dir.m_size;
-    pZip->m_central_directory_file_ofs = central_dir_ofs;
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs,
-                       pState->m_central_dir.m_p,
-                       (size_t)central_dir_size) != central_dir_size)
-      return MZ_FALSE;
-    pZip->m_archive_size += central_dir_size;
-  }
-
-  // Write end of central directory record
-  MZ_CLEAR_OBJ(hdr);
-  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS,
-                MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG);
-  MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS,
-                pZip->m_total_files);
-  MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files);
-  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size);
-  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs);
-
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr,
-                     sizeof(hdr)) != sizeof(hdr))
-    return MZ_FALSE;
-#ifndef MINIZ_NO_STDIO
-  if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return MZ_FALSE;
-#endif  // #ifndef MINIZ_NO_STDIO
-
-  pZip->m_archive_size += sizeof(hdr);
-
-  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED;
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf,
-                                            size_t *pSize) {
-  if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) return MZ_FALSE;
-  if (pZip->m_pWrite != mz_zip_heap_write_func) return MZ_FALSE;
-  if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE;
-
-  *pBuf = pZip->m_pState->m_pMem;
-  *pSize = pZip->m_pState->m_mem_size;
-  pZip->m_pState->m_pMem = NULL;
-  pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0;
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_end(mz_zip_archive *pZip) {
-  mz_zip_internal_state *pState;
-  mz_bool status = MZ_TRUE;
-  if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) ||
-      ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) &&
-       (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)))
-    return MZ_FALSE;
-
-  pState = pZip->m_pState;
-  pZip->m_pState = NULL;
-  mz_zip_array_clear(pZip, &pState->m_central_dir);
-  mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
-  mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
-
-#ifndef MINIZ_NO_STDIO
-  if (pState->m_pFile) {
-    MZ_FCLOSE(pState->m_pFile);
-    pState->m_pFile = NULL;
-  }
-#endif  // #ifndef MINIZ_NO_STDIO
-
-  if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem);
-    pState->m_pMem = NULL;
-  }
-
-  pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-  pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
-  return status;
-}
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_add_mem_to_archive_file_in_place(
-    const char *pZip_filename, const char *pArchive_name, const void *pBuf,
-    size_t buf_size, const void *pComment, mz_uint16 comment_size,
-    mz_uint level_and_flags) {
-  mz_bool status, created_new_archive = MZ_FALSE;
-  mz_zip_archive zip_archive;
-  struct MZ_FILE_STAT_STRUCT file_stat;
-  MZ_CLEAR_OBJ(zip_archive);
-  if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL;
-  if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) ||
-      ((comment_size) && (!pComment)) ||
-      ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION))
-    return MZ_FALSE;
-  if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE;
-  if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) {
-    // Create a new archive.
-    if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0))
-      return MZ_FALSE;
-    created_new_archive = MZ_TRUE;
-  } else {
-    // Append to an existing archive.
-    if (!mz_zip_reader_init_file(
-            &zip_archive, pZip_filename,
-            level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
-      return MZ_FALSE;
-    if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) {
-      mz_zip_reader_end(&zip_archive);
-      return MZ_FALSE;
-    }
-  }
-  status =
-      mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size,
-                               pComment, comment_size, level_and_flags, 0, 0);
-  // Always finalize, even if adding failed for some reason, so we have a valid
-  // central directory. (This may not always succeed, but we can try.)
-  if (!mz_zip_writer_finalize_archive(&zip_archive)) status = MZ_FALSE;
-  if (!mz_zip_writer_end(&zip_archive)) status = MZ_FALSE;
-  if ((!status) && (created_new_archive)) {
-    // It's a new archive and something went wrong, so just delete it.
-    int ignoredStatus = MZ_DELETE_FILE(pZip_filename);
-    (void)ignoredStatus;
-  }
-  return status;
-}
-
-void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename,
-                                          const char *pArchive_name,
-                                          size_t *pSize, mz_uint flags) {
-  int file_index;
-  mz_zip_archive zip_archive;
-  void *p = NULL;
-
-  if (pSize) *pSize = 0;
-
-  if ((!pZip_filename) || (!pArchive_name)) return NULL;
-
-  MZ_CLEAR_OBJ(zip_archive);
-  if (!mz_zip_reader_init_file(
-          &zip_archive, pZip_filename,
-          flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
-    return NULL;
-
-  if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL,
-                                              flags)) >= 0)
-    p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags);
-
-  mz_zip_reader_end(&zip_archive);
-  return p;
-}
-
-#endif  // #ifndef MINIZ_NO_STDIO
-
-#endif  // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-#endif  // #ifndef MINIZ_NO_ARCHIVE_APIS
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINIZ_HEADER_FILE_ONLY
-
-/*
-  This is free and unencumbered software released into the public domain.
-
-  Anyone is free to copy, modify, publish, use, compile, sell, or
-  distribute this software, either in source code form or as a compiled
-  binary, for any purpose, commercial or non-commercial, and by any
-  means.
-
-  In jurisdictions that recognize copyright laws, the author or authors
-  of this software dedicate any and all copyright interest in the
-  software to the public domain. We make this dedication for the benefit
-  of the public at large and to the detriment of our heirs and
-  successors. We intend this dedication to be an overt act of
-  relinquishment in perpetuity of all present and future rights to this
-  software under copyright law.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-  OTHER DEALINGS IN THE SOFTWARE.
-
-  For more information, please refer to <http://unlicense.org/>
-*/
-
-// ---------------------- end of miniz ----------------------------------------
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-}
-#else
-
-// Reuse MINIZ_LITTE_ENDIAN macro
-
-#if defined(__sparcv9)
-// Big endian
-#else
-#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
-// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
-#define MINIZ_LITTLE_ENDIAN 1
-#endif
-#endif
-
-#endif  // TINYEXR_USE_MINIZ
-
-// static bool IsBigEndian(void) {
-//  union {
-//    unsigned int i;
-//    char c[4];
-//  } bint = {0x01020304};
-//
-//  return bint.c[0] == 1;
-//}
-
-static const int kEXRVersionSize = 8;
-
-static void swap2(unsigned short *val) {
-#ifdef MINIZ_LITTLE_ENDIAN
-  (void)val;
-#else
-  unsigned short tmp = *val;
-  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
-  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
-
-  dst[0] = src[1];
-  dst[1] = src[0];
-#endif
-}
-
-static void swap4(unsigned int *val) {
-#ifdef MINIZ_LITTLE_ENDIAN
-  (void)val;
-#else
-  unsigned int tmp = *val;
-  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
-  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
-
-  dst[0] = src[3];
-  dst[1] = src[2];
-  dst[2] = src[1];
-  dst[3] = src[0];
-#endif
-}
-
-static void swap8(tinyexr::tinyexr_uint64 *val) {
-#ifdef MINIZ_LITTLE_ENDIAN
-  (void)val;
-#else
-  tinyexr::tinyexr_uint64 tmp = (*val);
-  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
-  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
-
-  dst[0] = src[7];
-  dst[1] = src[6];
-  dst[2] = src[5];
-  dst[3] = src[4];
-  dst[4] = src[3];
-  dst[5] = src[2];
-  dst[6] = src[1];
-  dst[7] = src[0];
-#endif
-}
-
-// https://gist.github.com/rygorous/2156668
-// Reuse MINIZ_LITTLE_ENDIAN flag from miniz.
-union FP32 {
-  unsigned int u;
-  float f;
-  struct {
-#if MINIZ_LITTLE_ENDIAN
-    unsigned int Mantissa : 23;
-    unsigned int Exponent : 8;
-    unsigned int Sign : 1;
-#else
-    unsigned int Sign : 1;
-    unsigned int Exponent : 8;
-    unsigned int Mantissa : 23;
-#endif
-  } s;
-};
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wpadded"
-#endif
-
-union FP16 {
-  unsigned short u;
-  struct {
-#if MINIZ_LITTLE_ENDIAN
-    unsigned int Mantissa : 10;
-    unsigned int Exponent : 5;
-    unsigned int Sign : 1;
-#else
-    unsigned int Sign : 1;
-    unsigned int Exponent : 5;
-    unsigned int Mantissa : 10;
-#endif
-  } s;
-};
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-static FP32 half_to_float(FP16 h) {
-  static const FP32 magic = {113 << 23};
-  static const unsigned int shifted_exp = 0x7c00
-                                          << 13;  // exponent mask after shift
-  FP32 o;
-
-  o.u = (h.u & 0x7fffU) << 13U;           // exponent/mantissa bits
-  unsigned int exp_ = shifted_exp & o.u;  // just the exponent
-  o.u += (127 - 15) << 23;                // exponent adjust
-
-  // handle exponent special cases
-  if (exp_ == shifted_exp)    // Inf/NaN?
-    o.u += (128 - 16) << 23;  // extra exp adjust
-  else if (exp_ == 0)         // Zero/Denormal?
-  {
-    o.u += 1 << 23;  // extra exp adjust
-    o.f -= magic.f;  // renormalize
-  }
-
-  o.u |= (h.u & 0x8000U) << 16U;  // sign bit
-  return o;
-}
-
-static FP16 float_to_half_full(FP32 f) {
-  FP16 o = {0};
-
-  // Based on ISPC reference code (with minor modifications)
-  if (f.s.Exponent == 0)  // Signed zero/denormal (which will underflow)
-    o.s.Exponent = 0;
-  else if (f.s.Exponent == 255)  // Inf or NaN (all exponent bits set)
-  {
-    o.s.Exponent = 31;
-    o.s.Mantissa = f.s.Mantissa ? 0x200 : 0;  // NaN->qNaN and Inf->Inf
-  } else                                      // Normalized number
-  {
-    // Exponent unbias the single, then bias the halfp
-    int newexp = f.s.Exponent - 127 + 15;
-    if (newexp >= 31)  // Overflow, return signed infinity
-      o.s.Exponent = 31;
-    else if (newexp <= 0)  // Underflow
-    {
-      if ((14 - newexp) <= 24)  // Mantissa might be non-zero
-      {
-        unsigned int mant = f.s.Mantissa | 0x800000;  // Hidden 1 bit
-        o.s.Mantissa = mant >> (14 - newexp);
-        if ((mant >> (13 - newexp)) & 1)  // Check for rounding
-          o.u++;  // Round, might overflow into exp bit, but this is OK
-      }
-    } else {
-      o.s.Exponent = static_cast<unsigned int>(newexp);
-      o.s.Mantissa = f.s.Mantissa >> 13;
-      if (f.s.Mantissa & 0x1000)  // Check for rounding
-        o.u++;                    // Round, might overflow to inf, this is OK
-    }
-  }
-
-  o.s.Sign = f.s.Sign;
-  return o;
-}
-
-// NOTE: From OpenEXR code
-// #define IMF_INCREASING_Y  0
-// #define IMF_DECREASING_Y  1
-// #define IMF_RAMDOM_Y    2
-//
-// #define IMF_NO_COMPRESSION  0
-// #define IMF_RLE_COMPRESSION 1
-// #define IMF_ZIPS_COMPRESSION  2
-// #define IMF_ZIP_COMPRESSION 3
-// #define IMF_PIZ_COMPRESSION 4
-// #define IMF_PXR24_COMPRESSION 5
-// #define IMF_B44_COMPRESSION 6
-// #define IMF_B44A_COMPRESSION  7
-
-static const char *ReadString(std::string *s, const char *ptr) {
-  // Read untile NULL(\0).
-  const char *p = ptr;
-  const char *q = ptr;
-  while ((*q) != 0) q++;
-
-  (*s) = std::string(p, q);
-
-  return q + 1;  // skip '\0'
-}
-
-static bool ReadAttribute(std::string *name, std::string *type,
-                          std::vector<unsigned char> *data, size_t *marker_size,
-                          const char *marker, size_t size) {
-  size_t name_len = strnlen(marker, size);
-  if (name_len == size) {
-    // String does not have a terminating character.
-    return false;
-  }
-  *name = std::string(marker, name_len);
-
-  marker += name_len + 1;
-  size -= name_len + 1;
-
-  size_t type_len = strnlen(marker, size);
-  if (type_len == size) {
-    return false;
-  }
-  *type = std::string(marker, type_len);
-
-  marker += type_len + 1;
-  size -= type_len + 1;
-
-  if (size < sizeof(uint32_t)) {
-    return false;
-  }
-
-  uint32_t data_len;
-  memcpy(&data_len, marker, sizeof(uint32_t));
-  tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
-
-  marker += sizeof(uint32_t);
-  size -= sizeof(uint32_t);
-
-  if (size < data_len) {
-    return false;
-  }
-
-  data->resize(static_cast<size_t>(data_len));
-  memcpy(&data->at(0), marker, static_cast<size_t>(data_len));
-
-  *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len;
-  return true;
-}
-
-static void WriteAttributeToMemory(std::vector<unsigned char> *out,
-                                   const char *name, const char *type,
-                                   const unsigned char *data, int len) {
-  out->insert(out->end(), name, name + strlen(name) + 1);
-  out->insert(out->end(), type, type + strlen(type) + 1);
-
-  int outLen = len;
-  tinyexr::swap4(reinterpret_cast<unsigned int *>(&outLen));
-  out->insert(out->end(), reinterpret_cast<unsigned char *>(&outLen),
-              reinterpret_cast<unsigned char *>(&outLen) + sizeof(int));
-  out->insert(out->end(), data, data + len);
-}
-
-typedef struct {
-  std::string name;  // less than 255 bytes long
-  int pixel_type;
-  int x_sampling;
-  int y_sampling;
-  unsigned char p_linear;
-  unsigned char pad[3];
-} ChannelInfo;
-
-typedef struct {
-  std::vector<tinyexr::ChannelInfo> channels;
-  std::vector<EXRAttribute> attributes;
-
-  int data_window[4];
-  int line_order;
-  int display_window[4];
-  float screen_window_center[2];
-  float screen_window_width;
-  float pixel_aspect_ratio;
-
-  int chunk_count;
-
-  // Tiled format
-  int tile_size_x;
-  int tile_size_y;
-  int tile_level_mode;
-  int tile_rounding_mode;
-
-  unsigned int header_len;
-
-  int compression_type;
-
-  void clear() {
-    channels.clear();
-    attributes.clear();
-
-    data_window[0] = 0;
-    data_window[1] = 0;
-    data_window[2] = 0;
-    data_window[3] = 0;
-    line_order = 0;
-    display_window[0] = 0;
-    display_window[1] = 0;
-    display_window[2] = 0;
-    display_window[3] = 0;
-    screen_window_center[0] = 0.0f;
-    screen_window_center[1] = 0.0f;
-    screen_window_width = 0.0f;
-    pixel_aspect_ratio = 0.0f;
-
-    chunk_count = 0;
-
-    // Tiled format
-    tile_size_x = 0;
-    tile_size_y = 0;
-    tile_level_mode = 0;
-    tile_rounding_mode = 0;
-
-    header_len = 0;
-    compression_type = 0;
-  }
-} HeaderInfo;
-
-static void ReadChannelInfo(std::vector<ChannelInfo> &channels,
-                            const std::vector<unsigned char> &data) {
-  const char *p = reinterpret_cast<const char *>(&data.at(0));
-
-  for (;;) {
-    if ((*p) == 0) {
-      break;
-    }
-    ChannelInfo info;
-    p = ReadString(&info.name, p);
-
-    memcpy(&info.pixel_type, p, sizeof(int));
-    p += 4;
-    info.p_linear = static_cast<unsigned char>(p[0]);  // uchar
-    p += 1 + 3;                                        // reserved: uchar[3]
-    memcpy(&info.x_sampling, p, sizeof(int));          // int
-    p += 4;
-    memcpy(&info.y_sampling, p, sizeof(int));  // int
-    p += 4;
-
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.pixel_type));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.x_sampling));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.y_sampling));
-
-    channels.push_back(info);
-  }
-}
-
-static void WriteChannelInfo(std::vector<unsigned char> &data,
-                             const std::vector<ChannelInfo> &channels) {
-  size_t sz = 0;
-
-  // Calculate total size.
-  for (size_t c = 0; c < channels.size(); c++) {
-    sz += strlen(channels[c].name.c_str()) + 1;  // +1 for \0
-    sz += 16;                                    // 4 * int
-  }
-  data.resize(sz + 1);
-
-  unsigned char *p = &data.at(0);
-
-  for (size_t c = 0; c < channels.size(); c++) {
-    memcpy(p, channels[c].name.c_str(), strlen(channels[c].name.c_str()));
-    p += strlen(channels[c].name.c_str());
-    (*p) = '\0';
-    p++;
-
-    int pixel_type = channels[c].pixel_type;
-    int x_sampling = channels[c].x_sampling;
-    int y_sampling = channels[c].y_sampling;
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&pixel_type));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&x_sampling));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&y_sampling));
-
-    memcpy(p, &pixel_type, sizeof(int));
-    p += sizeof(int);
-
-    (*p) = channels[c].p_linear;
-    p += 4;
-
-    memcpy(p, &x_sampling, sizeof(int));
-    p += sizeof(int);
-
-    memcpy(p, &y_sampling, sizeof(int));
-    p += sizeof(int);
-  }
-
-  (*p) = '\0';
-}
-
-static void CompressZip(unsigned char *dst,
-                        tinyexr::tinyexr_uint64 &compressedSize,
-                        const unsigned char *src, unsigned long src_size) {
-  std::vector<unsigned char> tmpBuf(src_size);
-
-  //
-  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
-  // ImfZipCompressor.cpp
-  //
-
-  //
-  // Reorder the pixel data.
-  //
-
-  const char *srcPtr = reinterpret_cast<const char *>(src);
-
-  {
-    char *t1 = reinterpret_cast<char *>(&tmpBuf.at(0));
-    char *t2 = reinterpret_cast<char *>(&tmpBuf.at(0)) + (src_size + 1) / 2;
-    const char *stop = srcPtr + src_size;
-
-    for (;;) {
-      if (srcPtr < stop)
-        *(t1++) = *(srcPtr++);
-      else
-        break;
-
-      if (srcPtr < stop)
-        *(t2++) = *(srcPtr++);
-      else
-        break;
-    }
-  }
-
-  //
-  // Predictor.
-  //
-
-  {
-    unsigned char *t = &tmpBuf.at(0) + 1;
-    unsigned char *stop = &tmpBuf.at(0) + src_size;
-    int p = t[-1];
-
-    while (t < stop) {
-      int d = int(t[0]) - p + (128 + 256);
-      p = t[0];
-      t[0] = static_cast<unsigned char>(d);
-      ++t;
-    }
-  }
-
-#if TINYEXR_USE_MINIZ
-  //
-  // Compress the data using miniz
-  //
-
-  miniz::mz_ulong outSize = miniz::mz_compressBound(src_size);
-  int ret = miniz::mz_compress(
-      dst, &outSize, static_cast<const unsigned char *>(&tmpBuf.at(0)),
-      src_size);
-  assert(ret == miniz::MZ_OK);
-  (void)ret;
-
-  compressedSize = outSize;
-#else
-  uLong outSize = compressBound(static_cast<uLong>(src_size));
-  int ret = compress(dst, &outSize, static_cast<const Bytef *>(&tmpBuf.at(0)),
-                     src_size);
-  assert(ret == Z_OK);
-
-  compressedSize = outSize;
-#endif
-}
-
-static void DecompressZip(unsigned char *dst,
-                          unsigned long *uncompressed_size /* inout */,
-                          const unsigned char *src, unsigned long src_size) {
-  std::vector<unsigned char> tmpBuf(*uncompressed_size);
-
-#if TINYEXR_USE_MINIZ
-  int ret =
-      miniz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
-  assert(ret == miniz::MZ_OK);
-  (void)ret;
-#else
-  int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
-  assert(ret == Z_OK);
-  (void)ret;
-#endif
-
-  //
-  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
-  // ImfZipCompressor.cpp
-  //
-
-  // Predictor.
-  {
-    unsigned char *t = &tmpBuf.at(0) + 1;
-    unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size);
-
-    while (t < stop) {
-      int d = int(t[-1]) + int(t[0]) - 128;
-      t[0] = static_cast<unsigned char>(d);
-      ++t;
-    }
-  }
-
-  // Reorder the pixel data.
-  {
-    const char *t1 = reinterpret_cast<const char *>(&tmpBuf.at(0));
-    const char *t2 = reinterpret_cast<const char *>(&tmpBuf.at(0)) +
-                     (*uncompressed_size + 1) / 2;
-    char *s = reinterpret_cast<char *>(dst);
-    char *stop = s + (*uncompressed_size);
-
-    for (;;) {
-      if (s < stop)
-        *(s++) = *(t1++);
-      else
-        break;
-
-      if (s < stop)
-        *(s++) = *(t2++);
-      else
-        break;
-    }
-  }
-}
-
-// RLE code from OpenEXR --------------------------------------
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wsign-conversion"
-#endif
-
-const int MIN_RUN_LENGTH = 3;
-const int MAX_RUN_LENGTH = 127;
-
-//
-// Compress an array of bytes, using run-length encoding,
-// and return the length of the compressed data.
-//
-
-static int rleCompress(int inLength, const char in[], signed char out[]) {
-  const char *inEnd = in + inLength;
-  const char *runStart = in;
-  const char *runEnd = in + 1;
-  signed char *outWrite = out;
-
-  while (runStart < inEnd) {
-    while (runEnd < inEnd && *runStart == *runEnd &&
-           runEnd - runStart - 1 < MAX_RUN_LENGTH) {
-      ++runEnd;
-    }
-
-    if (runEnd - runStart >= MIN_RUN_LENGTH) {
-      //
-      // Compressable run
-      //
-
-      *outWrite++ = static_cast<char>(runEnd - runStart) - 1;
-      *outWrite++ = *(reinterpret_cast<const signed char *>(runStart));
-      runStart = runEnd;
-    } else {
-      //
-      // Uncompressable run
-      //
-
-      while (runEnd < inEnd &&
-             ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) ||
-              (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) &&
-             runEnd - runStart < MAX_RUN_LENGTH) {
-        ++runEnd;
-      }
-
-      *outWrite++ = static_cast<char>(runStart - runEnd);
-
-      while (runStart < runEnd) {
-        *outWrite++ = *(reinterpret_cast<const signed char *>(runStart++));
-      }
-    }
-
-    ++runEnd;
-  }
-
-  return static_cast<int>(outWrite - out);
-}
-
-//
-// Uncompress an array of bytes compressed with rleCompress().
-// Returns the length of the oncompressed data, or 0 if the
-// length of the uncompressed data would be more than maxLength.
-//
-
-static int rleUncompress(int inLength, int maxLength, const signed char in[],
-                         char out[]) {
-  char *outStart = out;
-
-  while (inLength > 0) {
-    if (*in < 0) {
-      int count = -(static_cast<int>(*in++));
-      inLength -= count + 1;
-
-      if (0 > (maxLength -= count)) return 0;
-
-      memcpy(out, in, count);
-      out += count;
-      in += count;
-    } else {
-      int count = *in++;
-      inLength -= 2;
-
-      if (0 > (maxLength -= count + 1)) return 0;
-
-      memset(out, *reinterpret_cast<const char *>(in), count + 1);
-      out += count + 1;
-
-      in++;
-    }
-  }
-
-  return static_cast<int>(out - outStart);
-}
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-// End of RLE code from OpenEXR -----------------------------------
-
-static void CompressRle(unsigned char *dst,
-                        tinyexr::tinyexr_uint64 &compressedSize,
-                        const unsigned char *src, unsigned long src_size) {
-  std::vector<unsigned char> tmpBuf(src_size);
-
-  //
-  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
-  // ImfRleCompressor.cpp
-  //
-
-  //
-  // Reorder the pixel data.
-  //
-
-  const char *srcPtr = reinterpret_cast<const char *>(src);
-
-  {
-    char *t1 = reinterpret_cast<char *>(&tmpBuf.at(0));
-    char *t2 = reinterpret_cast<char *>(&tmpBuf.at(0)) + (src_size + 1) / 2;
-    const char *stop = srcPtr + src_size;
-
-    for (;;) {
-      if (srcPtr < stop)
-        *(t1++) = *(srcPtr++);
-      else
-        break;
-
-      if (srcPtr < stop)
-        *(t2++) = *(srcPtr++);
-      else
-        break;
-    }
-  }
-
-  //
-  // Predictor.
-  //
-
-  {
-    unsigned char *t = &tmpBuf.at(0) + 1;
-    unsigned char *stop = &tmpBuf.at(0) + src_size;
-    int p = t[-1];
-
-    while (t < stop) {
-      int d = int(t[0]) - p + (128 + 256);
-      p = t[0];
-      t[0] = static_cast<unsigned char>(d);
-      ++t;
-    }
-  }
-
-  // outSize will be (srcSiz * 3) / 2 at max.
-  int outSize = rleCompress(static_cast<int>(src_size),
-                            reinterpret_cast<const char *>(&tmpBuf.at(0)),
-                            reinterpret_cast<signed char *>(dst));
-  assert(outSize > 0);
-
-  compressedSize = static_cast<tinyexr::tinyexr_uint64>(outSize);
-}
-
-static void DecompressRle(unsigned char *dst,
-                          const unsigned long uncompressed_size,
-                          const unsigned char *src, unsigned long src_size) {
-  std::vector<unsigned char> tmpBuf(uncompressed_size);
-
-  int ret = rleUncompress(static_cast<int>(src_size),
-                          static_cast<int>(uncompressed_size),
-                          reinterpret_cast<const signed char *>(src),
-                          reinterpret_cast<char *>(&tmpBuf.at(0)));
-  assert(ret == static_cast<int>(uncompressed_size));
-  (void)ret;
-
-  //
-  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
-  // ImfRleCompressor.cpp
-  //
-
-  // Predictor.
-  {
-    unsigned char *t = &tmpBuf.at(0) + 1;
-    unsigned char *stop = &tmpBuf.at(0) + uncompressed_size;
-
-    while (t < stop) {
-      int d = int(t[-1]) + int(t[0]) - 128;
-      t[0] = static_cast<unsigned char>(d);
-      ++t;
-    }
-  }
-
-  // Reorder the pixel data.
-  {
-    const char *t1 = reinterpret_cast<const char *>(&tmpBuf.at(0));
-    const char *t2 = reinterpret_cast<const char *>(&tmpBuf.at(0)) +
-                     (uncompressed_size + 1) / 2;
-    char *s = reinterpret_cast<char *>(dst);
-    char *stop = s + uncompressed_size;
-
-    for (;;) {
-      if (s < stop)
-        *(s++) = *(t1++);
-      else
-        break;
-
-      if (s < stop)
-        *(s++) = *(t2++);
-      else
-        break;
-    }
-  }
-}
-
-#if TINYEXR_USE_PIZ
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wc++11-long-long"
-#pragma clang diagnostic ignored "-Wold-style-cast"
-#pragma clang diagnostic ignored "-Wpadded"
-#pragma clang diagnostic ignored "-Wsign-conversion"
-#pragma clang diagnostic ignored "-Wc++11-extensions"
-#pragma clang diagnostic ignored "-Wconversion"
-#endif
-
-//
-// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp
-//
-// -----------------------------------------------------------------
-// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas
-// Digital Ltd. LLC)
-// (3 clause BSD license)
-//
-
-struct PIZChannelData {
-  unsigned short *start;
-  unsigned short *end;
-  int nx;
-  int ny;
-  int ys;
-  int size;
-};
-
-//-----------------------------------------------------------------------------
-//
-//  16-bit Haar Wavelet encoding and decoding
-//
-//  The source code in this file is derived from the encoding
-//  and decoding routines written by Christian Rouet for his
-//  PIZ image file format.
-//
-//-----------------------------------------------------------------------------
-
-//
-// Wavelet basis functions without modulo arithmetic; they produce
-// the best compression ratios when the wavelet-transformed data are
-// Huffman-encoded, but the wavelet transform works only for 14-bit
-// data (untransformed data values must be less than (1 << 14)).
-//
-
-inline void wenc14(unsigned short a, unsigned short b, unsigned short &l,
-                   unsigned short &h) {
-  short as = static_cast<short>(a);
-  short bs = static_cast<short>(b);
-
-  short ms = (as + bs) >> 1;
-  short ds = as - bs;
-
-  l = static_cast<unsigned short>(ms);
-  h = static_cast<unsigned short>(ds);
-}
-
-inline void wdec14(unsigned short l, unsigned short h, unsigned short &a,
-                   unsigned short &b) {
-  short ls = static_cast<short>(l);
-  short hs = static_cast<short>(h);
-
-  int hi = hs;
-  int ai = ls + (hi & 1) + (hi >> 1);
-
-  short as = static_cast<short>(ai);
-  short bs = static_cast<short>(ai - hi);
-
-  a = static_cast<unsigned short>(as);
-  b = static_cast<unsigned short>(bs);
-}
-
-//
-// Wavelet basis functions with modulo arithmetic; they work with full
-// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't
-// compress the data quite as well.
-//
-
-const int NBITS = 16;
-const int A_OFFSET = 1 << (NBITS - 1);
-const int M_OFFSET = 1 << (NBITS - 1);
-const int MOD_MASK = (1 << NBITS) - 1;
-
-inline void wenc16(unsigned short a, unsigned short b, unsigned short &l,
-                   unsigned short &h) {
-  int ao = (a + A_OFFSET) & MOD_MASK;
-  int m = ((ao + b) >> 1);
-  int d = ao - b;
-
-  if (d < 0) m = (m + M_OFFSET) & MOD_MASK;
-
-  d &= MOD_MASK;
-
-  l = static_cast<unsigned short>(m);
-  h = static_cast<unsigned short>(d);
-}
-
-inline void wdec16(unsigned short l, unsigned short h, unsigned short &a,
-                   unsigned short &b) {
-  int m = l;
-  int d = h;
-  int bb = (m - (d >> 1)) & MOD_MASK;
-  int aa = (d + bb - A_OFFSET) & MOD_MASK;
-  b = static_cast<unsigned short>(bb);
-  a = static_cast<unsigned short>(aa);
-}
-
-//
-// 2D Wavelet encoding:
-//
-
-static void wav2Encode(
-    unsigned short *in,  // io: values are transformed in place
-    int nx,              // i : x size
-    int ox,              // i : x offset
-    int ny,              // i : y size
-    int oy,              // i : y offset
-    unsigned short mx)   // i : maximum in[x][y] value
-{
-  bool w14 = (mx < (1 << 14));
-  int n = (nx > ny) ? ny : nx;
-  int p = 1;   // == 1 <<  level
-  int p2 = 2;  // == 1 << (level+1)
-
-  //
-  // Hierachical loop on smaller dimension n
-  //
-
-  while (p2 <= n) {
-    unsigned short *py = in;
-    unsigned short *ey = in + oy * (ny - p2);
-    int oy1 = oy * p;
-    int oy2 = oy * p2;
-    int ox1 = ox * p;
-    int ox2 = ox * p2;
-    unsigned short i00, i01, i10, i11;
-
-    //
-    // Y loop
-    //
-
-    for (; py <= ey; py += oy2) {
-      unsigned short *px = py;
-      unsigned short *ex = py + ox * (nx - p2);
-
-      //
-      // X loop
-      //
-
-      for (; px <= ex; px += ox2) {
-        unsigned short *p01 = px + ox1;
-        unsigned short *p10 = px + oy1;
-        unsigned short *p11 = p10 + ox1;
-
-        //
-        // 2D wavelet encoding
-        //
-
-        if (w14) {
-          wenc14(*px, *p01, i00, i01);
-          wenc14(*p10, *p11, i10, i11);
-          wenc14(i00, i10, *px, *p10);
-          wenc14(i01, i11, *p01, *p11);
-        } else {
-          wenc16(*px, *p01, i00, i01);
-          wenc16(*p10, *p11, i10, i11);
-          wenc16(i00, i10, *px, *p10);
-          wenc16(i01, i11, *p01, *p11);
-        }
-      }
-
-      //
-      // Encode (1D) odd column (still in Y loop)
-      //
-
-      if (nx & p) {
-        unsigned short *p10 = px + oy1;
-
-        if (w14)
-          wenc14(*px, *p10, i00, *p10);
-        else
-          wenc16(*px, *p10, i00, *p10);
-
-        *px = i00;
-      }
-    }
-
-    //
-    // Encode (1D) odd line (must loop in X)
-    //
-
-    if (ny & p) {
-      unsigned short *px = py;
-      unsigned short *ex = py + ox * (nx - p2);
-
-      for (; px <= ex; px += ox2) {
-        unsigned short *p01 = px + ox1;
-
-        if (w14)
-          wenc14(*px, *p01, i00, *p01);
-        else
-          wenc16(*px, *p01, i00, *p01);
-
-        *px = i00;
-      }
-    }
-
-    //
-    // Next level
-    //
-
-    p = p2;
-    p2 <<= 1;
-  }
-}
-
-//
-// 2D Wavelet decoding:
-//
-
-static void wav2Decode(
-    unsigned short *in,  // io: values are transformed in place
-    int nx,              // i : x size
-    int ox,              // i : x offset
-    int ny,              // i : y size
-    int oy,              // i : y offset
-    unsigned short mx)   // i : maximum in[x][y] value
-{
-  bool w14 = (mx < (1 << 14));
-  int n = (nx > ny) ? ny : nx;
-  int p = 1;
-  int p2;
-
-  //
-  // Search max level
-  //
-
-  while (p <= n) p <<= 1;
-
-  p >>= 1;
-  p2 = p;
-  p >>= 1;
-
-  //
-  // Hierarchical loop on smaller dimension n
-  //
-
-  while (p >= 1) {
-    unsigned short *py = in;
-    unsigned short *ey = in + oy * (ny - p2);
-    int oy1 = oy * p;
-    int oy2 = oy * p2;
-    int ox1 = ox * p;
-    int ox2 = ox * p2;
-    unsigned short i00, i01, i10, i11;
-
-    //
-    // Y loop
-    //
-
-    for (; py <= ey; py += oy2) {
-      unsigned short *px = py;
-      unsigned short *ex = py + ox * (nx - p2);
-
-      //
-      // X loop
-      //
-
-      for (; px <= ex; px += ox2) {
-        unsigned short *p01 = px + ox1;
-        unsigned short *p10 = px + oy1;
-        unsigned short *p11 = p10 + ox1;
-
-        //
-        // 2D wavelet decoding
-        //
-
-        if (w14) {
-          wdec14(*px, *p10, i00, i10);
-          wdec14(*p01, *p11, i01, i11);
-          wdec14(i00, i01, *px, *p01);
-          wdec14(i10, i11, *p10, *p11);
-        } else {
-          wdec16(*px, *p10, i00, i10);
-          wdec16(*p01, *p11, i01, i11);
-          wdec16(i00, i01, *px, *p01);
-          wdec16(i10, i11, *p10, *p11);
-        }
-      }
-
-      //
-      // Decode (1D) odd column (still in Y loop)
-      //
-
-      if (nx & p) {
-        unsigned short *p10 = px + oy1;
-
-        if (w14)
-          wdec14(*px, *p10, i00, *p10);
-        else
-          wdec16(*px, *p10, i00, *p10);
-
-        *px = i00;
-      }
-    }
-
-    //
-    // Decode (1D) odd line (must loop in X)
-    //
-
-    if (ny & p) {
-      unsigned short *px = py;
-      unsigned short *ex = py + ox * (nx - p2);
-
-      for (; px <= ex; px += ox2) {
-        unsigned short *p01 = px + ox1;
-
-        if (w14)
-          wdec14(*px, *p01, i00, *p01);
-        else
-          wdec16(*px, *p01, i00, *p01);
-
-        *px = i00;
-      }
-    }
-
-    //
-    // Next level
-    //
-
-    p2 = p;
-    p >>= 1;
-  }
-}
-
-//-----------------------------------------------------------------------------
-//
-//  16-bit Huffman compression and decompression.
-//
-//  The source code in this file is derived from the 8-bit
-//  Huffman compression and decompression routines written
-//  by Christian Rouet for his PIZ image file format.
-//
-//-----------------------------------------------------------------------------
-
-// Adds some modification for tinyexr.
-
-const int HUF_ENCBITS = 16;  // literal (value) bit length
-const int HUF_DECBITS = 14;  // decoding bit size (>= 8)
-
-const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1;  // encoding table size
-const int HUF_DECSIZE = 1 << HUF_DECBITS;        // decoding table size
-const int HUF_DECMASK = HUF_DECSIZE - 1;
-
-struct HufDec {  // short code    long code
-  //-------------------------------
-  int len : 8;   // code length    0
-  int lit : 24;  // lit      p size
-  int *p;        // 0      lits
-};
-
-inline long long hufLength(long long code) { return code & 63; }
-
-inline long long hufCode(long long code) { return code >> 6; }
-
-inline void outputBits(int nBits, long long bits, long long &c, int &lc,
-                       char *&out) {
-  c <<= nBits;
-  lc += nBits;
-
-  c |= bits;
-
-  while (lc >= 8) *out++ = static_cast<char>((c >> (lc -= 8)));
-}
-
-inline long long getBits(int nBits, long long &c, int &lc, const char *&in) {
-  while (lc < nBits) {
-    c = (c << 8) | *(reinterpret_cast<const unsigned char *>(in++));
-    lc += 8;
-  }
-
-  lc -= nBits;
-  return (c >> lc) & ((1 << nBits) - 1);
-}
-
-//
-// ENCODING TABLE BUILDING & (UN)PACKING
-//
-
-//
-// Build a "canonical" Huffman code table:
-//  - for each (uncompressed) symbol, hcode contains the length
-//    of the corresponding code (in the compressed data)
-//  - canonical codes are computed and stored in hcode
-//  - the rules for constructing canonical codes are as follows:
-//    * shorter codes (if filled with zeroes to the right)
-//      have a numerically higher value than longer codes
-//    * for codes with the same length, numerical values
-//      increase with numerical symbol values
-//  - because the canonical code table can be constructed from
-//    symbol lengths alone, the code table can be transmitted
-//    without sending the actual code values
-//  - see http://www.compressconsult.com/huffman/
-//
-
-static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) {
-  long long n[59];
-
-  //
-  // For each i from 0 through 58, count the
-  // number of different codes of length i, and
-  // store the count in n[i].
-  //
-
-  for (int i = 0; i <= 58; ++i) n[i] = 0;
-
-  for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1;
-
-  //
-  // For each i from 58 through 1, compute the
-  // numerically lowest code with length i, and
-  // store that code in n[i].
-  //
-
-  long long c = 0;
-
-  for (int i = 58; i > 0; --i) {
-    long long nc = ((c + n[i]) >> 1);
-    n[i] = c;
-    c = nc;
-  }
-
-  //
-  // hcode[i] contains the length, l, of the
-  // code for symbol i.  Assign the next available
-  // code of length l to the symbol and store both
-  // l and the code in hcode[i].
-  //
-
-  for (int i = 0; i < HUF_ENCSIZE; ++i) {
-    int l = static_cast<int>(hcode[i]);
-
-    if (l > 0) hcode[i] = l | (n[l]++ << 6);
-  }
-}
-
-//
-// Compute Huffman codes (based on frq input) and store them in frq:
-//  - code structure is : [63:lsb - 6:msb] | [5-0: bit length];
-//  - max code length is 58 bits;
-//  - codes outside the range [im-iM] have a null length (unused values);
-//  - original frequencies are destroyed;
-//  - encoding tables are used by hufEncode() and hufBuildDecTable();
-//
-
-struct FHeapCompare {
-  bool operator()(long long *a, long long *b) { return *a > *b; }
-};
-
-static void hufBuildEncTable(
-    long long *frq,  // io: input frequencies [HUF_ENCSIZE], output table
-    int *im,         //  o: min frq index
-    int *iM)         //  o: max frq index
-{
-  //
-  // This function assumes that when it is called, array frq
-  // indicates the frequency of all possible symbols in the data
-  // that are to be Huffman-encoded.  (frq[i] contains the number
-  // of occurrences of symbol i in the data.)
-  //
-  // The loop below does three things:
-  //
-  // 1) Finds the minimum and maximum indices that point
-  //    to non-zero entries in frq:
-  //
-  //     frq[im] != 0, and frq[i] == 0 for all i < im
-  //     frq[iM] != 0, and frq[i] == 0 for all i > iM
-  //
-  // 2) Fills array fHeap with pointers to all non-zero
-  //    entries in frq.
-  //
-  // 3) Initializes array hlink such that hlink[i] == i
-  //    for all array entries.
-  //
-
-  int hlink[HUF_ENCSIZE];
-  long long *fHeap[HUF_ENCSIZE];
-
-  *im = 0;
-
-  while (!frq[*im]) (*im)++;
-
-  int nf = 0;
-
-  for (int i = *im; i < HUF_ENCSIZE; i++) {
-    hlink[i] = i;
-
-    if (frq[i]) {
-      fHeap[nf] = &frq[i];
-      nf++;
-      *iM = i;
-    }
-  }
-
-  //
-  // Add a pseudo-symbol, with a frequency count of 1, to frq;
-  // adjust the fHeap and hlink array accordingly.  Function
-  // hufEncode() uses the pseudo-symbol for run-length encoding.
-  //
-
-  (*iM)++;
-  frq[*iM] = 1;
-  fHeap[nf] = &frq[*iM];
-  nf++;
-
-  //
-  // Build an array, scode, such that scode[i] contains the number
-  // of bits assigned to symbol i.  Conceptually this is done by
-  // constructing a tree whose leaves are the symbols with non-zero
-  // frequency:
-  //
-  //     Make a heap that contains all symbols with a non-zero frequency,
-  //     with the least frequent symbol on top.
-  //
-  //     Repeat until only one symbol is left on the heap:
-  //
-  //         Take the two least frequent symbols off the top of the heap.
-  //         Create a new node that has first two nodes as children, and
-  //         whose frequency is the sum of the frequencies of the first
-  //         two nodes.  Put the new node back into the heap.
-  //
-  // The last node left on the heap is the root of the tree.  For each
-  // leaf node, the distance between the root and the leaf is the length
-  // of the code for the corresponding symbol.
-  //
-  // The loop below doesn't actually build the tree; instead we compute
-  // the distances of the leaves from the root on the fly.  When a new
-  // node is added to the heap, then that node's descendants are linked
-  // into a single linear list that starts at the new node, and the code
-  // lengths of the descendants (that is, their distance from the root
-  // of the tree) are incremented by one.
-  //
-
-  std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
-
-  long long scode[HUF_ENCSIZE];
-  memset(scode, 0, sizeof(long long) * HUF_ENCSIZE);
-
-  while (nf > 1) {
-    //
-    // Find the indices, mm and m, of the two smallest non-zero frq
-    // values in fHeap, add the smallest frq to the second-smallest
-    // frq, and remove the smallest frq value from fHeap.
-    //
-
-    int mm = fHeap[0] - frq;
-    std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
-    --nf;
-
-    int m = fHeap[0] - frq;
-    std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
-
-    frq[m] += frq[mm];
-    std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
-
-    //
-    // The entries in scode are linked into lists with the
-    // entries in hlink serving as "next" pointers and with
-    // the end of a list marked by hlink[j] == j.
-    //
-    // Traverse the lists that start at scode[m] and scode[mm].
-    // For each element visited, increment the length of the
-    // corresponding code by one bit. (If we visit scode[j]
-    // during the traversal, then the code for symbol j becomes
-    // one bit longer.)
-    //
-    // Merge the lists that start at scode[m] and scode[mm]
-    // into a single list that starts at scode[m].
-    //
-
-    //
-    // Add a bit to all codes in the first list.
-    //
-
-    for (int j = m;; j = hlink[j]) {
-      scode[j]++;
-
-      assert(scode[j] <= 58);
-
-      if (hlink[j] == j) {
-        //
-        // Merge the two lists.
-        //
-
-        hlink[j] = mm;
-        break;
-      }
-    }
-
-    //
-    // Add a bit to all codes in the second list
-    //
-
-    for (int j = mm;; j = hlink[j]) {
-      scode[j]++;
-
-      assert(scode[j] <= 58);
-
-      if (hlink[j] == j) break;
-    }
-  }
-
-  //
-  // Build a canonical Huffman code table, replacing the code
-  // lengths in scode with (code, code length) pairs.  Copy the
-  // code table from scode into frq.
-  //
-
-  hufCanonicalCodeTable(scode);
-  memcpy(frq, scode, sizeof(long long) * HUF_ENCSIZE);
-}
-
-//
-// Pack an encoding table:
-//  - only code lengths, not actual codes, are stored
-//  - runs of zeroes are compressed as follows:
-//
-//    unpacked    packed
-//    --------------------------------
-//    1 zero    0  (6 bits)
-//    2 zeroes    59
-//    3 zeroes    60
-//    4 zeroes    61
-//    5 zeroes    62
-//    n zeroes (6 or more)  63 n-6  (6 + 8 bits)
-//
-
-const int SHORT_ZEROCODE_RUN = 59;
-const int LONG_ZEROCODE_RUN = 63;
-const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
-const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN;
-
-static void hufPackEncTable(
-    const long long *hcode,  // i : encoding table [HUF_ENCSIZE]
-    int im,                  // i : min hcode index
-    int iM,                  // i : max hcode index
-    char **pcode)            //  o: ptr to packed table (updated)
-{
-  char *p = *pcode;
-  long long c = 0;
-  int lc = 0;
-
-  for (; im <= iM; im++) {
-    int l = hufLength(hcode[im]);
-
-    if (l == 0) {
-      int zerun = 1;
-
-      while ((im < iM) && (zerun < LONGEST_LONG_RUN)) {
-        if (hufLength(hcode[im + 1]) > 0) break;
-        im++;
-        zerun++;
-      }
-
-      if (zerun >= 2) {
-        if (zerun >= SHORTEST_LONG_RUN) {
-          outputBits(6, LONG_ZEROCODE_RUN, c, lc, p);
-          outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p);
-        } else {
-          outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p);
-        }
-        continue;
-      }
-    }
-
-    outputBits(6, l, c, lc, p);
-  }
-
-  if (lc > 0) *p++ = (unsigned char)(c << (8 - lc));
-
-  *pcode = p;
-}
-
-//
-// Unpack an encoding table packed by hufPackEncTable():
-//
-
-static bool hufUnpackEncTable(
-    const char **pcode,  // io: ptr to packed table (updated)
-    int ni,              // i : input size (in bytes)
-    int im,              // i : min hcode index
-    int iM,              // i : max hcode index
-    long long *hcode)    //  o: encoding table [HUF_ENCSIZE]
-{
-  memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE);
-
-  const char *p = *pcode;
-  long long c = 0;
-  int lc = 0;
-
-  for (; im <= iM; im++) {
-    if (p - *pcode > ni) {
-      return false;
-    }
-
-    long long l = hcode[im] = getBits(6, c, lc, p);  // code length
-
-    if (l == (long long)LONG_ZEROCODE_RUN) {
-      if (p - *pcode > ni) {
-        return false;
-      }
-
-      int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN;
-
-      if (im + zerun > iM + 1) {
-        return false;
-      }
-
-      while (zerun--) hcode[im++] = 0;
-
-      im--;
-    } else if (l >= (long long)SHORT_ZEROCODE_RUN) {
-      int zerun = l - SHORT_ZEROCODE_RUN + 2;
-
-      if (im + zerun > iM + 1) {
-        return false;
-      }
-
-      while (zerun--) hcode[im++] = 0;
-
-      im--;
-    }
-  }
-
-  *pcode = const_cast<char *>(p);
-
-  hufCanonicalCodeTable(hcode);
-
-  return true;
-}
-
-//
-// DECODING TABLE BUILDING
-//
-
-//
-// Clear a newly allocated decoding table so that it contains only zeroes.
-//
-
-static void hufClearDecTable(HufDec *hdecod)  // io: (allocated by caller)
-//     decoding table [HUF_DECSIZE]
-{
-  for (int i = 0; i < HUF_DECSIZE; i++) {
-    hdecod[i].len = 0;
-    hdecod[i].lit = 0;
-    hdecod[i].p = NULL;
-  }
-  // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE);
-}
-
-//
-// Build a decoding hash table based on the encoding table hcode:
-//  - short codes (<= HUF_DECBITS) are resolved with a single table access;
-//  - long code entry allocations are not optimized, because long codes are
-//    unfrequent;
-//  - decoding tables are used by hufDecode();
-//
-
-static bool hufBuildDecTable(const long long *hcode,  // i : encoding table
-                             int im,                  // i : min index in hcode
-                             int iM,                  // i : max index in hcode
-                             HufDec *hdecod)  //  o: (allocated by caller)
-//     decoding table [HUF_DECSIZE]
-{
-  //
-  // Init hashtable & loop on all codes.
-  // Assumes that hufClearDecTable(hdecod) has already been called.
-  //
-
-  for (; im <= iM; im++) {
-    long long c = hufCode(hcode[im]);
-    int l = hufLength(hcode[im]);
-
-    if (c >> l) {
-      //
-      // Error: c is supposed to be an l-bit code,
-      // but c contains a value that is greater
-      // than the largest l-bit number.
-      //
-
-      // invalidTableEntry();
-      return false;
-    }
-
-    if (l > HUF_DECBITS) {
-      //
-      // Long code: add a secondary entry
-      //
-
-      HufDec *pl = hdecod + (c >> (l - HUF_DECBITS));
-
-      if (pl->len) {
-        //
-        // Error: a short code has already
-        // been stored in table entry *pl.
-        //
-
-        // invalidTableEntry();
-        return false;
-      }
-
-      pl->lit++;
-
-      if (pl->p) {
-        int *p = pl->p;
-        pl->p = new int[pl->lit];
-
-        for (int i = 0; i < pl->lit - 1; ++i) pl->p[i] = p[i];
-
-        delete[] p;
-      } else {
-        pl->p = new int[1];
-      }
-
-      pl->p[pl->lit - 1] = im;
-    } else if (l) {
-      //
-      // Short code: init all primary entries
-      //
-
-      HufDec *pl = hdecod + (c << (HUF_DECBITS - l));
-
-      for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) {
-        if (pl->len || pl->p) {
-          //
-          // Error: a short code or a long code has
-          // already been stored in table entry *pl.
-          //
-
-          // invalidTableEntry();
-          return false;
-        }
-
-        pl->len = l;
-        pl->lit = im;
-      }
-    }
-  }
-
-  return true;
-}
-
-//
-// Free the long code entries of a decoding table built by hufBuildDecTable()
-//
-
-static void hufFreeDecTable(HufDec *hdecod)  // io: Decoding table
-{
-  for (int i = 0; i < HUF_DECSIZE; i++) {
-    if (hdecod[i].p) {
-      delete[] hdecod[i].p;
-      hdecod[i].p = 0;
-    }
-  }
-}
-
-//
-// ENCODING
-//
-
-inline void outputCode(long long code, long long &c, int &lc, char *&out) {
-  outputBits(hufLength(code), hufCode(code), c, lc, out);
-}
-
-inline void sendCode(long long sCode, int runCount, long long runCode,
-                     long long &c, int &lc, char *&out) {
-  //
-  // Output a run of runCount instances of the symbol sCount.
-  // Output the symbols explicitly, or if that is shorter, output
-  // the sCode symbol once followed by a runCode symbol and runCount
-  // expressed as an 8-bit number.
-  //
-
-  if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) {
-    outputCode(sCode, c, lc, out);
-    outputCode(runCode, c, lc, out);
-    outputBits(8, runCount, c, lc, out);
-  } else {
-    while (runCount-- >= 0) outputCode(sCode, c, lc, out);
-  }
-}
-
-//
-// Encode (compress) ni values based on the Huffman encoding table hcode:
-//
-
-static int hufEncode            // return: output size (in bits)
-    (const long long *hcode,    // i : encoding table
-     const unsigned short *in,  // i : uncompressed input buffer
-     const int ni,              // i : input buffer size (in bytes)
-     int rlc,                   // i : rl code
-     char *out)                 //  o: compressed output buffer
-{
-  char *outStart = out;
-  long long c = 0;  // bits not yet written to out
-  int lc = 0;       // number of valid bits in c (LSB)
-  int s = in[0];
-  int cs = 0;
-
-  //
-  // Loop on input values
-  //
-
-  for (int i = 1; i < ni; i++) {
-    //
-    // Count same values or send code
-    //
-
-    if (s == in[i] && cs < 255) {
-      cs++;
-    } else {
-      sendCode(hcode[s], cs, hcode[rlc], c, lc, out);
-      cs = 0;
-    }
-
-    s = in[i];
-  }
-
-  //
-  // Send remaining code
-  //
-
-  sendCode(hcode[s], cs, hcode[rlc], c, lc, out);
-
-  if (lc) *out = (c << (8 - lc)) & 0xff;
-
-  return (out - outStart) * 8 + lc;
-}
-
-//
-// DECODING
-//
-
-//
-// In order to force the compiler to inline them,
-// getChar() and getCode() are implemented as macros
-// instead of "inline" functions.
-//
-
-#define getChar(c, lc, in)                   \
-  {                                          \
-    c = (c << 8) | *(unsigned char *)(in++); \
-    lc += 8;                                 \
-  }
-
-#define getCode(po, rlc, c, lc, in, out, oe) \
-  {                                          \
-    if (po == rlc) {                         \
-      if (lc < 8) getChar(c, lc, in);        \
-                                             \
-      lc -= 8;                               \
-                                             \
-      unsigned char cs = (c >> lc);          \
-                                             \
-      if (out + cs > oe) return false;       \
-                                             \
-      unsigned short s = out[-1];            \
-                                             \
-      while (cs-- > 0) *out++ = s;           \
-    } else if (out < oe) {                   \
-      *out++ = po;                           \
-    } else {                                 \
-      return false;                          \
-    }                                        \
-  }
-
-//
-// Decode (uncompress) ni bits based on encoding & decoding tables:
-//
-
-static bool hufDecode(const long long *hcode,  // i : encoding table
-                      const HufDec *hdecod,    // i : decoding table
-                      const char *in,          // i : compressed input buffer
-                      int ni,                  // i : input size (in bits)
-                      int rlc,                 // i : run-length code
-                      int no,  // i : expected output size (in bytes)
-                      unsigned short *out)  //  o: uncompressed output buffer
-{
-  long long c = 0;
-  int lc = 0;
-  unsigned short *outb = out;
-  unsigned short *oe = out + no;
-  const char *ie = in + (ni + 7) / 8;  // input byte size
-
-  //
-  // Loop on input bytes
-  //
-
-  while (in < ie) {
-    getChar(c, lc, in);
-
-    //
-    // Access decoding table
-    //
-
-    while (lc >= HUF_DECBITS) {
-      const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK];
-
-      if (pl.len) {
-        //
-        // Get short code
-        //
-
-        lc -= pl.len;
-        getCode(pl.lit, rlc, c, lc, in, out, oe);
-      } else {
-        if (!pl.p) {
-          return false;
-        }
-        // invalidCode(); // wrong code
-
-        //
-        // Search long code
-        //
-
-        int j;
-
-        for (j = 0; j < pl.lit; j++) {
-          int l = hufLength(hcode[pl.p[j]]);
-
-          while (lc < l && in < ie)  // get more bits
-            getChar(c, lc, in);
-
-          if (lc >= l) {
-            if (hufCode(hcode[pl.p[j]]) ==
-                ((c >> (lc - l)) & (((long long)(1) << l) - 1))) {
-              //
-              // Found : get long code
-              //
-
-              lc -= l;
-              getCode(pl.p[j], rlc, c, lc, in, out, oe);
-              break;
-            }
-          }
-        }
-
-        if (j == pl.lit) {
-          return false;
-          // invalidCode(); // Not found
-        }
-      }
-    }
-  }
-
-  //
-  // Get remaining (short) codes
-  //
-
-  int i = (8 - ni) & 7;
-  c >>= i;
-  lc -= i;
-
-  while (lc > 0) {
-    const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK];
-
-    if (pl.len) {
-      lc -= pl.len;
-      getCode(pl.lit, rlc, c, lc, in, out, oe);
-    } else {
-      return false;
-      // invalidCode(); // wrong (long) code
-    }
-  }
-
-  if (out - outb != no) {
-    return false;
-  }
-  // notEnoughData ();
-
-  return true;
-}
-
-static void countFrequencies(long long freq[HUF_ENCSIZE],
-                             const unsigned short data[/*n*/], int n) {
-  for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0;
-
-  for (int i = 0; i < n; ++i) ++freq[data[i]];
-}
-
-static void writeUInt(char buf[4], unsigned int i) {
-  unsigned char *b = (unsigned char *)buf;
-
-  b[0] = i;
-  b[1] = i >> 8;
-  b[2] = i >> 16;
-  b[3] = i >> 24;
-}
-
-static unsigned int readUInt(const char buf[4]) {
-  const unsigned char *b = (const unsigned char *)buf;
-
-  return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) |
-         ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000);
-}
-
-//
-// EXTERNAL INTERFACE
-//
-
-static int hufCompress(const unsigned short raw[], int nRaw,
-                       char compressed[]) {
-  if (nRaw == 0) return 0;
-
-  long long freq[HUF_ENCSIZE];
-
-  countFrequencies(freq, raw, nRaw);
-
-  int im = 0;
-  int iM = 0;
-  hufBuildEncTable(freq, &im, &iM);
-
-  char *tableStart = compressed + 20;
-  char *tableEnd = tableStart;
-  hufPackEncTable(freq, im, iM, &tableEnd);
-  int tableLength = tableEnd - tableStart;
-
-  char *dataStart = tableEnd;
-  int nBits = hufEncode(freq, raw, nRaw, iM, dataStart);
-  int data_length = (nBits + 7) / 8;
-
-  writeUInt(compressed, im);
-  writeUInt(compressed + 4, iM);
-  writeUInt(compressed + 8, tableLength);
-  writeUInt(compressed + 12, nBits);
-  writeUInt(compressed + 16, 0);  // room for future extensions
-
-  return dataStart + data_length - compressed;
-}
-
-static bool hufUncompress(const char compressed[], int nCompressed,
-                          unsigned short raw[], int nRaw) {
-  if (nCompressed == 0) {
-    if (nRaw != 0) return false;
-
-    return false;
-  }
-
-  int im = readUInt(compressed);
-  int iM = readUInt(compressed + 4);
-  // int tableLength = readUInt (compressed + 8);
-  int nBits = readUInt(compressed + 12);
-
-  if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false;
-
-  const char *ptr = compressed + 20;
-
-  //
-  // Fast decoder needs at least 2x64-bits of compressed data, and
-  // needs to be run-able on this platform. Otherwise, fall back
-  // to the original decoder
-  //
-
-  // if (FastHufDecoder::enabled() && nBits > 128)
-  //{
-  //    FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM);
-  //    fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw);
-  //}
-  // else
-  {
-    std::vector<long long> freq(HUF_ENCSIZE);
-    std::vector<HufDec> hdec(HUF_DECSIZE);
-
-    hufClearDecTable(&hdec.at(0));
-
-    hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM,
-                      &freq.at(0));
-
-    {
-      if (nBits > 8 * (nCompressed - (ptr - compressed))) {
-        return false;
-      }
-
-      hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0));
-      hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, nRaw, raw);
-    }
-    // catch (...)
-    //{
-    //    hufFreeDecTable (hdec);
-    //    throw;
-    //}
-
-    hufFreeDecTable(&hdec.at(0));
-  }
-
-  return true;
-}
-
-//
-// Functions to compress the range of values in the pixel data
-//
-
-const int USHORT_RANGE = (1 << 16);
-const int BITMAP_SIZE = (USHORT_RANGE >> 3);
-
-static void bitmapFromData(const unsigned short data[/*nData*/], int nData,
-                           unsigned char bitmap[BITMAP_SIZE],
-                           unsigned short &minNonZero,
-                           unsigned short &maxNonZero) {
-  for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0;
-
-  for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7));
-
-  bitmap[0] &= ~1;  // zero is not explicitly stored in
-                    // the bitmap; we assume that the
-                    // data always contain zeroes
-  minNonZero = BITMAP_SIZE - 1;
-  maxNonZero = 0;
-
-  for (int i = 0; i < BITMAP_SIZE; ++i) {
-    if (bitmap[i]) {
-      if (minNonZero > i) minNonZero = i;
-      if (maxNonZero < i) maxNonZero = i;
-    }
-  }
-}
-
-static unsigned short forwardLutFromBitmap(
-    const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) {
-  int k = 0;
-
-  for (int i = 0; i < USHORT_RANGE; ++i) {
-    if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7))))
-      lut[i] = k++;
-    else
-      lut[i] = 0;
-  }
-
-  return k - 1;  // maximum value stored in lut[],
-}  // i.e. number of ones in bitmap minus 1
-
-static unsigned short reverseLutFromBitmap(
-    const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) {
-  int k = 0;
-
-  for (int i = 0; i < USHORT_RANGE; ++i) {
-    if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i;
-  }
-
-  int n = k - 1;
-
-  while (k < USHORT_RANGE) lut[k++] = 0;
-
-  return n;  // maximum k where lut[k] is non-zero,
-}  // i.e. number of ones in bitmap minus 1
-
-static void applyLut(const unsigned short lut[USHORT_RANGE],
-                     unsigned short data[/*nData*/], int nData) {
-  for (int i = 0; i < nData; ++i) data[i] = lut[data[i]];
-}
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif  // __clang__
-
-static bool CompressPiz(unsigned char *outPtr, unsigned int &outSize,
-                        const unsigned char *inPtr, size_t inSize,
-                        const std::vector<ChannelInfo> &channelInfo,
-                        int data_width, int num_lines) {
-  unsigned char bitmap[BITMAP_SIZE];
-  unsigned short minNonZero;
-  unsigned short maxNonZero;
-
-#if !MINIZ_LITTLE_ENDIAN
-  // @todo { PIZ compression on BigEndian architecture. }
-  assert(0);
-  return false;
-#endif
-
-  // Assume `inSize` is multiple of 2 or 4.
-  std::vector<unsigned short> tmpBuffer(inSize / sizeof(unsigned short));
-
-  std::vector<PIZChannelData> channelData(channelInfo.size());
-  unsigned short *tmpBufferEnd = &tmpBuffer.at(0);
-
-  for (size_t c = 0; c < channelData.size(); c++) {
-    PIZChannelData &cd = channelData[c];
-
-    cd.start = tmpBufferEnd;
-    cd.end = cd.start;
-
-    cd.nx = data_width;
-    cd.ny = num_lines;
-    // cd.ys = c.channel().ySampling;
-
-    size_t pixelSize = sizeof(int);  // UINT and FLOAT
-    if (channelInfo[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-      pixelSize = sizeof(short);
-    }
-
-    cd.size = static_cast<int>(pixelSize / sizeof(short));
-
-    tmpBufferEnd += cd.nx * cd.ny * cd.size;
-  }
-
-  const unsigned char *ptr = inPtr;
-  for (int y = 0; y < num_lines; ++y) {
-    for (size_t i = 0; i < channelData.size(); ++i) {
-      PIZChannelData &cd = channelData[i];
-
-      // if (modp (y, cd.ys) != 0)
-      //    continue;
-
-      size_t n = static_cast<size_t>(cd.nx * cd.size);
-      memcpy(cd.end, ptr, n * sizeof(unsigned short));
-      ptr += n * sizeof(unsigned short);
-      cd.end += n;
-    }
-  }
-
-  bitmapFromData(&tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()), bitmap,
-                 minNonZero, maxNonZero);
-
-  unsigned short lut[USHORT_RANGE];
-  unsigned short maxValue = forwardLutFromBitmap(bitmap, lut);
-  applyLut(lut, &tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()));
-
-  //
-  // Store range compression info in _outBuffer
-  //
-
-  char *buf = reinterpret_cast<char *>(outPtr);
-
-  memcpy(buf, &minNonZero, sizeof(unsigned short));
-  buf += sizeof(unsigned short);
-  memcpy(buf, &maxNonZero, sizeof(unsigned short));
-  buf += sizeof(unsigned short);
-
-  if (minNonZero <= maxNonZero) {
-    memcpy(buf, reinterpret_cast<char *>(&bitmap[0] + minNonZero),
-           maxNonZero - minNonZero + 1);
-    buf += maxNonZero - minNonZero + 1;
-  }
-
-  //
-  // Apply wavelet encoding
-  //
-
-  for (size_t i = 0; i < channelData.size(); ++i) {
-    PIZChannelData &cd = channelData[i];
-
-    for (int j = 0; j < cd.size; ++j) {
-      wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size,
-                 maxValue);
-    }
-  }
-
-  //
-  // Apply Huffman encoding; append the result to _outBuffer
-  //
-
-  // length header(4byte), then huff data. Initialize length header with zero,
-  // then later fill it by `length`.
-  char *lengthPtr = buf;
-  int zero = 0;
-  memcpy(buf, &zero, sizeof(int));
-  buf += sizeof(int);
-
-  int length =
-      hufCompress(&tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()), buf);
-  memcpy(lengthPtr, &length, sizeof(int));
-
-  outSize = static_cast<unsigned int>(
-      (reinterpret_cast<unsigned char *>(buf) - outPtr) +
-      static_cast<unsigned int>(length));
-  return true;
-}
-
-static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr,
-                          size_t tmpBufSize, int num_channels,
-                          const EXRChannelInfo *channels, int data_width,
-                          int num_lines) {
-  unsigned char bitmap[BITMAP_SIZE];
-  unsigned short minNonZero;
-  unsigned short maxNonZero;
-
-#if !MINIZ_LITTLE_ENDIAN
-  // @todo { PIZ compression on BigEndian architecture. }
-  assert(0);
-  return false;
-#endif
-
-  memset(bitmap, 0, BITMAP_SIZE);
-
-  const unsigned char *ptr = inPtr;
-  minNonZero = *(reinterpret_cast<const unsigned short *>(ptr));
-  maxNonZero = *(reinterpret_cast<const unsigned short *>(ptr + 2));
-  ptr += 4;
-
-  if (maxNonZero >= BITMAP_SIZE) {
-    return false;
-  }
-
-  if (minNonZero <= maxNonZero) {
-    memcpy(reinterpret_cast<char *>(&bitmap[0] + minNonZero), ptr,
-           maxNonZero - minNonZero + 1);
-    ptr += maxNonZero - minNonZero + 1;
-  }
-
-  unsigned short lut[USHORT_RANGE];
-  memset(lut, 0, sizeof(unsigned short) * USHORT_RANGE);
-  unsigned short maxValue = reverseLutFromBitmap(bitmap, lut);
-
-  //
-  // Huffman decoding
-  //
-
-  int length;
-
-  length = *(reinterpret_cast<const int *>(ptr));
-  ptr += sizeof(int);
-
-  std::vector<unsigned short> tmpBuffer(tmpBufSize);
-  hufUncompress(reinterpret_cast<const char *>(ptr), length, &tmpBuffer.at(0),
-                static_cast<int>(tmpBufSize));
-
-  //
-  // Wavelet decoding
-  //
-
-  std::vector<PIZChannelData> channelData(static_cast<size_t>(num_channels));
-
-  unsigned short *tmpBufferEnd = &tmpBuffer.at(0);
-
-  for (size_t i = 0; i < static_cast<size_t>(num_channels); ++i) {
-    const EXRChannelInfo &chan = channels[i];
-
-    size_t pixelSize = sizeof(int);  // UINT and FLOAT
-    if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) {
-      pixelSize = sizeof(short);
-    }
-
-    channelData[i].start = tmpBufferEnd;
-    channelData[i].end = channelData[i].start;
-    channelData[i].nx = data_width;
-    channelData[i].ny = num_lines;
-    // channelData[i].ys = 1;
-    channelData[i].size = static_cast<int>(pixelSize / sizeof(short));
-
-    tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size;
-  }
-
-  for (size_t i = 0; i < channelData.size(); ++i) {
-    PIZChannelData &cd = channelData[i];
-
-    for (int j = 0; j < cd.size; ++j) {
-      wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size,
-                 maxValue);
-    }
-  }
-
-  //
-  // Expand the pixel data to their original range
-  //
-
-  applyLut(lut, &tmpBuffer.at(0), static_cast<int>(tmpBufSize));
-
-  for (int y = 0; y < num_lines; y++) {
-    for (size_t i = 0; i < channelData.size(); ++i) {
-      PIZChannelData &cd = channelData[i];
-
-      // if (modp (y, cd.ys) != 0)
-      //    continue;
-
-      size_t n = static_cast<size_t>(cd.nx * cd.size);
-      memcpy(outPtr, cd.end, static_cast<size_t>(n * sizeof(unsigned short)));
-      outPtr += n * sizeof(unsigned short);
-      cd.end += n;
-    }
-  }
-
-  return true;
-}
-#endif  // TINYEXR_USE_PIZ
-
-#if TINYEXR_USE_ZFP
-struct ZFPCompressionParam {
-  double rate;
-  int precision;
-  double tolerance;
-  int type;  // TINYEXR_ZFP_COMPRESSIONTYPE_*
-
-  ZFPCompressionParam() {
-    type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE;
-    rate = 2.0;
-    precision = 0;
-    tolerance = 0.0f;
-  }
-};
-
-bool FindZFPCompressionParam(ZFPCompressionParam *param,
-                             const EXRAttribute *attributes,
-                             int num_attributes) {
-  bool foundType = false;
-
-  for (int i = 0; i < num_attributes; i++) {
-    if ((strcmp(attributes[i].name, "zfpCompressionType") == 0) &&
-        (attributes[i].size == 1)) {
-      param->type = static_cast<int>(attributes[i].value[0]);
-
-      foundType = true;
-    }
-  }
-
-  if (!foundType) {
-    return false;
-  }
-
-  if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
-    for (int i = 0; i < num_attributes; i++) {
-      if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) &&
-          (attributes[i].size == 8)) {
-        param->rate = *(reinterpret_cast<double *>(attributes[i].value));
-        return true;
-      }
-    }
-  } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
-    for (int i = 0; i < num_attributes; i++) {
-      if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) &&
-          (attributes[i].size == 4)) {
-        param->rate = *(reinterpret_cast<int *>(attributes[i].value));
-        return true;
-      }
-    }
-  } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
-    for (int i = 0; i < num_attributes; i++) {
-      if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) &&
-          (attributes[i].size == 8)) {
-        param->tolerance = *(reinterpret_cast<double *>(attributes[i].value));
-        return true;
-      }
-    }
-  } else {
-    assert(0);
-  }
-
-  return false;
-}
-
-// Assume pixel format is FLOAT for all channels.
-static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines,
-                          int num_channels, const unsigned char *src,
-                          unsigned long src_size,
-                          const ZFPCompressionParam &param) {
-  size_t uncompressed_size = dst_width * dst_num_lines * num_channels;
-
-  zfp_stream *zfp = NULL;
-  zfp_field *field = NULL;
-
-  assert((dst_width % 4) == 0);
-  assert((dst_num_lines % 4) == 0);
-
-  if ((dst_width & 3U) || (dst_num_lines & 3U)) {
-    return false;
-  }
-
-  field =
-      zfp_field_2d(reinterpret_cast<void *>(const_cast<unsigned char *>(src)),
-                   zfp_type_float, dst_width, dst_num_lines * num_channels);
-  zfp = zfp_stream_open(NULL);
-
-  if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
-    zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimention */ 2,
-                        /* write random access */ 0);
-  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
-    zfp_stream_set_precision(zfp, param.precision, zfp_type_float);
-  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
-    zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float);
-  } else {
-    assert(0);
-  }
-
-  size_t buf_size = zfp_stream_maximum_size(zfp, field);
-  std::vector<unsigned char> buf(buf_size);
-  memcpy(&buf.at(0), src, src_size);
-
-  bitstream *stream = stream_open(&buf.at(0), buf_size);
-  zfp_stream_set_bit_stream(zfp, stream);
-  zfp_stream_rewind(zfp);
-
-  size_t image_size = dst_width * dst_num_lines;
-
-  for (int c = 0; c < num_channels; c++) {
-    // decompress 4x4 pixel block.
-    for (int y = 0; y < dst_num_lines; y += 4) {
-      for (int x = 0; x < dst_width; x += 4) {
-        float fblock[16];
-        zfp_decode_block_float_2(zfp, fblock);
-        for (int j = 0; j < 4; j++) {
-          for (int i = 0; i < 4; i++) {
-            dst[c * image_size + ((y + j) * dst_width + (x + i))] =
-                fblock[j * 4 + i];
-          }
-        }
-      }
-    }
-  }
-
-  zfp_field_free(field);
-  zfp_stream_close(zfp);
-  stream_close(stream);
-
-  return true;
-}
-
-// Assume pixel format is FLOAT for all channels.
-bool CompressZfp(std::vector<unsigned char> *outBuf, unsigned int *outSize,
-                 const float *inPtr, int width, int num_lines, int num_channels,
-                 const ZFPCompressionParam &param) {
-  zfp_stream *zfp = NULL;
-  zfp_field *field = NULL;
-
-  assert((width % 4) == 0);
-  assert((num_lines % 4) == 0);
-
-  if ((width & 3U) || (num_lines & 3U)) {
-    return false;
-  }
-
-  // create input array.
-  field = zfp_field_2d(reinterpret_cast<void *>(const_cast<float *>(inPtr)),
-                       zfp_type_float, width, num_lines * num_channels);
-
-  zfp = zfp_stream_open(NULL);
-
-  if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
-    zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0);
-  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
-    zfp_stream_set_precision(zfp, param.precision, zfp_type_float);
-  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
-    zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float);
-  } else {
-    assert(0);
-  }
-
-  size_t buf_size = zfp_stream_maximum_size(zfp, field);
-
-  outBuf->resize(buf_size);
-
-  bitstream *stream = stream_open(&outBuf->at(0), buf_size);
-  zfp_stream_set_bit_stream(zfp, stream);
-  zfp_field_free(field);
-
-  size_t image_size = width * num_lines;
-
-  for (int c = 0; c < num_channels; c++) {
-    // compress 4x4 pixel block.
-    for (int y = 0; y < num_lines; y += 4) {
-      for (int x = 0; x < width; x += 4) {
-        float fblock[16];
-        for (int j = 0; j < 4; j++) {
-          for (int i = 0; i < 4; i++) {
-            fblock[j * 4 + i] =
-                inPtr[c * image_size + ((y + j) * width + (x + i))];
-          }
-        }
-        zfp_encode_block_float_2(zfp, fblock);
-      }
-    }
-  }
-
-  zfp_stream_flush(zfp);
-  (*outSize) = zfp_stream_compressed_size(zfp);
-
-  zfp_stream_close(zfp);
-
-  return true;
-}
-
-#endif
-
-//
-// -----------------------------------------------------------------
-//
-
-static void DecodePixelData(/* out */ unsigned char **out_images,
-                            const int *requested_pixel_types,
-                            const unsigned char *data_ptr, size_t data_len,
-                            int compression_type, int line_order, int width,
-                            int height, int x_stride, int y, int line_no,
-                            int num_lines, size_t pixel_data_size,
-                            size_t num_attributes,
-                            const EXRAttribute *attributes, size_t num_channels,
-                            const EXRChannelInfo *channels,
-                            const std::vector<size_t> &channel_offset_list) {
-  if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {  // PIZ
-#if TINYEXR_USE_PIZ
-    // Allocate original data size.
-    std::vector<unsigned char> outBuf(static_cast<size_t>(
-        static_cast<size_t>(width * num_lines) * pixel_data_size));
-    size_t tmpBufLen = static_cast<size_t>(
-        static_cast<size_t>(width * num_lines) * pixel_data_size);
-
-    bool ret = tinyexr::DecompressPiz(
-        reinterpret_cast<unsigned char *>(&outBuf.at(0)), data_ptr, tmpBufLen,
-        static_cast<int>(num_channels), channels, width, num_lines);
-
-    assert(ret);
-    (void)ret;
-
-    // For PIZ_COMPRESSION:
-    //   pixel sample data for channel 0 for scanline 0
-    //   pixel sample data for channel 1 for scanline 0
-    //   pixel sample data for channel ... for scanline 0
-    //   pixel sample data for channel n for scanline 0
-    //   pixel sample data for channel 0 for scanline 1
-    //   pixel sample data for channel 1 for scanline 1
-    //   pixel sample data for channel ... for scanline 1
-    //   pixel sample data for channel n for scanline 1
-    //   ...
-    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            FP16 hf;
-
-            hf.u = line_ptr[u];
-
-            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-              unsigned short *image =
-                  reinterpret_cast<unsigned short **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += static_cast<size_t>(
-                             (height - 1 - (line_no + static_cast<int>(v)))) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = hf.u;
-            } else {  // HALF -> FLOAT
-              FP32 f32 = half_to_float(hf);
-              float *image = reinterpret_cast<float **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += static_cast<size_t>(
-                             (height - 1 - (line_no + static_cast<int>(v)))) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = f32.f;
-            }
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT);
-
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            unsigned int val = line_ptr[u];
-
-            tinyexr::swap4(&val);
-
-            unsigned int *image =
-                reinterpret_cast<unsigned int **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += static_cast<size_t>(
-                           (height - 1 - (line_no + static_cast<int>(v)))) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT);
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const float *line_ptr = reinterpret_cast<float *>(&outBuf.at(
-              v * pixel_data_size * static_cast<size_t>(x_stride) +
-              channel_offset_list[c] * static_cast<size_t>(x_stride)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            float val = line_ptr[u];
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            float *image = reinterpret_cast<float **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += static_cast<size_t>(
-                           (height - 1 - (line_no + static_cast<int>(v)))) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else {
-        assert(0);
-      }
-    }
-#else
-    assert(0 && "PIZ is enabled in this build");
-#endif
-
-  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS ||
-             compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-    // Allocate original data size.
-    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
-                                      static_cast<size_t>(num_lines) *
-                                      pixel_data_size);
-
-    unsigned long dstLen = outBuf.size();
-    assert(dstLen > 0);
-    tinyexr::DecompressZip(reinterpret_cast<unsigned char *>(&outBuf.at(0)),
-                           &dstLen, data_ptr,
-                           static_cast<unsigned long>(data_len));
-
-    // For ZIP_COMPRESSION:
-    //   pixel sample data for channel 0 for scanline 0
-    //   pixel sample data for channel 1 for scanline 0
-    //   pixel sample data for channel ... for scanline 0
-    //   pixel sample data for channel n for scanline 0
-    //   pixel sample data for channel 0 for scanline 1
-    //   pixel sample data for channel 1 for scanline 1
-    //   pixel sample data for channel ... for scanline 1
-    //   pixel sample data for channel n for scanline 1
-    //   ...
-    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-              &outBuf.at(v * static_cast<size_t>(pixel_data_size) *
-                             static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            tinyexr::FP16 hf;
-
-            hf.u = line_ptr[u];
-
-            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-              unsigned short *image =
-                  reinterpret_cast<unsigned short **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += (static_cast<size_t>(height) - 1U -
-                          (static_cast<size_t>(line_no) + v)) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = hf.u;
-            } else {  // HALF -> FLOAT
-              tinyexr::FP32 f32 = half_to_float(hf);
-              float *image = reinterpret_cast<float **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += (static_cast<size_t>(height) - 1U -
-                          (static_cast<size_t>(line_no) + v)) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = f32.f;
-            }
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT);
-
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            unsigned int val = line_ptr[u];
-
-            tinyexr::swap4(&val);
-
-            unsigned int *image =
-                reinterpret_cast<unsigned int **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT);
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const float *line_ptr = reinterpret_cast<float *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            float val = line_ptr[u];
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            float *image = reinterpret_cast<float **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else {
-        assert(0);
-      }
-    }
-  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) {
-    // Allocate original data size.
-    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
-                                      static_cast<size_t>(num_lines) *
-                                      pixel_data_size);
-
-    unsigned long dstLen = outBuf.size();
-    assert(dstLen > 0);
-    tinyexr::DecompressRle(reinterpret_cast<unsigned char *>(&outBuf.at(0)),
-                           dstLen, data_ptr,
-                           static_cast<unsigned long>(data_len));
-
-    // For RLE_COMPRESSION:
-    //   pixel sample data for channel 0 for scanline 0
-    //   pixel sample data for channel 1 for scanline 0
-    //   pixel sample data for channel ... for scanline 0
-    //   pixel sample data for channel n for scanline 0
-    //   pixel sample data for channel 0 for scanline 1
-    //   pixel sample data for channel 1 for scanline 1
-    //   pixel sample data for channel ... for scanline 1
-    //   pixel sample data for channel n for scanline 1
-    //   ...
-    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-              &outBuf.at(v * static_cast<size_t>(pixel_data_size) *
-                             static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            tinyexr::FP16 hf;
-
-            hf.u = line_ptr[u];
-
-            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-              unsigned short *image =
-                  reinterpret_cast<unsigned short **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += (static_cast<size_t>(height) - 1U -
-                          (static_cast<size_t>(line_no) + v)) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = hf.u;
-            } else {  // HALF -> FLOAT
-              tinyexr::FP32 f32 = half_to_float(hf);
-              float *image = reinterpret_cast<float **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += (static_cast<size_t>(height) - 1U -
-                          (static_cast<size_t>(line_no) + v)) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = f32.f;
-            }
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT);
-
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            unsigned int val = line_ptr[u];
-
-            tinyexr::swap4(&val);
-
-            unsigned int *image =
-                reinterpret_cast<unsigned int **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT);
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const float *line_ptr = reinterpret_cast<float *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            float val = line_ptr[u];
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            float *image = reinterpret_cast<float **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else {
-        assert(0);
-      }
-    }
-  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-#if TINYEXR_USE_ZFP
-    tinyexr::ZFPCompressionParam zfp_compression_param;
-    if (!FindZFPCompressionParam(&zfp_compression_param, attributes,
-                                 num_attributes)) {
-      assert(0);
-      return;
-    }
-
-    // Allocate original data size.
-    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
-                                      static_cast<size_t>(num_lines) *
-                                      pixel_data_size);
-
-    unsigned long dstLen = outBuf.size();
-    assert(dstLen > 0);
-    tinyexr::DecompressZfp(reinterpret_cast<float *>(&outBuf.at(0)), width,
-                           num_lines, num_channels, data_ptr,
-                           static_cast<unsigned long>(data_len),
-                           zfp_compression_param);
-
-    // For ZFP_COMPRESSION:
-    //   pixel sample data for channel 0 for scanline 0
-    //   pixel sample data for channel 1 for scanline 0
-    //   pixel sample data for channel ... for scanline 0
-    //   pixel sample data for channel n for scanline 0
-    //   pixel sample data for channel 0 for scanline 1
-    //   pixel sample data for channel 1 for scanline 1
-    //   pixel sample data for channel ... for scanline 1
-    //   pixel sample data for channel n for scanline 1
-    //   ...
-    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-      assert(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT);
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT);
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const float *line_ptr = reinterpret_cast<float *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            float val = line_ptr[u];
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            float *image = reinterpret_cast<float **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else {
-        assert(0);
-      }
-    }
-#else
-    (void)attributes;
-    (void)num_attributes;
-    (void)num_channels;
-    assert(0);
-#endif
-  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) {
-    for (size_t c = 0; c < num_channels; c++) {
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-        const unsigned short *line_ptr =
-            reinterpret_cast<const unsigned short *>(
-                data_ptr +
-                c * static_cast<size_t>(width) * sizeof(unsigned short));
-
-        if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-          unsigned short *outLine =
-              reinterpret_cast<unsigned short *>(out_images[c]);
-          if (line_order == 0) {
-            outLine += y * x_stride;
-          } else {
-            outLine += (height - 1 - y) * x_stride;
-          }
-
-          for (int u = 0; u < width; u++) {
-            tinyexr::FP16 hf;
-
-            hf.u = line_ptr[u];
-
-            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-            outLine[u] = hf.u;
-          }
-        } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
-          float *outLine = reinterpret_cast<float *>(out_images[c]);
-          if (line_order == 0) {
-            outLine += y * x_stride;
-          } else {
-            outLine += (height - 1 - y) * x_stride;
-          }
-
-          for (int u = 0; u < width; u++) {
-            tinyexr::FP16 hf;
-
-            hf.u = line_ptr[u];
-
-            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-            tinyexr::FP32 f32 = half_to_float(hf);
-
-            outLine[u] = f32.f;
-          }
-        } else {
-          assert(0);
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        const float *line_ptr = reinterpret_cast<const float *>(
-            data_ptr + c * static_cast<size_t>(width) * sizeof(float));
-
-        float *outLine = reinterpret_cast<float *>(out_images[c]);
-        if (line_order == 0) {
-          outLine += y * x_stride;
-        } else {
-          outLine += (height - 1 - y) * x_stride;
-        }
-
-        for (int u = 0; u < width; u++) {
-          float val = line_ptr[u];
-
-          tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-          outLine[u] = val;
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-        const unsigned int *line_ptr = reinterpret_cast<const unsigned int *>(
-            data_ptr + c * static_cast<size_t>(width) * sizeof(unsigned int));
-
-        unsigned int *outLine = reinterpret_cast<unsigned int *>(out_images[c]);
-        if (line_order == 0) {
-          outLine += y * x_stride;
-        } else {
-          outLine += (height - 1 - y) * x_stride;
-        }
-
-        for (int u = 0; u < width; u++) {
-          unsigned int val = line_ptr[u];
-
-          tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-          outLine[u] = val;
-        }
-      }
-    }
-  }
-}
-
-static void DecodeTiledPixelData(
-    unsigned char **out_images, int *width, int *height,
-    const int *requested_pixel_types, const unsigned char *data_ptr,
-    size_t data_len, int compression_type, int line_order, int data_width,
-    int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x,
-    int tile_size_y, size_t pixel_data_size, size_t num_attributes,
-    const EXRAttribute *attributes, size_t num_channels,
-    const EXRChannelInfo *channels,
-    const std::vector<size_t> &channel_offset_list) {
-  assert(tile_offset_x * tile_size_x < data_width);
-  assert(tile_offset_y * tile_size_y < data_height);
-
-  // Compute actual image size in a tile.
-  if ((tile_offset_x + 1) * tile_size_x >= data_width) {
-    (*width) = data_width - (tile_offset_x * tile_size_x);
-  } else {
-    (*width) = tile_size_x;
-  }
-
-  if ((tile_offset_y + 1) * tile_size_y >= data_height) {
-    (*height) = data_height - (tile_offset_y * tile_size_y);
-  } else {
-    (*height) = tile_size_y;
-  }
-
-  // Image size = tile size.
-  DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len,
-                  compression_type, line_order, (*width), tile_size_y,
-                  /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0,
-                  (*height), pixel_data_size, num_attributes, attributes,
-                  num_channels, channels, channel_offset_list);
-}
-
-static void ComputeChannelLayout(std::vector<size_t> *channel_offset_list,
-                                 int *pixel_data_size, size_t *channel_offset,
-                                 int num_channels,
-                                 const EXRChannelInfo *channels) {
-  channel_offset_list->resize(static_cast<size_t>(num_channels));
-
-  (*pixel_data_size) = 0;
-  (*channel_offset) = 0;
-
-  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-    (*channel_offset_list)[c] = (*channel_offset);
-    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-      (*pixel_data_size) += sizeof(unsigned short);
-      (*channel_offset) += sizeof(unsigned short);
-    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-      (*pixel_data_size) += sizeof(float);
-      (*channel_offset) += sizeof(float);
-    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-      (*pixel_data_size) += sizeof(unsigned int);
-      (*channel_offset) += sizeof(unsigned int);
-    } else {
-      assert(0);
-    }
-  }
-}
-
-static unsigned char **AllocateImage(int num_channels,
-                                     const EXRChannelInfo *channels,
-                                     const int *requested_pixel_types,
-                                     int data_width, int data_height) {
-  unsigned char **images =
-      reinterpret_cast<unsigned char **>(static_cast<float **>(
-          malloc(sizeof(float *) * static_cast<size_t>(num_channels))));
-
-  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-    size_t data_len =
-        static_cast<size_t>(data_width) * static_cast<size_t>(data_height);
-    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-      // pixel_data_size += sizeof(unsigned short);
-      // channel_offset += sizeof(unsigned short);
-      // Alloc internal image for half type.
-      if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-        images[c] =
-            reinterpret_cast<unsigned char *>(static_cast<unsigned short *>(
-                malloc(sizeof(unsigned short) * data_len)));
-      } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
-        images[c] = reinterpret_cast<unsigned char *>(
-            static_cast<float *>(malloc(sizeof(float) * data_len)));
-      } else {
-        assert(0);
-      }
-    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-      // pixel_data_size += sizeof(float);
-      // channel_offset += sizeof(float);
-      images[c] = reinterpret_cast<unsigned char *>(
-          static_cast<float *>(malloc(sizeof(float) * data_len)));
-    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-      // pixel_data_size += sizeof(unsigned int);
-      // channel_offset += sizeof(unsigned int);
-      images[c] = reinterpret_cast<unsigned char *>(
-          static_cast<unsigned int *>(malloc(sizeof(unsigned int) * data_len)));
-    } else {
-      assert(0);
-    }
-  }
-
-  return images;
-}
-
-static int ParseEXRHeader(HeaderInfo *info, bool *empty_header,
-                          const EXRVersion *version, std::string *err,
-                          const unsigned char *buf, size_t size) {
-  const char *marker = reinterpret_cast<const char *>(&buf[0]);
-
-  if (empty_header) {
-    (*empty_header) = false;
-  }
-
-  if (version->multipart) {
-    if (size > 0 && marker[0] == '\0') {
-      // End of header list.
-      if (empty_header) {
-        (*empty_header) = true;
-      }
-      return TINYEXR_SUCCESS;
-    }
-  }
-
-  // According to the spec, the header of every OpenEXR file must contain at
-  // least the following attributes:
-  //
-  // channels chlist
-  // compression compression
-  // dataWindow box2i
-  // displayWindow box2i
-  // lineOrder lineOrder
-  // pixelAspectRatio float
-  // screenWindowCenter v2f
-  // screenWindowWidth float
-  bool has_channels = false;
-  bool has_compression = false;
-  bool has_data_window = false;
-  bool has_display_window = false;
-  bool has_line_order = false;
-  bool has_pixel_aspect_ratio = false;
-  bool has_screen_window_center = false;
-  bool has_screen_window_width = false;
-
-  info->data_window[0] = 0;
-  info->data_window[1] = 0;
-  info->data_window[2] = 0;
-  info->data_window[3] = 0;
-  info->line_order = 0;  // @fixme
-  info->display_window[0] = 0;
-  info->display_window[1] = 0;
-  info->display_window[2] = 0;
-  info->display_window[3] = 0;
-  info->screen_window_center[0] = 0.0f;
-  info->screen_window_center[1] = 0.0f;
-  info->screen_window_width = -1.0f;
-  info->pixel_aspect_ratio = -1.0f;
-
-  info->tile_size_x = -1;
-  info->tile_size_y = -1;
-  info->tile_level_mode = -1;
-  info->tile_rounding_mode = -1;
-
-  info->attributes.clear();
-
-  // Read attributes
-  size_t orig_size = size;
-  for (;;) {
-    if (0 == size) {
-      return TINYEXR_ERROR_INVALID_DATA;
-    } else if (marker[0] == '\0') {
-      size--;
-      break;
-    }
-
-    std::string attr_name;
-    std::string attr_type;
-    std::vector<unsigned char> data;
-    size_t marker_size;
-    if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size,
-                                marker, size)) {
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-    marker += marker_size;
-    size -= marker_size;
-
-    if (version->tiled && attr_name.compare("tiles") == 0) {
-      unsigned int x_size, y_size;
-      unsigned char tile_mode;
-      assert(data.size() == 9);
-      memcpy(&x_size, &data.at(0), sizeof(int));
-      memcpy(&y_size, &data.at(4), sizeof(int));
-      tile_mode = data[8];
-      tinyexr::swap4(&x_size);
-      tinyexr::swap4(&y_size);
-
-      info->tile_size_x = static_cast<int>(x_size);
-      info->tile_size_y = static_cast<int>(y_size);
-
-      // mode = levelMode + roundingMode * 16
-      info->tile_level_mode = tile_mode & 0x3;
-      info->tile_rounding_mode = (tile_mode >> 4) & 0x1;
-
-    } else if (attr_name.compare("compression") == 0) {
-      bool ok = false;
-      if ((data[0] >= TINYEXR_COMPRESSIONTYPE_NONE) &&
-          (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ)) {
-        ok = true;
-      }
-
-      if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) {
-#if TINYEXR_USE_PIZ
-        ok = true;
-#else
-        if (err) {
-          (*err) = "PIZ compression is not supported.";
-        }
-        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-#endif
-      }
-
-      if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) {
-#if TINYEXR_USE_ZFP
-        ok = true;
-#else
-        if (err) {
-          (*err) = "ZFP compression is not supported.";
-        }
-        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-#endif
-      }
-
-      if (!ok) {
-        if (err) {
-          (*err) = "Unknown compression type.";
-        }
-        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-      }
-
-      info->compression_type = static_cast<int>(data[0]);
-      has_compression = true;
-
-    } else if (attr_name.compare("channels") == 0) {
-      // name: zero-terminated string, from 1 to 255 bytes long
-      // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2
-      // pLinear: unsigned char, possible values are 0 and 1
-      // reserved: three chars, should be zero
-      // xSampling: int
-      // ySampling: int
-
-      ReadChannelInfo(info->channels, data);
-
-      if (info->channels.size() < 1) {
-        if (err) {
-          (*err) = "# of channels is zero.";
-        }
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-      has_channels = true;
-
-    } else if (attr_name.compare("dataWindow") == 0) {
-      memcpy(&info->data_window[0], &data.at(0), sizeof(int));
-      memcpy(&info->data_window[1], &data.at(4), sizeof(int));
-      memcpy(&info->data_window[2], &data.at(8), sizeof(int));
-      memcpy(&info->data_window[3], &data.at(12), sizeof(int));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[0]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[1]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[2]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[3]));
-
-      has_data_window = true;
-    } else if (attr_name.compare("displayWindow") == 0) {
-      memcpy(&info->display_window[0], &data.at(0), sizeof(int));
-      memcpy(&info->display_window[1], &data.at(4), sizeof(int));
-      memcpy(&info->display_window[2], &data.at(8), sizeof(int));
-      memcpy(&info->display_window[3], &data.at(12), sizeof(int));
-      tinyexr::swap4(
-          reinterpret_cast<unsigned int *>(&info->display_window[0]));
-      tinyexr::swap4(
-          reinterpret_cast<unsigned int *>(&info->display_window[1]));
-      tinyexr::swap4(
-          reinterpret_cast<unsigned int *>(&info->display_window[2]));
-      tinyexr::swap4(
-          reinterpret_cast<unsigned int *>(&info->display_window[3]));
-
-      has_display_window = true;
-    } else if (attr_name.compare("lineOrder") == 0) {
-      info->line_order = static_cast<int>(data[0]);
-      has_line_order = true;
-    } else if (attr_name.compare("pixelAspectRatio") == 0) {
-      memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float));
-      tinyexr::swap4(
-          reinterpret_cast<unsigned int *>(&info->pixel_aspect_ratio));
-      has_pixel_aspect_ratio = true;
-    } else if (attr_name.compare("screenWindowCenter") == 0) {
-      memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float));
-      memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float));
-      tinyexr::swap4(
-          reinterpret_cast<unsigned int *>(&info->screen_window_center[0]));
-      tinyexr::swap4(
-          reinterpret_cast<unsigned int *>(&info->screen_window_center[1]));
-      has_screen_window_center = true;
-    } else if (attr_name.compare("screenWindowWidth") == 0) {
-      memcpy(&info->screen_window_width, &data.at(0), sizeof(float));
-      tinyexr::swap4(
-          reinterpret_cast<unsigned int *>(&info->screen_window_width));
-
-      has_screen_window_width = true;
-    } else if (attr_name.compare("chunkCount") == 0) {
-      memcpy(&info->chunk_count, &data.at(0), sizeof(int));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->chunk_count));
-    } else {
-      // Custom attribute(up to TINYEXR_MAX_ATTRIBUTES)
-      if (info->attributes.size() < TINYEXR_MAX_ATTRIBUTES) {
-        EXRAttribute attrib;
-        strncpy(attrib.name, attr_name.c_str(), 255);
-        attrib.name[255] = '\0';
-        strncpy(attrib.type, attr_type.c_str(), 255);
-        attrib.type[255] = '\0';
-        attrib.size = static_cast<int>(data.size());
-        attrib.value = static_cast<unsigned char *>(malloc(data.size()));
-        memcpy(reinterpret_cast<char *>(attrib.value), &data.at(0),
-               data.size());
-        info->attributes.push_back(attrib);
-      }
-    }
-  }
-
-  // Check if required attributes exist
-  {
-    std::stringstream ss_err;
-
-    if (!has_compression) {
-      ss_err << "\"compression\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_channels) {
-      ss_err << "\"channels\" attribute not found in the header." << std::endl;
-    }
-
-    if (!has_line_order) {
-      ss_err << "\"lineOrder\" attribute not found in the header." << std::endl;
-    }
-
-    if (!has_display_window) {
-      ss_err << "\"displayWindow\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_data_window) {
-      ss_err << "\"dataWindow\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_pixel_aspect_ratio) {
-      ss_err << "\"pixelAspectRatio\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_screen_window_width) {
-      ss_err << "\"screenWindowWidth\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_screen_window_center) {
-      ss_err << "\"screenWindowCenter\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!(ss_err.str().empty())) {
-      if (err) {
-        (*err) += ss_err.str();
-      }
-      return TINYEXR_ERROR_INVALID_HEADER;
-    }
-  }
-
-  info->header_len = static_cast<unsigned int>(orig_size - size);
-
-  return TINYEXR_SUCCESS;
-}
-
-// C++ HeaderInfo to C EXRHeader conversion.
-static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) {
-  exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio;
-  exr_header->screen_window_center[0] = info.screen_window_center[0];
-  exr_header->screen_window_center[1] = info.screen_window_center[1];
-  exr_header->screen_window_width = info.screen_window_width;
-  exr_header->chunk_count = info.chunk_count;
-  exr_header->display_window[0] = info.display_window[0];
-  exr_header->display_window[1] = info.display_window[1];
-  exr_header->display_window[2] = info.display_window[2];
-  exr_header->display_window[3] = info.display_window[3];
-  exr_header->data_window[0] = info.data_window[0];
-  exr_header->data_window[1] = info.data_window[1];
-  exr_header->data_window[2] = info.data_window[2];
-  exr_header->data_window[3] = info.data_window[3];
-  exr_header->line_order = info.line_order;
-  exr_header->compression_type = info.compression_type;
-
-  exr_header->tile_size_x = info.tile_size_x;
-  exr_header->tile_size_y = info.tile_size_y;
-  exr_header->tile_level_mode = info.tile_level_mode;
-  exr_header->tile_rounding_mode = info.tile_rounding_mode;
-
-  exr_header->num_channels = static_cast<int>(info.channels.size());
-
-  exr_header->channels = static_cast<EXRChannelInfo *>(malloc(
-      sizeof(EXRChannelInfo) * static_cast<size_t>(exr_header->num_channels)));
-  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-    strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255);
-    // manually add '\0' for safety.
-    exr_header->channels[c].name[255] = '\0';
-
-    exr_header->channels[c].pixel_type = info.channels[c].pixel_type;
-    exr_header->channels[c].p_linear = info.channels[c].p_linear;
-    exr_header->channels[c].x_sampling = info.channels[c].x_sampling;
-    exr_header->channels[c].y_sampling = info.channels[c].y_sampling;
-  }
-
-  exr_header->pixel_types = static_cast<int *>(
-      malloc(sizeof(int) * static_cast<size_t>(exr_header->num_channels)));
-  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-    exr_header->pixel_types[c] = info.channels[c].pixel_type;
-  }
-
-  // Initially fill with values of `pixel_types`
-  exr_header->requested_pixel_types = static_cast<int *>(
-      malloc(sizeof(int) * static_cast<size_t>(exr_header->num_channels)));
-  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-    exr_header->requested_pixel_types[c] = info.channels[c].pixel_type;
-  }
-
-  assert(info.attributes.size() < TINYEXR_MAX_ATTRIBUTES);
-  exr_header->num_custom_attributes = static_cast<int>(info.attributes.size());
-
-  for (size_t i = 0; i < info.attributes.size(); i++) {
-    memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, 256);
-    memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, 256);
-    exr_header->custom_attributes[i].size = info.attributes[i].size;
-    // Just copy poiner
-    exr_header->custom_attributes[i].value = info.attributes[i].value;
-  }
-
-  exr_header->header_len = info.header_len;
-}
-
-static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
-                       const std::vector<tinyexr::tinyexr_uint64> &offsets,
-                       const unsigned char *head) {
-  int num_channels = exr_header->num_channels;
-
-  int num_scanline_blocks = 1;
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-    num_scanline_blocks = 16;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    num_scanline_blocks = 32;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    num_scanline_blocks = 16;
-  }
-
-  int data_width = exr_header->data_window[2] - exr_header->data_window[0] + 1;
-  int data_height = exr_header->data_window[3] - exr_header->data_window[1] + 1;
-
-  size_t num_blocks = offsets.size();
-
-  std::vector<size_t> channel_offset_list;
-  int pixel_data_size = 0;
-  size_t channel_offset = 0;
-  tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size,
-                                &channel_offset, num_channels,
-                                exr_header->channels);
-
-  if (exr_header->tiled) {
-    size_t num_tiles = offsets.size();  // = # of blocks
-
-    exr_image->tiles = static_cast<EXRTile *>(
-        malloc(sizeof(EXRTile) * static_cast<size_t>(num_tiles)));
-
-    for (size_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
-      // Allocate memory for each tile.
-      exr_image->tiles[tile_idx].images = tinyexr::AllocateImage(
-          num_channels, exr_header->channels, exr_header->requested_pixel_types,
-          data_width, data_height);
-
-      // 16 byte: tile coordinates
-      // 4 byte : data size
-      // ~      : data(uncompressed or compressed)
-      const unsigned char *data_ptr =
-          reinterpret_cast<const unsigned char *>(head + offsets[tile_idx]);
-
-      int tile_coordinates[4];
-      memcpy(tile_coordinates, data_ptr, sizeof(int) * 4);
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[0]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[1]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[2]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[3]));
-
-      // @todo{ LoD }
-      assert(tile_coordinates[2] == 0);
-      assert(tile_coordinates[3] == 0);
-
-      int data_len;
-      memcpy(&data_len, data_ptr + 16,
-             sizeof(int));  // 16 = sizeof(tile_coordinates)
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
-      assert(data_len >= 4);
-
-      // Move to data addr: 20 = 16 + 4;
-      data_ptr += 20;
-
-      tinyexr::DecodeTiledPixelData(
-          exr_image->tiles[tile_idx].images,
-          &(exr_image->tiles[tile_idx].width),
-          &(exr_image->tiles[tile_idx].height),
-          exr_header->requested_pixel_types, data_ptr,
-          static_cast<size_t>(data_len), exr_header->compression_type,
-          exr_header->line_order, data_width, data_height, tile_coordinates[0],
-          tile_coordinates[1], exr_header->tile_size_x, exr_header->tile_size_y,
-          static_cast<size_t>(pixel_data_size),
-          static_cast<size_t>(exr_header->num_custom_attributes),
-          exr_header->custom_attributes,
-          static_cast<size_t>(exr_header->num_channels), exr_header->channels,
-          channel_offset_list);
-
-      exr_image->tiles[tile_idx].offset_x = tile_coordinates[0];
-      exr_image->tiles[tile_idx].offset_y = tile_coordinates[1];
-      exr_image->tiles[tile_idx].level_x = tile_coordinates[2];
-      exr_image->tiles[tile_idx].level_y = tile_coordinates[3];
-
-      exr_image->num_tiles = static_cast<int>(num_tiles);
-    }
-  } else {  // scanline format
-
-    exr_image->images = tinyexr::AllocateImage(
-        num_channels, exr_header->channels, exr_header->requested_pixel_types,
-        data_width, data_height);
-
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
-    for (int y = 0; y < static_cast<int>(num_blocks); y++) {
-      size_t y_idx = static_cast<size_t>(y);
-      const unsigned char *data_ptr =
-          reinterpret_cast<const unsigned char *>(head + offsets[y_idx]);
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(uncompressed or compressed)
-      int line_no;
-      memcpy(&line_no, data_ptr, sizeof(int));
-      int data_len;
-      memcpy(&data_len, data_ptr + 4, sizeof(int));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&line_no));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
-
-      int end_line_no = (std::min)(line_no + num_scanline_blocks,
-                                   (exr_header->data_window[3] + 1));
-
-      int num_lines = end_line_no - line_no;
-      assert(num_lines > 0);
-
-      // Move to data addr: 8 = 4 + 4;
-      data_ptr += 8;
-
-      // Adjust line_no with data_window.bmin.y
-      line_no -= exr_header->data_window[1];
-
-      tinyexr::DecodePixelData(
-          exr_image->images, exr_header->requested_pixel_types, data_ptr,
-          static_cast<size_t>(data_len), exr_header->compression_type,
-          exr_header->line_order, data_width, data_height, data_width, y,
-          line_no, num_lines, static_cast<size_t>(pixel_data_size),
-          static_cast<size_t>(exr_header->num_custom_attributes),
-          exr_header->custom_attributes,
-          static_cast<size_t>(exr_header->num_channels), exr_header->channels,
-          channel_offset_list);
-    }  // omp parallel
-  }
-
-  // Overwrite `pixel_type` with `requested_pixel_type`.
-  {
-    for (int c = 0; c < exr_header->num_channels; c++) {
-      exr_header->pixel_types[c] = exr_header->requested_pixel_types[c];
-    }
-  }
-
-  {
-    exr_image->num_channels = num_channels;
-
-    exr_image->width = data_width;
-    exr_image->height = data_height;
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-static void ReconstructLineOffsets(std::vector<tinyexr::tinyexr_uint64> *offsets, size_t n, const unsigned char *head, const unsigned char *marker, const size_t size)
-{
-  assert(head < marker);
-  assert(offsets->size() == n);
-
-  for (size_t i = 0; i < n; i++) {
-    size_t offset = static_cast<size_t>(marker - head);
-    assert(offset < size); // Offset should not exceed whole EXR file/data size.
-
-    int y;
-    int data_len;
-
-    memcpy(&y, marker, sizeof(int));
-    memcpy(&data_len, marker + 4, sizeof(unsigned int));
-
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&y));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
-
-    (*offsets)[i] = offset;
-    
-    marker += data_len + 8; // 8 = 4 bytes(y) + 4 bytes(data_len)
-  }
-}
-
-static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header,
-                          const unsigned char *head,
-                          const unsigned char *marker, const size_t size, const char **err) {
-  if (exr_image == NULL || exr_header == NULL || head == NULL ||
-      marker == NULL || (size <= tinyexr::kEXRVersionSize)) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  int num_scanline_blocks = 1;
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-    num_scanline_blocks = 16;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    num_scanline_blocks = 32;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    num_scanline_blocks = 16;
-  }
-
-  int data_width = exr_header->data_window[2] - exr_header->data_window[0] + 1;
-  int data_height = exr_header->data_window[3] - exr_header->data_window[1] + 1;
-
-  // Read offset tables.
-  size_t num_blocks;
-
-  if (exr_header->chunk_count > 0) {
-    // Use `chunkCount` attribute.
-    num_blocks = static_cast<size_t>(exr_header->chunk_count);
-  } else if (exr_header->tiled) {
-    // @todo { LoD }
-    size_t num_x_tiles = static_cast<size_t>(data_width) /
-                         static_cast<size_t>(exr_header->tile_size_x);
-    if (num_x_tiles * static_cast<size_t>(exr_header->tile_size_x) <
-        static_cast<size_t>(data_width)) {
-      num_x_tiles++;
-    }
-    size_t num_y_tiles = static_cast<size_t>(data_height) /
-                         static_cast<size_t>(exr_header->tile_size_y);
-    if (num_y_tiles * static_cast<size_t>(exr_header->tile_size_y) <
-        static_cast<size_t>(data_height)) {
-      num_y_tiles++;
-    }
-
-    num_blocks = num_x_tiles * num_y_tiles;
-  } else {
-    num_blocks = static_cast<size_t>(data_height) /
-                 static_cast<size_t>(num_scanline_blocks);
-    if (num_blocks * static_cast<size_t>(num_scanline_blocks) <
-        static_cast<size_t>(data_height)) {
-      num_blocks++;
-    }
-  }
-
-  std::vector<tinyexr::tinyexr_uint64> offsets(num_blocks);
-
-  for (size_t y = 0; y < num_blocks; y++) {
-    tinyexr::tinyexr_uint64 offset;
-    memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64));
-    tinyexr::swap8(&offset);
-    marker += sizeof(tinyexr::tinyexr_uint64);  // = 8
-    offsets[y] = offset;
-  }
-
-  // If line offsets are invalid, we try to reconstruct it.
-  // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details.
-  for (size_t y = 0; y < num_blocks; y++) {
-    if (offsets[y] <= 0) {
-      // TODO(syoyo) Report as warning.
-      //if (err) {
-      //  stringstream ss;
-      //  ss << "Incomplete lineOffsets." << std::endl;
-      //  (*err) += ss.str();
-      //}
-      ReconstructLineOffsets(&offsets, num_blocks, head, marker, size);
-      break;
-    }
-  }
-
-  return DecodeChunk(exr_image, exr_header, offsets, head);
-}
-
-}  // namespace tinyexr
-
-int LoadEXR(float **out_rgba, int *width, int *height, const char *filename,
-            const char **err) {
-  if (out_rgba == NULL) {
-    if (err) {
-      (*err) = "Invalid argument.\n";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  EXRVersion exr_version;
-  EXRImage exr_image;
-  EXRHeader exr_header;
-  InitEXRHeader(&exr_header);
-  InitEXRImage(&exr_image);
-
-  {
-    int ret = ParseEXRVersionFromFile(&exr_version, filename);
-    if (ret != TINYEXR_SUCCESS) {
-      return ret;
-    }
-
-    if (exr_version.multipart || exr_version.non_image) {
-      if (err) {
-        (*err) = "Loading multipart or DeepImage is not supported yet.\n";
-      }
-      return TINYEXR_ERROR_INVALID_DATA;  // @fixme.
-    }
-  }
-
-  {
-    int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err);
-    if (ret != TINYEXR_SUCCESS) {
-      return ret;
-    }
-  }
-
-  // Read HALF channel as FLOAT.
-  for (int i = 0; i < exr_header.num_channels; i++) {
-    if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) {
-      exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;
-    }
-  }
-
-  {
-    int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err);
-    if (ret != TINYEXR_SUCCESS) {
-      return ret;
-    }
-  }
-
-  // RGBA
-  int idxR = -1;
-  int idxG = -1;
-  int idxB = -1;
-  int idxA = -1;
-  for (int c = 0; c < exr_header.num_channels; c++) {
-    if (strcmp(exr_header.channels[c].name, "R") == 0) {
-      idxR = c;
-    } else if (strcmp(exr_header.channels[c].name, "G") == 0) {
-      idxG = c;
-    } else if (strcmp(exr_header.channels[c].name, "B") == 0) {
-      idxB = c;
-    } else if (strcmp(exr_header.channels[c].name, "A") == 0) {
-      idxA = c;
-    }
-  }
-
-  if (idxR == -1) {
-    if (err) {
-      (*err) = "R channel not found\n";
-    }
-
-    // @todo { free exr_image }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  if (idxG == -1) {
-    if (err) {
-      (*err) = "G channel not found\n";
-    }
-    // @todo { free exr_image }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  if (idxB == -1) {
-    if (err) {
-      (*err) = "B channel not found\n";
-    }
-    // @todo { free exr_image }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  (*out_rgba) = reinterpret_cast<float *>(
-      malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
-             static_cast<size_t>(exr_image.height)));
-  for (int i = 0; i < exr_image.width * exr_image.height; i++) {
-    (*out_rgba)[4 * i + 0] =
-        reinterpret_cast<float **>(exr_image.images)[idxR][i];
-    (*out_rgba)[4 * i + 1] =
-        reinterpret_cast<float **>(exr_image.images)[idxG][i];
-    (*out_rgba)[4 * i + 2] =
-        reinterpret_cast<float **>(exr_image.images)[idxB][i];
-    if (idxA != -1) {
-      (*out_rgba)[4 * i + 3] =
-          reinterpret_cast<float **>(exr_image.images)[idxA][i];
-    } else {
-      (*out_rgba)[4 * i + 3] = 1.0;
-    }
-  }
-
-  (*width) = exr_image.width;
-  (*height) = exr_image.height;
-
-  FreeEXRHeader(&exr_header);
-  FreeEXRImage(&exr_image);
-
-  return TINYEXR_SUCCESS;
-}
-
-int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version,
-                             const unsigned char *memory, size_t size,
-                             const char **err) {
-  if (memory == NULL || exr_header == NULL) {
-    if (err) {
-      (*err) = "Invalid argument.\n";
-    }
-
-    // Invalid argument
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (size < tinyexr::kEXRVersionSize) {
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  const unsigned char *marker = memory + tinyexr::kEXRVersionSize;
-  size_t marker_size = size - tinyexr::kEXRVersionSize;
-
-  tinyexr::HeaderInfo info;
-  info.clear();
-
-  std::string err_str;
-  int ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size);
-
-  if (ret != TINYEXR_SUCCESS) {
-    if (err && !err_str.empty()) {
-      (*err) = strdup(err_str.c_str());  // May leak
-    }
-  }
-
-  ConvertHeader(exr_header, info);
-
-  // transfoer `tiled` from version.
-  exr_header->tiled = version->tiled;
-
-  return ret;
-}
-
-int LoadEXRFromMemory(float *out_rgba, const unsigned char *memory, size_t size,
-                      const char **err) {
-  if (out_rgba == NULL || memory == NULL) {
-    if (err) {
-      (*err) = "Invalid argument.\n";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  EXRVersion exr_version;
-  EXRImage exr_image;
-  EXRHeader exr_header;
-
-  InitEXRHeader(&exr_header);
-
-  int ret = ParseEXRVersionFromMemory(&exr_version, memory, size);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
-
-  ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
-
-  InitEXRImage(&exr_image);
-  ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
-
-  // RGBA
-  int idxR = -1;
-  int idxG = -1;
-  int idxB = -1;
-  int idxA = -1;
-  for (int c = 0; c < exr_header.num_channels; c++) {
-    if (strcmp(exr_header.channels[c].name, "R") == 0) {
-      idxR = c;
-    } else if (strcmp(exr_header.channels[c].name, "G") == 0) {
-      idxG = c;
-    } else if (strcmp(exr_header.channels[c].name, "B") == 0) {
-      idxB = c;
-    } else if (strcmp(exr_header.channels[c].name, "A") == 0) {
-      idxA = c;
-    }
-  }
-
-  if (idxR == -1) {
-    if (err) {
-      (*err) = "R channel not found\n";
-    }
-
-    // @todo { free exr_image }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  if (idxG == -1) {
-    if (err) {
-      (*err) = "G channel not found\n";
-    }
-    // @todo { free exr_image }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  if (idxB == -1) {
-    if (err) {
-      (*err) = "B channel not found\n";
-    }
-    // @todo { free exr_image }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  // Assume `out_rgba` have enough memory allocated.
-  for (int i = 0; i < exr_image.width * exr_image.height; i++) {
-    out_rgba[4 * i + 0] = reinterpret_cast<float **>(exr_image.images)[idxR][i];
-    out_rgba[4 * i + 1] = reinterpret_cast<float **>(exr_image.images)[idxG][i];
-    out_rgba[4 * i + 2] = reinterpret_cast<float **>(exr_image.images)[idxB][i];
-    if (idxA > 0) {
-      out_rgba[4 * i + 3] =
-          reinterpret_cast<float **>(exr_image.images)[idxA][i];
-    } else {
-      out_rgba[4 * i + 3] = 1.0;
-    }
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header,
-                         const char *filename, const char **err) {
-  if (exr_image == NULL) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    if (err) {
-      (*err) = "Cannot read file.";
-    }
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  std::vector<unsigned char> buf(filesize);  // @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    fclose(fp);
-    (void)ret;
-  }
-
-  return LoadEXRImageFromMemory(exr_image, exr_header, &buf.at(0), filesize, err);
-}
-
-int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header,
-                           const unsigned char *memory, const size_t size, const char **err) {
-  if (exr_image == NULL || memory == NULL || (size < tinyexr::kEXRVersionSize)) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (exr_header->header_len == 0) {
-    if (err) {
-      (*err) = "EXRHeader is not initialized.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  const unsigned char *head = memory;
-  const unsigned char *marker = reinterpret_cast<const unsigned char *>(
-      memory + exr_header->header_len +
-      8);  // +8 for magic number + version header.
-  return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size, err);
-}
-
-size_t SaveEXRImageToMemory(const EXRImage *exr_image,
-                            const EXRHeader *exr_header,
-                            unsigned char **memory_out, const char **err) {
-  if (exr_image == NULL || memory_out == NULL ||
-      exr_header->compression_type < 0) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return 0;  // @fixme
-  }
-
-#if !TINYEXR_USE_PIZ
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    if (err) {
-      (*err) = "PIZ compression is not supported in this build.";
-    }
-    return 0;
-  }
-#endif
-
-#if !TINYEXR_USE_ZFP
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    if (err) {
-      (*err) = "ZFP compression is not supported in this build.";
-    }
-    return 0;
-  }
-#endif
-
-#if TINYEXR_USE_ZFP
-  for (size_t i = 0; i < static_cast<size_t>(exr_header->num_channels); i++) {
-    if (exr_header->requested_pixel_types[i] != TINYEXR_PIXELTYPE_FLOAT) {
-      if (err) {
-        (*err) = "Pixel type must be FLOAT for ZFP compression.";
-      }
-      return 0;
-    }
-  }
-#endif
-
-  std::vector<unsigned char> memory;
-
-  // Header
-  {
-    const char header[] = {0x76, 0x2f, 0x31, 0x01};
-    memory.insert(memory.end(), header, header + 4);
-  }
-
-  // Version, scanline.
-  {
-    char marker[] = {2, 0, 0, 0};
-    /* @todo
-    if (exr_header->tiled) {
-      marker[1] |= 0x2;
-    }
-    if (exr_header->long_name) {
-      marker[1] |= 0x4;
-    }
-    if (exr_header->non_image) {
-      marker[1] |= 0x8;
-    }
-    if (exr_header->multipart) {
-      marker[1] |= 0x10;
-    }
-    */
-    memory.insert(memory.end(), marker, marker + 4);
-  }
-
-  int num_scanlines = 1;
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-    num_scanlines = 16;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    num_scanlines = 32;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    num_scanlines = 16;
-  }
-
-  // Write attributes.
-  std::vector<tinyexr::ChannelInfo> channels;
-  {
-    std::vector<unsigned char> data;
-
-    for (int c = 0; c < exr_header->num_channels; c++) {
-      tinyexr::ChannelInfo info;
-      info.p_linear = 0;
-      info.pixel_type = exr_header->requested_pixel_types[c];
-      info.x_sampling = 1;
-      info.y_sampling = 1;
-      info.name = std::string(exr_header->channels[c].name);
-      channels.push_back(info);
-    }
-
-    tinyexr::WriteChannelInfo(data, channels);
-
-    tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0),
-                                    static_cast<int>(data.size()));
-  }
-
-  {
-    int comp = exr_header->compression_type;
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&comp));
-    tinyexr::WriteAttributeToMemory(
-        &memory, "compression", "compression",
-        reinterpret_cast<const unsigned char *>(&comp), 1);
-  }
-
-  {
-    int data[4] = {0, 0, exr_image->width - 1, exr_image->height - 1};
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[0]));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[1]));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[2]));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[3]));
-    tinyexr::WriteAttributeToMemory(
-        &memory, "dataWindow", "box2i",
-        reinterpret_cast<const unsigned char *>(data), sizeof(int) * 4);
-    tinyexr::WriteAttributeToMemory(
-        &memory, "displayWindow", "box2i",
-        reinterpret_cast<const unsigned char *>(data), sizeof(int) * 4);
-  }
-
-  {
-    unsigned char line_order = 0;  // @fixme { read line_order from EXRHeader }
-    tinyexr::WriteAttributeToMemory(&memory, "lineOrder", "lineOrder",
-                                    &line_order, 1);
-  }
-
-  {
-    float aspectRatio = 1.0f;
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&aspectRatio));
-    tinyexr::WriteAttributeToMemory(
-        &memory, "pixelAspectRatio", "float",
-        reinterpret_cast<const unsigned char *>(&aspectRatio), sizeof(float));
-  }
-
-  {
-    float center[2] = {0.0f, 0.0f};
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&center[0]));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&center[1]));
-    tinyexr::WriteAttributeToMemory(
-        &memory, "screenWindowCenter", "v2f",
-        reinterpret_cast<const unsigned char *>(center), 2 * sizeof(float));
-  }
-
-  {
-    float w = static_cast<float>(exr_image->width);
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&w));
-    tinyexr::WriteAttributeToMemory(&memory, "screenWindowWidth", "float",
-                                    reinterpret_cast<const unsigned char *>(&w),
-                                    sizeof(float));
-  }
-
-  // Custom attributes
-  if (exr_header->num_custom_attributes > 0) {
-    for (int i = 0; i < exr_header->num_custom_attributes; i++) {
-      tinyexr::WriteAttributeToMemory(
-          &memory, exr_header->custom_attributes[i].name,
-          exr_header->custom_attributes[i].type,
-          reinterpret_cast<const unsigned char *>(
-              exr_header->custom_attributes[i].value),
-          exr_header->custom_attributes[i].size);
-    }
-  }
-
-  {  // end of header
-    unsigned char e = 0;
-    memory.push_back(e);
-  }
-
-  int num_blocks = exr_image->height / num_scanlines;
-  if (num_blocks * num_scanlines < exr_image->height) {
-    num_blocks++;
-  }
-
-  std::vector<tinyexr::tinyexr_uint64> offsets(static_cast<size_t>(num_blocks));
-
-  size_t headerSize = memory.size();
-  tinyexr::tinyexr_uint64 offset =
-      headerSize +
-      static_cast<size_t>(num_blocks) *
-          sizeof(
-              tinyexr::tinyexr_int64);  // sizeof(header) + sizeof(offsetTable)
-
-  std::vector<unsigned char> data;
-
-  std::vector<std::vector<unsigned char> > data_list(
-      static_cast<size_t>(num_blocks));
-  std::vector<size_t> channel_offset_list(
-      static_cast<size_t>(exr_header->num_channels));
-
-  int pixel_data_size = 0;
-  size_t channel_offset = 0;
-  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-    channel_offset_list[c] = channel_offset;
-    if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-      pixel_data_size += sizeof(unsigned short);
-      channel_offset += sizeof(unsigned short);
-    } else if (exr_header->requested_pixel_types[c] ==
-               TINYEXR_PIXELTYPE_FLOAT) {
-      pixel_data_size += sizeof(float);
-      channel_offset += sizeof(float);
-    } else if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT) {
-      pixel_data_size += sizeof(unsigned int);
-      channel_offset += sizeof(unsigned int);
-    } else {
-      assert(0);
-    }
-  }
-
-#if TINYEXR_USE_ZFP
-  tinyexr::ZFPCompressionParam zfp_compression_param;
-
-  // Use ZFP compression parameter from custom attributes(if such a parameter
-  // exists)
-  {
-    bool ret = tinyexr::FindZFPCompressionParam(
-        &zfp_compression_param, exr_header->custom_attributes,
-        exr_header->num_custom_attributes);
-
-    if (!ret) {
-      // Use predefined compression parameter.
-      zfp_compression_param.type = 0;
-      zfp_compression_param.rate = 2;
-    }
-  }
-#endif
-
-// Use signed int since some OpenMP compiler doesn't allow unsigned type for
-// `parallel for`
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
-  for (int i = 0; i < num_blocks; i++) {
-    size_t ii = static_cast<size_t>(i);
-    int start_y = num_scanlines * i;
-    int endY = (std::min)(num_scanlines * (i + 1), exr_image->height);
-    int h = endY - start_y;
-
-    std::vector<unsigned char> buf(
-        static_cast<size_t>(exr_image->width * h * pixel_data_size));
-
-    for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-      if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-        if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
-          for (int y = 0; y < h; y++) {
-            for (int x = 0; x < exr_image->width; x++) {
-              tinyexr::FP16 h16;
-              h16.u = reinterpret_cast<unsigned short **>(
-                  exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-              tinyexr::FP32 f32 = half_to_float(h16);
-
-              tinyexr::swap4(reinterpret_cast<unsigned int *>(&f32.f));
-
-              // Assume increasing Y
-              float *line_ptr = reinterpret_cast<float *>(&buf.at(
-                  static_cast<size_t>(pixel_data_size * y * exr_image->width) +
-                  channel_offset_list[c] *
-                      static_cast<size_t>(exr_image->width)));
-              line_ptr[x] = f32.f;
-            }
-          }
-        } else if (exr_header->requested_pixel_types[c] ==
-                   TINYEXR_PIXELTYPE_HALF) {
-          for (int y = 0; y < h; y++) {
-            for (int x = 0; x < exr_image->width; x++) {
-              unsigned short val = reinterpret_cast<unsigned short **>(
-                  exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-              tinyexr::swap2(&val);
-
-              // Assume increasing Y
-              unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-                  &buf.at(static_cast<size_t>(pixel_data_size * y *
-                                              exr_image->width) +
-                          channel_offset_list[c] *
-                              static_cast<size_t>(exr_image->width)));
-              line_ptr[x] = val;
-            }
-          }
-        } else {
-          assert(0);
-        }
-
-      } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
-        if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-          for (int y = 0; y < h; y++) {
-            for (int x = 0; x < exr_image->width; x++) {
-              tinyexr::FP32 f32;
-              f32.f = reinterpret_cast<float **>(
-                  exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-              tinyexr::FP16 h16;
-              h16 = float_to_half_full(f32);
-
-              tinyexr::swap2(reinterpret_cast<unsigned short *>(&h16.u));
-
-              // Assume increasing Y
-              unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-                  &buf.at(static_cast<size_t>(pixel_data_size * y *
-                                              exr_image->width) +
-                          channel_offset_list[c] *
-                              static_cast<size_t>(exr_image->width)));
-              line_ptr[x] = h16.u;
-            }
-          }
-        } else if (exr_header->requested_pixel_types[c] ==
-                   TINYEXR_PIXELTYPE_FLOAT) {
-          for (int y = 0; y < h; y++) {
-            for (int x = 0; x < exr_image->width; x++) {
-              float val = reinterpret_cast<float **>(
-                  exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-              tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-              // Assume increasing Y
-              float *line_ptr = reinterpret_cast<float *>(&buf.at(
-                  static_cast<size_t>(pixel_data_size * y * exr_image->width) +
-                  channel_offset_list[c] *
-                      static_cast<size_t>(exr_image->width)));
-              line_ptr[x] = val;
-            }
-          }
-        } else {
-          assert(0);
-        }
-      } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_UINT) {
-        for (int y = 0; y < h; y++) {
-          for (int x = 0; x < exr_image->width; x++) {
-            unsigned int val = reinterpret_cast<unsigned int **>(
-                exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-            tinyexr::swap4(&val);
-
-            // Assume increasing Y
-            unsigned int *line_ptr = reinterpret_cast<unsigned int *>(&buf.at(
-                static_cast<size_t>(pixel_data_size * y * exr_image->width) +
-                channel_offset_list[c] *
-                    static_cast<size_t>(exr_image->width)));
-            line_ptr[x] = val;
-          }
-        }
-      }
-    }
-
-    if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_NONE) {
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(uncompressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = static_cast<unsigned int>(buf.size());
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), buf.begin(),
-                           buf.begin() + data_len);
-
-    } else if ((exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
-               (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) {
-#if TINYEXR_USE_MINIZ
-      std::vector<unsigned char> block(
-          tinyexr::miniz::mz_compressBound(buf.size()));
-#else
-      std::vector<unsigned char> block(compressBound(buf.size()));
-#endif
-      tinyexr::tinyexr_uint64 outSize = block.size();
-
-      tinyexr::CompressZip(&block.at(0), outSize,
-                           reinterpret_cast<const unsigned char *>(&buf.at(0)),
-                           buf.size());
-
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(compressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = static_cast<unsigned int>(outSize);  // truncate
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), block.begin(),
-                           block.begin() + data_len);
-
-    } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_RLE) {
-      // (buf.size() * 3) / 2 would be enough.
-      std::vector<unsigned char> block((buf.size() * 3) / 2);
-
-      tinyexr::tinyexr_uint64 outSize = block.size();
-
-      tinyexr::CompressRle(&block.at(0), outSize,
-                           reinterpret_cast<const unsigned char *>(&buf.at(0)),
-                           buf.size());
-
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(compressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = static_cast<unsigned int>(outSize);  // truncate
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), block.begin(),
-                           block.begin() + data_len);
-
-    } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-#if TINYEXR_USE_PIZ
-      unsigned int bufLen =
-          1024 + static_cast<unsigned int>(
-                     1.2 * static_cast<unsigned int>(
-                               buf.size()));  // @fixme { compute good bound. }
-      std::vector<unsigned char> block(bufLen);
-      unsigned int outSize = static_cast<unsigned int>(block.size());
-
-      CompressPiz(&block.at(0), outSize,
-                  reinterpret_cast<const unsigned char *>(&buf.at(0)),
-                  buf.size(), channels, exr_image->width, h);
-
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(compressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = outSize;
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), block.begin(),
-                           block.begin() + data_len);
-
-#else
-      assert(0);
-#endif
-    } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-#if TINYEXR_USE_ZFP
-      std::vector<unsigned char> block;
-      unsigned int outSize;
-
-      tinyexr::CompressZfp(
-          &block, &outSize, reinterpret_cast<const float *>(&buf.at(0)),
-          exr_image->width, h, exr_header->num_channels, zfp_compression_param);
-
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(compressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = outSize;
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), block.begin(),
-                           block.begin() + data_len);
-
-#else
-      assert(0);
-#endif
-    } else {
-      assert(0);
-    }
-  }  // omp parallel
-
-  for (size_t i = 0; i < static_cast<size_t>(num_blocks); i++) {
-    data.insert(data.end(), data_list[i].begin(), data_list[i].end());
-
-    offsets[i] = offset;
-    tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offsets[i]));
-    offset += data_list[i].size();
-  }
-
-  {
-    memory.insert(
-        memory.end(), reinterpret_cast<unsigned char *>(&offsets.at(0)),
-        reinterpret_cast<unsigned char *>(&offsets.at(0)) +
-            sizeof(tinyexr::tinyexr_uint64) * static_cast<size_t>(num_blocks));
-  }
-
-  { memory.insert(memory.end(), data.begin(), data.end()); }
-
-  assert(memory.size() > 0);
-
-  (*memory_out) = static_cast<unsigned char *>(malloc(memory.size()));
-  memcpy((*memory_out), &memory.at(0), memory.size());
-
-  return memory.size();  // OK
-}
-
-int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header,
-                       const char *filename, const char **err) {
-  if (exr_image == NULL || filename == NULL ||
-      exr_header->compression_type < 0) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#if !TINYEXR_USE_PIZ
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    if (err) {
-      (*err) = "PIZ compression is not supported in this build.";
-    }
-    return 0;
-  }
-#endif
-
-#if !TINYEXR_USE_ZFP
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    if (err) {
-      (*err) = "ZFP compression is not supported in this build.";
-    }
-    return 0;
-  }
-#endif
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "wb");
-#else
-  FILE *fp = fopen(filename, "wb");
-#endif
-  if (!fp) {
-    if (err) {
-      (*err) = "Cannot write a file.";
-    }
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  unsigned char *mem = NULL;
-  size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err);
-
-  if ((mem_size > 0) && mem) {
-    fwrite(mem, 1, mem_size, fp);
-  }
-  free(mem);
-
-  fclose(fp);
-
-  return TINYEXR_SUCCESS;
-}
-
-int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) {
-  if (deep_image == NULL) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  FILE *fp = fopen(filename, "rb");
-  if (!fp) {
-    if (err) {
-      (*err) = "Cannot read file.";
-    }
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  if (filesize == 0) {
-    fclose(fp);
-    if (err) {
-      (*err) = "File size is zero.";
-    }
-    return TINYEXR_ERROR_INVALID_FILE;
-  }
-
-  std::vector<char> buf(filesize);  // @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    (void)ret;
-  }
-  fclose(fp);
-
-  const char *head = &buf[0];
-  const char *marker = &buf[0];
-
-  // Header check.
-  {
-    const char header[] = {0x76, 0x2f, 0x31, 0x01};
-
-    if (memcmp(marker, header, 4) != 0) {
-      if (err) {
-        (*err) = "Invalid magic number.";
-      }
-      return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
-    }
-    marker += 4;
-  }
-
-  // Version, scanline.
-  {
-    // ver 2.0, scanline, deep bit on(0x800)
-    // must be [2, 0, 0, 0]
-    if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) {
-      if (err) {
-        (*err) = "Unsupported version or scanline.";
-      }
-      return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-    }
-
-    marker += 4;
-  }
-
-  int dx = -1;
-  int dy = -1;
-  int dw = -1;
-  int dh = -1;
-  int num_scanline_blocks = 1;  // 16 for ZIP compression.
-  int compression_type = -1;
-  int num_channels = -1;
-  std::vector<tinyexr::ChannelInfo> channels;
-
-  // Read attributes
-  size_t size = filesize - tinyexr::kEXRVersionSize;
-  for (;;) {
-    if (0 == size) {
-      return TINYEXR_ERROR_INVALID_DATA;
-    } else if (marker[0] == '\0') {
-      size--;
-      break;
-    }
-
-    std::string attr_name;
-    std::string attr_type;
-    std::vector<unsigned char> data;
-    size_t marker_size;
-    if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size,
-                                marker, size)) {
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-    marker += marker_size;
-    size -= marker_size;
-
-    if (attr_name.compare("compression") == 0) {
-      compression_type = data[0];
-      if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) {
-        if (err) {
-          (*err) = "Unsupported compression type.";
-        }
-        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-      }
-
-      if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-        num_scanline_blocks = 16;
-      }
-
-    } else if (attr_name.compare("channels") == 0) {
-      // name: zero-terminated string, from 1 to 255 bytes long
-      // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2
-      // pLinear: unsigned char, possible values are 0 and 1
-      // reserved: three chars, should be zero
-      // xSampling: int
-      // ySampling: int
-
-      tinyexr::ReadChannelInfo(channels, data);
-
-      num_channels = static_cast<int>(channels.size());
-
-      if (num_channels < 1) {
-        if (err) {
-          (*err) = "Invalid channels format.";
-        }
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-    } else if (attr_name.compare("dataWindow") == 0) {
-      memcpy(&dx, &data.at(0), sizeof(int));
-      memcpy(&dy, &data.at(4), sizeof(int));
-      memcpy(&dw, &data.at(8), sizeof(int));
-      memcpy(&dh, &data.at(12), sizeof(int));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&dx));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&dy));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&dw));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&dh));
-
-    } else if (attr_name.compare("displayWindow") == 0) {
-      int x;
-      int y;
-      int w;
-      int h;
-      memcpy(&x, &data.at(0), sizeof(int));
-      memcpy(&y, &data.at(4), sizeof(int));
-      memcpy(&w, &data.at(8), sizeof(int));
-      memcpy(&h, &data.at(12), sizeof(int));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&x));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&y));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&w));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&h));
-    }
-  }
-
-  assert(dx >= 0);
-  assert(dy >= 0);
-  assert(dw >= 0);
-  assert(dh >= 0);
-  assert(num_channels >= 1);
-
-  int data_width = dw - dx + 1;
-  int data_height = dh - dy + 1;
-
-  std::vector<float> image(
-      static_cast<size_t>(data_width * data_height * 4));  // 4 = RGBA
-
-  // Read offset tables.
-  int num_blocks = data_height / num_scanline_blocks;
-  if (num_blocks * num_scanline_blocks < data_height) {
-    num_blocks++;
-  }
-
-  std::vector<tinyexr::tinyexr_int64> offsets(static_cast<size_t>(num_blocks));
-
-  for (size_t y = 0; y < static_cast<size_t>(num_blocks); y++) {
-    tinyexr::tinyexr_int64 offset;
-    memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64));
-    tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offset));
-    marker += sizeof(tinyexr::tinyexr_int64);  // = 8
-    offsets[y] = offset;
-  }
-
-#if TINYEXR_USE_PIZ
-  if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) {
-#else
-  if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) {
-#endif
-    // OK
-  } else {
-    if (err) {
-      (*err) = "Unsupported format.";
-    }
-    return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-  }
-
-  deep_image->image = static_cast<float ***>(
-      malloc(sizeof(float **) * static_cast<size_t>(num_channels)));
-  for (int c = 0; c < num_channels; c++) {
-    deep_image->image[c] = static_cast<float **>(
-        malloc(sizeof(float *) * static_cast<size_t>(data_height)));
-    for (int y = 0; y < data_height; y++) {
-    }
-  }
-
-  deep_image->offset_table = static_cast<int **>(
-      malloc(sizeof(int *) * static_cast<size_t>(data_height)));
-  for (int y = 0; y < data_height; y++) {
-    deep_image->offset_table[y] = static_cast<int *>(
-        malloc(sizeof(int) * static_cast<size_t>(data_width)));
-  }
-
-  for (size_t y = 0; y < static_cast<size_t>(num_blocks); y++) {
-    const unsigned char *data_ptr =
-        reinterpret_cast<const unsigned char *>(head + offsets[y]);
-
-    // int: y coordinate
-    // int64: packed size of pixel offset table
-    // int64: packed size of sample data
-    // int64: unpacked size of sample data
-    // compressed pixel offset table
-    // compressed sample data
-    int line_no;
-    tinyexr::tinyexr_int64 packedOffsetTableSize;
-    tinyexr::tinyexr_int64 packedSampleDataSize;
-    tinyexr::tinyexr_int64 unpackedSampleDataSize;
-    memcpy(&line_no, data_ptr, sizeof(int));
-    memcpy(&packedOffsetTableSize, data_ptr + 4,
-           sizeof(tinyexr::tinyexr_int64));
-    memcpy(&packedSampleDataSize, data_ptr + 12,
-           sizeof(tinyexr::tinyexr_int64));
-    memcpy(&unpackedSampleDataSize, data_ptr + 20,
-           sizeof(tinyexr::tinyexr_int64));
-
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&line_no));
-    tinyexr::swap8(
-        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&packedOffsetTableSize));
-    tinyexr::swap8(
-        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&packedSampleDataSize));
-    tinyexr::swap8(
-        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&unpackedSampleDataSize));
-
-    std::vector<int> pixelOffsetTable(static_cast<size_t>(data_width));
-
-    // decode pixel offset table.
-    {
-      unsigned long dstLen = pixelOffsetTable.size() * sizeof(int);
-      tinyexr::DecompressZip(
-          reinterpret_cast<unsigned char *>(&pixelOffsetTable.at(0)), &dstLen,
-          data_ptr + 28, static_cast<size_t>(packedOffsetTableSize));
-
-      assert(dstLen == pixelOffsetTable.size() * sizeof(int));
-      for (size_t i = 0; i < static_cast<size_t>(data_width); i++) {
-        deep_image->offset_table[y][i] = pixelOffsetTable[i];
-      }
-    }
-
-    std::vector<unsigned char> sample_data(
-        static_cast<size_t>(unpackedSampleDataSize));
-
-    // decode sample data.
-    {
-      unsigned long dstLen = static_cast<unsigned long>(unpackedSampleDataSize);
-      tinyexr::DecompressZip(
-          reinterpret_cast<unsigned char *>(&sample_data.at(0)), &dstLen,
-          data_ptr + 28 + packedOffsetTableSize,
-          static_cast<size_t>(packedSampleDataSize));
-      assert(dstLen == static_cast<unsigned long>(unpackedSampleDataSize));
-    }
-
-    // decode sample
-    int sampleSize = -1;
-    std::vector<int> channel_offset_list(static_cast<size_t>(num_channels));
-    {
-      int channel_offset = 0;
-      for (size_t i = 0; i < static_cast<size_t>(num_channels); i++) {
-        channel_offset_list[i] = channel_offset;
-        if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) {  // UINT
-          channel_offset += 4;
-        } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) {  // half
-          channel_offset += 2;
-        } else if (channels[i].pixel_type ==
-                   TINYEXR_PIXELTYPE_FLOAT) {  // float
-          channel_offset += 4;
-        } else {
-          assert(0);
-        }
-      }
-      sampleSize = channel_offset;
-    }
-    assert(sampleSize >= 2);
-
-    assert(static_cast<size_t>(
-               pixelOffsetTable[static_cast<size_t>(data_width - 1)] *
-               sampleSize) == sample_data.size());
-    int samples_per_line = static_cast<int>(sample_data.size()) / sampleSize;
-
-    //
-    // Alloc memory
-    //
-
-    //
-    // pixel data is stored as image[channels][pixel_samples]
-    //
-    {
-      tinyexr::tinyexr_uint64 data_offset = 0;
-      for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-        deep_image->image[c][y] = static_cast<float *>(
-            malloc(sizeof(float) * static_cast<size_t>(samples_per_line)));
-
-        if (channels[c].pixel_type == 0) {  // UINT
-          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
-            unsigned int ui = *reinterpret_cast<unsigned int *>(
-                &sample_data.at(data_offset + x * sizeof(int)));
-            deep_image->image[c][y][x] = static_cast<float>(ui);  // @fixme
-          }
-          data_offset +=
-              sizeof(unsigned int) * static_cast<size_t>(samples_per_line);
-        } else if (channels[c].pixel_type == 1) {  // half
-          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
-            tinyexr::FP16 f16;
-            f16.u = *reinterpret_cast<unsigned short *>(
-                &sample_data.at(data_offset + x * sizeof(short)));
-            tinyexr::FP32 f32 = half_to_float(f16);
-            deep_image->image[c][y][x] = f32.f;
-          }
-          data_offset += sizeof(short) * static_cast<size_t>(samples_per_line);
-        } else {  // float
-          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
-            float f = *reinterpret_cast<float *>(
-                &sample_data.at(data_offset + x * sizeof(float)));
-            deep_image->image[c][y][x] = f;
-          }
-          data_offset += sizeof(float) * static_cast<size_t>(samples_per_line);
-        }
-      }
-    }
-  }  // y
-
-  deep_image->width = data_width;
-  deep_image->height = data_height;
-
-  deep_image->channel_names = static_cast<const char **>(
-      malloc(sizeof(const char *) * static_cast<size_t>(num_channels)));
-  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-#ifdef _WIN32
-    deep_image->channel_names[c] = _strdup(channels[c].name.c_str());
-#else
-    deep_image->channel_names[c] = strdup(channels[c].name.c_str());
-#endif
-  }
-  deep_image->num_channels = num_channels;
-
-  return TINYEXR_SUCCESS;
-}
-
-void InitEXRImage(EXRImage *exr_image) {
-  if (exr_image == NULL) {
-    return;
-  }
-
-  exr_image->width = 0;
-  exr_image->height = 0;
-  exr_image->num_channels = 0;
-
-  exr_image->images = NULL;
-  exr_image->tiles = NULL;
-}
-
-void InitEXRHeader(EXRHeader *exr_header) {
-  if (exr_header == NULL) {
-    return;
-  }
-
-  memset(exr_header, 0, sizeof(EXRHeader));
-}
-
-int FreeEXRHeader(EXRHeader *exr_header) {
-  if (exr_header == NULL) {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (exr_header->channels) {
-    free(exr_header->channels);
-  }
-
-  if (exr_header->pixel_types) {
-    free(exr_header->pixel_types);
-  }
-
-  if (exr_header->requested_pixel_types) {
-    free(exr_header->requested_pixel_types);
-  }
-
-  for (int i = 0; i < exr_header->num_custom_attributes; i++) {
-    if (exr_header->custom_attributes[i].value) {
-      free(exr_header->custom_attributes[i].value);
-    }
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int FreeEXRImage(EXRImage *exr_image) {
-  if (exr_image == NULL) {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  for (int i = 0; i < exr_image->num_channels; i++) {
-    if (exr_image->images && exr_image->images[i]) {
-      free(exr_image->images[i]);
-    }
-  }
-
-  if (exr_image->images) {
-    free(exr_image->images);
-  }
-
-  if (exr_image->tiles) {
-    for (int tid = 0; tid < exr_image->num_tiles; tid++) {
-      for (int i = 0; i < exr_image->num_channels; i++) {
-        if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) {
-          free(exr_image->tiles[tid].images[i]);
-        }
-      }
-      if (exr_image->tiles[tid].images) {
-        free(exr_image->tiles[tid].images);
-      }
-    }
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version,
-                           const char *filename, const char **err) {
-  if (exr_header == NULL || exr_version == NULL || filename == NULL) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    if (err) {
-      (*err) = "Cannot read file.";
-    }
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  std::vector<unsigned char> buf(filesize);  // @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    fclose(fp);
-
-    if (ret != filesize) {
-      if (err) {
-        (*err) = "fread error.";
-      }
-      return TINYEXR_ERROR_INVALID_FILE;
-    }
-  }
-
-  return ParseEXRHeaderFromMemory(exr_header, exr_version, &buf.at(0), filesize,
-                                  err);
-}
-
-int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers,
-                                      int *num_headers,
-                                      const EXRVersion *exr_version,
-                                      const unsigned char *memory, size_t size,
-                                      const char **err) {
-  if (memory == NULL || exr_headers == NULL || num_headers == NULL ||
-      exr_version == NULL) {
-    // Invalid argument
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (size < tinyexr::kEXRVersionSize) {
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  const unsigned char *marker = memory + tinyexr::kEXRVersionSize;
-  size_t marker_size = size - tinyexr::kEXRVersionSize;
-
-  std::vector<tinyexr::HeaderInfo> infos;
-
-  for (;;) {
-    tinyexr::HeaderInfo info;
-    info.clear();
-
-    std::string err_str;
-    bool empty_header = false;
-    int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str,
-                             marker, marker_size);
-
-    if (ret != TINYEXR_SUCCESS) {
-      if (err) {
-        (*err) = strdup(err_str.c_str());  // may leak
-      }
-      return ret;
-    }
-
-    if (empty_header) {
-      marker += 1;  // skip '\0'
-      break;
-    }
-
-    // `chunkCount` must exist in the header.
-    if (info.chunk_count == 0) {
-      if (err) {
-        (*err) = "`chunkCount' attribute is not found in the header.";
-      }
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    infos.push_back(info);
-
-    // move to next header.
-    marker += info.header_len;
-    size -= info.header_len;
-  }
-
-  // allocate memory for EXRHeader and create array of EXRHeader pointers.
-  (*exr_headers) =
-      static_cast<EXRHeader **>(malloc(sizeof(EXRHeader *) * infos.size()));
-  for (size_t i = 0; i < infos.size(); i++) {
-    EXRHeader *exr_header = static_cast<EXRHeader *>(malloc(sizeof(EXRHeader)));
-
-    ConvertHeader(exr_header, infos[i]);
-
-    // transfoer `tiled` from version.
-    exr_header->tiled = exr_version->tiled;
-
-    (*exr_headers)[i] = exr_header;
-  }
-
-  (*num_headers) = static_cast<int>(infos.size());
-
-  return TINYEXR_SUCCESS;
-}
-
-int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers,
-                                    const EXRVersion *exr_version,
-                                    const char *filename, const char **err) {
-  if (exr_headers == NULL || num_headers == NULL || exr_version == NULL ||
-      filename == NULL) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    if (err) {
-      (*err) = "Cannot read file.";
-    }
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  std::vector<unsigned char> buf(filesize);  // @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    fclose(fp);
-
-    if (ret != filesize) {
-      if (err) {
-        (*err) = "fread error.";
-      }
-      return TINYEXR_ERROR_INVALID_FILE;
-    }
-  }
-
-  return ParseEXRMultipartHeaderFromMemory(
-      exr_headers, num_headers, exr_version, &buf.at(0), filesize, err);
-}
-
-int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory,
-                              size_t size) {
-  if (version == NULL || memory == NULL) {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (size < tinyexr::kEXRVersionSize) {
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  const unsigned char *marker = memory;
-
-  // Header check.
-  {
-    const char header[] = {0x76, 0x2f, 0x31, 0x01};
-
-    if (memcmp(marker, header, 4) != 0) {
-      return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
-    }
-    marker += 4;
-  }
-
-  version->tiled = false;
-  version->long_name = false;
-  version->non_image = false;
-  version->multipart = false;
-
-  // Parse version header.
-  {
-    // must be 2
-    if (marker[0] != 2) {
-      return TINYEXR_ERROR_INVALID_EXR_VERSION;
-    }
-
-    if (version == NULL) {
-      return TINYEXR_SUCCESS;  // May OK
-    }
-
-    version->version = 2;
-
-    if (marker[1] & 0x2) {  // 9th bit
-      version->tiled = true;
-    }
-    if (marker[1] & 0x4) {  // 10th bit
-      version->long_name = true;
-    }
-    if (marker[1] & 0x8) {        // 11th bit
-      version->non_image = true;  // (deep image)
-    }
-    if (marker[1] & 0x10) {  // 12th bit
-      version->multipart = true;
-    }
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) {
-  if (filename == NULL) {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t file_size;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  file_size = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  if (file_size < tinyexr::kEXRVersionSize) {
-    return TINYEXR_ERROR_INVALID_FILE;
-  }
-
-  unsigned char buf[tinyexr::kEXRVersionSize];
-  size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp);
-  fclose(fp);
-
-  if (ret != tinyexr::kEXRVersionSize) {
-    return TINYEXR_ERROR_INVALID_FILE;
-  }
-
-  return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize);
-}
-
-int LoadEXRMultipartImageFromMemory(EXRImage *exr_images,
-                                    const EXRHeader **exr_headers,
-                                    unsigned int num_parts,
-                                    const unsigned char *memory,
-                                    const char **err) {
-  if (exr_images == NULL || exr_headers == NULL || num_parts == 0 ||
-      memory == NULL) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  // compute total header size.
-  size_t total_header_size = 0;
-  for (unsigned int i = 0; i < num_parts; i++) {
-    if (exr_headers[i]->header_len == 0) {
-      if (err) {
-        (*err) = "EXRHeader is not initialized.";
-      }
-      return TINYEXR_ERROR_INVALID_ARGUMENT;
-    }
-
-    total_header_size += exr_headers[i]->header_len;
-  }
-
-  const char *marker = reinterpret_cast<const char *>(
-      memory + total_header_size + 4 +
-      4);  // +8 for magic number and version header.
-
-  marker += 1;  // Skip empty header.
-
-  // NOTE 1:
-  //   In multipart image, There is 'part number' before chunk data.
-  //   4 byte : part number
-  //   4+     : chunk
-  //
-  // NOTE 2:
-  //   EXR spec says 'part number' is 'unsigned long' but actually this is
-  //   'unsigned int(4 bytes)' in OpenEXR implementation...
-  //   http://www.openexr.com/openexrfilelayout.pdf
-
-  // Load chunk offset table.
-  std::vector<std::vector<tinyexr::tinyexr_uint64> > chunk_offset_table_list;
-  for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) {
-    std::vector<tinyexr::tinyexr_uint64> offset_table(
-        static_cast<size_t>(exr_headers[i]->chunk_count));
-
-    for (size_t c = 0; c < offset_table.size(); c++) {
-      tinyexr::tinyexr_uint64 offset;
-      memcpy(&offset, marker, 8);
-      tinyexr::swap8(&offset);
-
-      offset_table[c] = offset + 4;  // +4 to skip 'part number'
-      marker += 8;
-    }
-
-    chunk_offset_table_list.push_back(offset_table);
-  }
-
-  // Decode image.
-  for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) {
-    std::vector<tinyexr::tinyexr_uint64> &offset_table =
-        chunk_offset_table_list[i];
-
-    // First check 'part number' is identitical to 'i'
-    for (size_t c = 0; c < offset_table.size(); c++) {
-      const unsigned char *part_number_addr =
-          memory + offset_table[c] - 4;  // -4 to move to 'part number' field.
-      unsigned int part_no;
-      memcpy(&part_no, part_number_addr, sizeof(unsigned int));  // 4
-      tinyexr::swap4(&part_no);
-
-      if (part_no != i) {
-        assert(0);
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-    }
-
-    int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_table,
-                                   memory);
-    if (ret != TINYEXR_SUCCESS) {
-      return ret;
-    }
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int LoadEXRMultipartImageFromFile(EXRImage *exr_images,
-                                  const EXRHeader **exr_headers,
-                                  unsigned int num_parts, const char *filename,
-                                  const char **err) {
-  if (exr_images == NULL || exr_headers == NULL || num_parts == 0) {
-    if (err) {
-      (*err) = "Invalid argument.";
-    }
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    if (err) {
-      (*err) = "Cannot read file.";
-    }
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  std::vector<unsigned char> buf(filesize);  //  @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    fclose(fp);
-    (void)ret;
-  }
-
-  return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts,
-                                         &buf.at(0), err);
-}
-
-int SaveEXR(const float *data, int width, int height, int components,
-            const char *outfilename) {
-  if (components == 3 || components == 4) {
-    // OK
-  } else {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  // Assume at least 16x16 pixels.
-  if (width < 16) return TINYEXR_ERROR_INVALID_ARGUMENT;
-  if (height < 16) return TINYEXR_ERROR_INVALID_ARGUMENT;
-
-  EXRHeader header;
-  InitEXRHeader(&header);
-
-  EXRImage image;
-  InitEXRImage(&image);
-
-  image.num_channels = components;
-
-  std::vector<float> images[4];
-  images[0].resize(static_cast<size_t>(width * height));
-  images[1].resize(static_cast<size_t>(width * height));
-  images[2].resize(static_cast<size_t>(width * height));
-  images[3].resize(static_cast<size_t>(width * height));
-
-  // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers
-  for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
-    images[0][i] = data[static_cast<size_t>(components) * i + 0];
-    images[1][i] = data[static_cast<size_t>(components) * i + 1];
-    images[2][i] = data[static_cast<size_t>(components) * i + 2];
-    if (components == 4) {
-      images[3][i] = data[static_cast<size_t>(components) * i + 3];
-    }
-  }
-
-  float *image_ptr[4] = {0, 0, 0, 0};
-  if (components == 4) {
-    image_ptr[0] = &(images[3].at(0));  // A
-    image_ptr[1] = &(images[2].at(0));  // B
-    image_ptr[2] = &(images[1].at(0));  // G
-    image_ptr[3] = &(images[0].at(0));  // R
-  } else {
-    image_ptr[0] = &(images[2].at(0));  // B
-    image_ptr[1] = &(images[1].at(0));  // G
-    image_ptr[2] = &(images[0].at(0));  // R
-  }
-
-  image.images = reinterpret_cast<unsigned char **>(image_ptr);
-  image.width = width;
-  image.height = height;
-
-  header.num_channels = components;
-  header.channels = static_cast<EXRChannelInfo *>(malloc(
-      sizeof(EXRChannelInfo) * static_cast<size_t>(header.num_channels)));
-  // Must be (A)BGR order, since most of EXR viewers expect this channel order.
-  if (components == 4) {
-    strncpy(header.channels[0].name, "A", 255);
-    header.channels[0].name[strlen("A")] = '\0';
-    strncpy(header.channels[1].name, "B", 255);
-    header.channels[1].name[strlen("B")] = '\0';
-    strncpy(header.channels[2].name, "G", 255);
-    header.channels[2].name[strlen("G")] = '\0';
-    strncpy(header.channels[3].name, "R", 255);
-    header.channels[3].name[strlen("R")] = '\0';
-  } else {
-    strncpy(header.channels[0].name, "B", 255);
-    header.channels[0].name[strlen("B")] = '\0';
-    strncpy(header.channels[1].name, "G", 255);
-    header.channels[1].name[strlen("G")] = '\0';
-    strncpy(header.channels[2].name, "R", 255);
-    header.channels[2].name[strlen("R")] = '\0';
-  }
-
-  header.pixel_types = static_cast<int *>(
-      malloc(sizeof(int) * static_cast<size_t>(header.num_channels)));
-  header.requested_pixel_types = static_cast<int *>(
-      malloc(sizeof(int) * static_cast<size_t>(header.num_channels)));
-  for (int i = 0; i < header.num_channels; i++) {
-    header.pixel_types[i] =
-        TINYEXR_PIXELTYPE_FLOAT;  // pixel type of input image
-    header.requested_pixel_types[i] =
-        TINYEXR_PIXELTYPE_HALF;  // pixel type of output image to be stored in
-                                 // .EXR
-  }
-
-  const char *err;
-  int ret = SaveEXRImageToFile(&image, &header, outfilename, &err);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
-
-  free(header.channels);
-  free(header.pixel_types);
-  free(header.requested_pixel_types);
-
-  return ret;
-}
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif
-
-#endif  // TINYEXR_H_
diff --git a/examples/32-particles/particles.cpp b/examples/32-particles/particles.cpp
index ca38efa60..aac0aeeff 100644
--- a/examples/32-particles/particles.cpp
+++ b/examples/32-particles/particles.cpp
@@ -249,7 +249,7 @@ class Particles : public entry::AppI
 
 		psInit();
 
-		bgfx::ImageContainer* image = imageLoad(
+		bimg::ImageContainer* image = imageLoad(
 			  "textures/particle.ktx"
 			, bgfx::TextureFormat::BGRA8
 			);
@@ -260,7 +260,7 @@ class Particles : public entry::AppI
 				, image->m_data
 				);
 
-		bgfx::imageFree(image);
+		bimg::imageFree(image);
 
 		for (uint32_t ii = 0; ii < BX_COUNTOF(m_emitter); ++ii)
 		{
diff --git a/examples/common/bgfx_utils.cpp b/examples/common/bgfx_utils.cpp
index edea94caf..6a92d1ca3 100644
--- a/examples/common/bgfx_utils.cpp
+++ b/examples/common/bgfx_utils.cpp
@@ -21,6 +21,8 @@ namespace stl = tinystl;
 
 #include "bgfx_utils.h"
 
+#include <bimg/decode.h>
+
 void* load(bx::FileReaderI* _reader, bx::AllocatorI* _allocator, const char* _filePath, uint32_t* _size)
 {
 	if (bx::open(_reader, _filePath) )
@@ -149,8 +151,8 @@ bgfx::ProgramHandle loadProgram(const char* _vsName, const char* _fsName)
 static void imageReleaseCb(void* _ptr, void* _userData)
 {
 	BX_UNUSED(_ptr);
-	bgfx::ImageContainer* imageContainer = (bgfx::ImageContainer*)_userData;
-	bgfx::imageFree(imageContainer);
+	bimg::ImageContainer* imageContainer = (bimg::ImageContainer*)_userData;
+	bimg::imageFree(imageContainer);
 }
 
 bgfx::TextureHandle loadTexture(bx::FileReaderI* _reader, const char* _filePath, uint32_t _flags, uint8_t _skip, bgfx::TextureInfo* _info)
@@ -162,7 +164,7 @@ bgfx::TextureHandle loadTexture(bx::FileReaderI* _reader, const char* _filePath,
 	void* data = load(_reader, entry::getAllocator(), _filePath, &size);
 	if (NULL != data)
 	{
-		bgfx::ImageContainer* imageContainer = bgfx::imageParse(entry::getAllocator(), data, size);
+		bimg::ImageContainer* imageContainer = bimg::imageParse(entry::getAllocator(), data, size);
 
 		if (NULL != imageContainer)
 		{
@@ -180,7 +182,7 @@ bgfx::TextureHandle loadTexture(bx::FileReaderI* _reader, const char* _filePath,
 					  uint16_t(imageContainer->m_width)
 					, 1 < imageContainer->m_numMips
 					, imageContainer->m_numLayers
-					, imageContainer->m_format
+					, bgfx::TextureFormat::Enum(imageContainer->m_format)
 					, _flags
 					, mem
 					);
@@ -192,7 +194,7 @@ bgfx::TextureHandle loadTexture(bx::FileReaderI* _reader, const char* _filePath,
 					, uint16_t(imageContainer->m_height)
 					, 1 < imageContainer->m_numMips
 					, imageContainer->m_numLayers
-					, imageContainer->m_format
+					, bgfx::TextureFormat::Enum(imageContainer->m_format)
 					, _flags
 					, mem
 					);
@@ -208,7 +210,7 @@ bgfx::TextureHandle loadTexture(bx::FileReaderI* _reader, const char* _filePath,
 					, false
 					, false
 					, 1
-					, imageContainer->m_format
+					, bgfx::TextureFormat::Enum(imageContainer->m_format)
 					);
 			}
 		}
@@ -222,12 +224,12 @@ bgfx::TextureHandle loadTexture(const char* _name, uint32_t _flags, uint8_t _ski
 	return loadTexture(entry::getFileReader(), _name, _flags, _skip, _info);
 }
 
-bgfx::ImageContainer* imageLoad(const char* _filePath, bgfx::TextureFormat::Enum _dstFormat)
+bimg::ImageContainer* imageLoad(const char* _filePath, bgfx::TextureFormat::Enum _dstFormat)
 {
 	uint32_t size = 0;
 	void* data = loadMem(entry::getFileReader(), entry::getAllocator(), _filePath, &size);
 
-	return bgfx::imageParse(entry::getAllocator(), data, size, _dstFormat);
+	return bimg::imageParse(entry::getAllocator(), data, size, bimg::TextureFormat::Enum(_dstFormat) );
 }
 
 void calcTangents(void* _vertices, uint16_t _numVertices, bgfx::VertexDecl _decl, const uint16_t* _indices, uint32_t _numIndices)
diff --git a/examples/common/bgfx_utils.h b/examples/common/bgfx_utils.h
index 6b3eefdf1..e81510ff0 100644
--- a/examples/common/bgfx_utils.h
+++ b/examples/common/bgfx_utils.h
@@ -8,7 +8,7 @@
 
 #include <bx/pixelformat.h>
 #include <bgfx/bgfx.h>
-#include "image_decode.h"
+#include <bimg/bimg.h>
 
 ///
 void* load(const char* _filePath, uint32_t* _size = NULL);
@@ -26,7 +26,7 @@ bgfx::ProgramHandle loadProgram(const char* _vsName, const char* _fsName);
 bgfx::TextureHandle loadTexture(const char* _name, uint32_t _flags = BGFX_TEXTURE_NONE, uint8_t _skip = 0, bgfx::TextureInfo* _info = NULL);
 
 ///
-bgfx::ImageContainer* imageLoad(const char* _filePath, bgfx::TextureFormat::Enum _dstFormat);
+bimg::ImageContainer* imageLoad(const char* _filePath, bgfx::TextureFormat::Enum _dstFormat);
 
 ///
 void calcTangents(void* _vertices, uint16_t _numVertices, bgfx::VertexDecl _decl, const uint16_t* _indices, uint32_t _numIndices);
diff --git a/examples/common/image_decode.cpp b/examples/common/image_decode.cpp
deleted file mode 100644
index 67140d84b..000000000
--- a/examples/common/image_decode.cpp
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Copyright 2011-2017 Branimir Karadzic. All rights reserved.
- * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
- */
-
-#include "entry/dbg.h"
-
-#include <bgfx/bgfx.h>
-#include <bx/allocator.h>
-#include <bx/endian.h>
-#include <bx/readerwriter.h>
-#include "bgfx_utils.h"
-
-BX_PRAGMA_DIAGNOSTIC_PUSH()
-BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG_GCC("-Wtype-limits")
-BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG_GCC("-Wunused-parameter")
-BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG_GCC("-Wunused-value")
-BX_PRAGMA_DIAGNOSTIC_IGNORED_MSVC(4100) // error C4100: '' : unreferenced formal parameter
-#if BX_PLATFORM_EMSCRIPTEN
-#	include <compat/ctype.h>
-#endif // BX_PLATFORM_EMSCRIPTEN
-#define MINIZ_NO_STDIO
-#define TINYEXR_IMPLEMENTATION
-#include <tinyexr/tinyexr.h>
-BX_PRAGMA_DIAGNOSTIC_POP()
-
-BX_PRAGMA_DIAGNOSTIC_PUSH();
-BX_PRAGMA_DIAGNOSTIC_IGNORED_MSVC(4127) // warning C4127: conditional expression is constant
-#define LODEPNG_NO_COMPILE_ENCODER
-#define LODEPNG_NO_COMPILE_DISK
-#define LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
-#define LODEPNG_NO_COMPILE_ERROR_TEXT
-#define LODEPNG_NO_COMPILE_ALLOCATORS
-#define LODEPNG_NO_COMPILE_CPP
-#include <lodepng/lodepng.cpp>
-BX_PRAGMA_DIAGNOSTIC_POP();
-
-void* lodepng_malloc(size_t _size)
-{
-	return ::malloc(_size);
-}
-
-void* lodepng_realloc(void* _ptr, size_t _size)
-{
-	return ::realloc(_ptr, _size);
-}
-
-void lodepng_free(void* _ptr)
-{
-	::free(_ptr);
-}
-
-BX_PRAGMA_DIAGNOSTIC_PUSH();
-BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG_GCC("-Wmissing-field-initializers");
-BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG_GCC("-Wshadow");
-BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG_GCC("-Wint-to-pointer-cast")
-#if BX_COMPILER_GCC >= 60000
-BX_PRAGMA_DIAGNOSTIC_IGNORED_GCC("-Wmisleading-indentation");
-BX_PRAGMA_DIAGNOSTIC_IGNORED_GCC("-Wshift-negative-value");
-#endif // BX_COMPILER_GCC >= 60000_
-#define STBI_MALLOC(_size)        lodepng_malloc(_size)
-#define STBI_REALLOC(_ptr, _size) lodepng_realloc(_ptr, _size)
-#define STBI_FREE(_ptr)           lodepng_free(_ptr)
-#define STB_IMAGE_IMPLEMENTATION
-#include <stb/stb_image.c>
-BX_PRAGMA_DIAGNOSTIC_POP();
-
-namespace bgfx
-{
-#if !defined(BGFX_IMAGE_H_HEADER_GUARD)
-	struct ImageMip
-	{
-		TextureFormat::Enum m_format;
-		uint32_t m_width;
-		uint32_t m_height;
-		uint32_t m_blockSize;
-		uint32_t m_size;
-		uint8_t  m_bpp;
-		bool     m_hasAlpha;
-		const uint8_t* m_data;
-	};
-#endif // !defined(BGFX_IMAGE_H_HEADER_GUARD)
-
-	uint32_t imageGetSize(
-		  TextureInfo* _info
-		, uint16_t _width
-		, uint16_t _height
-		, uint16_t _depth
-		, bool _cubeMap
-		, bool _hasMips
-		, uint16_t _numLayers
-		, TextureFormat::Enum _format
-		);
-
-	///
-	ImageContainer* imageParseBgfx(bx::AllocatorI* _allocator, const void* _src, uint32_t _size);
-
-	///
-	bool imageConvert(
-		  void* _dst
-		, TextureFormat::Enum _dstFormat
-		, const void* _src
-		, TextureFormat::Enum _srcFormat
-		, uint32_t _width
-		, uint32_t _height
-		);
-
-	///
-	ImageContainer* imageConvert(
-		  bx::AllocatorI* _allocator
-		, TextureFormat::Enum _dstFormat
-		, const ImageContainer& _input
-		);
-
-} // namespace bgfx
-
-namespace bgfx
-{
-	static ImageContainer* imageParseLodePng(bx::AllocatorI* _allocator, const void* _data, uint32_t _size)
-	{
-		static uint8_t pngMagic[] = { 0x89, 0x50, 0x4E, 0x47, 0x0d, 0x0a };
-
-		if (0 != bx::memCmp(_data, pngMagic, sizeof(pngMagic) ) )
-		{
-			return NULL;
-		}
-
-		ImageContainer* output = NULL;
-		bgfx::TextureFormat::Enum format = bgfx::TextureFormat::RGBA8;
-		uint32_t width  = 0;
-		uint32_t height = 0;
-
-		unsigned error;
-		LodePNGState state;
-		lodepng_state_init(&state);
-		state.decoder.color_convert = 0;
-
-		uint8_t* data = NULL;
-		error = lodepng_decode(&data, &width, &height, &state, (uint8_t*)_data, _size);
-
-		if (0 == error)
-		{
-			switch (state.info_raw.bitdepth)
-			{
-				case 8:
-					switch (state.info_raw.colortype)
-					{
-						case LCT_GREY:
-							format = bgfx::TextureFormat::R8;
-							break;
-
-						case LCT_GREY_ALPHA:
-							format = bgfx::TextureFormat::RG8;
-							break;
-
-						case LCT_RGB:
-							format = bgfx::TextureFormat::RGB8;
-							break;
-
-						case LCT_RGBA:
-							format = bgfx::TextureFormat::RGBA8;
-							break;
-
-						case LCT_PALETTE:
-							break;
-					}
-					break;
-
-				case 16:
-					switch (state.info_raw.colortype)
-					{
-						case LCT_GREY:
-							for (uint32_t ii = 0, num = width*height; ii < num; ++ii)
-							{
-								uint16_t* rgba = (uint16_t*)data + ii;
-								rgba[0] = bx::toHostEndian(rgba[0], false);
-							}
-							format = bgfx::TextureFormat::R16;
-							break;
-
-						case LCT_GREY_ALPHA:
-							for (uint32_t ii = 0, num = width*height; ii < num; ++ii)
-							{
-								uint16_t* rgba = (uint16_t*)data + ii*2;
-								rgba[0] = bx::toHostEndian(rgba[0], false);
-								rgba[1] = bx::toHostEndian(rgba[1], false);
-							}
-							format = bgfx::TextureFormat::RG16;
-							break;
-
-						case LCT_RGBA:
-							for (uint32_t ii = 0, num = width*height; ii < num; ++ii)
-							{
-								uint16_t* rgba = (uint16_t*)data + ii*4;
-								rgba[0] = bx::toHostEndian(rgba[0], false);
-								rgba[1] = bx::toHostEndian(rgba[1], false);
-								rgba[2] = bx::toHostEndian(rgba[2], false);
-								rgba[3] = bx::toHostEndian(rgba[3], false);
-							}
-							format = bgfx::TextureFormat::RGBA16;
-							break;
-
-						case LCT_RGB:
-						case LCT_PALETTE:
-							break;
-					}
-					break;
-
-				default:
-					break;
-			}
-
-			output = imageAlloc(_allocator
-				, format
-				, uint16_t(width)
-				, uint16_t(height)
-				, 0
-				, 1
-				, false
-				, false
-				, data
-				);
-		}
-
-		lodepng_state_cleanup(&state);
-		lodepng_free(data);
-
-		return output;
-	}
-
-	static ImageContainer* imageParseTinyExr(bx::AllocatorI* _allocator, const void* _data, uint32_t _size)
-	{
-		EXRVersion exrVersion;
-		int result = ParseEXRVersionFromMemory(&exrVersion, (uint8_t*)_data, _size);
-		if (TINYEXR_SUCCESS != result)
-		{
-			return NULL;
-		}
-
-		bgfx::TextureFormat::Enum format = bgfx::TextureFormat::RGBA8;
-		uint32_t width  = 0;
-		uint32_t height = 0;
-
-		uint8_t* data = NULL;
-		const char* err = NULL;
-		EXRHeader exrHeader;
-		result = ParseEXRHeaderFromMemory(&exrHeader, &exrVersion, (uint8_t*)_data, _size, &err);
-		if (TINYEXR_SUCCESS == result)
-		{
-			EXRImage exrImage;
-			InitEXRImage(&exrImage);
-
-			result = LoadEXRImageFromMemory(&exrImage, &exrHeader, (uint8_t*)_data, _size, &err);
-			if (TINYEXR_SUCCESS == result)
-			{
-				uint8_t idxR = UINT8_MAX;
-				uint8_t idxG = UINT8_MAX;
-				uint8_t idxB = UINT8_MAX;
-				uint8_t idxA = UINT8_MAX;
-				for (uint8_t ii = 0, num = uint8_t(exrHeader.num_channels); ii < num; ++ii)
-				{
-					const EXRChannelInfo& channel = exrHeader.channels[ii];
-					if (UINT8_MAX == idxR
-					&&  0 == bx::strncmp(channel.name, "R") )
-					{
-						idxR = ii;
-					}
-					else if (UINT8_MAX == idxG
-					&&  0 == bx::strncmp(channel.name, "G") )
-					{
-						idxG = ii;
-					}
-					else if (UINT8_MAX == idxB
-					&&  0 == bx::strncmp(channel.name, "B") )
-					{
-						idxB = ii;
-					}
-					else if (UINT8_MAX == idxA
-					&&  0 == bx::strncmp(channel.name, "A") )
-					{
-						idxA = ii;
-					}
-				}
-
-				if (UINT8_MAX != idxR)
-				{
-					const bool asFloat = exrHeader.pixel_types[idxR] == TINYEXR_PIXELTYPE_FLOAT;
-					uint32_t srcBpp = 32;
-					uint32_t dstBpp = asFloat ? 32 : 16;
-					format = asFloat ? TextureFormat::R32F : TextureFormat::R16F;
-					uint32_t stepR = 1;
-					uint32_t stepG = 0;
-					uint32_t stepB = 0;
-					uint32_t stepA = 0;
-
-					if (UINT8_MAX != idxG)
-					{
-						srcBpp += 32;
-						dstBpp = asFloat ? 64 : 32;
-						format = asFloat ? TextureFormat::RG32F : TextureFormat::RG16F;
-						stepG  = 1;
-					}
-
-					if (UINT8_MAX != idxB)
-					{
-						srcBpp += 32;
-						dstBpp = asFloat ? 128 : 64;
-						format = asFloat ? TextureFormat::RGBA32F : TextureFormat::RGBA16F;
-						stepB  = 1;
-					}
-
-					if (UINT8_MAX != idxA)
-					{
-						srcBpp += 32;
-						dstBpp = asFloat ? 128 : 64;
-						format = asFloat ? TextureFormat::RGBA32F : TextureFormat::RGBA16F;
-						stepA  = 1;
-					}
-
-					data = (uint8_t*)BX_ALLOC(_allocator, exrImage.width * exrImage.height * dstBpp/8);
-
-					const float  zero = 0.0f;
-					const float* srcR = UINT8_MAX == idxR ? &zero : (const float*)(exrImage.images)[idxR];
-					const float* srcG = UINT8_MAX == idxG ? &zero : (const float*)(exrImage.images)[idxG];
-					const float* srcB = UINT8_MAX == idxB ? &zero : (const float*)(exrImage.images)[idxB];
-					const float* srcA = UINT8_MAX == idxA ? &zero : (const float*)(exrImage.images)[idxA];
-
-					const uint32_t bytesPerPixel = dstBpp/8;
-					for (uint32_t ii = 0, num = exrImage.width * exrImage.height; ii < num; ++ii)
-					{
-						float rgba[4] =
-						{
-							*srcR,
-							*srcG,
-							*srcB,
-							*srcA,
-						};
-						bx::memCopy(&data[ii * bytesPerPixel], rgba, bytesPerPixel);
-
-						srcR += stepR;
-						srcG += stepG;
-						srcB += stepB;
-						srcA += stepA;
-					}
-				}
-
-				FreeEXRImage(&exrImage);
-			}
-
-			FreeEXRHeader(&exrHeader);
-		}
-
-		ImageContainer* output = imageAlloc(_allocator
-			, format
-			, uint16_t(width)
-			, uint16_t(height)
-			, 0
-			, 1
-			, false
-			, false
-			, data
-			);
-		BX_FREE(_allocator, data);
-
-		return output;
-	}
-
-	static ImageContainer* imageParseStbImage(bx::AllocatorI* _allocator, const void* _data, uint32_t _size)
-	{
-		const int isHdr = stbi_is_hdr_from_memory((const uint8_t*)_data, (int)_size);
-
-		void* data;
-		uint32_t width  = 0;
-		uint32_t height = 0;
-		int comp = 0;
-		if (isHdr) { data = stbi_loadf_from_memory((const uint8_t*)_data, (int)_size, (int*)&width, (int*)&height, &comp, 4); }
-		else       { data = stbi_load_from_memory ((const uint8_t*)_data, (int)_size, (int*)&width, (int*)&height, &comp, 0); }
-
-		if (NULL == data)
-		{
-			return NULL;
-		}
-
-		bgfx::TextureFormat::Enum format;
-		if (isHdr)
-		{
-			format = bgfx::TextureFormat::RGBA32F;
-		}
-		else
-		{
-			if       (1 == comp)   { format = bgfx::TextureFormat::R8;    }
-			else  if (2 == comp)   { format = bgfx::TextureFormat::RG8;   }
-			else  if (3 == comp)   { format = bgfx::TextureFormat::RGB8;  }
-			else/*if (4 == comp)*/ { format = bgfx::TextureFormat::RGBA8; }
-		}
-
-		ImageContainer* output = imageAlloc(_allocator
-			, format
-			, uint16_t(width)
-			, uint16_t(height)
-			, 0
-			, 1
-			, false
-			, false
-			, data
-			);
-		stbi_image_free(data);
-
-		return output;
-	}
-
-	ImageContainer* imageParse(bx::AllocatorI* _allocator, const void* _data, uint32_t _size, TextureFormat::Enum _dstFormat)
-	{
-		ImageContainer* input = imageParseBgfx    (_allocator, _data, _size)        ;
-		input = NULL == input ? imageParseLodePng (_allocator, _data, _size) : input;
-		input = NULL == input ? imageParseTinyExr (_allocator, _data, _size) : input;
-		input = NULL == input ? imageParseStbImage(_allocator, _data, _size) : input;
-
-		if (NULL == input)
-		{
-			return NULL;
-		}
-
-		_dstFormat = TextureFormat::Count == _dstFormat
-			? input->m_format
-			: _dstFormat
-			;
-
-		if (_dstFormat == input->m_format)
-		{
-			return input;
-		}
-
-		ImageContainer* output = imageConvert(_allocator, _dstFormat, *input);
-		imageFree(input);
-
-		return output;
-	}
-
-} // namespace bgfx
diff --git a/examples/common/image_decode.h b/examples/common/image_decode.h
deleted file mode 100644
index a4c863cbd..000000000
--- a/examples/common/image_decode.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2011-2017 Branimir Karadzic. All rights reserved.
- * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
- */
-
-#ifndef IMAGE_H_HEADER_GUARD
-#define IMAGE_H_HEADER_GUARD
-
-namespace bgfx
-{
-#if !defined(BGFX_IMAGE_H_HEADER_GUARD)
-	///
-	struct ImageContainer
-	{
-		bx::AllocatorI* m_allocator;
-		void*           m_data;
-
-		TextureFormat::Enum m_format;
-
-		uint32_t m_size;
-		uint32_t m_offset;
-		uint32_t m_width;
-		uint32_t m_height;
-		uint32_t m_depth;
-		uint16_t m_numLayers;
-		uint8_t  m_numMips;
-		bool     m_hasAlpha;
-		bool     m_cubeMap;
-		bool     m_ktx;
-		bool     m_ktxLE;
-		bool     m_srgb;
-	};
-
-	///
-	ImageContainer* imageAlloc(
-		  bx::AllocatorI* _allocator
-		, TextureFormat::Enum _format
-		, uint16_t _width
-		, uint16_t _height
-		, uint16_t _depth
-		, uint16_t _numLayers
-		, bool _cubeMap
-		, bool _hasMips
-		, const void* _data = NULL
-		);
-
-	///
-	void imageFree(ImageContainer* _imageContainer);
-
-	/// Converts format to string.
-	const char* getName(TextureFormat::Enum _format);
-#endif // !defined(BGFX_IMAGE_H_HEADER_GUARD)
-
-	///
-	ImageContainer* imageParse(
-		  bx::AllocatorI* _allocator
-		, const void* _data
-		, uint32_t _size
-		, TextureFormat::Enum _dstFormat = TextureFormat::Count
-		);
-
-} // namespace bgfx
-
-#endif // IMAGE_H_HEADER_GUARD
diff --git a/examples/common/nanovg/nanovg.cpp b/examples/common/nanovg/nanovg.cpp
index 07668d8df..c7687282d 100644
--- a/examples/common/nanovg/nanovg.cpp
+++ b/examples/common/nanovg/nanovg.cpp
@@ -24,7 +24,7 @@
 #include "nanovg.h"
 
 #ifndef NANOVG_HAS_STB_IMAGE
-#	define NANOVG_HAS_STB_IMAGE 1
+#	define NANOVG_HAS_STB_IMAGE 0
 #endif // NANOVG_HAS_STB_IMAGE
 
 #include <bx/macros.h>
@@ -816,9 +816,9 @@ void nvgFillPaint(NVGcontext* ctx, NVGpaint paint)
 	nvgTransformMultiply(state->fill.xform, state->xform);
 }
 
-#if NANOVG_HAS_STB_IMAGE
 int nvgCreateImage(NVGcontext* ctx, const char* filename, int imageFlags)
 {
+#if NANOVG_HAS_STB_IMAGE
 	int w, h, n, image;
 	unsigned char* img;
 	stbi_set_unpremultiply_on_load(1);
@@ -831,10 +831,15 @@ int nvgCreateImage(NVGcontext* ctx, const char* filename, int imageFlags)
 	image = nvgCreateImageRGBA(ctx, w, h, imageFlags, img);
 	stbi_image_free(img);
 	return image;
+#else
+	BX_UNUSED(ctx, filename, imageFlags);
+	return 0;
+#endif // NANOVG_HAS_STB_IMAGE
 }
 
 int nvgCreateImageMem(NVGcontext* ctx, int imageFlags, unsigned char* data, int ndata)
 {
+#if NANOVG_HAS_STB_IMAGE
 	int w, h, n, image;
 	unsigned char* img = stbi_load_from_memory(data, ndata, &w, &h, &n, 4);
 	if (img == NULL) {
@@ -844,8 +849,11 @@ int nvgCreateImageMem(NVGcontext* ctx, int imageFlags, unsigned char* data, int
 	image = nvgCreateImageRGBA(ctx, w, h, imageFlags, img);
 	stbi_image_free(img);
 	return image;
-}
+#else
+	BX_UNUSED(ctx, imageFlags, data, ndata);
+	return 0;
 #endif // NANOVG_HAS_STB_IMAGE
+}
 
 int nvgCreateImageRGBA(NVGcontext* ctx, int w, int h, int imageFlags, const unsigned char* data)
 {
diff --git a/include/bgfx/bgfx.h b/include/bgfx/bgfx.h
index 5a3ab56a6..b39c74b77 100644
--- a/include/bgfx/bgfx.h
+++ b/include/bgfx/bgfx.h
@@ -933,44 +933,6 @@ namespace bgfx
 		, bool _index32
 		);
 
-	/// Swizzle RGBA8 image to BGRA8.
-	///
-	/// @param[in] _dst Destination image. Must be the same size as input image.
-	///   _dst might be pointer to the same memory as _src.
-	/// @param[in] _width Width of input image (pixels).
-	/// @param[in] _height Height of input image (pixels).
-	/// @param[in] _pitch Pitch of input image (bytes).
-	/// @param[in] _src Source image.
-	///
-	/// @attention C99 equivalent is `bgfx_image_swizzle_bgra8`.
-	///
-	void imageSwizzleBgra8(
-		  void* _dst
-		, uint32_t _width
-		, uint32_t _height
-		, uint32_t _pitch
-		, const void* _src
-		);
-
-	/// Downsample RGBA8 image with 2x2 pixel average filter.
-	///
-	/// @param[in] _dst Destination image. Must be at least quarter size of
-	///   input image. _dst might be pointer to the same memory as _src.
-	/// @param[in] _width Width of input image (pixels).
-	/// @param[in] _height Height of input image (pixels).
-	/// @param[in] _pitch Pitch of input image (bytes).
-	/// @param[in] _src Source image.
-	///
-	/// @attention C99 equivalent is `bgfx_image_rgba8_downsample_2x2`.
-	///
-	void imageRgba8Downsample2x2(
-		  void* _dst
-		, uint32_t _width
-		, uint32_t _height
-		, uint32_t _pitch
-		, const void* _src
-		);
-
 	/// Returns supported backend API renderers.
 	///
 	/// @param[in] _max Maximum number of elements in _enum array.
diff --git a/include/bgfx/c99/bgfx.h b/include/bgfx/c99/bgfx.h
index dc14a22b5..58e109efd 100644
--- a/include/bgfx/c99/bgfx.h
+++ b/include/bgfx/c99/bgfx.h
@@ -548,12 +548,6 @@ BGFX_C_API uint32_t bgfx_topology_convert(bgfx_topology_convert_t _conversion, v
 /**/
 BGFX_C_API void bgfx_topology_sort_tri_list(bgfx_topology_sort_t _sort, void* _dst, uint32_t _dstSize, const float _dir[3], const float _pos[3], const void* _vertices, uint32_t _stride, const void* _indices, uint32_t _numIndices, bool _index32);
 
-/**/
-BGFX_C_API void bgfx_image_swizzle_bgra8(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-
-/**/
-BGFX_C_API void bgfx_image_rgba8_downsample_2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-
 /**/
 BGFX_C_API uint8_t bgfx_get_supported_renderers(uint8_t _max, bgfx_renderer_type_t* _enum);
 
diff --git a/include/bgfx/c99/platform.h b/include/bgfx/c99/platform.h
index 69d3e1b9a..e734fdf12 100644
--- a/include/bgfx/c99/platform.h
+++ b/include/bgfx/c99/platform.h
@@ -81,8 +81,6 @@ typedef struct bgfx_interface_vtbl
     uint16_t (*weld_vertices)(uint16_t* _output, const bgfx_vertex_decl_t* _decl, const void* _data, uint16_t _num, float _epsilon);
     uint32_t (*topology_convert)(bgfx_topology_convert_t _conversion, void* _dst, uint32_t _dstSize, const void* _indices, uint32_t _numIndices, bool _index32);
     void (*topology_sort_tri_list)(bgfx_topology_sort_t _sort, void* _dst, uint32_t _dstSize, const float _dir[3], const float _pos[3], const void* _vertices, uint32_t _stride, const void* _indices, uint32_t _numIndices, bool _index32);
-    void (*image_swizzle_bgra8)(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-    void (*image_rgba8_downsample_2x2)(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
     uint8_t (*get_supported_renderers)(uint8_t _max, bgfx_renderer_type_t* _enum);
     const char* (*get_renderer_name)(bgfx_renderer_type_t _type);
     bool (*init)(bgfx_renderer_type_t _type, uint16_t _vendorId, uint16_t _deviceId, bgfx_callback_interface_t* _callback, bgfx_allocator_interface_t* _allocator);
diff --git a/scripts/bgfx.lua b/scripts/bgfx.lua
index f95e99f6d..3c529d80d 100644
--- a/scripts/bgfx.lua
+++ b/scripts/bgfx.lua
@@ -47,6 +47,7 @@ function bgfxProject(_name, _kind, _defines)
 			}
 
 			links {
+				"bimg",
 				"bx",
 			}
 
@@ -73,6 +74,7 @@ function bgfxProject(_name, _kind, _defines)
 			path.join(BGFX_DIR, "3rdparty"),
 			path.join(BGFX_DIR, "3rdparty/dxsdk/include"),
 			path.join(BX_DIR,   "include"),
+			path.join(BIMG_DIR, "include"),
 		}
 
 		defines {
diff --git a/scripts/example-common.lua b/scripts/example-common.lua
index f87849f34..37e0f410b 100644
--- a/scripts/example-common.lua
+++ b/scripts/example-common.lua
@@ -8,7 +8,8 @@ project ("example-common")
 	kind "StaticLib"
 
 	includedirs {
-		path.join(BX_DIR, "include"),
+		path.join(BX_DIR,   "include"),
+		path.join(BIMG_DIR, "include"),
 		path.join(BGFX_DIR, "include"),
 		path.join(BGFX_DIR, "3rdparty"),
 	}
diff --git a/scripts/genie.lua b/scripts/genie.lua
index a01d7fe06..1c3e74813 100644
--- a/scripts/genie.lua
+++ b/scripts/genie.lua
@@ -72,6 +72,7 @@ solution "bgfx"
 
 MODULE_DIR = path.getabsolute("../")
 BGFX_DIR   = path.getabsolute("..")
+BIMG_DIR   = path.getabsolute(path.join(BGFX_DIR, "../bimg"))
 BX_DIR     = os.getenv("BX_DIR")
 
 local BGFX_BUILD_DIR = path.join(BGFX_DIR, ".build")
@@ -122,6 +123,7 @@ function exampleProject(_name)
 
 	includedirs {
 		path.join(BX_DIR,   "include"),
+		path.join(BIMG_DIR, "include"),
 		path.join(BGFX_DIR, "include"),
 		path.join(BGFX_DIR, "3rdparty"),
 		path.join(BGFX_DIR, "examples/common"),
@@ -144,6 +146,8 @@ function exampleProject(_name)
 	links {
 		"example-common",
 		"bgfx",
+		"bimg_decode",
+		"bimg",
 		"bx",
 	}
 
@@ -364,7 +368,8 @@ dofile "bgfx.lua"
 group "libs"
 bgfxProject("", "StaticLib", {})
 
-dofile(path.join(BX_DIR, "scripts/bx.lua"))
+dofile(path.join(BX_DIR,   "scripts/bx.lua"))
+dofile(path.join(BIMG_DIR, "scripts/bimg.lua"))
 
 if _OPTIONS["with-examples"] or _OPTIONS["with-tools"] then
 	group "examples"
diff --git a/scripts/shaderc.lua b/scripts/shaderc.lua
index 300c4c6a4..18d9988c3 100644
--- a/scripts/shaderc.lua
+++ b/scripts/shaderc.lua
@@ -219,7 +219,8 @@ project "shaderc"
 	kind "ConsoleApp"
 
 	includedirs {
-		path.join(BX_DIR, "include"),
+		path.join(BX_DIR,   "include"),
+		path.join(BIMG_DIR, "include"),
 		path.join(BGFX_DIR, "include"),
 
 		path.join(BGFX_DIR, "3rdparty/dxsdk/include"),
diff --git a/scripts/texturec.lua b/scripts/texturec.lua
index 0811ebec3..9cff76e92 100644
--- a/scripts/texturec.lua
+++ b/scripts/texturec.lua
@@ -8,38 +8,20 @@ project "texturec"
 	kind "ConsoleApp"
 
 	includedirs {
-		path.join(BX_DIR, "include"),
+		path.join(BX_DIR,   "include"),
+		path.join(BIMG_DIR, "include"),
 		path.join(BGFX_DIR, "include"),
-		path.join(BGFX_DIR, "src"),
-		path.join(BGFX_DIR, "3rdparty"),
-		path.join(BGFX_DIR, "3rdparty/nvtt"),
-		path.join(BGFX_DIR, "3rdparty/iqa/include"),
-		path.join(BGFX_DIR, "examples/common"),
+		path.join(BIMG_DIR, "3rdparty/iqa/include"),
 	}
 
 	files {
-		path.join(BGFX_DIR, "examples/common/image_decode.*"),
-		path.join(BGFX_DIR, "src/image.*"),
-		path.join(BGFX_DIR, "3rdparty/libsquish/**.cpp"),
-		path.join(BGFX_DIR, "3rdparty/libsquish/**.h"),
-		path.join(BGFX_DIR, "3rdparty/edtaa3/**.cpp"),
-		path.join(BGFX_DIR, "3rdparty/edtaa3/**.h"),
-		path.join(BGFX_DIR, "3rdparty/etc1/**.cpp"),
-		path.join(BGFX_DIR, "3rdparty/etc1/**.h"),
-		path.join(BGFX_DIR, "3rdparty/etc2/**.cpp"),
-		path.join(BGFX_DIR, "3rdparty/etc2/**.hpp"),
-		path.join(BGFX_DIR, "3rdparty/nvtt/**.cpp"),
-		path.join(BGFX_DIR, "3rdparty/nvtt/**.h"),
-		path.join(BGFX_DIR, "3rdparty/pvrtc/**.cpp"),
-		path.join(BGFX_DIR, "3rdparty/pvrtc/**.h"),
-		path.join(BGFX_DIR, "3rdparty/tinyexr/**.h"),
-		path.join(BGFX_DIR, "3rdparty/iqa/include/**.h"),
-		path.join(BGFX_DIR, "3rdparty/iqa/source/**.c"),
-		path.join(BGFX_DIR, "tools/texturec/**.cpp"),
-		path.join(BGFX_DIR, "tools/texturec/**.h"),
+		path.join(BGFX_DIR, "tools/texturec/texturec.cpp"),
 	}
 
 	links {
+		"bimg_decode",
+		"bimg_encode",
+		"bimg",
 		"bx",
 	}
 
diff --git a/scripts/texturev.lua b/scripts/texturev.lua
index fe08f59d6..4443391a2 100644
--- a/scripts/texturev.lua
+++ b/scripts/texturev.lua
@@ -6,6 +6,7 @@ project ("texturev")
 
 	includedirs {
 		path.join(BX_DIR,   "include"),
+		path.join(BIMG_DIR, "include"),
 		path.join(BGFX_DIR, "include"),
 		path.join(BGFX_DIR, "3rdparty"),
 		path.join(BGFX_DIR, "examples/common"),
@@ -19,6 +20,8 @@ project ("texturev")
 
 	links {
 		"example-common",
+		"bimg_decode",
+		"bimg",
 		"bgfx",
 		"bx",
 	}
diff --git a/src/amalgamated.cpp b/src/amalgamated.cpp
index 860f448e8..0ea731d98 100644
--- a/src/amalgamated.cpp
+++ b/src/amalgamated.cpp
@@ -8,7 +8,6 @@
 #include "glcontext_glx.cpp"
 #include "glcontext_ppapi.cpp"
 #include "glcontext_wgl.cpp"
-#include "image.cpp"
 #include "hmd.cpp"
 #include "hmd_ovr.cpp"
 #include "hmd_openvr.cpp"
diff --git a/src/bgfx.cpp b/src/bgfx.cpp
index 5886e8424..2badfc2f3 100644
--- a/src/bgfx.cpp
+++ b/src/bgfx.cpp
@@ -126,7 +126,7 @@ namespace bgfx
 			bx::CrtFileWriter writer;
 			if (bx::open(&writer, filePath) )
 			{
-				imageWriteTga(&writer, _width, _height, _pitch, _data, false, _yflip);
+				bimg::imageWriteTga(&writer, _width, _height, _pitch, _data, false, _yflip);
 				bx::close(&writer);
 			}
 #endif // BX_CONFIG_CRT_FILE_READER_WRITER
@@ -1246,7 +1246,7 @@ namespace bgfx
 		BX_TRACE("");
 	}
 
-	TextureFormat::Enum getViableTextureFormat(const ImageContainer& _imageContainer)
+	TextureFormat::Enum getViableTextureFormat(const bimg::ImageContainer& _imageContainer)
 	{
 		const uint32_t formatCaps = g_caps.formats[_imageContainer.m_format];
 		bool convert = 0 == formatCaps;
@@ -1275,7 +1275,12 @@ namespace bgfx
 			return TextureFormat::BGRA8;
 		}
 
-		return _imageContainer.m_format;
+		return TextureFormat::Enum(_imageContainer.m_format);
+	}
+
+	const char* getName(TextureFormat::Enum _fmt)
+	{
+		return bimg::getName(bimg::TextureFormat::Enum(_fmt));
 	}
 
 	static TextureFormat::Enum s_emulatedFormats[] =
@@ -1389,7 +1394,7 @@ namespace bgfx
 
 		for (uint32_t ii = 0; ii < TextureFormat::UnknownDepth; ++ii)
 		{
-			bool convertable = imageConvert(TextureFormat::BGRA8, TextureFormat::Enum(ii) );
+			bool convertable = bimg::imageConvert(bimg::TextureFormat::BGRA8, bimg::TextureFormat::Enum(ii) );
 			g_caps.formats[ii] |= 0 == (g_caps.formats[ii] & BGFX_CAPS_FORMAT_TEXTURE_2D  ) && convertable ? BGFX_CAPS_FORMAT_TEXTURE_2D_EMULATED   : 0;
 			g_caps.formats[ii] |= 0 == (g_caps.formats[ii] & BGFX_CAPS_FORMAT_TEXTURE_3D  ) && convertable ? BGFX_CAPS_FORMAT_TEXTURE_3D_EMULATED   : 0;
 			g_caps.formats[ii] |= 0 == (g_caps.formats[ii] & BGFX_CAPS_FORMAT_TEXTURE_CUBE) && convertable ? BGFX_CAPS_FORMAT_TEXTURE_CUBE_EMULATED : 0;
@@ -3136,7 +3141,7 @@ error:
 
 	void calcTextureSize(TextureInfo& _info, uint16_t _width, uint16_t _height, uint16_t _depth, bool _cubeMap, bool _hasMips, uint16_t _numLayers, TextureFormat::Enum _format)
 	{
-		imageGetSize(&_info, _width, _height, _depth, _cubeMap, _hasMips, _numLayers, _format);
+		bimg::imageGetSize( (bimg::TextureInfo*)&_info, _width, _height, _depth, _cubeMap, _hasMips, _numLayers, bimg::TextureFormat::Enum(_format) );
 	}
 
 	TextureHandle createTexture(const Memory* _mem, uint32_t _flags, uint8_t _skip, TextureInfo* _info)
@@ -3867,6 +3872,89 @@ error:
 	}
 } // namespace bgfx
 
+#define BGFX_TEXTURE_FORMAT_BIMG(_fmt) \
+			BX_STATIC_ASSERT(uint32_t(bgfx::TextureFormat::_fmt) == uint32_t(bimg::TextureFormat::_fmt) )
+
+BGFX_TEXTURE_FORMAT_BIMG(BC1);
+BGFX_TEXTURE_FORMAT_BIMG(BC2);
+BGFX_TEXTURE_FORMAT_BIMG(BC3);
+BGFX_TEXTURE_FORMAT_BIMG(BC4);
+BGFX_TEXTURE_FORMAT_BIMG(BC5);
+BGFX_TEXTURE_FORMAT_BIMG(BC6H);
+BGFX_TEXTURE_FORMAT_BIMG(BC7);
+BGFX_TEXTURE_FORMAT_BIMG(ETC1);
+BGFX_TEXTURE_FORMAT_BIMG(ETC2);
+BGFX_TEXTURE_FORMAT_BIMG(ETC2A);
+BGFX_TEXTURE_FORMAT_BIMG(ETC2A1);
+BGFX_TEXTURE_FORMAT_BIMG(PTC12);
+BGFX_TEXTURE_FORMAT_BIMG(PTC14);
+BGFX_TEXTURE_FORMAT_BIMG(PTC12A);
+BGFX_TEXTURE_FORMAT_BIMG(PTC14A);
+BGFX_TEXTURE_FORMAT_BIMG(PTC22);
+BGFX_TEXTURE_FORMAT_BIMG(PTC24);
+BGFX_TEXTURE_FORMAT_BIMG(Unknown);
+BGFX_TEXTURE_FORMAT_BIMG(R1);
+BGFX_TEXTURE_FORMAT_BIMG(A8);
+BGFX_TEXTURE_FORMAT_BIMG(R8);
+BGFX_TEXTURE_FORMAT_BIMG(R8I);
+BGFX_TEXTURE_FORMAT_BIMG(R8U);
+BGFX_TEXTURE_FORMAT_BIMG(R8S);
+BGFX_TEXTURE_FORMAT_BIMG(R16);
+BGFX_TEXTURE_FORMAT_BIMG(R16I);
+BGFX_TEXTURE_FORMAT_BIMG(R16U);
+BGFX_TEXTURE_FORMAT_BIMG(R16F);
+BGFX_TEXTURE_FORMAT_BIMG(R16S);
+BGFX_TEXTURE_FORMAT_BIMG(R32I);
+BGFX_TEXTURE_FORMAT_BIMG(R32U);
+BGFX_TEXTURE_FORMAT_BIMG(R32F);
+BGFX_TEXTURE_FORMAT_BIMG(RG8);
+BGFX_TEXTURE_FORMAT_BIMG(RG8I);
+BGFX_TEXTURE_FORMAT_BIMG(RG8U);
+BGFX_TEXTURE_FORMAT_BIMG(RG8S);
+BGFX_TEXTURE_FORMAT_BIMG(RG16);
+BGFX_TEXTURE_FORMAT_BIMG(RG16I);
+BGFX_TEXTURE_FORMAT_BIMG(RG16U);
+BGFX_TEXTURE_FORMAT_BIMG(RG16F);
+BGFX_TEXTURE_FORMAT_BIMG(RG16S);
+BGFX_TEXTURE_FORMAT_BIMG(RG32I);
+BGFX_TEXTURE_FORMAT_BIMG(RG32U);
+BGFX_TEXTURE_FORMAT_BIMG(RG32F);
+BGFX_TEXTURE_FORMAT_BIMG(RGB8);
+BGFX_TEXTURE_FORMAT_BIMG(RGB8I);
+BGFX_TEXTURE_FORMAT_BIMG(RGB8U);
+BGFX_TEXTURE_FORMAT_BIMG(RGB8S);
+BGFX_TEXTURE_FORMAT_BIMG(RGB9E5F);
+BGFX_TEXTURE_FORMAT_BIMG(BGRA8);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA8);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA8I);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA8U);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA8S);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA16);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA16I);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA16U);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA16F);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA16S);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA32I);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA32U);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA32F);
+BGFX_TEXTURE_FORMAT_BIMG(R5G6B5);
+BGFX_TEXTURE_FORMAT_BIMG(RGBA4);
+BGFX_TEXTURE_FORMAT_BIMG(RGB5A1);
+BGFX_TEXTURE_FORMAT_BIMG(RGB10A2);
+BGFX_TEXTURE_FORMAT_BIMG(R11G11B10F);
+BGFX_TEXTURE_FORMAT_BIMG(UnknownDepth);
+BGFX_TEXTURE_FORMAT_BIMG(D16);
+BGFX_TEXTURE_FORMAT_BIMG(D24);
+BGFX_TEXTURE_FORMAT_BIMG(D24S8);
+BGFX_TEXTURE_FORMAT_BIMG(D32);
+BGFX_TEXTURE_FORMAT_BIMG(D16F);
+BGFX_TEXTURE_FORMAT_BIMG(D24F);
+BGFX_TEXTURE_FORMAT_BIMG(D32F);
+BGFX_TEXTURE_FORMAT_BIMG(D0S8);
+BGFX_TEXTURE_FORMAT_BIMG(Count);
+
+#undef BGFX_TEXTURE_FORMAT_BIMG
+
 #include <bgfx/c99/bgfx.h>
 #include <bgfx/c99/platform.h>
 
@@ -4070,16 +4158,6 @@ void bgfx_topology_sort_tri_list(bgfx_topology_sort_t _sort, void* _dst, uint32_
 	bgfx::topologySortTriList(bgfx::TopologySort::Enum(_sort), _dst, _dstSize, _dir, _pos, _vertices, _stride, _indices, _numIndices, _index32);
 }
 
-BGFX_C_API void bgfx_image_swizzle_bgra8(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-{
-	bgfx::imageSwizzleBgra8(_dst, _width, _height, _pitch, _src);
-}
-
-BGFX_C_API void bgfx_image_rgba8_downsample_2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-{
-	bgfx::imageRgba8Downsample2x2(_dst, _width, _height, _pitch, _src);
-}
-
 BGFX_C_API uint8_t bgfx_get_supported_renderers(uint8_t _max, bgfx_renderer_type_t* _enum)
 {
 	return bgfx::getSupportedRenderers(_max, (bgfx::RendererType::Enum*)_enum);
@@ -4844,8 +4922,6 @@ BGFX_C_API bgfx_interface_vtbl_t* bgfx_get_interface(uint32_t _version)
 	BGFX_IMPORT_FUNC(weld_vertices) \
 	BGFX_IMPORT_FUNC(topology_convert) \
 	BGFX_IMPORT_FUNC(topology_sort_tri_list) \
-	BGFX_IMPORT_FUNC(image_swizzle_bgra8) \
-	BGFX_IMPORT_FUNC(image_rgba8_downsample_2x2) \
 	BGFX_IMPORT_FUNC(get_supported_renderers) \
 	BGFX_IMPORT_FUNC(get_renderer_name) \
 	BGFX_IMPORT_FUNC(init) \
diff --git a/src/bgfx_p.h b/src/bgfx_p.h
index 988f6a75f..8aa10b21b 100644
--- a/src/bgfx_p.h
+++ b/src/bgfx_p.h
@@ -132,7 +132,7 @@ namespace bgfx
 #include <bx/maputil.h>
 
 #include <bgfx/platform.h>
-#include "image.h"
+#include <bimg/bimg.h>
 #include "shader.h"
 
 #define BGFX_CHUNK_MAGIC_CSH BX_MAKEFOURCC('C', 'S', 'H', 0x2)
@@ -360,7 +360,8 @@ namespace bgfx
 	void release(const Memory* _mem);
 	const char* getAttribName(Attrib::Enum _attr);
 	void getTextureSizeFromRatio(BackbufferRatio::Enum _ratio, uint16_t& _width, uint16_t& _height);
-	TextureFormat::Enum getViableTextureFormat(const ImageContainer& _imageContainer);
+	TextureFormat::Enum getViableTextureFormat(const bimg::ImageContainer& _imageContainer);
+	const char* getName(TextureFormat::Enum _fmt);
 
 	inline uint32_t castfu(float _value)
 	{
@@ -3243,8 +3244,8 @@ namespace bgfx
 				_info = &ti;
 			}
 
-			ImageContainer imageContainer;
-			if (imageParse(imageContainer, _mem->data, _mem->size) )
+			bimg::ImageContainer imageContainer;
+			if (bimg::imageParse(imageContainer, _mem->data, _mem->size) )
 			{
 				calcTextureSize(*_info
 					, (uint16_t)imageContainer.m_width
@@ -3328,7 +3329,7 @@ namespace bgfx
 				, _handle.idx
 				, _width
 				, _height
-				, bgfx::getName(TextureFormat::Enum(textureRef.m_format) )
+				, bimg::getName(bimg::TextureFormat::Enum(textureRef.m_format) )
 				);
 
 			CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::ResizeTexture);
@@ -3406,7 +3407,7 @@ namespace bgfx
 			for (uint32_t ii = 0; ii < _num; ++ii)
 			{
 				TextureHandle texHandle = _attachment[ii].handle;
-				if (isDepth(TextureFormat::Enum(m_textureRef[texHandle.idx].m_format)))
+				if (bimg::isDepth(bimg::TextureFormat::Enum(m_textureRef[texHandle.idx].m_format)))
 				{
 					++depth;
 				}
@@ -4087,8 +4088,8 @@ namespace bgfx
 			const TextureRef& dst = m_textureRef[_dst.idx];
 			BX_CHECK(src.m_format == dst.m_format
 				, "Texture format must match (src %s, dst %s)."
-				, bgfx::getName(TextureFormat::Enum(src.m_format) )
-				, bgfx::getName(TextureFormat::Enum(dst.m_format) )
+				, bimg::getName(bimg::TextureFormat::Enum(src.m_format) )
+				, bimg::getName(bimg::TextureFormat::Enum(dst.m_format) )
 				);
 			BX_UNUSED(src, dst);
 			m_submit->blit(_id, _dst, _dstMip, _dstX, _dstY, _dstZ, _src, _srcMip, _srcX, _srcY, _srcZ, _width, _height, _depth);
diff --git a/src/image.cpp b/src/image.cpp
deleted file mode 100644
index e6c9e074a..000000000
--- a/src/image.cpp
+++ /dev/null
@@ -1,3261 +0,0 @@
-/*
- * Copyright 2011-2017 Branimir Karadzic. All rights reserved.
- * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
- */
-
-#include "bgfx_p.h"
-#include "image.h"
-
-namespace bgfx
-{
-	static const ImageBlockInfo s_imageBlockInfo[] =
-	{
-		//  +-------------------------------------------- bits per pixel
-		//  |  +----------------------------------------- block width
-		//  |  |  +-------------------------------------- block height
-		//  |  |  |   +---------------------------------- block size
-		//  |  |  |   |  +------------------------------- min blocks x
-		//  |  |  |   |  |  +---------------------------- min blocks y
-		//  |  |  |   |  |  |   +------------------------ depth bits
-		//  |  |  |   |  |  |   |  +--------------------- stencil bits
-		//  |  |  |   |  |  |   |  |   +---+---+---+----- r, g, b, a bits
-		//  |  |  |   |  |  |   |  |   r   g   b   a  +-- encoding type
-		//  |  |  |   |  |  |   |  |   |   |   |   |  |
-		{   4, 4, 4,  8, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // BC1
-		{   8, 4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // BC2
-		{   8, 4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // BC3
-		{   4, 4, 4,  8, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // BC4
-		{   8, 4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // BC5
-		{   8, 4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // BC6H
-		{   8, 4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // BC7
-		{   4, 4, 4,  8, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ETC1
-		{   4, 4, 4,  8, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ETC2
-		{   8, 4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ETC2A
-		{   4, 4, 4,  8, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ETC2A1
-		{   2, 8, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC12
-		{   4, 4, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC14
-		{   2, 8, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC12A
-		{   4, 4, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC14A
-		{   2, 8, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC22
-		{   4, 4, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC24
-		{   0, 0, 0,  0, 0, 0,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Count) }, // Unknown
-		{   1, 8, 1,  1, 1, 1,  0, 0,  1,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // R1
-		{   8, 1, 1,  1, 1, 1,  0, 0,  0,  0,  0,  8, uint8_t(bx::EncodingType::Unorm) }, // A8
-		{   8, 1, 1,  1, 1, 1,  0, 0,  8,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // R8
-		{   8, 1, 1,  1, 1, 1,  0, 0,  8,  0,  0,  0, uint8_t(bx::EncodingType::Int  ) }, // R8I
-		{   8, 1, 1,  1, 1, 1,  0, 0,  8,  0,  0,  0, uint8_t(bx::EncodingType::Uint ) }, // R8U
-		{   8, 1, 1,  1, 1, 1,  0, 0,  8,  0,  0,  0, uint8_t(bx::EncodingType::Snorm) }, // R8S
-		{  16, 1, 1,  2, 1, 1,  0, 0, 16,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // R16
-		{  16, 1, 1,  2, 1, 1,  0, 0, 16,  0,  0,  0, uint8_t(bx::EncodingType::Int  ) }, // R16I
-		{  16, 1, 1,  2, 1, 1,  0, 0, 16,  0,  0,  0, uint8_t(bx::EncodingType::Uint ) }, // R16U
-		{  16, 1, 1,  2, 1, 1,  0, 0, 16,  0,  0,  0, uint8_t(bx::EncodingType::Float) }, // R16F
-		{  16, 1, 1,  2, 1, 1,  0, 0, 16,  0,  0,  0, uint8_t(bx::EncodingType::Snorm) }, // R16S
-		{  32, 1, 1,  4, 1, 1,  0, 0, 32,  0,  0,  0, uint8_t(bx::EncodingType::Int  ) }, // R32I
-		{  32, 1, 1,  4, 1, 1,  0, 0, 32,  0,  0,  0, uint8_t(bx::EncodingType::Uint ) }, // R32U
-		{  32, 1, 1,  4, 1, 1,  0, 0, 32,  0,  0,  0, uint8_t(bx::EncodingType::Float) }, // R32F
-		{  16, 1, 1,  2, 1, 1,  0, 0,  8,  8,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // RG8
-		{  16, 1, 1,  2, 1, 1,  0, 0,  8,  8,  0,  0, uint8_t(bx::EncodingType::Int  ) }, // RG8I
-		{  16, 1, 1,  2, 1, 1,  0, 0,  8,  8,  0,  0, uint8_t(bx::EncodingType::Uint ) }, // RG8U
-		{  16, 1, 1,  2, 1, 1,  0, 0,  8,  8,  0,  0, uint8_t(bx::EncodingType::Snorm) }, // RG8S
-		{  32, 1, 1,  4, 1, 1,  0, 0, 16, 16,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // RG16
-		{  32, 1, 1,  4, 1, 1,  0, 0, 16, 16,  0,  0, uint8_t(bx::EncodingType::Int  ) }, // RG16I
-		{  32, 1, 1,  4, 1, 1,  0, 0, 16, 16,  0,  0, uint8_t(bx::EncodingType::Uint ) }, // RG16U
-		{  32, 1, 1,  4, 1, 1,  0, 0, 16, 16,  0,  0, uint8_t(bx::EncodingType::Float) }, // RG16F
-		{  32, 1, 1,  4, 1, 1,  0, 0, 16, 16,  0,  0, uint8_t(bx::EncodingType::Snorm) }, // RG16S
-		{  64, 1, 1,  8, 1, 1,  0, 0, 32, 32,  0,  0, uint8_t(bx::EncodingType::Int  ) }, // RG32I
-		{  64, 1, 1,  8, 1, 1,  0, 0, 32, 32,  0,  0, uint8_t(bx::EncodingType::Uint ) }, // RG32U
-		{  64, 1, 1,  8, 1, 1,  0, 0, 32, 32,  0,  0, uint8_t(bx::EncodingType::Float) }, // RG32F
-		{  24, 1, 1,  3, 1, 1,  0, 0,  8,  8,  8,  0, uint8_t(bx::EncodingType::Unorm) }, // RGB8
-		{  24, 1, 1,  3, 1, 1,  0, 0,  8,  8,  8,  0, uint8_t(bx::EncodingType::Int  ) }, // RGB8I
-		{  24, 1, 1,  3, 1, 1,  0, 0,  8,  8,  8,  0, uint8_t(bx::EncodingType::Uint ) }, // RGB8U
-		{  24, 1, 1,  3, 1, 1,  0, 0,  8,  8,  8,  0, uint8_t(bx::EncodingType::Snorm) }, // RGB8S
-		{  32, 1, 1,  4, 1, 1,  0, 0,  9,  9,  9,  5, uint8_t(bx::EncodingType::Float) }, // RGB9E5F
-		{  32, 1, 1,  4, 1, 1,  0, 0,  8,  8,  8,  8, uint8_t(bx::EncodingType::Unorm) }, // BGRA8
-		{  32, 1, 1,  4, 1, 1,  0, 0,  8,  8,  8,  8, uint8_t(bx::EncodingType::Unorm) }, // RGBA8
-		{  32, 1, 1,  4, 1, 1,  0, 0,  8,  8,  8,  8, uint8_t(bx::EncodingType::Int  ) }, // RGBA8I
-		{  32, 1, 1,  4, 1, 1,  0, 0,  8,  8,  8,  8, uint8_t(bx::EncodingType::Uint ) }, // RGBA8U
-		{  32, 1, 1,  4, 1, 1,  0, 0,  8,  8,  8,  8, uint8_t(bx::EncodingType::Snorm) }, // RGBA8S
-		{  64, 1, 1,  8, 1, 1,  0, 0, 16, 16, 16, 16, uint8_t(bx::EncodingType::Unorm) }, // RGBA16
-		{  64, 1, 1,  8, 1, 1,  0, 0, 16, 16, 16, 16, uint8_t(bx::EncodingType::Int  ) }, // RGBA16I
-		{  64, 1, 1,  8, 1, 1,  0, 0, 16, 16, 16, 16, uint8_t(bx::EncodingType::Uint ) }, // RGBA16U
-		{  64, 1, 1,  8, 1, 1,  0, 0, 16, 16, 16, 16, uint8_t(bx::EncodingType::Float) }, // RGBA16F
-		{  64, 1, 1,  8, 1, 1,  0, 0, 16, 16, 16, 16, uint8_t(bx::EncodingType::Snorm) }, // RGBA16S
-		{ 128, 1, 1, 16, 1, 1,  0, 0, 32, 32, 32, 32, uint8_t(bx::EncodingType::Int  ) }, // RGBA32I
-		{ 128, 1, 1, 16, 1, 1,  0, 0, 32, 32, 32, 32, uint8_t(bx::EncodingType::Uint ) }, // RGBA32U
-		{ 128, 1, 1, 16, 1, 1,  0, 0, 32, 32, 32, 32, uint8_t(bx::EncodingType::Float) }, // RGBA32F
-		{  16, 1, 1,  2, 1, 1,  0, 0,  5,  6,  5,  0, uint8_t(bx::EncodingType::Unorm) }, // R5G6B5
-		{  16, 1, 1,  2, 1, 1,  0, 0,  4,  4,  4,  4, uint8_t(bx::EncodingType::Unorm) }, // RGBA4
-		{  16, 1, 1,  2, 1, 1,  0, 0,  5,  5,  5,  1, uint8_t(bx::EncodingType::Unorm) }, // RGB5A1
-		{  32, 1, 1,  4, 1, 1,  0, 0, 10, 10, 10,  2, uint8_t(bx::EncodingType::Unorm) }, // RGB10A2
-		{  32, 1, 1,  4, 1, 1,  0, 0, 11, 11, 10,  0, uint8_t(bx::EncodingType::Unorm) }, // R11G11B10F
-		{   0, 0, 0,  0, 0, 0,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Count) }, // UnknownDepth
-		{  16, 1, 1,  2, 1, 1, 16, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // D16
-		{  24, 1, 1,  3, 1, 1, 24, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // D24
-		{  32, 1, 1,  4, 1, 1, 24, 8,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // D24S8
-		{  32, 1, 1,  4, 1, 1, 32, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // D32
-		{  16, 1, 1,  2, 1, 1, 16, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Float) }, // D16F
-		{  24, 1, 1,  3, 1, 1, 24, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Float) }, // D24F
-		{  32, 1, 1,  4, 1, 1, 32, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Float) }, // D32F
-		{   8, 1, 1,  1, 1, 1,  0, 8,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // D0S8
-	};
-	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_imageBlockInfo) );
-
-	static const char* s_textureFormatName[] =
-	{
-		"BC1",        // BC1
-		"BC2",        // BC2
-		"BC3",        // BC3
-		"BC4",        // BC4
-		"BC5",        // BC5
-		"BC6H",       // BC6H
-		"BC7",        // BC7
-		"ETC1",       // ETC1
-		"ETC2",       // ETC2
-		"ETC2A",      // ETC2A
-		"ETC2A1",     // ETC2A1
-		"PTC12",      // PTC12
-		"PTC14",      // PTC14
-		"PTC12A",     // PTC12A
-		"PTC14A",     // PTC14A
-		"PTC22",      // PTC22
-		"PTC24",      // PTC24
-		"<unknown>",  // Unknown
-		"R1",         // R1
-		"A8",         // A8
-		"R8",         // R8
-		"R8I",        // R8I
-		"R8U",        // R8U
-		"R8S",        // R8S
-		"R16",        // R16
-		"R16I",       // R16I
-		"R16U",       // R16U
-		"R16F",       // R16F
-		"R16S",       // R16S
-		"R32I",       // R32I
-		"R32U",       // R32U
-		"R32F",       // R32F
-		"RG8",        // RG8
-		"RG8I",       // RG8I
-		"RG8U",       // RG8U
-		"RG8S",       // RG8S
-		"RG16",       // RG16
-		"RG16I",      // RG16I
-		"RG16U",      // RG16U
-		"RG16F",      // RG16F
-		"RG16S",      // RG16S
-		"RG32I",      // RG32I
-		"RG32U",      // RG32U
-		"RG32F",      // RG32F
-		"RGB8",       // RGB8
-		"RGB8I",      // RGB8I
-		"RGB8U",      // RGB8U
-		"RGB8S",      // RGB8S
-		"RGB9E5",     // RGB9E5F
-		"BGRA8",      // BGRA8
-		"RGBA8",      // RGBA8
-		"RGBA8I",     // RGBA8I
-		"RGBA8U",     // RGBA8U
-		"RGBA8S",     // RGBA8S
-		"RGBA16",     // RGBA16
-		"RGBA16I",    // RGBA16I
-		"RGBA16U",    // RGBA16U
-		"RGBA16F",    // RGBA16F
-		"RGBA16S",    // RGBA16S
-		"RGBA32I",    // RGBA32I
-		"RGBA32U",    // RGBA32U
-		"RGBA32F",    // RGBA32F
-		"R5G6B5",     // R5G6B5
-		"RGBA4",      // RGBA4
-		"RGB5A1",     // RGB5A1
-		"RGB10A2",    // RGB10A2
-		"R11G11B10F", // R11G11B10F
-		"<unknown>",  // UnknownDepth
-		"D16",        // D16
-		"D24",        // D24
-		"D24S8",      // D24S8
-		"D32",        // D32
-		"D16F",       // D16F
-		"D24F",       // D24F
-		"D32F",       // D32F
-		"D0S8",       // D0S8
-	};
-	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormatName) );
-
-	bool isCompressed(TextureFormat::Enum _format)
-	{
-		return _format < TextureFormat::Unknown;
-	}
-
-	bool isColor(TextureFormat::Enum _format)
-	{
-		return _format > TextureFormat::Unknown
-			&& _format < TextureFormat::UnknownDepth
-			;
-	}
-
-	bool isDepth(TextureFormat::Enum _format)
-	{
-		return _format > TextureFormat::UnknownDepth
-			&& _format < TextureFormat::Count
-			;
-	}
-
-	bool isValid(TextureFormat::Enum _format)
-	{
-		return _format != TextureFormat::Unknown
-			&& _format != TextureFormat::UnknownDepth
-			&& _format != TextureFormat::Count
-			;
-	}
-
-	uint8_t getBitsPerPixel(TextureFormat::Enum _format)
-	{
-		return s_imageBlockInfo[_format].bitsPerPixel;
-	}
-
-	const ImageBlockInfo& getBlockInfo(TextureFormat::Enum _format)
-	{
-		return s_imageBlockInfo[_format];
-	}
-
-	uint8_t getBlockSize(TextureFormat::Enum _format)
-	{
-		return s_imageBlockInfo[_format].blockSize;
-	}
-
-	const char* getName(TextureFormat::Enum _format)
-	{
-		return s_textureFormatName[_format];
-	}
-
-	TextureFormat::Enum getFormat(const char* _name)
-	{
-		for (uint32_t ii = 0; ii < TextureFormat::Count; ++ii)
-		{
-			const TextureFormat::Enum fmt = TextureFormat::Enum(ii);
-			if (isValid(fmt) )
-			{
-				if (0 == bx::strincmp(s_textureFormatName[ii], _name) )
-				{
-					return fmt;
-				}
-			}
-		}
-
-		return TextureFormat::Unknown;
-	}
-
-	uint8_t imageGetNumMips(TextureFormat::Enum _format, uint16_t _width, uint16_t _height, uint16_t _depth)
-	{
-		const ImageBlockInfo& blockInfo = getBlockInfo(_format);
-		const uint16_t blockWidth  = blockInfo.blockWidth;
-		const uint16_t blockHeight = blockInfo.blockHeight;
-		const uint16_t minBlockX   = blockInfo.minBlockX;
-		const uint16_t minBlockY   = blockInfo.minBlockY;
-
-		_width  = bx::uint16_max(blockWidth  * minBlockX, ( (_width  + blockWidth  - 1) / blockWidth)*blockWidth);
-		_height = bx::uint16_max(blockHeight * minBlockY, ( (_height + blockHeight - 1) / blockHeight)*blockHeight);
-		_depth  = bx::uint16_max(1, _depth);
-
-		uint32_t max = bx::uint32_max(_width, bx::uint32_max(_height, _depth) );
-		uint32_t numMips = bx::uint32_max(1, uint8_t(bx::flog2(float(max) ) ) );
-
-		return uint8_t(numMips);
-	}
-
-	uint32_t imageGetSize(TextureInfo* _info, uint16_t _width, uint16_t _height, uint16_t _depth, bool _cubeMap, bool _hasMips, uint16_t _numLayers, TextureFormat::Enum _format)
-	{
-		const ImageBlockInfo& blockInfo = getBlockInfo(_format);
-		const uint8_t  bpp         = blockInfo.bitsPerPixel;
-		const uint16_t blockWidth  = blockInfo.blockWidth;
-		const uint16_t blockHeight = blockInfo.blockHeight;
-		const uint16_t minBlockX   = blockInfo.minBlockX;
-		const uint16_t minBlockY   = blockInfo.minBlockY;
-
-		_width  = bx::uint16_max(blockWidth  * minBlockX, ( (_width  + blockWidth  - 1) / blockWidth)*blockWidth);
-		_height = bx::uint16_max(blockHeight * minBlockY, ( (_height + blockHeight - 1) / blockHeight)*blockHeight);
-		_depth  = bx::uint16_max(1, _depth);
-		const uint8_t  numMips = calcNumMips(_hasMips, _width, _height, _depth);
-		const uint32_t sides   = _cubeMap ? 6 : 1;
-
-		uint32_t width  = _width;
-		uint32_t height = _height;
-		uint32_t depth  = _depth;
-		uint32_t size   = 0;
-
-		for (uint32_t lod = 0; lod < numMips; ++lod)
-		{
-			width  = bx::uint32_max(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
-			height = bx::uint32_max(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
-			depth  = bx::uint32_max(1, depth);
-
-			size += width*height*depth*bpp/8 * sides;
-
-			width  >>= 1;
-			height >>= 1;
-			depth  >>= 1;
-		}
-
-		size *= _numLayers;
-
-		if (NULL != _info)
-		{
-			_info->format  = _format;
-			_info->width   = _width;
-			_info->height  = _height;
-			_info->depth   = _depth;
-			_info->numMips = numMips;
-			_info->numLayers = _numLayers;
-			_info->cubeMap   = _cubeMap;
-			_info->storageSize  = size;
-			_info->bitsPerPixel = bpp;
-		}
-
-		return size;
-	}
-
-	void imageSolid(void* _dst, uint32_t _width, uint32_t _height, uint32_t _solid)
-	{
-		uint32_t* dst = (uint32_t*)_dst;
-		for (uint32_t ii = 0, num = _width*_height; ii < num; ++ii)
-		{
-			*dst++ = _solid;
-		}
-	}
-
-	void imageCheckerboard(void* _dst, uint32_t _width, uint32_t _height, uint32_t _step, uint32_t _0, uint32_t _1)
-	{
-		uint32_t* dst = (uint32_t*)_dst;
-		for (uint32_t yy = 0; yy < _height; ++yy)
-		{
-			for (uint32_t xx = 0; xx < _width; ++xx)
-			{
-				uint32_t abgr = ( (xx/_step)&1) ^ ( (yy/_step)&1) ? _1 : _0;
-				*dst++ = abgr;
-			}
-		}
-	}
-
-	void imageRgba8Downsample2x2Ref(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		const uint32_t dstwidth  = _width/2;
-		const uint32_t dstheight = _height/2;
-
-		if (0 == dstwidth
-		||  0 == dstheight)
-		{
-			return;
-		}
-
-		uint8_t* dst = (uint8_t*)_dst;
-		const uint8_t* src = (const uint8_t*)_src;
-
-		for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep)
-		{
-			const uint8_t* rgba = src;
-			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 8, dst += 4)
-			{
-				float rr = bx::fpow(rgba[       0], 2.2f);
-				float gg = bx::fpow(rgba[       1], 2.2f);
-				float bb = bx::fpow(rgba[       2], 2.2f);
-				float aa =          rgba[       3];
-				rr      += bx::fpow(rgba[       4], 2.2f);
-				gg      += bx::fpow(rgba[       5], 2.2f);
-				bb      += bx::fpow(rgba[       6], 2.2f);
-				aa      +=          rgba[       7];
-				rr      += bx::fpow(rgba[_pitch+0], 2.2f);
-				gg      += bx::fpow(rgba[_pitch+1], 2.2f);
-				bb      += bx::fpow(rgba[_pitch+2], 2.2f);
-				aa      +=          rgba[_pitch+3];
-				rr      += bx::fpow(rgba[_pitch+4], 2.2f);
-				gg      += bx::fpow(rgba[_pitch+5], 2.2f);
-				bb      += bx::fpow(rgba[_pitch+6], 2.2f);
-				aa      +=          rgba[_pitch+7];
-
-				rr *= 0.25f;
-				gg *= 0.25f;
-				bb *= 0.25f;
-				aa *= 0.25f;
-				rr = bx::fpow(rr, 1.0f/2.2f);
-				gg = bx::fpow(gg, 1.0f/2.2f);
-				bb = bx::fpow(bb, 1.0f/2.2f);
-				dst[0] = (uint8_t)rr;
-				dst[1] = (uint8_t)gg;
-				dst[2] = (uint8_t)bb;
-				dst[3] = (uint8_t)aa;
-			}
-		}
-	}
-
-	void imageRgba8Downsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		const uint32_t dstwidth  = _width/2;
-		const uint32_t dstheight = _height/2;
-
-		if (0 == dstwidth
-		||  0 == dstheight)
-		{
-			return;
-		}
-
-		uint8_t* dst = (uint8_t*)_dst;
-		const uint8_t* src = (const uint8_t*)_src;
-
-		using namespace bx;
-		const simd128_t unpack = simd_ld(1.0f, 1.0f/256.0f, 1.0f/65536.0f, 1.0f/16777216.0f);
-		const simd128_t pack   = simd_ld(1.0f, 256.0f*0.5f, 65536.0f, 16777216.0f*0.5f);
-		const simd128_t umask  = simd_ild(0xff, 0xff00, 0xff0000, 0xff000000);
-		const simd128_t pmask  = simd_ild(0xff, 0x7f80, 0xff0000, 0x7f800000);
-		const simd128_t wflip  = simd_ild(0, 0, 0, 0x80000000);
-		const simd128_t wadd   = simd_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f);
-		const simd128_t gamma  = simd_ld(1.0f/2.2f, 1.0f/2.2f, 1.0f/2.2f, 1.0f);
-		const simd128_t linear = simd_ld(2.2f, 2.2f, 2.2f, 1.0f);
-		const simd128_t quater = simd_splat(0.25f);
-
-		for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep)
-		{
-			const uint8_t* rgba = src;
-			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 8, dst += 4)
-			{
-				const simd128_t abgr0  = simd_splat(rgba);
-				const simd128_t abgr1  = simd_splat(rgba+4);
-				const simd128_t abgr2  = simd_splat(rgba+_pitch);
-				const simd128_t abgr3  = simd_splat(rgba+_pitch+4);
-
-				const simd128_t abgr0m = simd_and(abgr0, umask);
-				const simd128_t abgr1m = simd_and(abgr1, umask);
-				const simd128_t abgr2m = simd_and(abgr2, umask);
-				const simd128_t abgr3m = simd_and(abgr3, umask);
-				const simd128_t abgr0x = simd_xor(abgr0m, wflip);
-				const simd128_t abgr1x = simd_xor(abgr1m, wflip);
-				const simd128_t abgr2x = simd_xor(abgr2m, wflip);
-				const simd128_t abgr3x = simd_xor(abgr3m, wflip);
-				const simd128_t abgr0f = simd_itof(abgr0x);
-				const simd128_t abgr1f = simd_itof(abgr1x);
-				const simd128_t abgr2f = simd_itof(abgr2x);
-				const simd128_t abgr3f = simd_itof(abgr3x);
-				const simd128_t abgr0c = simd_add(abgr0f, wadd);
-				const simd128_t abgr1c = simd_add(abgr1f, wadd);
-				const simd128_t abgr2c = simd_add(abgr2f, wadd);
-				const simd128_t abgr3c = simd_add(abgr3f, wadd);
-				const simd128_t abgr0n = simd_mul(abgr0c, unpack);
-				const simd128_t abgr1n = simd_mul(abgr1c, unpack);
-				const simd128_t abgr2n = simd_mul(abgr2c, unpack);
-				const simd128_t abgr3n = simd_mul(abgr3c, unpack);
-
-				const simd128_t abgr0l = simd_pow(abgr0n, linear);
-				const simd128_t abgr1l = simd_pow(abgr1n, linear);
-				const simd128_t abgr2l = simd_pow(abgr2n, linear);
-				const simd128_t abgr3l = simd_pow(abgr3n, linear);
-
-				const simd128_t sum0   = simd_add(abgr0l, abgr1l);
-				const simd128_t sum1   = simd_add(abgr2l, abgr3l);
-				const simd128_t sum2   = simd_add(sum0, sum1);
-				const simd128_t avg0   = simd_mul(sum2, quater);
-				const simd128_t avg1   = simd_pow(avg0, gamma);
-
-				const simd128_t avg2   = simd_mul(avg1, pack);
-				const simd128_t ftoi0  = simd_ftoi(avg2);
-				const simd128_t ftoi1  = simd_and(ftoi0, pmask);
-				const simd128_t zwxy   = simd_swiz_zwxy(ftoi1);
-				const simd128_t tmp0   = simd_or(ftoi1, zwxy);
-				const simd128_t yyyy   = simd_swiz_yyyy(tmp0);
-				const simd128_t tmp1   = simd_iadd(yyyy, yyyy);
-				const simd128_t result = simd_or(tmp0, tmp1);
-
-				simd_stx(dst, result);
-			}
-		}
-	}
-
-	void imageRgba32fToLinear(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		      uint8_t* dst = (      uint8_t*)_dst;
-		const uint8_t* src = (const uint8_t*)_src;
-
-		for (uint32_t yy = 0; yy < _height; ++yy, src += _pitch)
-		{
-			for (uint32_t xx = 0; xx < _width; ++xx, dst += 16)
-			{
-				      float* fd = (      float*)dst;
-				const float* fs = (const float*)src;
-
-				fd[0] = bx::fpow(fs[0], 1.0f/2.2f);
-				fd[1] = bx::fpow(fs[1], 1.0f/2.2f);
-				fd[2] = bx::fpow(fs[2], 1.0f/2.2f);
-				fd[3] =          fs[3];
-			}
-		}
-	}
-
-	void imageRgba32fToGamma(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		      uint8_t* dst = (      uint8_t*)_dst;
-		const uint8_t* src = (const uint8_t*)_src;
-
-		for (uint32_t yy = 0; yy < _height; ++yy, src += _pitch)
-		{
-			for (uint32_t xx = 0; xx < _width; ++xx, dst += 16)
-			{
-				      float* fd = (      float*)dst;
-				const float* fs = (const float*)src;
-
-				fd[0] = bx::fpow(fs[0], 2.2f);
-				fd[1] = bx::fpow(fs[1], 2.2f);
-				fd[2] = bx::fpow(fs[2], 2.2f);
-				fd[3] =          fs[3];
-			}
-		}
-	}
-
-	void imageRgba32fLinearDownsample2x2Ref(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		const uint32_t dstwidth  = _width/2;
-		const uint32_t dstheight = _height/2;
-
-		if (0 == dstwidth
-		||  0 == dstheight)
-		{
-			return;
-		}
-
-		const uint8_t* src = (const uint8_t*)_src;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep)
-		{
-			const float* rgba0 = (const float*)&src[0];
-			const float* rgba1 = (const float*)&src[_pitch];
-			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba0 += 8, rgba1 += 8, dst += 16)
-			{
-				float xyz[4];
-				xyz[0]  = rgba0[0];
-				xyz[1]  = rgba0[1];
-				xyz[2]  = rgba0[2];
-				xyz[3]  = rgba0[3];
-
-				xyz[0] += rgba0[4];
-				xyz[1] += rgba0[5];
-				xyz[2] += rgba0[6];
-				xyz[3] += rgba0[7];
-
-				xyz[0] += rgba1[0];
-				xyz[1] += rgba1[1];
-				xyz[2] += rgba1[2];
-				xyz[3] += rgba1[3];
-
-				xyz[0] += rgba1[4];
-				xyz[1] += rgba1[5];
-				xyz[2] += rgba1[6];
-				xyz[3] += rgba1[7];
-
-				xyz[0] *= 0.25f;
-				xyz[1] *= 0.25f;
-				xyz[2] *= 0.25f;
-				xyz[3] *= 0.25f;
-			}
-		}
-	}
-
-	void imageRgba32fLinearDownsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		imageRgba32fLinearDownsample2x2Ref(_dst, _width, _height, _pitch, _src);
-	}
-
-	void imageRgba32fDownsample2x2NormalMapRef(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		const uint32_t dstwidth  = _width/2;
-		const uint32_t dstheight = _height/2;
-
-		if (0 == dstwidth
-		||  0 == dstheight)
-		{
-			return;
-		}
-
-		const uint8_t* src = (const uint8_t*)_src;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep)
-		{
-			const float* rgba0 = (const float*)&src[0];
-			const float* rgba1 = (const float*)&src[_pitch];
-			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba0 += 8, rgba1 += 8, dst += 16)
-			{
-				float xyz[3];
-				xyz[0]  = rgba0[0];
-				xyz[1]  = rgba0[1];
-				xyz[2]  = rgba0[2];
-				xyz[0] += rgba0[4];
-				xyz[1] += rgba0[5];
-				xyz[2] += rgba0[6];
-				xyz[0] += rgba1[0];
-				xyz[1] += rgba1[1];
-				xyz[2] += rgba1[2];
-				xyz[0] += rgba1[4];
-				xyz[1] += rgba1[5];
-				xyz[2] += rgba1[6];
-				bx::vec3Norm( (float*)dst, xyz);
-			}
-		}
-	}
-
-	void imageRgba32fDownsample2x2NormalMap(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		imageRgba32fDownsample2x2NormalMapRef(_dst, _width, _height, _pitch, _src);
-	}
-
-	void imageSwizzleBgra8Ref(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		const uint8_t* src = (uint8_t*) _src;
-		const uint8_t* next = src + _pitch;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		for (uint32_t yy = 0; yy < _height; ++yy, src = next, next += _pitch)
-		{
-			for (uint32_t xx = 0; xx < _width; ++xx, src += 4, dst += 4)
-			{
-				uint8_t rr = src[0];
-				uint8_t gg = src[1];
-				uint8_t bb = src[2];
-				uint8_t aa = src[3];
-				dst[0] = bb;
-				dst[1] = gg;
-				dst[2] = rr;
-				dst[3] = aa;
-			}
-		}
-	}
-
-	void imageSwizzleBgra8(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		// Test can we do four 4-byte pixels at the time.
-		if (0 != (_width&0x3)
-		||  _width < 4
-		||  !bx::isAligned(_src, 16)
-		||  !bx::isAligned(_dst, 16) )
-		{
-			BX_WARN(false, "Image swizzle is taking slow path.");
-			BX_WARN(bx::isAligned(_src, 16), "Source %p is not 16-byte aligned.", _src);
-			BX_WARN(bx::isAligned(_dst, 16), "Destination %p is not 16-byte aligned.", _dst);
-			BX_WARN(_width < 4, "Image width must be multiple of 4 (width %d).", _width);
-			imageSwizzleBgra8Ref(_dst, _width, _height, _pitch, _src);
-			return;
-		}
-
-		using namespace bx;
-
-		const simd128_t mf0f0 = simd_isplat(0xff00ff00);
-		const simd128_t m0f0f = simd_isplat(0x00ff00ff);
-		const uint8_t* src = (uint8_t*) _src;
-		const uint8_t* next = src + _pitch;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		const uint32_t width = _width/4;
-
-		for (uint32_t yy = 0; yy < _height; ++yy, src = next, next += _pitch)
-		{
-			for (uint32_t xx = 0; xx < width; ++xx, src += 16, dst += 16)
-			{
-				const simd128_t tabgr = simd_ld(src);
-				const simd128_t t00ab = simd_srl(tabgr, 16);
-				const simd128_t tgr00 = simd_sll(tabgr, 16);
-				const simd128_t tgrab = simd_or(t00ab, tgr00);
-				const simd128_t ta0g0 = simd_and(tabgr, mf0f0);
-				const simd128_t t0r0b = simd_and(tgrab, m0f0f);
-				const simd128_t targb = simd_or(ta0g0, t0r0b);
-				simd_st(dst, targb);
-			}
-		}
-	}
-
-	void imageCopy(void* _dst, uint32_t _height, uint32_t _srcPitch, const void* _src, uint32_t _dstPitch)
-	{
-		const uint32_t pitch = bx::uint32_min(_srcPitch, _dstPitch);
-		const uint8_t* src = (uint8_t*)_src;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		for (uint32_t yy = 0; yy < _height; ++yy, src += _srcPitch, dst += _dstPitch)
-		{
-			bx::memCopy(dst, src, pitch);
-		}
-	}
-
-	void imageCopy(void* _dst, uint32_t _width, uint32_t _height, uint32_t _bpp, uint32_t _pitch, const void* _src)
-	{
-		const uint32_t dstPitch = _width*_bpp/8;
-		imageCopy(_dst, _height, _pitch, _src, dstPitch);
-	}
-
-	struct PackUnpack
-	{
-		bx::PackFn pack;
-		bx::UnpackFn unpack;
-	};
-
-	static const PackUnpack s_packUnpack[] =
-	{
-		{ NULL,               NULL                 }, // BC1
-		{ NULL,               NULL                 }, // BC2
-		{ NULL,               NULL                 }, // BC3
-		{ NULL,               NULL                 }, // BC4
-		{ NULL,               NULL                 }, // BC5
-		{ NULL,               NULL                 }, // BC6H
-		{ NULL,               NULL                 }, // BC7
-		{ NULL,               NULL                 }, // ETC1
-		{ NULL,               NULL                 }, // ETC2
-		{ NULL,               NULL                 }, // ETC2A
-		{ NULL,               NULL                 }, // ETC2A1
-		{ NULL,               NULL                 }, // PTC12
-		{ NULL,               NULL                 }, // PTC14
-		{ NULL,               NULL                 }, // PTC12A
-		{ NULL,               NULL                 }, // PTC14A
-		{ NULL,               NULL                 }, // PTC22
-		{ NULL,               NULL                 }, // PTC24
-		{ NULL,               NULL                 }, // Unknown
-		{ NULL,               NULL                 }, // R1
-		{ bx::packR8,         bx::unpackR8         }, // A8
-		{ bx::packR8,         bx::unpackR8         }, // R8
-		{ bx::packR8I,        bx::unpackR8I        }, // R8I
-		{ bx::packR8U,        bx::unpackR8U        }, // R8U
-		{ bx::packR8S,        bx::unpackR8S        }, // R8S
-		{ bx::packR16,        bx::unpackR16        }, // R16
-		{ bx::packR16I,       bx::unpackR16I       }, // R16I
-		{ bx::packR16U,       bx::unpackR16U       }, // R16U
-		{ bx::packR16F,       bx::unpackR16F       }, // R16F
-		{ bx::packR16S,       bx::unpackR16S       }, // R16S
-		{ bx::packR32I,       bx::unpackR32I       }, // R32I
-		{ bx::packR32U,       bx::unpackR32U       }, // R32U
-		{ bx::packR32F,       bx::unpackR32F       }, // R32F
-		{ bx::packRg8,        bx::unpackRg8        }, // RG8
-		{ bx::packRg8I,       bx::unpackRg8I       }, // RG8I
-		{ bx::packRg8U,       bx::unpackRg8U       }, // RG8U
-		{ bx::packRg8S,       bx::unpackRg8S       }, // RG8S
-		{ bx::packRg16,       bx::unpackRg16       }, // RG16
-		{ bx::packRg16I,      bx::unpackRg16I      }, // RG16I
-		{ bx::packRg16U,      bx::unpackRg16U      }, // RG16U
-		{ bx::packRg16F,      bx::unpackRg16F      }, // RG16F
-		{ bx::packRg16S,      bx::unpackRg16S      }, // RG16S
-		{ bx::packRg32I,      bx::unpackRg32I      }, // RG32I
-		{ bx::packRg32U,      bx::unpackRg32U      }, // RG32U
-		{ bx::packRg32F,      bx::unpackRg32F      }, // RG32F
-		{ bx::packRgb8,       bx::unpackRgb8       }, // RGB8
-		{ bx::packRgb8S,      bx::unpackRgb8S      }, // RGB8S
-		{ bx::packRgb8I,      bx::unpackRgb8I      }, // RGB8I
-		{ bx::packRgb8U,      bx::unpackRgb8U      }, // RGB8U
-		{ bx::packRgb9E5F,    bx::unpackRgb9E5F    }, // RGB9E5F
-		{ bx::packBgra8,      bx::unpackBgra8      }, // BGRA8
-		{ bx::packRgba8,      bx::unpackRgba8      }, // RGBA8
-		{ bx::packRgba8I,     bx::unpackRgba8I     }, // RGBA8I
-		{ bx::packRgba8U,     bx::unpackRgba8U     }, // RGBA8U
-		{ bx::packRgba8S,     bx::unpackRgba8S     }, // RGBA8S
-		{ bx::packRgba16,     bx::unpackRgba16     }, // RGBA16
-		{ bx::packRgba16I,    bx::unpackRgba16I    }, // RGBA16I
-		{ bx::packRgba16U,    bx::unpackRgba16U    }, // RGBA16U
-		{ bx::packRgba16F,    bx::unpackRgba16F    }, // RGBA16F
-		{ bx::packRgba16S,    bx::unpackRgba16S    }, // RGBA16S
-		{ bx::packRgba32I,    bx::unpackRgba32I    }, // RGBA32I
-		{ bx::packRgba32U,    bx::unpackRgba32U    }, // RGBA32U
-		{ bx::packRgba32F,    bx::unpackRgba32F    }, // RGBA32F
-		{ bx::packR5G6B5,     bx::unpackR5G6B5     }, // R5G6B5
-		{ bx::packRgba4,      bx::unpackRgba4      }, // RGBA4
-		{ bx::packRgb5a1,     bx::unpackRgb5a1     }, // RGB5A1
-		{ bx::packRgb10A2,    bx::unpackRgb10A2    }, // RGB10A2
-		{ bx::packR11G11B10F, bx::unpackR11G11B10F }, // R11G11B10F
-		{ NULL,               NULL                 }, // UnknownDepth
-		{ bx::packR16,        bx::unpackR16        }, // D16
-		{ bx::packR24,        bx::unpackR24        }, // D24
-		{ bx::packR24G8,      bx::unpackR24G8      }, // D24S8
-		{ NULL,               NULL                 }, // D32
-		{ bx::packR16F,       bx::unpackR16F       }, // D16F
-		{ NULL,               NULL                 }, // D24F
-		{ bx::packR32F,       bx::unpackR32F       }, // D32F
-		{ bx::packR8,         bx::unpackR8         }, // D0S8
-	};
-	BX_STATIC_ASSERT(TextureFormat::Count ==       BX_COUNTOF(s_packUnpack) );
-
-	bool imageConvert(TextureFormat::Enum _dstFormat, TextureFormat::Enum _srcFormat)
-	{
-		bx::UnpackFn unpack = s_packUnpack[_srcFormat].unpack;
-		bx::PackFn   pack   = s_packUnpack[_dstFormat].pack;
-		return NULL != pack
-			&& NULL != unpack
-			;
-	}
-
-	void imageConvert(void* _dst, uint32_t _bpp, bx::PackFn _pack, const void* _src, bx::UnpackFn _unpack, uint32_t _size)
-	{
-		const uint8_t* src = (uint8_t*)_src;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		const uint32_t size = _size * 8 / _bpp;
-
-		for (uint32_t ii = 0; ii < size; ++ii)
-		{
-			float rgba[4];
-			_unpack(rgba, &src[ii*_bpp/8]);
-			_pack(&dst[ii*_bpp/8], rgba);
-		}
-	}
-
-	void imageConvert(void* _dst, uint32_t _dstBpp, bx::PackFn _pack, const void* _src, uint32_t _srcBpp, bx::UnpackFn _unpack, uint32_t _width, uint32_t _height, uint32_t _srcPitch)
-	{
-		const uint8_t* src = (uint8_t*)_src;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		const uint32_t dstPitch = _width * _dstBpp / 8;
-
-		for (uint32_t yy = 0; yy < _height; ++yy, src += _srcPitch, dst += dstPitch)
-		{
-			for (uint32_t xx = 0; xx < _width; ++xx)
-			{
-				float rgba[4];
-				_unpack(rgba, &src[xx*_srcBpp/8]);
-				_pack(&dst[xx*_dstBpp/8], rgba);
-			}
-		}
-	}
-
-	bool imageConvert(void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height, uint32_t _srcPitch)
-	{
-		bx::UnpackFn unpack = s_packUnpack[_srcFormat].unpack;
-		bx::PackFn   pack   = s_packUnpack[_dstFormat].pack;
-		if (NULL == pack
-		||  NULL == unpack)
-		{
-			return false;
-		}
-
-		const uint32_t srcBpp = s_imageBlockInfo[_srcFormat].bitsPerPixel;
-		const uint32_t dstBpp = s_imageBlockInfo[_dstFormat].bitsPerPixel;
-		imageConvert(_dst, dstBpp, pack, _src, srcBpp, unpack, _width, _height, _srcPitch);
-
-		return true;
-	}
-
-	bool imageConvert(void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height)
-	{
-		const uint32_t srcBpp = s_imageBlockInfo[_srcFormat].bitsPerPixel;
-
-		if (_dstFormat == _srcFormat)
-		{
-			bx::memCopy(_dst, _src, _width*_height*srcBpp/8);
-			return true;
-		}
-
-		return imageConvert(_dst, _dstFormat, _src, _srcFormat, _width, _height, _width*srcBpp/8);
-	}
-
-	ImageContainer* imageConvert(bx::AllocatorI* _allocator, TextureFormat::Enum _dstFormat, const ImageContainer& _input)
-	{
-		ImageContainer* output = imageAlloc(_allocator
-			, _dstFormat
-			, uint16_t(_input.m_width)
-			, uint16_t(_input.m_height)
-			, uint16_t(_input.m_depth)
-			, _input.m_numLayers
-			, _input.m_cubeMap
-			, 1 < _input.m_numMips
-			);
-
-		const uint8_t  bpp = getBitsPerPixel(_dstFormat);
-		const uint16_t numSides = _input.m_numLayers * (_input.m_cubeMap ? 6 : 1);
-
-		uint8_t* dst = (uint8_t*)output->m_data	;
-		for (uint16_t side = 0; side < numSides; ++side)
-		{
-			for (uint8_t lod = 0, num = _input.m_numMips; lod < num; ++lod)
-			{
-				ImageMip mip;
-				if (imageGetRawData(_input, side, lod, _input.m_data, _input.m_size, mip) )
-				{
-					bool ok = imageConvert(dst
-							, _dstFormat
-							, mip.m_data
-							, mip.m_format
-							, mip.m_width
-							, mip.m_height
-							);
-					BX_CHECK(ok, "Conversion from %s to %s failed!"
-							, getName(_input.m_format)
-							, getName(output->m_format)
-							);
-					BX_UNUSED(ok);
-
-					dst += mip.m_width*mip.m_height*bpp/8;
-				}
-			}
-		}
-
-		return output;
-	}
-
-	ImageContainer* imageParseBgfx(bx::AllocatorI* _allocator, const void* _src, uint32_t _size)
-	{
-		ImageContainer imageContainer;
-		if (!imageParse(imageContainer, _src, _size) )
-		{
-			return NULL;
-		}
-
-		ImageContainer* output = imageAlloc(_allocator
-			, imageContainer.m_format
-			, uint16_t(imageContainer.m_width)
-			, uint16_t(imageContainer.m_height)
-			, uint16_t(imageContainer.m_depth)
-			, imageContainer.m_numLayers
-			, imageContainer.m_cubeMap
-			, 1 < imageContainer.m_numMips
-			);
-
-		const uint16_t numSides = imageContainer.m_numLayers * (imageContainer.m_cubeMap ? 6 : 1);
-		uint8_t* dst = (uint8_t*)output->m_data;
-
-		for (uint16_t side = 0; side < numSides; ++side)
-		{
-			for (uint8_t lod = 0, num = imageContainer.m_numMips; lod < num; ++lod)
-			{
-				ImageMip mip;
-				if (imageGetRawData(imageContainer, side, lod, _src, _size, mip) )
-				{
-					bx::memCopy(dst, mip.m_data, mip.m_size);
-					dst += mip.m_size;
-				}
-			}
-		}
-
-		return output;
-	}
-
-	uint8_t bitRangeConvert(uint32_t _in, uint32_t _from, uint32_t _to)
-	{
-		using namespace bx;
-		uint32_t tmp0   = uint32_sll(1, _to);
-		uint32_t tmp1   = uint32_sll(1, _from);
-		uint32_t tmp2   = uint32_dec(tmp0);
-		uint32_t tmp3   = uint32_dec(tmp1);
-		uint32_t tmp4   = uint32_mul(_in, tmp2);
-		uint32_t tmp5   = uint32_add(tmp3, tmp4);
-		uint32_t tmp6   = uint32_srl(tmp5, _from);
-		uint32_t tmp7   = uint32_add(tmp5, tmp6);
-		uint32_t result = uint32_srl(tmp7, _from);
-
-		return uint8_t(result);
-	}
-
-	void decodeBlockDxt(uint8_t _dst[16*4], const uint8_t _src[8])
-	{
-		uint8_t colors[4*3];
-
-		uint32_t c0 = _src[0] | (_src[1] << 8);
-		colors[0] = bitRangeConvert( (c0>> 0)&0x1f, 5, 8);
-		colors[1] = bitRangeConvert( (c0>> 5)&0x3f, 6, 8);
-		colors[2] = bitRangeConvert( (c0>>11)&0x1f, 5, 8);
-
-		uint32_t c1 = _src[2] | (_src[3] << 8);
-		colors[3] = bitRangeConvert( (c1>> 0)&0x1f, 5, 8);
-		colors[4] = bitRangeConvert( (c1>> 5)&0x3f, 6, 8);
-		colors[5] = bitRangeConvert( (c1>>11)&0x1f, 5, 8);
-
-		colors[6] = (2*colors[0] + colors[3]) / 3;
-		colors[7] = (2*colors[1] + colors[4]) / 3;
-		colors[8] = (2*colors[2] + colors[5]) / 3;
-
-		colors[ 9] = (colors[0] + 2*colors[3]) / 3;
-		colors[10] = (colors[1] + 2*colors[4]) / 3;
-		colors[11] = (colors[2] + 2*colors[5]) / 3;
-
-		for (uint32_t ii = 0, next = 8*4; ii < 16*4; ii += 4, next += 2)
-		{
-			int idx = ( (_src[next>>3] >> (next & 7) ) & 3) * 3;
-			_dst[ii+0] = colors[idx+0];
-			_dst[ii+1] = colors[idx+1];
-			_dst[ii+2] = colors[idx+2];
-		}
-	}
-
-	void decodeBlockDxt1(uint8_t _dst[16*4], const uint8_t _src[8])
-	{
-		uint8_t colors[4*4];
-
-		uint32_t c0 = _src[0] | (_src[1] << 8);
-		colors[0] = bitRangeConvert( (c0>> 0)&0x1f, 5, 8);
-		colors[1] = bitRangeConvert( (c0>> 5)&0x3f, 6, 8);
-		colors[2] = bitRangeConvert( (c0>>11)&0x1f, 5, 8);
-		colors[3] = 255;
-
-		uint32_t c1 = _src[2] | (_src[3] << 8);
-		colors[4] = bitRangeConvert( (c1>> 0)&0x1f, 5, 8);
-		colors[5] = bitRangeConvert( (c1>> 5)&0x3f, 6, 8);
-		colors[6] = bitRangeConvert( (c1>>11)&0x1f, 5, 8);
-		colors[7] = 255;
-
-		if (c0 > c1)
-		{
-			colors[ 8] = (2*colors[0] + colors[4]) / 3;
-			colors[ 9] = (2*colors[1] + colors[5]) / 3;
-			colors[10] = (2*colors[2] + colors[6]) / 3;
-			colors[11] = 255;
-
-			colors[12] = (colors[0] + 2*colors[4]) / 3;
-			colors[13] = (colors[1] + 2*colors[5]) / 3;
-			colors[14] = (colors[2] + 2*colors[6]) / 3;
-			colors[15] = 255;
-		}
-		else
-		{
-			colors[ 8] = (colors[0] + colors[4]) / 2;
-			colors[ 9] = (colors[1] + colors[5]) / 2;
-			colors[10] = (colors[2] + colors[6]) / 2;
-			colors[11] = 255;
-
-			colors[12] = 0;
-			colors[13] = 0;
-			colors[14] = 0;
-			colors[15] = 0;
-		}
-
-		for (uint32_t ii = 0, next = 8*4; ii < 16*4; ii += 4, next += 2)
-		{
-			int idx = ( (_src[next>>3] >> (next & 7) ) & 3) * 4;
-			_dst[ii+0] = colors[idx+0];
-			_dst[ii+1] = colors[idx+1];
-			_dst[ii+2] = colors[idx+2];
-			_dst[ii+3] = colors[idx+3];
-		}
-	}
-
-	void decodeBlockDxt23A(uint8_t _dst[16*4], const uint8_t _src[8])
-	{
-		for (uint32_t ii = 0, next = 0; ii < 16*4; ii += 4, next += 4)
-		{
-			uint32_t c0 = (_src[next>>3] >> (next&7) ) & 0xf;
-			_dst[ii] = bitRangeConvert(c0, 4, 8);
-		}
-	}
-
-	void decodeBlockDxt45A(uint8_t _dst[16*4], const uint8_t _src[8])
-	{
-		uint8_t alpha[8];
-		alpha[0] = _src[0];
-		alpha[1] = _src[1];
-
-		if (alpha[0] > alpha[1])
-		{
-			alpha[2] = (6*alpha[0] + 1*alpha[1]) / 7;
-			alpha[3] = (5*alpha[0] + 2*alpha[1]) / 7;
-			alpha[4] = (4*alpha[0] + 3*alpha[1]) / 7;
-			alpha[5] = (3*alpha[0] + 4*alpha[1]) / 7;
-			alpha[6] = (2*alpha[0] + 5*alpha[1]) / 7;
-			alpha[7] = (1*alpha[0] + 6*alpha[1]) / 7;
-		}
-		else
-		{
-			alpha[2] = (4*alpha[0] + 1*alpha[1]) / 5;
-			alpha[3] = (3*alpha[0] + 2*alpha[1]) / 5;
-			alpha[4] = (2*alpha[0] + 3*alpha[1]) / 5;
-			alpha[5] = (1*alpha[0] + 4*alpha[1]) / 5;
-			alpha[6] = 0;
-			alpha[7] = 255;
-		}
-
-		uint32_t idx0 = _src[2];
-		uint32_t idx1 = _src[5];
-		idx0 |= uint32_t(_src[3])<<8;
-		idx1 |= uint32_t(_src[6])<<8;
-		idx0 |= uint32_t(_src[4])<<16;
-		idx1 |= uint32_t(_src[7])<<16;
-		for (uint32_t ii = 0; ii < 8*4; ii += 4)
-		{
-			_dst[ii]    = alpha[idx0&7];
-			_dst[ii+32] = alpha[idx1&7];
-			idx0 >>= 3;
-			idx1 >>= 3;
-		}
-	}
-
-	static const int32_t s_etc1Mod[8][4] =
-	{
-		{  2,   8,  -2,   -8},
-		{  5,  17,  -5,  -17},
-		{  9,  29,  -9,  -29},
-		{ 13,  42, -13,  -42},
-		{ 18,  60, -18,  -60},
-		{ 24,  80, -24,  -80},
-		{ 33, 106, -33, -106},
-		{ 47, 183, -47, -183},
-	};
-
-	static const uint8_t s_etc2Mod[8] = { 3, 6, 11, 16, 23, 32, 41, 64 };
-
-	uint8_t uint8_sat(int32_t _a)
-	{
-		using namespace bx;
-		const uint32_t min    = uint32_imin(_a, 255);
-		const uint32_t result = uint32_imax(min, 0);
-		return (uint8_t)result;
-	}
-
-	uint8_t uint8_satadd(int32_t _a, int32_t _b)
-	{
-		const int32_t add = _a + _b;
-		return uint8_sat(add);
-	}
-
-	void decodeBlockEtc2ModeT(uint8_t _dst[16*4], const uint8_t _src[8])
-	{
-		uint8_t rgb[16];
-
-		// 0       1       2       3       4       5       6       7
-		// 7654321076543210765432107654321076543210765432107654321076543210
-		// ...rr.rrggggbbbbrrrrggggbbbbDDD.mmmmmmmmmmmmmmmmllllllllllllllll
-		//    ^            ^           ^   ^               ^
-		//    +-- c0       +-- c1      |   +-- msb         +-- lsb
-		//                             +-- dist
-
-		rgb[ 0] = ( (_src[0] >> 1) & 0xc)
-			    |   (_src[0]       & 0x3)
-			    ;
-		rgb[ 1] = _src[1] >> 4;
-		rgb[ 2] = _src[1] & 0xf;
-
-		rgb[ 8] = _src[2] >> 4;
-		rgb[ 9] = _src[2] & 0xf;
-		rgb[10] = _src[3] >> 4;
-
-		rgb[ 0] = bitRangeConvert(rgb[ 0], 4, 8);
-		rgb[ 1] = bitRangeConvert(rgb[ 1], 4, 8);
-		rgb[ 2] = bitRangeConvert(rgb[ 2], 4, 8);
-		rgb[ 8] = bitRangeConvert(rgb[ 8], 4, 8);
-		rgb[ 9] = bitRangeConvert(rgb[ 9], 4, 8);
-		rgb[10] = bitRangeConvert(rgb[10], 4, 8);
-
-		uint8_t dist = (_src[3] >> 1) & 0x7;
-		int32_t mod = s_etc2Mod[dist];
-
-		rgb[ 4] = uint8_satadd(rgb[ 8],  mod);
-		rgb[ 5] = uint8_satadd(rgb[ 9],  mod);
-		rgb[ 6] = uint8_satadd(rgb[10],  mod);
-
-		rgb[12] = uint8_satadd(rgb[ 8], -mod);
-		rgb[13] = uint8_satadd(rgb[ 9], -mod);
-		rgb[14] = uint8_satadd(rgb[10], -mod);
-
-		uint32_t indexMsb = (_src[4]<<8) | _src[5];
-		uint32_t indexLsb = (_src[6]<<8) | _src[7];
-
-		for (uint32_t ii = 0; ii < 16; ++ii)
-		{
-			const uint32_t idx  = (ii&0xc) | ( (ii & 0x3)<<4);
-			const uint32_t lsbi = indexLsb & 1;
-			const uint32_t msbi = (indexMsb & 1)<<1;
-			const uint32_t pal  = (lsbi | msbi)<<2;
-
-			_dst[idx + 0] = rgb[pal+2];
-			_dst[idx + 1] = rgb[pal+1];
-			_dst[idx + 2] = rgb[pal+0];
-			_dst[idx + 3] = 255;
-
-			indexLsb >>= 1;
-			indexMsb >>= 1;
-		}
-	}
-
-	void decodeBlockEtc2ModeH(uint8_t _dst[16*4], const uint8_t _src[8])
-	{
-		uint8_t rgb[16];
-
-		// 0       1       2       3       4       5       6       7
-		// 7654321076543210765432107654321076543210765432107654321076543210
-		// .rrrrggg...gb.bbbrrrrggggbbbbDD.mmmmmmmmmmmmmmmmllllllllllllllll
-		//  ^               ^           ^  ^               ^
-		//  +-- c0          +-- c1      |  +-- msb         +-- lsb
-		//                              +-- dist
-
-		rgb[ 0] =   (_src[0] >> 3) & 0xf;
-		rgb[ 1] = ( (_src[0] << 1) & 0xe)
-				| ( (_src[1] >> 4) & 0x1)
-				;
-		rgb[ 2] =   (_src[1]       & 0x8)
-				| ( (_src[1] << 1) & 0x6)
-				|   (_src[2] >> 7)
-				;
-
-		rgb[ 8] =   (_src[2] >> 3) & 0xf;
-		rgb[ 9] = ( (_src[2] << 1) & 0xe)
-				|   (_src[3] >> 7)
-				;
-		rgb[10] = (_src[2] >> 3) & 0xf;
-
-		rgb[ 0] = bitRangeConvert(rgb[ 0], 4, 8);
-		rgb[ 1] = bitRangeConvert(rgb[ 1], 4, 8);
-		rgb[ 2] = bitRangeConvert(rgb[ 2], 4, 8);
-		rgb[ 8] = bitRangeConvert(rgb[ 8], 4, 8);
-		rgb[ 9] = bitRangeConvert(rgb[ 9], 4, 8);
-		rgb[10] = bitRangeConvert(rgb[10], 4, 8);
-
-		uint32_t col0 = uint32_t(rgb[0]<<16) | uint32_t(rgb[1]<<8) | uint32_t(rgb[ 2]);
-		uint32_t col1 = uint32_t(rgb[8]<<16) | uint32_t(rgb[9]<<8) | uint32_t(rgb[10]);
-		uint8_t  dist = (_src[3] & 0x6) | (col0 >= col1);
-		int32_t  mod  = s_etc2Mod[dist];
-
-		rgb[ 4] = uint8_satadd(rgb[ 0], -mod);
-		rgb[ 5] = uint8_satadd(rgb[ 1], -mod);
-		rgb[ 6] = uint8_satadd(rgb[ 2], -mod);
-
-		rgb[ 0] = uint8_satadd(rgb[ 0],  mod);
-		rgb[ 1] = uint8_satadd(rgb[ 1],  mod);
-		rgb[ 2] = uint8_satadd(rgb[ 2],  mod);
-
-		rgb[12] = uint8_satadd(rgb[ 8], -mod);
-		rgb[13] = uint8_satadd(rgb[ 9], -mod);
-		rgb[14] = uint8_satadd(rgb[10], -mod);
-
-		rgb[ 8] = uint8_satadd(rgb[ 8],  mod);
-		rgb[ 9] = uint8_satadd(rgb[ 9],  mod);
-		rgb[10] = uint8_satadd(rgb[10],  mod);
-
-		uint32_t indexMsb = (_src[4]<<8) | _src[5];
-		uint32_t indexLsb = (_src[6]<<8) | _src[7];
-
-		for (uint32_t ii = 0; ii < 16; ++ii)
-		{
-			const uint32_t idx  = (ii&0xc) | ( (ii & 0x3)<<4);
-			const uint32_t lsbi = indexLsb & 1;
-			const uint32_t msbi = (indexMsb & 1)<<1;
-			const uint32_t pal  = (lsbi | msbi)<<2;
-
-			_dst[idx + 0] = rgb[pal+2];
-			_dst[idx + 1] = rgb[pal+1];
-			_dst[idx + 2] = rgb[pal+0];
-			_dst[idx + 3] = 255;
-
-			indexLsb >>= 1;
-			indexMsb >>= 1;
-		}
-	}
-
-	void decodeBlockEtc2ModePlanar(uint8_t _dst[16*4], const uint8_t _src[8])
-	{
-		// 0       1       2       3       4       5       6       7
-		// 7654321076543210765432107654321076543210765432107654321076543210
-		// .rrrrrrg.ggggggb...bb.bbbrrrrr.rgggggggbbbbbbrrrrrrgggggggbbbbbb
-		//  ^                       ^                   ^
-		//  +-- c0                  +-- cH              +-- cV
-
-		uint8_t c0[3];
-		uint8_t cH[3];
-		uint8_t cV[3];
-
-		c0[0] =   (_src[0] >> 1) & 0x3f;
-		c0[1] = ( (_src[0] & 1) << 6)
-			  | ( (_src[1] >> 1) & 0x3f)
-			  ;
-		c0[2] = ( (_src[1] & 1) << 5)
-			  | ( (_src[2] & 0x18) )
-			  | ( (_src[2] << 1) & 6)
-			  | ( (_src[3] >> 7) )
-			  ;
-
-		cH[0] = ( (_src[3] >> 1) & 0x3e)
-			  | (_src[3] & 1)
-			  ;
-		cH[1] = _src[4] >> 1;
-		cH[2] = ( (_src[4] & 1) << 5)
-			  | (_src[5] >> 3)
-			  ;
-
-		cV[0] = ( (_src[5] & 0x7) << 3)
-			  | (_src[6] >> 5)
-			  ;
-		cV[1] = ( (_src[6] & 0x1f) << 2)
-			  | (_src[7] >> 5)
-			  ;
-		cV[2] = _src[7] & 0x3f;
-
-		c0[0] = bitRangeConvert(c0[0], 6, 8);
-		c0[1] = bitRangeConvert(c0[1], 7, 8);
-		c0[2] = bitRangeConvert(c0[2], 6, 8);
-
-		cH[0] = bitRangeConvert(cH[0], 6, 8);
-		cH[1] = bitRangeConvert(cH[1], 7, 8);
-		cH[2] = bitRangeConvert(cH[2], 6, 8);
-
-		cV[0] = bitRangeConvert(cV[0], 6, 8);
-		cV[1] = bitRangeConvert(cV[1], 7, 8);
-		cV[2] = bitRangeConvert(cV[2], 6, 8);
-
-		int16_t dy[3];
-		dy[0] = cV[0] - c0[0];
-		dy[1] = cV[1] - c0[1];
-		dy[2] = cV[2] - c0[2];
-
-		int16_t sx[3];
-		sx[0] = int16_t(c0[0])<<2;
-		sx[1] = int16_t(c0[1])<<2;
-		sx[2] = int16_t(c0[2])<<2;
-
-		int16_t ex[3];
-		ex[0] = int16_t(cH[0])<<2;
-		ex[1] = int16_t(cH[1])<<2;
-		ex[2] = int16_t(cH[2])<<2;
-
-		for (int32_t vv = 0; vv < 4; ++vv)
-		{
-			int16_t dx[3];
-			dx[0] = (ex[0] - sx[0])>>2;
-			dx[1] = (ex[1] - sx[1])>>2;
-			dx[2] = (ex[2] - sx[2])>>2;
-
-			for (int32_t hh = 0; hh < 4; ++hh)
-			{
-				const uint32_t idx  = (vv<<4) + (hh<<2);
-
-				_dst[idx + 0] = uint8_sat( (sx[2] + dx[2]*hh)>>2);
-				_dst[idx + 1] = uint8_sat( (sx[1] + dx[1]*hh)>>2);
-				_dst[idx + 2] = uint8_sat( (sx[0] + dx[0]*hh)>>2);
-				_dst[idx + 3] = 255;
-			}
-
-			sx[0] += dy[0];
-			sx[1] += dy[1];
-			sx[2] += dy[2];
-
-			ex[0] += dy[0];
-			ex[1] += dy[1];
-			ex[2] += dy[2];
-		}
-	}
-
-	void decodeBlockEtc12(uint8_t _dst[16*4], const uint8_t _src[8])
-	{
-		bool flipBit = 0 != (_src[3] & 0x1);
-		bool diffBit = 0 != (_src[3] & 0x2);
-
-		uint8_t rgb[8];
-
-		if (diffBit)
-		{
-			rgb[0]  = _src[0] >> 3;
-			rgb[1]  = _src[1] >> 3;
-			rgb[2]  = _src[2] >> 3;
-
-			int8_t diff[3];
-			diff[0] = int8_t( (_src[0] & 0x7)<<5)>>5;
-			diff[1] = int8_t( (_src[1] & 0x7)<<5)>>5;
-			diff[2] = int8_t( (_src[2] & 0x7)<<5)>>5;
-
-			int8_t rr = rgb[0] + diff[0];
-			int8_t gg = rgb[1] + diff[1];
-			int8_t bb = rgb[2] + diff[2];
-
-			// Etc2 3-modes
-			if (rr < 0 || rr > 31)
-			{
-				decodeBlockEtc2ModeT(_dst, _src);
-				return;
-			}
-			if (gg < 0 || gg > 31)
-			{
-				decodeBlockEtc2ModeH(_dst, _src);
-				return;
-			}
-			if (bb < 0 || bb > 31)
-			{
-				decodeBlockEtc2ModePlanar(_dst, _src);
-				return;
-			}
-
-			// Etc1
-			rgb[0] = bitRangeConvert(rgb[0], 5, 8);
-			rgb[1] = bitRangeConvert(rgb[1], 5, 8);
-			rgb[2] = bitRangeConvert(rgb[2], 5, 8);
-			rgb[4] = bitRangeConvert(rr, 5, 8);
-			rgb[5] = bitRangeConvert(gg, 5, 8);
-			rgb[6] = bitRangeConvert(bb, 5, 8);
-		}
-		else
-		{
-			rgb[0] = _src[0] >> 4;
-			rgb[1] = _src[1] >> 4;
-			rgb[2] = _src[2] >> 4;
-
-			rgb[4] = _src[0] & 0xf;
-			rgb[5] = _src[1] & 0xf;
-			rgb[6] = _src[2] & 0xf;
-
-			rgb[0] = bitRangeConvert(rgb[0], 4, 8);
-			rgb[1] = bitRangeConvert(rgb[1], 4, 8);
-			rgb[2] = bitRangeConvert(rgb[2], 4, 8);
-			rgb[4] = bitRangeConvert(rgb[4], 4, 8);
-			rgb[5] = bitRangeConvert(rgb[5], 4, 8);
-			rgb[6] = bitRangeConvert(rgb[6], 4, 8);
-		}
-
-		uint32_t table[2];
-		table[0] = (_src[3] >> 5) & 0x7;
-		table[1] = (_src[3] >> 2) & 0x7;
-
-		uint32_t indexMsb = (_src[4]<<8) | _src[5];
-		uint32_t indexLsb = (_src[6]<<8) | _src[7];
-
-		if (flipBit)
-		{
-			for (uint32_t ii = 0; ii < 16; ++ii)
-			{
-				const uint32_t block = (ii>>1)&1;
-				const uint32_t color = block<<2;
-				const uint32_t idx   = (ii&0xc) | ( (ii & 0x3)<<4);
-				const uint32_t lsbi  = indexLsb & 1;
-				const uint32_t msbi  = (indexMsb & 1)<<1;
-				const  int32_t mod   = s_etc1Mod[table[block] ][lsbi | msbi];
-
-				_dst[idx + 0] = uint8_satadd(rgb[color+2], mod);
-				_dst[idx + 1] = uint8_satadd(rgb[color+1], mod);
-				_dst[idx + 2] = uint8_satadd(rgb[color+0], mod);
-				_dst[idx + 3] = 255;
-
-				indexLsb >>= 1;
-				indexMsb >>= 1;
-			}
-		}
-		else
-		{
-			for (uint32_t ii = 0; ii < 16; ++ii)
-			{
-				const uint32_t block = ii>>3;
-				const uint32_t color = block<<2;
-				const uint32_t idx   = (ii&0xc) | ( (ii & 0x3)<<4);
-				const uint32_t lsbi  = indexLsb & 1;
-				const uint32_t msbi  = (indexMsb & 1)<<1;
-				const  int32_t mod   = s_etc1Mod[table[block] ][lsbi | msbi];
-
-				_dst[idx + 0] = uint8_satadd(rgb[color+2], mod);
-				_dst[idx + 1] = uint8_satadd(rgb[color+1], mod);
-				_dst[idx + 2] = uint8_satadd(rgb[color+0], mod);
-				_dst[idx + 3] = 255;
-
-				indexLsb >>= 1;
-				indexMsb >>= 1;
-			}
-		}
-	}
-
-	static const uint8_t s_pvrtcFactors[16][4] =
-	{
-		{  4,  4,  4,  4 },
-		{  2,  6,  2,  6 },
-		{  8,  0,  8,  0 },
-		{  6,  2,  6,  2 },
-
-		{  2,  2,  6,  6 },
-		{  1,  3,  3,  9 },
-		{  4,  0, 12,  0 },
-		{  3,  1,  9,  3 },
-
-		{  8,  8,  0,  0 },
-		{  4, 12,  0,  0 },
-		{ 16,  0,  0,  0 },
-		{ 12,  4,  0,  0 },
-
-		{  6,  6,  2,  2 },
-		{  3,  9,  1,  3 },
-		{ 12,  0,  4,  0 },
-		{  9,  3,  3,  1 },
-	};
-
-	static const uint8_t s_pvrtcWeights[8][4] =
-	{
-		{ 8, 0, 8, 0 },
-		{ 5, 3, 5, 3 },
-		{ 3, 5, 3, 5 },
-		{ 0, 8, 0, 8 },
-
-		{ 8, 0, 8, 0 },
-		{ 4, 4, 4, 4 },
-		{ 4, 4, 4, 4 },
-		{ 0, 8, 0, 8 },
-	};
-
-	uint32_t morton2d(uint32_t _x, uint32_t _y)
-	{
-		using namespace bx;
-		const uint32_t tmpx   = uint32_part1by1(_x);
-		const uint32_t xbits  = uint32_sll(tmpx, 1);
-		const uint32_t ybits  = uint32_part1by1(_y);
-		const uint32_t result = uint32_or(xbits, ybits);
-		return result;
-	}
-
-	uint32_t getColor(const uint8_t _src[8])
-	{
-		return 0
-			| _src[7]<<24
-			| _src[6]<<16
-			| _src[5]<<8
-			| _src[4]
-			;
-	}
-
-	void decodeBlockPtc14RgbAddA(uint32_t _block, uint32_t* _r, uint32_t* _g, uint32_t* _b, uint8_t _factor)
-	{
-		if (0 != (_block & (1<<15) ) )
-		{
-			*_r += bitRangeConvert( (_block >> 10) & 0x1f, 5, 8) * _factor;
-			*_g += bitRangeConvert( (_block >>  5) & 0x1f, 5, 8) * _factor;
-			*_b += bitRangeConvert( (_block >>  1) & 0x0f, 4, 8) * _factor;
-		}
-		else
-		{
-			*_r += bitRangeConvert( (_block >>  8) &  0xf, 4, 8) * _factor;
-			*_g += bitRangeConvert( (_block >>  4) &  0xf, 4, 8) * _factor;
-			*_b += bitRangeConvert( (_block >>  1) &  0x7, 3, 8) * _factor;
-		}
-	}
-
-	void decodeBlockPtc14RgbAddB(uint32_t _block, uint32_t* _r, uint32_t* _g, uint32_t* _b, uint8_t _factor)
-	{
-		if (0 != (_block & (1<<31) ) )
-		{
-			*_r += bitRangeConvert( (_block >> 26) & 0x1f, 5, 8) * _factor;
-			*_g += bitRangeConvert( (_block >> 21) & 0x1f, 5, 8) * _factor;
-			*_b += bitRangeConvert( (_block >> 16) & 0x1f, 5, 8) * _factor;
-		}
-		else
-		{
-			*_r += bitRangeConvert( (_block >> 24) &  0xf, 4, 8) * _factor;
-			*_g += bitRangeConvert( (_block >> 20) &  0xf, 4, 8) * _factor;
-			*_b += bitRangeConvert( (_block >> 16) &  0xf, 4, 8) * _factor;
-		}
-	}
-
-	void decodeBlockPtc14(uint8_t _dst[16*4], const uint8_t* _src, uint32_t _x, uint32_t _y, uint32_t _width, uint32_t _height)
-	{
-		// 0       1       2       3       4       5       6       7
-		// 7654321076543210765432107654321076543210765432107654321076543210
-		// mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmyrrrrrgggggbbbbbxrrrrrgggggbbbbp
-		// ^                               ^^              ^^             ^
-		// +-- modulation data             |+- B color     |+- A color    |
-		//                                 +-- B opaque    +-- A opaque   |
-		//                                           alpha punchthrough --+
-
-		const uint8_t* bc = &_src[morton2d(_x, _y) * 8];
-
-		uint32_t mod = 0
-			| bc[3]<<24
-			| bc[2]<<16
-			| bc[1]<<8
-			| bc[0]
-			;
-
-		const bool punchthrough = !!(bc[7] & 1);
-		const uint8_t* weightTable = s_pvrtcWeights[4 * punchthrough];
-		const uint8_t* factorTable = s_pvrtcFactors[0];
-
-		for (int yy = 0; yy < 4; ++yy)
-		{
-			const uint32_t yOffset = (yy < 2) ? -1 : 0;
-			const uint32_t y0 = (_y + yOffset) % _height;
-			const uint32_t y1 = (y0 +       1) % _height;
-
-			for (int xx = 0; xx < 4; ++xx)
-			{
-				const uint32_t xOffset = (xx < 2) ? -1 : 0;
-				const uint32_t x0 = (_x + xOffset) % _width;
-				const uint32_t x1 = (x0 +       1) % _width;
-
-				const uint32_t bc0 = getColor(&_src[morton2d(x0, y0) * 8]);
-				const uint32_t bc1 = getColor(&_src[morton2d(x1, y0) * 8]);
-				const uint32_t bc2 = getColor(&_src[morton2d(x0, y1) * 8]);
-				const uint32_t bc3 = getColor(&_src[morton2d(x1, y1) * 8]);
-
-				const uint8_t f0 = factorTable[0];
-				const uint8_t f1 = factorTable[1];
-				const uint8_t f2 = factorTable[2];
-				const uint8_t f3 = factorTable[3];
-
-				uint32_t ar = 0, ag = 0, ab = 0;
-				decodeBlockPtc14RgbAddA(bc0, &ar, &ag, &ab, f0);
-				decodeBlockPtc14RgbAddA(bc1, &ar, &ag, &ab, f1);
-				decodeBlockPtc14RgbAddA(bc2, &ar, &ag, &ab, f2);
-				decodeBlockPtc14RgbAddA(bc3, &ar, &ag, &ab, f3);
-
-				uint32_t br = 0, bg = 0, bb = 0;
-				decodeBlockPtc14RgbAddB(bc0, &br, &bg, &bb, f0);
-				decodeBlockPtc14RgbAddB(bc1, &br, &bg, &bb, f1);
-				decodeBlockPtc14RgbAddB(bc2, &br, &bg, &bb, f2);
-				decodeBlockPtc14RgbAddB(bc3, &br, &bg, &bb, f3);
-
-				const uint8_t* weight = &weightTable[(mod & 3)*4];
-				const uint8_t wa = weight[0];
-				const uint8_t wb = weight[1];
-
-				_dst[(yy*4 + xx)*4+0] = uint8_t( (ab * wa + bb * wb) >> 7);
-				_dst[(yy*4 + xx)*4+1] = uint8_t( (ag * wa + bg * wb) >> 7);
-				_dst[(yy*4 + xx)*4+2] = uint8_t( (ar * wa + br * wb) >> 7);
-				_dst[(yy*4 + xx)*4+3] = 255;
-
-				mod >>= 2;
-				factorTable += 4;
-			}
-		}
-	}
-
-	void decodeBlockPtc14ARgbaAddA(uint32_t _block, uint32_t* _r, uint32_t* _g, uint32_t* _b, uint32_t* _a, uint8_t _factor)
-	{
-		if (0 != (_block & (1<<15) ) )
-		{
-			*_r += bitRangeConvert( (_block >> 10) & 0x1f, 5, 8) * _factor;
-			*_g += bitRangeConvert( (_block >>  5) & 0x1f, 5, 8) * _factor;
-			*_b += bitRangeConvert( (_block >>  1) & 0x0f, 4, 8) * _factor;
-			*_a += 255 * _factor;
-		}
-		else
-		{
-			*_r += bitRangeConvert( (_block >>  8) &  0xf, 4, 8) * _factor;
-			*_g += bitRangeConvert( (_block >>  4) &  0xf, 4, 8) * _factor;
-			*_b += bitRangeConvert( (_block >>  1) &  0x7, 3, 8) * _factor;
-			*_a += bitRangeConvert( (_block >> 12) &  0x7, 3, 8) * _factor;
-		}
-	}
-
-	void decodeBlockPtc14ARgbaAddB(uint32_t _block, uint32_t* _r, uint32_t* _g, uint32_t* _b, uint32_t* _a, uint8_t _factor)
-	{
-		if (0 != (_block & (1<<31) ) )
-		{
-			*_r += bitRangeConvert( (_block >> 26) & 0x1f, 5, 8) * _factor;
-			*_g += bitRangeConvert( (_block >> 21) & 0x1f, 5, 8) * _factor;
-			*_b += bitRangeConvert( (_block >> 16) & 0x1f, 5, 8) * _factor;
-			*_a += 255 * _factor;
-		}
-		else
-		{
-			*_r += bitRangeConvert( (_block >> 24) &  0xf, 4, 8) * _factor;
-			*_g += bitRangeConvert( (_block >> 20) &  0xf, 4, 8) * _factor;
-			*_b += bitRangeConvert( (_block >> 16) &  0xf, 4, 8) * _factor;
-			*_a += bitRangeConvert( (_block >> 28) &  0x7, 3, 8) * _factor;
-		}
-	}
-
-	void decodeBlockPtc14A(uint8_t _dst[16*4], const uint8_t* _src, uint32_t _x, uint32_t _y, uint32_t _width, uint32_t _height)
-	{
-		// 0       1       2       3       4       5       6       7
-		// 7654321076543210765432107654321076543210765432107654321076543210
-		// mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmyrrrrrgggggbbbbbxrrrrrgggggbbbbp
-		// ^                               ^^              ^^             ^
-		// +-- modulation data             |+- B color     |+- A color    |
-		//                                 +-- B opaque    +-- A opaque   |
-		//                                           alpha punchthrough --+
-
-		const uint8_t* bc = &_src[morton2d(_x, _y) * 8];
-
-		uint32_t mod = 0
-			| bc[3]<<24
-			| bc[2]<<16
-			| bc[1]<<8
-			| bc[0]
-			;
-
-		const bool punchthrough = !!(bc[7] & 1);
-		const uint8_t* weightTable = s_pvrtcWeights[4 * punchthrough];
-		const uint8_t* factorTable = s_pvrtcFactors[0];
-
-		for (int yy = 0; yy < 4; ++yy)
-		{
-			const uint32_t yOffset = (yy < 2) ? -1 : 0;
-			const uint32_t y0 = (_y + yOffset) % _height;
-			const uint32_t y1 = (y0 +       1) % _height;
-
-			for (int xx = 0; xx < 4; ++xx)
-			{
-				const uint32_t xOffset = (xx < 2) ? -1 : 0;
-				const uint32_t x0 = (_x + xOffset) % _width;
-				const uint32_t x1 = (x0 +       1) % _width;
-
-				const uint32_t bc0 = getColor(&_src[morton2d(x0, y0) * 8]);
-				const uint32_t bc1 = getColor(&_src[morton2d(x1, y0) * 8]);
-				const uint32_t bc2 = getColor(&_src[morton2d(x0, y1) * 8]);
-				const uint32_t bc3 = getColor(&_src[morton2d(x1, y1) * 8]);
-
-				const uint8_t f0 = factorTable[0];
-				const uint8_t f1 = factorTable[1];
-				const uint8_t f2 = factorTable[2];
-				const uint8_t f3 = factorTable[3];
-
-				uint32_t ar = 0, ag = 0, ab = 0, aa = 0;
-				decodeBlockPtc14ARgbaAddA(bc0, &ar, &ag, &ab, &aa, f0);
-				decodeBlockPtc14ARgbaAddA(bc1, &ar, &ag, &ab, &aa, f1);
-				decodeBlockPtc14ARgbaAddA(bc2, &ar, &ag, &ab, &aa, f2);
-				decodeBlockPtc14ARgbaAddA(bc3, &ar, &ag, &ab, &aa, f3);
-
-				uint32_t br = 0, bg = 0, bb = 0, ba = 0;
-				decodeBlockPtc14ARgbaAddB(bc0, &br, &bg, &bb, &ba, f0);
-				decodeBlockPtc14ARgbaAddB(bc1, &br, &bg, &bb, &ba, f1);
-				decodeBlockPtc14ARgbaAddB(bc2, &br, &bg, &bb, &ba, f2);
-				decodeBlockPtc14ARgbaAddB(bc3, &br, &bg, &bb, &ba, f3);
-
-				const uint8_t* weight = &weightTable[(mod & 3)*4];
-				const uint8_t wa = weight[0];
-				const uint8_t wb = weight[1];
-				const uint8_t wc = weight[2];
-				const uint8_t wd = weight[3];
-
-				_dst[(yy*4 + xx)*4+0] = uint8_t( (ab * wa + bb * wb) >> 7);
-				_dst[(yy*4 + xx)*4+1] = uint8_t( (ag * wa + bg * wb) >> 7);
-				_dst[(yy*4 + xx)*4+2] = uint8_t( (ar * wa + br * wb) >> 7);
-				_dst[(yy*4 + xx)*4+3] = uint8_t( (aa * wc + ba * wd) >> 7);
-
-				mod >>= 2;
-				factorTable += 4;
-			}
-		}
-	}
-
-	ImageContainer* imageAlloc(bx::AllocatorI* _allocator, TextureFormat::Enum _format, uint16_t _width, uint16_t _height, uint16_t _depth, uint16_t _numLayers, bool _cubeMap, bool _hasMips, const void* _data)
-	{
-		const ImageBlockInfo& blockInfo = getBlockInfo(_format);
-		const uint16_t blockWidth  = blockInfo.blockWidth;
-		const uint16_t blockHeight = blockInfo.blockHeight;
-		const uint16_t minBlockX   = blockInfo.minBlockX;
-		const uint16_t minBlockY   = blockInfo.minBlockY;
-
-		_width     = bx::uint16_max(blockWidth  * minBlockX, ( (_width  + blockWidth  - 1) / blockWidth)*blockWidth);
-		_height    = bx::uint16_max(blockHeight * minBlockY, ( (_height + blockHeight - 1) / blockHeight)*blockHeight);
-		_depth     = bx::uint16_max(1, _depth);
-		_numLayers = bx::uint16_max(1, _numLayers);
-
-		const uint8_t numMips = _hasMips ? imageGetNumMips(_format, _width, _height) : 1;
-		uint32_t size = imageGetSize(NULL, _width, _height, _depth, _cubeMap, _hasMips, _numLayers, _format);
-
-		ImageContainer* imageContainer = (ImageContainer*)BX_ALLOC(_allocator, size + sizeof(ImageContainer) );
-
-		imageContainer->m_allocator = _allocator;
-		imageContainer->m_data      = imageContainer + 1;
-		imageContainer->m_format    = _format;
-		imageContainer->m_size      = size;
-		imageContainer->m_offset    = 0;
-		imageContainer->m_width     = _width;
-		imageContainer->m_height    = _height;
-		imageContainer->m_depth     = _depth;
-		imageContainer->m_numLayers = _numLayers;
-		imageContainer->m_numMips   = numMips;
-		imageContainer->m_hasAlpha  = false;
-		imageContainer->m_cubeMap   = _cubeMap;
-		imageContainer->m_ktx       = false;
-		imageContainer->m_ktxLE     = false;
-		imageContainer->m_srgb      = false;
-
-		if (NULL != _data)
-		{
-			bx::memCopy(imageContainer->m_data, _data, imageContainer->m_size);
-		}
-
-		return imageContainer;
-	}
-
-	void imageFree(ImageContainer* _imageContainer)
-	{
-		BX_FREE(_imageContainer->m_allocator, _imageContainer);
-	}
-
-// DDS
-#define DDS_MAGIC             BX_MAKEFOURCC('D', 'D', 'S', ' ')
-#define DDS_HEADER_SIZE       124
-
-#define DDS_DXT1 BX_MAKEFOURCC('D', 'X', 'T', '1')
-#define DDS_DXT2 BX_MAKEFOURCC('D', 'X', 'T', '2')
-#define DDS_DXT3 BX_MAKEFOURCC('D', 'X', 'T', '3')
-#define DDS_DXT4 BX_MAKEFOURCC('D', 'X', 'T', '4')
-#define DDS_DXT5 BX_MAKEFOURCC('D', 'X', 'T', '5')
-#define DDS_ATI1 BX_MAKEFOURCC('A', 'T', 'I', '1')
-#define DDS_BC4U BX_MAKEFOURCC('B', 'C', '4', 'U')
-#define DDS_ATI2 BX_MAKEFOURCC('A', 'T', 'I', '2')
-#define DDS_BC5U BX_MAKEFOURCC('B', 'C', '5', 'U')
-#define DDS_DX10 BX_MAKEFOURCC('D', 'X', '1', '0')
-
-#define DDS_A8R8G8B8       21
-#define DDS_R5G6B5         23
-#define DDS_A1R5G5B5       25
-#define DDS_A4R4G4B4       26
-#define DDS_A2B10G10R10    31
-#define DDS_G16R16         34
-#define DDS_A2R10G10B10    35
-#define DDS_A16B16G16R16   36
-#define DDS_A8L8           51
-#define DDS_R16F           111
-#define DDS_G16R16F        112
-#define DDS_A16B16G16R16F  113
-#define DDS_R32F           114
-#define DDS_G32R32F        115
-#define DDS_A32B32G32R32F  116
-
-#define DDS_FORMAT_R32G32B32A32_FLOAT  2
-#define DDS_FORMAT_R32G32B32A32_UINT   3
-#define DDS_FORMAT_R16G16B16A16_FLOAT  10
-#define DDS_FORMAT_R16G16B16A16_UNORM  11
-#define DDS_FORMAT_R16G16B16A16_UINT   12
-#define DDS_FORMAT_R32G32_FLOAT        16
-#define DDS_FORMAT_R32G32_UINT         17
-#define DDS_FORMAT_R10G10B10A2_UNORM   24
-#define DDS_FORMAT_R11G11B10_FLOAT     26
-#define DDS_FORMAT_R8G8B8A8_UNORM      28
-#define DDS_FORMAT_R8G8B8A8_UNORM_SRGB 29
-#define DDS_FORMAT_R16G16_FLOAT        34
-#define DDS_FORMAT_R16G16_UNORM        35
-#define DDS_FORMAT_R32_FLOAT           41
-#define DDS_FORMAT_R32_UINT            42
-#define DDS_FORMAT_R8G8_UNORM          49
-#define DDS_FORMAT_R16_FLOAT           54
-#define DDS_FORMAT_R16_UNORM           56
-#define DDS_FORMAT_R8_UNORM            61
-#define DDS_FORMAT_R1_UNORM            66
-#define DDS_FORMAT_BC1_UNORM           71
-#define DDS_FORMAT_BC1_UNORM_SRGB      72
-#define DDS_FORMAT_BC2_UNORM           74
-#define DDS_FORMAT_BC2_UNORM_SRGB      75
-#define DDS_FORMAT_BC3_UNORM           77
-#define DDS_FORMAT_BC3_UNORM_SRGB      78
-#define DDS_FORMAT_BC4_UNORM           80
-#define DDS_FORMAT_BC5_UNORM           83
-#define DDS_FORMAT_B5G6R5_UNORM        85
-#define DDS_FORMAT_B5G5R5A1_UNORM      86
-#define DDS_FORMAT_B8G8R8A8_UNORM      87
-#define DDS_FORMAT_B8G8R8A8_UNORM_SRGB 91
-#define DDS_FORMAT_BC6H_SF16           96
-#define DDS_FORMAT_BC7_UNORM           98
-#define DDS_FORMAT_BC7_UNORM_SRGB      99
-#define DDS_FORMAT_B4G4R4A4_UNORM      115
-
-#define DDSD_CAPS                   0x00000001
-#define DDSD_HEIGHT                 0x00000002
-#define DDSD_WIDTH                  0x00000004
-#define DDSD_PITCH                  0x00000008
-#define DDSD_PIXELFORMAT            0x00001000
-#define DDSD_MIPMAPCOUNT            0x00020000
-#define DDSD_LINEARSIZE             0x00080000
-#define DDSD_DEPTH                  0x00800000
-
-#define DDPF_ALPHAPIXELS            0x00000001
-#define DDPF_ALPHA                  0x00000002
-#define DDPF_FOURCC                 0x00000004
-#define DDPF_INDEXED                0x00000020
-#define DDPF_RGB                    0x00000040
-#define DDPF_YUV                    0x00000200
-#define DDPF_LUMINANCE              0x00020000
-
-#define DDSCAPS_COMPLEX             0x00000008
-#define DDSCAPS_TEXTURE             0x00001000
-#define DDSCAPS_MIPMAP              0x00400000
-
-#define DDSCAPS2_CUBEMAP            0x00000200
-#define DDSCAPS2_CUBEMAP_POSITIVEX  0x00000400
-#define DDSCAPS2_CUBEMAP_NEGATIVEX  0x00000800
-#define DDSCAPS2_CUBEMAP_POSITIVEY  0x00001000
-#define DDSCAPS2_CUBEMAP_NEGATIVEY  0x00002000
-#define DDSCAPS2_CUBEMAP_POSITIVEZ  0x00004000
-#define DDSCAPS2_CUBEMAP_NEGATIVEZ  0x00008000
-
-#define DDS_CUBEMAP_ALLFACES (DDSCAPS2_CUBEMAP_POSITIVEX|DDSCAPS2_CUBEMAP_NEGATIVEX \
-							 |DDSCAPS2_CUBEMAP_POSITIVEY|DDSCAPS2_CUBEMAP_NEGATIVEY \
-							 |DDSCAPS2_CUBEMAP_POSITIVEZ|DDSCAPS2_CUBEMAP_NEGATIVEZ)
-
-#define DDSCAPS2_VOLUME             0x00200000
-
-	struct TranslateDdsFormat
-	{
-		uint32_t m_format;
-		TextureFormat::Enum m_textureFormat;
-		bool m_srgb;
-	};
-
-	static const TranslateDdsFormat s_translateDdsFourccFormat[] =
-	{
-		{ DDS_DXT1,                  TextureFormat::BC1,     false },
-		{ DDS_DXT2,                  TextureFormat::BC2,     false },
-		{ DDS_DXT3,                  TextureFormat::BC2,     false },
-		{ DDS_DXT4,                  TextureFormat::BC3,     false },
-		{ DDS_DXT5,                  TextureFormat::BC3,     false },
-		{ DDS_ATI1,                  TextureFormat::BC4,     false },
-		{ DDS_BC4U,                  TextureFormat::BC4,     false },
-		{ DDS_ATI2,                  TextureFormat::BC5,     false },
-		{ DDS_BC5U,                  TextureFormat::BC5,     false },
-		{ DDS_A16B16G16R16,          TextureFormat::RGBA16,  false },
-		{ DDS_A16B16G16R16F,         TextureFormat::RGBA16F, false },
-		{ DDPF_RGB|DDPF_ALPHAPIXELS, TextureFormat::BGRA8,   false },
-		{ DDPF_INDEXED,              TextureFormat::R8,      false },
-		{ DDPF_LUMINANCE,            TextureFormat::R8,      false },
-		{ DDPF_ALPHA,                TextureFormat::R8,      false },
-		{ DDS_R16F,                  TextureFormat::R16F,    false },
-		{ DDS_R32F,                  TextureFormat::R32F,    false },
-		{ DDS_A8L8,                  TextureFormat::RG8,     false },
-		{ DDS_G16R16,                TextureFormat::RG16,    false },
-		{ DDS_G16R16F,               TextureFormat::RG16F,   false },
-		{ DDS_G32R32F,               TextureFormat::RG32F,   false },
-		{ DDS_A8R8G8B8,              TextureFormat::BGRA8,   false },
-		{ DDS_A16B16G16R16,          TextureFormat::RGBA16,  false },
-		{ DDS_A16B16G16R16F,         TextureFormat::RGBA16F, false },
-		{ DDS_A32B32G32R32F,         TextureFormat::RGBA32F, false },
-		{ DDS_R5G6B5,                TextureFormat::R5G6B5,  false },
-		{ DDS_A4R4G4B4,              TextureFormat::RGBA4,   false },
-		{ DDS_A1R5G5B5,              TextureFormat::RGB5A1,  false },
-		{ DDS_A2B10G10R10,           TextureFormat::RGB10A2, false },
-	};
-
-	static const TranslateDdsFormat s_translateDxgiFormat[] =
-	{
-		{ DDS_FORMAT_BC1_UNORM,           TextureFormat::BC1,        false },
-		{ DDS_FORMAT_BC1_UNORM_SRGB,      TextureFormat::BC1,        true  },
-		{ DDS_FORMAT_BC2_UNORM,           TextureFormat::BC2,        false },
-		{ DDS_FORMAT_BC2_UNORM_SRGB,      TextureFormat::BC2,        true  },
-		{ DDS_FORMAT_BC3_UNORM,           TextureFormat::BC3,        false },
-		{ DDS_FORMAT_BC3_UNORM_SRGB,      TextureFormat::BC3,        true  },
-		{ DDS_FORMAT_BC4_UNORM,           TextureFormat::BC4,        false },
-		{ DDS_FORMAT_BC5_UNORM,           TextureFormat::BC5,        false },
-		{ DDS_FORMAT_BC6H_SF16,           TextureFormat::BC6H,       false },
-		{ DDS_FORMAT_BC7_UNORM,           TextureFormat::BC7,        false },
-		{ DDS_FORMAT_BC7_UNORM_SRGB,      TextureFormat::BC7,        true  },
-
-		{ DDS_FORMAT_R1_UNORM,            TextureFormat::R1,         false },
-		{ DDS_FORMAT_R8_UNORM,            TextureFormat::R8,         false },
-		{ DDS_FORMAT_R16_UNORM,           TextureFormat::R16,        false },
-		{ DDS_FORMAT_R16_FLOAT,           TextureFormat::R16F,       false },
-		{ DDS_FORMAT_R32_UINT,            TextureFormat::R32U,       false },
-		{ DDS_FORMAT_R32_FLOAT,           TextureFormat::R32F,       false },
-		{ DDS_FORMAT_R8G8_UNORM,          TextureFormat::RG8,        false },
-		{ DDS_FORMAT_R16G16_UNORM,        TextureFormat::RG16,       false },
-		{ DDS_FORMAT_R16G16_FLOAT,        TextureFormat::RG16F,      false },
-		{ DDS_FORMAT_R32G32_UINT,         TextureFormat::RG32U,      false },
-		{ DDS_FORMAT_R32G32_FLOAT,        TextureFormat::RG32F,      false },
-		{ DDS_FORMAT_B8G8R8A8_UNORM,      TextureFormat::BGRA8,      false },
-		{ DDS_FORMAT_B8G8R8A8_UNORM_SRGB, TextureFormat::BGRA8,      true  },
-		{ DDS_FORMAT_R8G8B8A8_UNORM,      TextureFormat::RGBA8,      false },
-		{ DDS_FORMAT_R8G8B8A8_UNORM_SRGB, TextureFormat::RGBA8,      true  },
-		{ DDS_FORMAT_R16G16B16A16_UNORM,  TextureFormat::RGBA16,     false },
-		{ DDS_FORMAT_R16G16B16A16_FLOAT,  TextureFormat::RGBA16F,    false },
-		{ DDS_FORMAT_R32G32B32A32_UINT,   TextureFormat::RGBA32U,    false },
-		{ DDS_FORMAT_R32G32B32A32_FLOAT,  TextureFormat::RGBA32F,    false },
-		{ DDS_FORMAT_B5G6R5_UNORM,        TextureFormat::R5G6B5,     false },
-		{ DDS_FORMAT_B4G4R4A4_UNORM,      TextureFormat::RGBA4,      false },
-		{ DDS_FORMAT_B5G5R5A1_UNORM,      TextureFormat::RGB5A1,     false },
-		{ DDS_FORMAT_R10G10B10A2_UNORM,   TextureFormat::RGB10A2,    false },
-		{ DDS_FORMAT_R11G11B10_FLOAT,     TextureFormat::R11G11B10F, false },
-	};
-
-	struct TranslateDdsPixelFormat
-	{
-		uint32_t m_bitCount;
-		uint32_t m_bitmask[4];
-		TextureFormat::Enum m_textureFormat;
-	};
-
-	static const TranslateDdsPixelFormat s_translateDdsPixelFormat[] =
-	{
-		{  8, { 0x000000ff, 0x00000000, 0x00000000, 0x00000000 }, TextureFormat::R8      },
-		{ 16, { 0x0000ffff, 0x00000000, 0x00000000, 0x00000000 }, TextureFormat::R16U    },
-		{ 16, { 0x00000f00, 0x000000f0, 0x0000000f, 0x0000f000 }, TextureFormat::RGBA4   },
-		{ 16, { 0x0000f800, 0x000007e0, 0x0000001f, 0x00000000 }, TextureFormat::R5G6B5  },
-		{ 16, { 0x00007c00, 0x000003e0, 0x0000001f, 0x00008000 }, TextureFormat::RGB5A1  },
-		{ 24, { 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000 }, TextureFormat::RGB8    },
-		{ 32, { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }, TextureFormat::BGRA8   },
-		{ 32, { 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000 }, TextureFormat::BGRA8   },
-		{ 32, { 0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000 }, TextureFormat::RGB10A2 },
-		{ 32, { 0x0000ffff, 0xffff0000, 0x00000000, 0x00000000 }, TextureFormat::RG16    },
-		{ 32, { 0xffffffff, 0x00000000, 0x00000000, 0x00000000 }, TextureFormat::R32U    },
-	};
-
-	bool imageParseDds(ImageContainer& _imageContainer, bx::ReaderSeekerI* _reader)
-	{
-		uint32_t headerSize;
-		bx::read(_reader, headerSize);
-
-		if (headerSize < DDS_HEADER_SIZE)
-		{
-			return false;
-		}
-
-		uint32_t flags;
-		bx::read(_reader, flags);
-
-		if ( (flags & (DDSD_CAPS|DDSD_HEIGHT|DDSD_WIDTH|DDSD_PIXELFORMAT) ) != (DDSD_CAPS|DDSD_HEIGHT|DDSD_WIDTH|DDSD_PIXELFORMAT) )
-		{
-			return false;
-		}
-
-		uint32_t height;
-		bx::read(_reader, height);
-
-		uint32_t width;
-		bx::read(_reader, width);
-
-		uint32_t pitch;
-		bx::read(_reader, pitch);
-
-		uint32_t depth;
-		bx::read(_reader, depth);
-
-		uint32_t mips;
-		bx::read(_reader, mips);
-
-		bx::skip(_reader, 44); // reserved
-
-		uint32_t pixelFormatSize;
-		bx::read(_reader, pixelFormatSize);
-
-		uint32_t pixelFlags;
-		bx::read(_reader, pixelFlags);
-
-		uint32_t fourcc;
-		bx::read(_reader, fourcc);
-
-		uint32_t bitCount;
-		bx::read(_reader, bitCount);
-
-		uint32_t bitmask[4];
-		bx::read(_reader, bitmask, sizeof(bitmask) );
-
-		uint32_t caps[4];
-		bx::read(_reader, caps);
-
-		bx::skip(_reader, 4); // reserved
-
-		uint32_t dxgiFormat = 0;
-		uint32_t arraySize = 1;
-		if (DDPF_FOURCC == pixelFlags
-		&&  DDS_DX10 == fourcc)
-		{
-			bx::read(_reader, dxgiFormat);
-
-			uint32_t dims;
-			bx::read(_reader, dims);
-
-			uint32_t miscFlags;
-			bx::read(_reader, miscFlags);
-
-			bx::read(_reader, arraySize);
-
-			uint32_t miscFlags2;
-			bx::read(_reader, miscFlags2);
-		}
-
-		if ( (caps[0] & DDSCAPS_TEXTURE) == 0)
-		{
-			return false;
-		}
-
-		bool cubeMap = 0 != (caps[1] & DDSCAPS2_CUBEMAP);
-		if (cubeMap)
-		{
-			if ( (caps[1] & DDS_CUBEMAP_ALLFACES) != DDS_CUBEMAP_ALLFACES)
-			{
-				// partial cube map is not supported.
-				return false;
-			}
-		}
-
-		TextureFormat::Enum format = TextureFormat::Unknown;
-		bool hasAlpha = pixelFlags & DDPF_ALPHAPIXELS;
-		bool srgb = false;
-
-		if (dxgiFormat == 0)
-		{
-			if (DDPF_FOURCC == (pixelFlags & DDPF_FOURCC) )
-			{
-				for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateDdsFourccFormat); ++ii)
-				{
-					if (s_translateDdsFourccFormat[ii].m_format == fourcc)
-					{
-						format = s_translateDdsFourccFormat[ii].m_textureFormat;
-						break;
-					}
-				}
-			}
-			else
-			{
-				for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateDdsPixelFormat); ++ii)
-				{
-					const TranslateDdsPixelFormat& pf = s_translateDdsPixelFormat[ii];
-					if (pf.m_bitCount   == bitCount
-					&&  pf.m_bitmask[0] == bitmask[0]
-					&&  pf.m_bitmask[1] == bitmask[1]
-					&&  pf.m_bitmask[2] == bitmask[2]
-					&&  pf.m_bitmask[3] == bitmask[3])
-					{
-						format = pf.m_textureFormat;
-						break;
-					}
-				}
-			}
-		}
-		else
-		{
-			for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateDxgiFormat); ++ii)
-			{
-				if (s_translateDxgiFormat[ii].m_format == dxgiFormat)
-				{
-					format = s_translateDxgiFormat[ii].m_textureFormat;
-					srgb = s_translateDxgiFormat[ii].m_srgb;
-					break;
-				}
-			}
-		}
-
-		_imageContainer.m_allocator = NULL;
-		_imageContainer.m_data      = NULL;
-		_imageContainer.m_size      = 0;
-		_imageContainer.m_offset    = (uint32_t)bx::seek(_reader);
-		_imageContainer.m_width     = width;
-		_imageContainer.m_height    = height;
-		_imageContainer.m_depth     = depth;
-		_imageContainer.m_format    = format;
-		_imageContainer.m_numLayers = uint16_t(arraySize);
-		_imageContainer.m_numMips   = uint8_t( (caps[0] & DDSCAPS_MIPMAP) ? mips : 1);
-		_imageContainer.m_hasAlpha  = hasAlpha;
-		_imageContainer.m_cubeMap   = cubeMap;
-		_imageContainer.m_ktx       = false;
-		_imageContainer.m_ktxLE     = false;
-		_imageContainer.m_srgb      = srgb;
-
-		return TextureFormat::Unknown != format;
-	}
-
-// KTX
-#define KTX_MAGIC       BX_MAKEFOURCC(0xAB, 'K', 'T', 'X')
-#define KTX_HEADER_SIZE 64
-
-#define KTX_ETC1_RGB8_OES                             0x8D64
-#define KTX_COMPRESSED_R11_EAC                        0x9270
-#define KTX_COMPRESSED_SIGNED_R11_EAC                 0x9271
-#define KTX_COMPRESSED_RG11_EAC                       0x9272
-#define KTX_COMPRESSED_SIGNED_RG11_EAC                0x9273
-#define KTX_COMPRESSED_RGB8_ETC2                      0x9274
-#define KTX_COMPRESSED_SRGB8_ETC2                     0x9275
-#define KTX_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2  0x9276
-#define KTX_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277
-#define KTX_COMPRESSED_RGBA8_ETC2_EAC                 0x9278
-#define KTX_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC          0x9279
-#define KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG           0x8C00
-#define KTX_COMPRESSED_RGB_PVRTC_2BPPV1_IMG           0x8C01
-#define KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG          0x8C02
-#define KTX_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG          0x8C03
-#define KTX_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG          0x9137
-#define KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG          0x9138
-#define KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT             0x83F1
-#define KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT             0x83F2
-#define KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT             0x83F3
-#define KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT       0x8C4D
-#define KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT       0x8C4E
-#define KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT       0x8C4F
-#define KTX_COMPRESSED_LUMINANCE_LATC1_EXT            0x8C70
-#define KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT      0x8C72
-#define KTX_COMPRESSED_RGBA_BPTC_UNORM_ARB            0x8E8C
-#define KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB      0x8E8D
-#define KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB      0x8E8E
-#define KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB    0x8E8F
-#define KTX_COMPRESSED_SRGB_PVRTC_2BPPV1_EXT          0x8A54
-#define KTX_COMPRESSED_SRGB_PVRTC_4BPPV1_EXT          0x8A55
-#define KTX_COMPRESSED_SRGB_ALPHA_PVRTC_2BPPV1_EXT    0x8A56
-#define KTX_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV1_EXT    0x8A57
-
-#define KTX_A8                                        0x803C
-#define KTX_R8                                        0x8229
-#define KTX_R16                                       0x822A
-#define KTX_RG8                                       0x822B
-#define KTX_RG16                                      0x822C
-#define KTX_R16F                                      0x822D
-#define KTX_R32F                                      0x822E
-#define KTX_RG16F                                     0x822F
-#define KTX_RG32F                                     0x8230
-#define KTX_RGBA8                                     0x8058
-#define KTX_RGBA16                                    0x805B
-#define KTX_RGBA16F                                   0x881A
-#define KTX_R32UI                                     0x8236
-#define KTX_RG32UI                                    0x823C
-#define KTX_RGBA32UI                                  0x8D70
-#define KTX_RGBA32F                                   0x8814
-#define KTX_RGB565                                    0x8D62
-#define KTX_RGBA4                                     0x8056
-#define KTX_RGB5_A1                                   0x8057
-#define KTX_RGB10_A2                                  0x8059
-#define KTX_R8I                                       0x8231
-#define KTX_R8UI                                      0x8232
-#define KTX_R16I                                      0x8233
-#define KTX_R16UI                                     0x8234
-#define KTX_R32I                                      0x8235
-#define KTX_R32UI                                     0x8236
-#define KTX_RG8I                                      0x8237
-#define KTX_RG8UI                                     0x8238
-#define KTX_RG16I                                     0x8239
-#define KTX_RG16UI                                    0x823A
-#define KTX_RG32I                                     0x823B
-#define KTX_RG32UI                                    0x823C
-#define KTX_R8_SNORM                                  0x8F94
-#define KTX_RG8_SNORM                                 0x8F95
-#define KTX_RGB8_SNORM                                0x8F96
-#define KTX_RGBA8_SNORM                               0x8F97
-#define KTX_R16_SNORM                                 0x8F98
-#define KTX_RG16_SNORM                                0x8F99
-#define KTX_RGB16_SNORM                               0x8F9A
-#define KTX_RGBA16_SNORM                              0x8F9B
-#define KTX_SRGB8                                     0x8C41
-#define KTX_SRGB8_ALPHA8                              0x8C43
-#define KTX_RGBA32UI                                  0x8D70
-#define KTX_RGB32UI                                   0x8D71
-#define KTX_RGBA16UI                                  0x8D76
-#define KTX_RGB16UI                                   0x8D77
-#define KTX_RGBA8UI                                   0x8D7C
-#define KTX_RGB8UI                                    0x8D7D
-#define KTX_RGBA32I                                   0x8D82
-#define KTX_RGB32I                                    0x8D83
-#define KTX_RGBA16I                                   0x8D88
-#define KTX_RGB16I                                    0x8D89
-#define KTX_RGBA8I                                    0x8D8E
-#define KTX_RGB8                                      0x8051
-#define KTX_RGB8I                                     0x8D8F
-#define KTX_RGB9_E5                                   0x8C3D
-#define KTX_R11F_G11F_B10F                            0x8C3A
-
-#define KTX_ZERO                                      0
-#define KTX_RED                                       0x1903
-#define KTX_ALPHA                                     0x1906
-#define KTX_RGB                                       0x1907
-#define KTX_RGBA                                      0x1908
-#define KTX_BGRA                                      0x80E1
-#define KTX_RG                                        0x8227
-
-#define KTX_BYTE                                      0x1400
-#define KTX_UNSIGNED_BYTE                             0x1401
-#define KTX_SHORT                                     0x1402
-#define KTX_UNSIGNED_SHORT                            0x1403
-#define KTX_INT                                       0x1404
-#define KTX_UNSIGNED_INT                              0x1405
-#define KTX_FLOAT                                     0x1406
-#define KTX_HALF_FLOAT                                0x140B
-#define KTX_UNSIGNED_INT_5_9_9_9_REV                  0x8C3E
-#define KTX_UNSIGNED_SHORT_5_6_5                      0x8363
-#define KTX_UNSIGNED_SHORT_4_4_4_4                    0x8033
-#define KTX_UNSIGNED_SHORT_5_5_5_1                    0x8034
-#define KTX_UNSIGNED_INT_2_10_10_10_REV               0x8368
-#define KTX_UNSIGNED_INT_10F_11F_11F_REV              0x8C3B
-
-	struct KtxFormatInfo
-	{
-		uint32_t m_internalFmt;
-		uint32_t m_internalFmtSrgb;
-		uint32_t m_fmt;
-		uint32_t m_type;
-	};
-
-	static const KtxFormatInfo s_translateKtxFormat[] =
-	{
-		{ KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT,            KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,        KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT,            KTX_ZERO,                         }, // BC1
-		{ KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT,            KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,        KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT,            KTX_ZERO,                         }, // BC2
-		{ KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT,            KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,        KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT,            KTX_ZERO,                         }, // BC3
-		{ KTX_COMPRESSED_LUMINANCE_LATC1_EXT,           KTX_ZERO,                                       KTX_COMPRESSED_LUMINANCE_LATC1_EXT,           KTX_ZERO,                         }, // BC4
-		{ KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT,     KTX_ZERO,                                       KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT,     KTX_ZERO,                         }, // BC5
-		{ KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB,     KTX_ZERO,                                       KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB,     KTX_ZERO,                         }, // BC6H
-		{ KTX_COMPRESSED_RGBA_BPTC_UNORM_ARB,           KTX_ZERO,                                       KTX_COMPRESSED_RGBA_BPTC_UNORM_ARB,           KTX_ZERO,                         }, // BC7
-		{ KTX_ETC1_RGB8_OES,                            KTX_ZERO,                                       KTX_ETC1_RGB8_OES,                            KTX_ZERO,                         }, // ETC1
-		{ KTX_COMPRESSED_RGB8_ETC2,                     KTX_ZERO,                                       KTX_COMPRESSED_RGB8_ETC2,                     KTX_ZERO,                         }, // ETC2
-		{ KTX_COMPRESSED_RGBA8_ETC2_EAC,                KTX_COMPRESSED_SRGB8_ETC2,                      KTX_COMPRESSED_RGBA8_ETC2_EAC,                KTX_ZERO,                         }, // ETC2A
-		{ KTX_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, KTX_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2,  KTX_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, KTX_ZERO,                         }, // ETC2A1
-		{ KTX_COMPRESSED_RGB_PVRTC_2BPPV1_IMG,          KTX_COMPRESSED_SRGB_PVRTC_2BPPV1_EXT,           KTX_COMPRESSED_RGB_PVRTC_2BPPV1_IMG,          KTX_ZERO,                         }, // PTC12
-		{ KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG,          KTX_COMPRESSED_SRGB_PVRTC_4BPPV1_EXT,           KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG,          KTX_ZERO,                         }, // PTC14
-		{ KTX_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG,         KTX_COMPRESSED_SRGB_ALPHA_PVRTC_2BPPV1_EXT,     KTX_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG,         KTX_ZERO,                         }, // PTC12A
-		{ KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG,         KTX_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV1_EXT,     KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG,         KTX_ZERO,                         }, // PTC14A
-		{ KTX_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG,         KTX_ZERO,                                       KTX_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG,         KTX_ZERO,                         }, // PTC22
-		{ KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG,         KTX_ZERO,                                       KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG,         KTX_ZERO,                         }, // PTC24
-		{ KTX_ZERO,                                     KTX_ZERO,                                       KTX_ZERO,                                     KTX_ZERO,                         }, // Unknown
-		{ KTX_ZERO,                                     KTX_ZERO,                                       KTX_ZERO,                                     KTX_ZERO,                         }, // R1
-		{ KTX_ALPHA,                                    KTX_ZERO,                                       KTX_ALPHA,                                    KTX_UNSIGNED_BYTE,                }, // A8
-		{ KTX_R8,                                       KTX_ZERO,                                       KTX_RED,                                      KTX_UNSIGNED_BYTE,                }, // R8
-		{ KTX_R8I,                                      KTX_ZERO,                                       KTX_RED,                                      KTX_BYTE,                         }, // R8S
-		{ KTX_R8UI,                                     KTX_ZERO,                                       KTX_RED,                                      KTX_UNSIGNED_BYTE,                }, // R8S
-		{ KTX_R8_SNORM,                                 KTX_ZERO,                                       KTX_RED,                                      KTX_BYTE,                         }, // R8S
-		{ KTX_R16,                                      KTX_ZERO,                                       KTX_RED,                                      KTX_UNSIGNED_SHORT,               }, // R16
-		{ KTX_R16I,                                     KTX_ZERO,                                       KTX_RED,                                      KTX_SHORT,                        }, // R16I
-		{ KTX_R16UI,                                    KTX_ZERO,                                       KTX_RED,                                      KTX_UNSIGNED_SHORT,               }, // R16U
-		{ KTX_R16F,                                     KTX_ZERO,                                       KTX_RED,                                      KTX_HALF_FLOAT,                   }, // R16F
-		{ KTX_R16_SNORM,                                KTX_ZERO,                                       KTX_RED,                                      KTX_SHORT,                        }, // R16S
-		{ KTX_R32I,                                     KTX_ZERO,                                       KTX_RED,                                      KTX_INT,                          }, // R32I
-		{ KTX_R32UI,                                    KTX_ZERO,                                       KTX_RED,                                      KTX_UNSIGNED_INT,                 }, // R32U
-		{ KTX_R32F,                                     KTX_ZERO,                                       KTX_RED,                                      KTX_FLOAT,                        }, // R32F
-		{ KTX_RG8,                                      KTX_ZERO,                                       KTX_RG,                                       KTX_UNSIGNED_BYTE,                }, // RG8
-		{ KTX_RG8I,                                     KTX_ZERO,                                       KTX_RG,                                       KTX_BYTE,                         }, // RG8I
-		{ KTX_RG8UI,                                    KTX_ZERO,                                       KTX_RG,                                       KTX_UNSIGNED_BYTE,                }, // RG8U
-		{ KTX_RG8_SNORM,                                KTX_ZERO,                                       KTX_RG,                                       KTX_BYTE,                         }, // RG8S
-		{ KTX_RG16,                                     KTX_ZERO,                                       KTX_RG,                                       KTX_UNSIGNED_SHORT,               }, // RG16
-		{ KTX_RG16I,                                    KTX_ZERO,                                       KTX_RG,                                       KTX_SHORT,                        }, // RG16
-		{ KTX_RG16UI,                                   KTX_ZERO,                                       KTX_RG,                                       KTX_UNSIGNED_SHORT,               }, // RG16
-		{ KTX_RG16F,                                    KTX_ZERO,                                       KTX_RG,                                       KTX_FLOAT,                        }, // RG16F
-		{ KTX_RG16_SNORM,                               KTX_ZERO,                                       KTX_RG,                                       KTX_SHORT,                        }, // RG16S
-		{ KTX_RG32I,                                    KTX_ZERO,                                       KTX_RG,                                       KTX_INT,                          }, // RG32I
-		{ KTX_RG32UI,                                   KTX_ZERO,                                       KTX_RG,                                       KTX_UNSIGNED_INT,                 }, // RG32U
-		{ KTX_RG32F,                                    KTX_ZERO,                                       KTX_RG,                                       KTX_FLOAT,                        }, // RG32F
-		{ KTX_RGB8,                                     KTX_SRGB8,                                      KTX_RGB,                                      KTX_UNSIGNED_BYTE,                }, // RGB8
-		{ KTX_RGB8I,                                    KTX_ZERO,                                       KTX_RGB,                                      KTX_BYTE,                         }, // RGB8I
-		{ KTX_RGB8UI,                                   KTX_ZERO,                                       KTX_RGB,                                      KTX_UNSIGNED_BYTE,                }, // RGB8U
-		{ KTX_RGB8_SNORM,                               KTX_ZERO,                                       KTX_RGB,                                      KTX_BYTE,                         }, // RGB8S
-		{ KTX_RGB9_E5,                                  KTX_ZERO,                                       KTX_RGB,                                      KTX_UNSIGNED_INT_5_9_9_9_REV,     }, // RGB9E5F
-		{ KTX_BGRA,                                     KTX_SRGB8_ALPHA8,                               KTX_BGRA,                                     KTX_UNSIGNED_BYTE,                }, // BGRA8
-		{ KTX_RGBA8,                                    KTX_SRGB8_ALPHA8,                               KTX_RGBA,                                     KTX_UNSIGNED_BYTE,                }, // RGBA8
-		{ KTX_RGBA8I,                                   KTX_ZERO,                                       KTX_RGBA,                                     KTX_BYTE,                         }, // RGBA8I
-		{ KTX_RGBA8UI,                                  KTX_ZERO,                                       KTX_RGBA,                                     KTX_UNSIGNED_BYTE,                }, // RGBA8U
-		{ KTX_RGBA8_SNORM,                              KTX_ZERO,                                       KTX_RGBA,                                     KTX_BYTE,                         }, // RGBA8S
-		{ KTX_RGBA16,                                   KTX_ZERO,                                       KTX_RGBA,                                     KTX_UNSIGNED_SHORT,               }, // RGBA16
-		{ KTX_RGBA16I,                                  KTX_ZERO,                                       KTX_RGBA,                                     KTX_SHORT,                        }, // RGBA16I
-		{ KTX_RGBA16UI,                                 KTX_ZERO,                                       KTX_RGBA,                                     KTX_UNSIGNED_SHORT,               }, // RGBA16U
-		{ KTX_RGBA16F,                                  KTX_ZERO,                                       KTX_RGBA,                                     KTX_HALF_FLOAT,                   }, // RGBA16F
-		{ KTX_RGBA16_SNORM,                             KTX_ZERO,                                       KTX_RGBA,                                     KTX_SHORT,                        }, // RGBA16S
-		{ KTX_RGBA32I,                                  KTX_ZERO,                                       KTX_RGBA,                                     KTX_INT,                          }, // RGBA32I
-		{ KTX_RGBA32UI,                                 KTX_ZERO,                                       KTX_RGBA,                                     KTX_UNSIGNED_INT,                 }, // RGBA32U
-		{ KTX_RGBA32F,                                  KTX_ZERO,                                       KTX_RGBA,                                     KTX_FLOAT,                        }, // RGBA32F
-		{ KTX_RGB565,                                   KTX_ZERO,                                       KTX_RGB,                                      KTX_UNSIGNED_SHORT_5_6_5,         }, // R5G6B5
-		{ KTX_RGBA4,                                    KTX_ZERO,                                       KTX_RGBA,                                     KTX_UNSIGNED_SHORT_4_4_4_4,       }, // RGBA4
-		{ KTX_RGB5_A1,                                  KTX_ZERO,                                       KTX_RGBA,                                     KTX_UNSIGNED_SHORT_5_5_5_1,       }, // RGB5A1
-		{ KTX_RGB10_A2,                                 KTX_ZERO,                                       KTX_RGBA,                                     KTX_UNSIGNED_INT_2_10_10_10_REV,  }, // RGB10A2
-		{ KTX_R11F_G11F_B10F,                           KTX_ZERO,                                       KTX_RGB,                                      KTX_UNSIGNED_INT_10F_11F_11F_REV, }, // R11G11B10F
-	};
-	BX_STATIC_ASSERT(TextureFormat::UnknownDepth == BX_COUNTOF(s_translateKtxFormat) );
-
-	struct KtxFormatInfo2
-	{
-		uint32_t m_internalFmt;
-		TextureFormat::Enum m_format;
-	};
-
-	static const KtxFormatInfo2 s_translateKtxFormat2[] =
-	{
-		{ KTX_A8,  TextureFormat::A8   },
-		{ KTX_RED, TextureFormat::R8   },
-		{ KTX_RGB, TextureFormat::RGB8 },
-	};
-
-	bool imageParseKtx(ImageContainer& _imageContainer, bx::ReaderSeekerI* _reader)
-	{
-		uint8_t identifier[8];
-		bx::read(_reader, identifier);
-
-		if (identifier[1] != '1'
-		&&  identifier[2] != '1')
-		{
-			return false;
-		}
-
-		uint32_t endianness;
-		bx::read(_reader, endianness);
-
-		bool fromLittleEndian = 0x04030201 == endianness;
-
-		uint32_t glType;
-		bx::readHE(_reader, glType, fromLittleEndian);
-
-		uint32_t glTypeSize;
-		bx::readHE(_reader, glTypeSize, fromLittleEndian);
-
-		uint32_t glFormat;
-		bx::readHE(_reader, glFormat, fromLittleEndian);
-
-		uint32_t glInternalFormat;
-		bx::readHE(_reader, glInternalFormat, fromLittleEndian);
-
-		uint32_t glBaseInternalFormat;
-		bx::readHE(_reader, glBaseInternalFormat, fromLittleEndian);
-
-		uint32_t width;
-		bx::readHE(_reader, width, fromLittleEndian);
-
-		uint32_t height;
-		bx::readHE(_reader, height, fromLittleEndian);
-
-		uint32_t depth;
-		bx::readHE(_reader, depth, fromLittleEndian);
-
-		uint32_t numberOfArrayElements;
-		bx::readHE(_reader, numberOfArrayElements, fromLittleEndian);
-
-		uint32_t numFaces;
-		bx::readHE(_reader, numFaces, fromLittleEndian);
-
-		uint32_t numMips;
-		bx::readHE(_reader, numMips, fromLittleEndian);
-
-		uint32_t metaDataSize;
-		bx::readHE(_reader, metaDataSize, fromLittleEndian);
-
-		// skip meta garbage...
-		int64_t offset = bx::skip(_reader, metaDataSize);
-
-		TextureFormat::Enum format = TextureFormat::Unknown;
-		bool hasAlpha = false;
-
-		for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateKtxFormat); ++ii)
-		{
-			if (s_translateKtxFormat[ii].m_internalFmt == glInternalFormat)
-			{
-				format = TextureFormat::Enum(ii);
-				break;
-			}
-		}
-
-		if (TextureFormat::Unknown == format)
-		{
-			for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateKtxFormat2); ++ii)
-			{
-				if (s_translateKtxFormat2[ii].m_internalFmt == glInternalFormat)
-				{
-					format = s_translateKtxFormat2[ii].m_format;
-					break;
-				}
-			}
-		}
-
-		_imageContainer.m_allocator = NULL;
-		_imageContainer.m_data      = NULL;
-		_imageContainer.m_size      = 0;
-		_imageContainer.m_offset    = (uint32_t)offset;
-		_imageContainer.m_width     = width;
-		_imageContainer.m_height    = height;
-		_imageContainer.m_depth     = depth;
-		_imageContainer.m_format    = format;
-		_imageContainer.m_numLayers = uint16_t(bx::uint32_max(numberOfArrayElements, 1) );
-		_imageContainer.m_numMips   = uint8_t(bx::uint32_max(numMips, 1) );
-		_imageContainer.m_hasAlpha  = hasAlpha;
-		_imageContainer.m_cubeMap   = numFaces > 1;
-		_imageContainer.m_ktx       = true;
-		_imageContainer.m_ktxLE     = fromLittleEndian;
-		_imageContainer.m_srgb      = false;
-
-		return TextureFormat::Unknown != format;
-	}
-
-// PVR3
-#define PVR3_MAKE8CC(_a, _b, _c, _d, _e, _f, _g, _h) (uint64_t(BX_MAKEFOURCC(_a, _b, _c, _d) ) | (uint64_t(BX_MAKEFOURCC(_e, _f, _g, _h) )<<32) )
-
-#define PVR3_MAGIC            BX_MAKEFOURCC('P', 'V', 'R', 3)
-#define PVR3_HEADER_SIZE      52
-
-#define PVR3_PVRTC1_2BPP_RGB  0
-#define PVR3_PVRTC1_2BPP_RGBA 1
-#define PVR3_PVRTC1_4BPP_RGB  2
-#define PVR3_PVRTC1_4BPP_RGBA 3
-#define PVR3_PVRTC2_2BPP_RGBA 4
-#define PVR3_PVRTC2_4BPP_RGBA 5
-#define PVR3_ETC1             6
-#define PVR3_DXT1             7
-#define PVR3_DXT2             8
-#define PVR3_DXT3             9
-#define PVR3_DXT4             10
-#define PVR3_DXT5             11
-#define PVR3_BC4              12
-#define PVR3_BC5              13
-#define PVR3_R8               PVR3_MAKE8CC('r',   0,   0,   0,  8,  0,  0,  0)
-#define PVR3_R16              PVR3_MAKE8CC('r',   0,   0,   0, 16,  0,  0,  0)
-#define PVR3_R32              PVR3_MAKE8CC('r',   0,   0,   0, 32,  0,  0,  0)
-#define PVR3_RG8              PVR3_MAKE8CC('r', 'g',   0,   0,  8,  8,  0,  0)
-#define PVR3_RG16             PVR3_MAKE8CC('r', 'g',   0,   0, 16, 16,  0,  0)
-#define PVR3_RG32             PVR3_MAKE8CC('r', 'g',   0,   0, 32, 32,  0,  0)
-#define PVR3_BGRA8            PVR3_MAKE8CC('b', 'g', 'r', 'a',  8,  8,  8,  8)
-#define PVR3_RGBA16           PVR3_MAKE8CC('r', 'g', 'b', 'a', 16, 16, 16, 16)
-#define PVR3_RGBA32           PVR3_MAKE8CC('r', 'g', 'b', 'a', 32, 32, 32, 32)
-#define PVR3_RGB565           PVR3_MAKE8CC('r', 'g', 'b',   0,  5,  6,  5,  0)
-#define PVR3_RGBA4            PVR3_MAKE8CC('r', 'g', 'b', 'a',  4,  4,  4,  4)
-#define PVR3_RGBA51           PVR3_MAKE8CC('r', 'g', 'b', 'a',  5,  5,  5,  1)
-#define PVR3_RGB10A2          PVR3_MAKE8CC('r', 'g', 'b', 'a', 10, 10, 10,  2)
-
-#define PVR3_CHANNEL_TYPE_ANY   UINT32_MAX
-#define PVR3_CHANNEL_TYPE_FLOAT UINT32_C(12)
-
-	struct TranslatePvr3Format
-	{
-		uint64_t m_format;
-		uint32_t m_channelTypeMask;
-		TextureFormat::Enum m_textureFormat;
-	};
-
-	static const TranslatePvr3Format s_translatePvr3Format[] =
-	{
-		{ PVR3_PVRTC1_2BPP_RGB,  PVR3_CHANNEL_TYPE_ANY,   TextureFormat::PTC12   },
-		{ PVR3_PVRTC1_2BPP_RGBA, PVR3_CHANNEL_TYPE_ANY,   TextureFormat::PTC12A  },
-		{ PVR3_PVRTC1_4BPP_RGB,  PVR3_CHANNEL_TYPE_ANY,   TextureFormat::PTC14   },
-		{ PVR3_PVRTC1_4BPP_RGBA, PVR3_CHANNEL_TYPE_ANY,   TextureFormat::PTC14A  },
-		{ PVR3_PVRTC2_2BPP_RGBA, PVR3_CHANNEL_TYPE_ANY,   TextureFormat::PTC22   },
-		{ PVR3_PVRTC2_4BPP_RGBA, PVR3_CHANNEL_TYPE_ANY,   TextureFormat::PTC24   },
-		{ PVR3_ETC1,             PVR3_CHANNEL_TYPE_ANY,   TextureFormat::ETC1    },
-		{ PVR3_DXT1,             PVR3_CHANNEL_TYPE_ANY,   TextureFormat::BC1     },
-		{ PVR3_DXT2,             PVR3_CHANNEL_TYPE_ANY,   TextureFormat::BC2     },
-		{ PVR3_DXT3,             PVR3_CHANNEL_TYPE_ANY,   TextureFormat::BC2     },
-		{ PVR3_DXT4,             PVR3_CHANNEL_TYPE_ANY,   TextureFormat::BC3     },
-		{ PVR3_DXT5,             PVR3_CHANNEL_TYPE_ANY,   TextureFormat::BC3     },
-		{ PVR3_BC4,              PVR3_CHANNEL_TYPE_ANY,   TextureFormat::BC4     },
-		{ PVR3_BC5,              PVR3_CHANNEL_TYPE_ANY,   TextureFormat::BC5     },
-		{ PVR3_R8,               PVR3_CHANNEL_TYPE_ANY,   TextureFormat::R8      },
-		{ PVR3_R16,              PVR3_CHANNEL_TYPE_ANY,   TextureFormat::R16U    },
-		{ PVR3_R16,              PVR3_CHANNEL_TYPE_FLOAT, TextureFormat::R16F    },
-		{ PVR3_R32,              PVR3_CHANNEL_TYPE_ANY,   TextureFormat::R32U    },
-		{ PVR3_R32,              PVR3_CHANNEL_TYPE_FLOAT, TextureFormat::R32F    },
-		{ PVR3_RG8,              PVR3_CHANNEL_TYPE_ANY,   TextureFormat::RG8     },
-		{ PVR3_RG16,             PVR3_CHANNEL_TYPE_ANY,   TextureFormat::RG16    },
-		{ PVR3_RG16,             PVR3_CHANNEL_TYPE_FLOAT, TextureFormat::RG16F   },
-		{ PVR3_RG32,             PVR3_CHANNEL_TYPE_ANY,   TextureFormat::RG16    },
-		{ PVR3_RG32,             PVR3_CHANNEL_TYPE_FLOAT, TextureFormat::RG32F   },
-		{ PVR3_BGRA8,            PVR3_CHANNEL_TYPE_ANY,   TextureFormat::BGRA8   },
-		{ PVR3_RGBA16,           PVR3_CHANNEL_TYPE_ANY,   TextureFormat::RGBA16  },
-		{ PVR3_RGBA16,           PVR3_CHANNEL_TYPE_FLOAT, TextureFormat::RGBA16F },
-		{ PVR3_RGBA32,           PVR3_CHANNEL_TYPE_ANY,   TextureFormat::RGBA32U },
-		{ PVR3_RGBA32,           PVR3_CHANNEL_TYPE_FLOAT, TextureFormat::RGBA32F },
-		{ PVR3_RGB565,           PVR3_CHANNEL_TYPE_ANY,   TextureFormat::R5G6B5  },
-		{ PVR3_RGBA4,            PVR3_CHANNEL_TYPE_ANY,   TextureFormat::RGBA4   },
-		{ PVR3_RGBA51,           PVR3_CHANNEL_TYPE_ANY,   TextureFormat::RGB5A1  },
-		{ PVR3_RGB10A2,          PVR3_CHANNEL_TYPE_ANY,   TextureFormat::RGB10A2 },
-	};
-
-	bool imageParsePvr3(ImageContainer& _imageContainer, bx::ReaderSeekerI* _reader)
-	{
-		uint32_t flags;
-		bx::read(_reader, flags);
-
-		uint64_t pixelFormat;
-		bx::read(_reader, pixelFormat);
-
-		uint32_t colorSpace;
-		bx::read(_reader, colorSpace); // 0 - linearRGB, 1 - sRGB
-
-		uint32_t channelType;
-		bx::read(_reader, channelType);
-
-		uint32_t height;
-		bx::read(_reader, height);
-
-		uint32_t width;
-		bx::read(_reader, width);
-
-		uint32_t depth;
-		bx::read(_reader, depth);
-
-		uint32_t numSurfaces;
-		bx::read(_reader, numSurfaces);
-
-		uint32_t numFaces;
-		bx::read(_reader, numFaces);
-
-		uint32_t numMips;
-		bx::read(_reader, numMips);
-
-		uint32_t metaDataSize;
-		bx::read(_reader, metaDataSize);
-
-		// skip meta garbage...
-		int64_t offset = bx::skip(_reader, metaDataSize);
-
-		TextureFormat::Enum format = TextureFormat::Unknown;
-		bool hasAlpha = false;
-
-		for (uint32_t ii = 0; ii < BX_COUNTOF(s_translatePvr3Format); ++ii)
-		{
-			if (s_translatePvr3Format[ii].m_format == pixelFormat
-			&&  channelType == (s_translatePvr3Format[ii].m_channelTypeMask & channelType) )
-			{
-				format = s_translatePvr3Format[ii].m_textureFormat;
-				break;
-			}
-		}
-
-		_imageContainer.m_allocator = NULL;
-		_imageContainer.m_data      = NULL;
-		_imageContainer.m_size      = 0;
-		_imageContainer.m_offset    = (uint32_t)offset;
-		_imageContainer.m_width     = width;
-		_imageContainer.m_height    = height;
-		_imageContainer.m_depth     = depth;
-		_imageContainer.m_format    = format;
-		_imageContainer.m_numLayers = 1;
-		_imageContainer.m_numMips   = uint8_t(bx::uint32_max(numMips, 1) );
-		_imageContainer.m_hasAlpha  = hasAlpha;
-		_imageContainer.m_cubeMap   = numFaces > 1;
-		_imageContainer.m_ktx       = false;
-		_imageContainer.m_ktxLE     = false;
-		_imageContainer.m_srgb      = colorSpace > 0;
-
-		return TextureFormat::Unknown != format;
-	}
-
-	bool imageParse(ImageContainer& _imageContainer, bx::ReaderSeekerI* _reader)
-	{
-		uint32_t magic;
-		bx::read(_reader, magic);
-
-		if (DDS_MAGIC == magic)
-		{
-			return imageParseDds(_imageContainer, _reader);
-		}
-		else if (KTX_MAGIC == magic)
-		{
-			return imageParseKtx(_imageContainer, _reader);
-		}
-		else if (PVR3_MAGIC == magic)
-		{
-			return imageParsePvr3(_imageContainer, _reader);
-		}
-		else if (BGFX_CHUNK_MAGIC_TEX == magic)
-		{
-			TextureCreate tc;
-			bx::read(_reader, tc);
-
-			_imageContainer.m_format = tc.m_format;
-			_imageContainer.m_offset = UINT32_MAX;
-			_imageContainer.m_allocator = NULL;
-			if (NULL == tc.m_mem)
-			{
-				_imageContainer.m_data = NULL;
-				_imageContainer.m_size = 0;
-			}
-			else
-			{
-				_imageContainer.m_data = tc.m_mem->data;
-				_imageContainer.m_size = tc.m_mem->size;
-			}
-			_imageContainer.m_width     = tc.m_width;
-			_imageContainer.m_height    = tc.m_height;
-			_imageContainer.m_depth     = tc.m_depth;
-			_imageContainer.m_numLayers = tc.m_numLayers;
-			_imageContainer.m_numMips   = tc.m_numMips;
-			_imageContainer.m_hasAlpha  = false;
-			_imageContainer.m_cubeMap   = tc.m_cubeMap;
-			_imageContainer.m_ktx       = false;
-			_imageContainer.m_ktxLE     = false;
-			_imageContainer.m_srgb      = false;
-
-			return true;
-		}
-
-		BX_TRACE("Unrecognized image format (magic: 0x%08x)!", magic);
-		return false;
-	}
-
-	bool imageParse(ImageContainer& _imageContainer, const void* _data, uint32_t _size)
-	{
-		bx::MemoryReader reader(_data, _size);
-		return imageParse(_imageContainer, &reader);
-	}
-
-	void imageDecodeToBgra8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _pitch, TextureFormat::Enum _format)
-	{
-		const uint8_t* src = (const uint8_t*)_src;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		uint32_t width  = _width/4;
-		uint32_t height = _height/4;
-
-		uint8_t temp[16*4];
-
-		switch (_format)
-		{
-		case TextureFormat::BC1:
-			for (uint32_t yy = 0; yy < height; ++yy)
-			{
-				for (uint32_t xx = 0; xx < width; ++xx)
-				{
-					decodeBlockDxt1(temp, src);
-					src += 8;
-
-					uint8_t* block = &dst[(yy*_pitch+xx*4)*4];
-					bx::memCopy(&block[0*_pitch], &temp[ 0], 16);
-					bx::memCopy(&block[1*_pitch], &temp[16], 16);
-					bx::memCopy(&block[2*_pitch], &temp[32], 16);
-					bx::memCopy(&block[3*_pitch], &temp[48], 16);
-				}
-			}
-			break;
-
-		case TextureFormat::BC2:
-			for (uint32_t yy = 0; yy < height; ++yy)
-			{
-				for (uint32_t xx = 0; xx < width; ++xx)
-				{
-					decodeBlockDxt23A(temp+3, src);
-					src += 8;
-					decodeBlockDxt(temp, src);
-					src += 8;
-
-					uint8_t* block = &dst[(yy*_pitch+xx*4)*4];
-					bx::memCopy(&block[0*_pitch], &temp[ 0], 16);
-					bx::memCopy(&block[1*_pitch], &temp[16], 16);
-					bx::memCopy(&block[2*_pitch], &temp[32], 16);
-					bx::memCopy(&block[3*_pitch], &temp[48], 16);
-				}
-			}
-			break;
-
-		case TextureFormat::BC3:
-			for (uint32_t yy = 0; yy < height; ++yy)
-			{
-				for (uint32_t xx = 0; xx < width; ++xx)
-				{
-					decodeBlockDxt45A(temp+3, src);
-					src += 8;
-					decodeBlockDxt(temp, src);
-					src += 8;
-
-					uint8_t* block = &dst[(yy*_pitch+xx*4)*4];
-					bx::memCopy(&block[0*_pitch], &temp[ 0], 16);
-					bx::memCopy(&block[1*_pitch], &temp[16], 16);
-					bx::memCopy(&block[2*_pitch], &temp[32], 16);
-					bx::memCopy(&block[3*_pitch], &temp[48], 16);
-				}
-			}
-			break;
-
-		case TextureFormat::BC4:
-			for (uint32_t yy = 0; yy < height; ++yy)
-			{
-				for (uint32_t xx = 0; xx < width; ++xx)
-				{
-					decodeBlockDxt45A(temp, src);
-					src += 8;
-
-					uint8_t* block = &dst[(yy*_pitch+xx*4)*4];
-					bx::memCopy(&block[0*_pitch], &temp[ 0], 16);
-					bx::memCopy(&block[1*_pitch], &temp[16], 16);
-					bx::memCopy(&block[2*_pitch], &temp[32], 16);
-					bx::memCopy(&block[3*_pitch], &temp[48], 16);
-				}
-			}
-			break;
-
-		case TextureFormat::BC5:
-			for (uint32_t yy = 0; yy < height; ++yy)
-			{
-				for (uint32_t xx = 0; xx < width; ++xx)
-				{
-					decodeBlockDxt45A(temp+2, src);
-					src += 8;
-					decodeBlockDxt45A(temp+1, src);
-					src += 8;
-
-					for (uint32_t ii = 0; ii < 16; ++ii)
-					{
-						float nx = temp[ii*4+2]*2.0f/255.0f - 1.0f;
-						float ny = temp[ii*4+1]*2.0f/255.0f - 1.0f;
-						float nz = bx::fsqrt(1.0f - nx*nx - ny*ny);
-						temp[ii*4+0] = uint8_t( (nz + 1.0f)*255.0f/2.0f);
-						temp[ii*4+3] = 0;
-					}
-
-					uint8_t* block = &dst[(yy*_pitch+xx*4)*4];
-					bx::memCopy(&block[0*_pitch], &temp[ 0], 16);
-					bx::memCopy(&block[1*_pitch], &temp[16], 16);
-					bx::memCopy(&block[2*_pitch], &temp[32], 16);
-					bx::memCopy(&block[3*_pitch], &temp[48], 16);
-				}
-			}
-			break;
-
-		case TextureFormat::ETC1:
-		case TextureFormat::ETC2:
-			for (uint32_t yy = 0; yy < height; ++yy)
-			{
-				for (uint32_t xx = 0; xx < width; ++xx)
-				{
-					decodeBlockEtc12(temp, src);
-					src += 8;
-
-					uint8_t* block = &dst[(yy*_pitch+xx*4)*4];
-					bx::memCopy(&block[0*_pitch], &temp[ 0], 16);
-					bx::memCopy(&block[1*_pitch], &temp[16], 16);
-					bx::memCopy(&block[2*_pitch], &temp[32], 16);
-					bx::memCopy(&block[3*_pitch], &temp[48], 16);
-				}
-			}
-			break;
-
-		case TextureFormat::ETC2A:
-			BX_WARN(false, "ETC2A decoder is not implemented.");
-			imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xff00ff00) );
-			break;
-
-		case TextureFormat::ETC2A1:
-			BX_WARN(false, "ETC2A1 decoder is not implemented.");
-			imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffff0000) );
-			break;
-
-		case TextureFormat::PTC12:
-			BX_WARN(false, "PTC12 decoder is not implemented.");
-			imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffff00ff) );
-			break;
-
-		case TextureFormat::PTC12A:
-			BX_WARN(false, "PTC12A decoder is not implemented.");
-			imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffffff00) );
-			break;
-
-		case TextureFormat::PTC14:
-			for (uint32_t yy = 0; yy < height; ++yy)
-			{
-				for (uint32_t xx = 0; xx < width; ++xx)
-				{
-					decodeBlockPtc14(temp, src, xx, yy, width, height);
-
-					uint8_t* block = &dst[(yy*_pitch+xx*4)*4];
-					bx::memCopy(&block[0*_pitch], &temp[ 0], 16);
-					bx::memCopy(&block[1*_pitch], &temp[16], 16);
-					bx::memCopy(&block[2*_pitch], &temp[32], 16);
-					bx::memCopy(&block[3*_pitch], &temp[48], 16);
-				}
-			}
-			break;
-
-		case TextureFormat::PTC14A:
-			for (uint32_t yy = 0; yy < height; ++yy)
-			{
-				for (uint32_t xx = 0; xx < width; ++xx)
-				{
-					decodeBlockPtc14A(temp, src, xx, yy, width, height);
-
-					uint8_t* block = &dst[(yy*_pitch+xx*4)*4];
-					bx::memCopy(&block[0*_pitch], &temp[ 0], 16);
-					bx::memCopy(&block[1*_pitch], &temp[16], 16);
-					bx::memCopy(&block[2*_pitch], &temp[32], 16);
-					bx::memCopy(&block[3*_pitch], &temp[48], 16);
-				}
-			}
-			break;
-
-		case TextureFormat::PTC22:
-			BX_WARN(false, "PTC22 decoder is not implemented.");
-			imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff00ff00), UINT32_C(0xff0000ff) );
-			break;
-
-		case TextureFormat::PTC24:
-			BX_WARN(false, "PTC24 decoder is not implemented.");
-			imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffffffff) );
-			break;
-
-		case TextureFormat::RGBA8:
-			imageSwizzleBgra8(_dst, _width, _height, _pitch, _src);
-			break;
-
-		case TextureFormat::BGRA8:
-			bx::memCopy(_dst, _src, _pitch*_height);
-			break;
-
-		default:
-			{
-				const uint32_t srcBpp   = s_imageBlockInfo[_format].bitsPerPixel;
-				const uint32_t srcPitch = _width * srcBpp / 8;
-				if (!imageConvert(_dst, TextureFormat::BGRA8, _src, _format, _width, _height, srcPitch) )
-				{
-					// Failed to convert, just make ugly red-yellow checkerboard texture.
-					imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xffff0000), UINT32_C(0xffffff00) );
-				}
-			}
-			break;
-		}
-	}
-
-	void imageDecodeToRgba8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _pitch, TextureFormat::Enum _format)
-	{
-		switch (_format)
-		{
-		case TextureFormat::RGBA8:
-			bx::memCopy(_dst, _src, _pitch*_height);
-			break;
-
-		case TextureFormat::BGRA8:
-			imageSwizzleBgra8(_dst, _width, _height, _pitch, _src);
-			break;
-
-		default:
-			imageDecodeToBgra8(_dst, _src, _width, _height, _pitch, _format);
-			imageSwizzleBgra8(_dst, _width, _height, _width*4, _dst);
-			break;
-		}
-	}
-
-	void imageRgba8ToRgba32fRef(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		const uint32_t dstwidth  = _width;
-		const uint32_t dstheight = _height;
-
-		if (0 == dstwidth
-		||  0 == dstheight)
-		{
-			return;
-		}
-
-		float* dst = (float*)_dst;
-		const uint8_t* src = (const uint8_t*)_src;
-
-		for (uint32_t yy = 0, ystep = _pitch; yy < dstheight; ++yy, src += ystep)
-		{
-			const uint8_t* rgba = src;
-			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 4, dst += 4)
-			{
-				dst[0] = bx::fpow(rgba[0], 2.2f);
-				dst[1] = bx::fpow(rgba[1], 2.2f);
-				dst[2] = bx::fpow(rgba[2], 2.2f);
-				dst[3] =          rgba[3];
-			}
-		}
-	}
-
-	void imageRgba8ToRgba32f(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		const uint32_t dstwidth  = _width;
-		const uint32_t dstheight = _height;
-
-		if (0 == dstwidth
-		||  0 == dstheight)
-		{
-			return;
-		}
-
-		float* dst = (float*)_dst;
-		const uint8_t* src = (const uint8_t*)_src;
-
-		using namespace bx;
-		const simd128_t unpack = simd_ld(1.0f, 1.0f/256.0f, 1.0f/65536.0f, 1.0f/16777216.0f);
-		const simd128_t umask  = simd_ild(0xff, 0xff00, 0xff0000, 0xff000000);
-		const simd128_t wflip  = simd_ild(0, 0, 0, 0x80000000);
-		const simd128_t wadd   = simd_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f);
-
-		for (uint32_t yy = 0, ystep = _pitch; yy < dstheight; ++yy, src += ystep)
-		{
-			const uint8_t* rgba = src;
-			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 4, dst += 4)
-			{
-				const simd128_t abgr0  = simd_splat(rgba);
-				const simd128_t abgr0m = simd_and(abgr0, umask);
-				const simd128_t abgr0x = simd_xor(abgr0m, wflip);
-				const simd128_t abgr0f = simd_itof(abgr0x);
-				const simd128_t abgr0c = simd_add(abgr0f, wadd);
-				const simd128_t abgr0n = simd_mul(abgr0c, unpack);
-
-				simd_st(dst, abgr0n);
-			}
-		}
-	}
-
-	void imageDecodeToRgba32f(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _pitch, TextureFormat::Enum _format)
-	{
-		const uint8_t* src = (const uint8_t*)_src;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		switch (_format)
-		{
-		case TextureFormat::BC5:
-			{
-				uint32_t width  = _width/4;
-				uint32_t height = _height/4;
-
-				for (uint32_t yy = 0; yy < height; ++yy)
-				{
-					for (uint32_t xx = 0; xx < width; ++xx)
-					{
-						uint8_t temp[16*4];
-
-						decodeBlockDxt45A(temp+2, src);
-						src += 8;
-						decodeBlockDxt45A(temp+1, src);
-						src += 8;
-
-						for (uint32_t ii = 0; ii < 16; ++ii)
-						{
-							float nx = temp[ii*4+2]*2.0f/255.0f - 1.0f;
-							float ny = temp[ii*4+1]*2.0f/255.0f - 1.0f;
-							float nz = bx::fsqrt(1.0f - nx*nx - ny*ny);
-
-							const uint32_t offset = (yy*4 + ii/4)*_width*16 + (xx*4 + ii%4)*16;
-							float* block = (float*)&dst[offset];
-							block[0] = nx;
-							block[1] = ny;
-							block[2] = nz;
-							block[3] = 0.0f;
-						}
-					}
-				}
-			}
-			break;
-
-		case TextureFormat::RGBA32F:
-			bx::memCopy(_dst, _src, _pitch*_height);
-			break;
-
-		case TextureFormat::RGBA8:
-			imageRgba8ToRgba32f(_dst, _width, _height, _pitch, _src);
-			break;
-
-		default:
-			if (isCompressed(_format) )
-			{
-				uint32_t size = imageGetSize(NULL, uint16_t(_pitch/4), uint16_t(_height), 0, false, false, 1, _format);
-				void* temp = BX_ALLOC(_allocator, size);
-				imageDecodeToRgba8(temp, _src, _width, _height, _pitch, _format);
-				imageRgba8ToRgba32f(_dst, _width, _height, _pitch, temp);
-				BX_FREE(_allocator, temp);
-			}
-			else
-			{
-				imageConvert(_dst, TextureFormat::RGBA32F, _src, _format, _width, _height, _pitch);
-			}
-			break;
-		}
-	}
-
-	bool imageGetRawData(const ImageContainer& _imageContainer, uint16_t _side, uint8_t _lod, const void* _data, uint32_t _size, ImageMip& _mip)
-	{
-		uint32_t offset = _imageContainer.m_offset;
-		TextureFormat::Enum format = TextureFormat::Enum(_imageContainer.m_format);
-		bool hasAlpha = _imageContainer.m_hasAlpha;
-
-		const ImageBlockInfo& blockInfo = s_imageBlockInfo[format];
-		const uint8_t  bpp         = blockInfo.bitsPerPixel;
-		const uint32_t blockSize   = blockInfo.blockSize;
-		const uint32_t blockWidth  = blockInfo.blockWidth;
-		const uint32_t blockHeight = blockInfo.blockHeight;
-		const uint32_t minBlockX   = blockInfo.minBlockX;
-		const uint32_t minBlockY   = blockInfo.minBlockY;
-
-		if (UINT32_MAX == _imageContainer.m_offset)
-		{
-			if (NULL == _imageContainer.m_data)
-			{
-				return false;
-			}
-
-			offset = 0;
-			_data = _imageContainer.m_data;
-			_size = _imageContainer.m_size;
-		}
-
-		const uint8_t* data = (const uint8_t*)_data;
-		const uint16_t numSides = _imageContainer.m_numLayers * (_imageContainer.m_cubeMap ? 6 : 1);
-
-		if (_imageContainer.m_ktx)
-		{
-			uint32_t width  = _imageContainer.m_width;
-			uint32_t height = _imageContainer.m_height;
-			uint32_t depth  = _imageContainer.m_depth;
-
-			for (uint8_t lod = 0, num = _imageContainer.m_numMips; lod < num; ++lod)
-			{
-				uint32_t imageSize = bx::toHostEndian(*(const uint32_t*)&data[offset], _imageContainer.m_ktxLE) / _imageContainer.m_numLayers;
-				offset += sizeof(uint32_t);
-
-				width  = bx::uint32_max(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
-				height = bx::uint32_max(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
-				depth  = bx::uint32_max(1, depth);
-
-				uint32_t size = width*height*depth*bpp/8;
-				BX_CHECK(size == imageSize, "KTX: Image size mismatch %d (expected %d).", size, imageSize);
-
-				for (uint16_t side = 0; side < numSides; ++side)
-				{
-					if (side == _side
-					&&  lod  == _lod)
-					{
-						_mip.m_width     = width;
-						_mip.m_height    = height;
-						_mip.m_blockSize = blockSize;
-						_mip.m_size      = size;
-						_mip.m_data      = &data[offset];
-						_mip.m_bpp       = bpp;
-						_mip.m_format    = format;
-						_mip.m_hasAlpha  = hasAlpha;
-						return true;
-					}
-
-					offset += imageSize;
-
-					BX_CHECK(offset <= _size, "Reading past size of data buffer! (offset %d, size %d)", offset, _size);
-					BX_UNUSED(_size);
-				}
-
-				width  >>= 1;
-				height >>= 1;
-				depth  >>= 1;
-			}
-		}
-		else
-		{
-			for (uint16_t side = 0; side < numSides; ++side)
-			{
-				uint32_t width  = _imageContainer.m_width;
-				uint32_t height = _imageContainer.m_height;
-				uint32_t depth  = _imageContainer.m_depth;
-
-				for (uint8_t lod = 0, num = _imageContainer.m_numMips; lod < num; ++lod)
-				{
-					width  = bx::uint32_max(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
-					height = bx::uint32_max(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
-					depth  = bx::uint32_max(1, depth);
-
-					uint32_t size = width*height*depth*bpp/8;
-
-					if (side == _side
-					&&  lod  == _lod)
-					{
-						_mip.m_width     = width;
-						_mip.m_height    = height;
-						_mip.m_blockSize = blockSize;
-						_mip.m_size      = size;
-						_mip.m_data      = &data[offset];
-						_mip.m_bpp       = bpp;
-						_mip.m_format    = format;
-						_mip.m_hasAlpha  = hasAlpha;
-						return true;
-					}
-
-					offset += size;
-
-					BX_CHECK(offset <= _size, "Reading past size of data buffer! (offset %d, size %d)", offset, _size);
-					BX_UNUSED(_size);
-
-					width  >>= 1;
-					height >>= 1;
-					depth  >>= 1;
-				}
-			}
-		}
-
-		return false;
-	}
-
-	void imageWriteTga(bx::WriterI* _writer, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, bool _grayscale, bool _yflip, bx::Error* _err)
-	{
-		BX_ERROR_SCOPE(_err);
-
-		uint8_t type = _grayscale ? 3 :  2;
-		uint8_t bpp  = _grayscale ? 8 : 32;
-
-		uint8_t header[18] = {};
-		header[ 2] = type;
-		header[12] =  _width     &0xff;
-		header[13] = (_width >>8)&0xff;
-		header[14] =  _height    &0xff;
-		header[15] = (_height>>8)&0xff;
-		header[16] = bpp;
-		header[17] = 32;
-
-		bx::write(_writer, header, sizeof(header), _err);
-
-		uint32_t dstPitch = _width*bpp/8;
-		if (_yflip)
-		{
-			uint8_t* data = (uint8_t*)_src + _pitch*_height - _pitch;
-			for (uint32_t yy = 0; yy < _height; ++yy)
-			{
-				bx::write(_writer, data, dstPitch, _err);
-				data -= _pitch;
-			}
-		}
-		else if (_pitch == dstPitch)
-		{
-			bx::write(_writer, _src, _height*_pitch, _err);
-		}
-		else
-		{
-			uint8_t* data = (uint8_t*)_src;
-			for (uint32_t yy = 0; yy < _height; ++yy)
-			{
-				bx::write(_writer, data, dstPitch, _err);
-				data += _pitch;
-			}
-		}
-	}
-
-	static int32_t imageWriteKtxHeader(bx::WriterI* _writer, TextureFormat::Enum _format, bool _cubeMap, uint32_t _width, uint32_t _height, uint32_t _depth, uint8_t _numMips, bx::Error* _err)
-	{
-		BX_ERROR_SCOPE(_err);
-
-		const KtxFormatInfo& tfi = s_translateKtxFormat[_format];
-
-		int32_t size = 0;
-		size += bx::write(_writer, "\xabKTX 11\xbb\r\n\x1a\n", 12, _err);
-		size += bx::write(_writer, uint32_t(0x04030201), _err);
-		size += bx::write(_writer, uint32_t(0), _err); // glType
-		size += bx::write(_writer, uint32_t(1), _err); // glTypeSize
-		size += bx::write(_writer, uint32_t(0), _err); // glFormat
-		size += bx::write(_writer, tfi.m_internalFmt, _err); // glInternalFormat
-		size += bx::write(_writer, tfi.m_fmt, _err); // glBaseInternalFormat
-		size += bx::write(_writer, _width, _err);
-		size += bx::write(_writer, _height, _err);
-		size += bx::write(_writer, _depth, _err);
-		size += bx::write(_writer, uint32_t(0), _err); // numberOfArrayElements
-		size += bx::write(_writer, _cubeMap ? uint32_t(6) : uint32_t(0), _err);
-		size += bx::write(_writer, uint32_t(_numMips), _err);
-		size += bx::write(_writer, uint32_t(0), _err); // Meta-data size.
-
-		BX_WARN(size == 64, "KTX: Failed to write header size %d (expected: %d).", size, 64);
-		return size;
-	}
-
-	void imageWriteKtx(bx::WriterI* _writer, TextureFormat::Enum _format, bool _cubeMap, uint32_t _width, uint32_t _height, uint32_t _depth, uint8_t _numMips, const void* _src, bx::Error* _err)
-	{
-		BX_ERROR_SCOPE(_err);
-
-		imageWriteKtxHeader(_writer, _format, _cubeMap, _width, _height, _depth, _numMips, _err);
-
-		const ImageBlockInfo& blockInfo = s_imageBlockInfo[_format];
-		const uint8_t  bpp         = blockInfo.bitsPerPixel;
-		const uint32_t blockWidth  = blockInfo.blockWidth;
-		const uint32_t blockHeight = blockInfo.blockHeight;
-		const uint32_t minBlockX   = blockInfo.minBlockX;
-		const uint32_t minBlockY   = blockInfo.minBlockY;
-
-		const uint8_t* src = (const uint8_t*)_src;
-		uint32_t width  = _width;
-		uint32_t height = _height;
-		uint32_t depth  = _depth;
-
-		for (uint8_t lod = 0, num = _numMips; lod < num; ++lod)
-		{
-			width  = bx::uint32_max(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
-			height = bx::uint32_max(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
-			depth  = bx::uint32_max(1, depth);
-
-			uint32_t size = width*height*depth*bpp/8;
-			bx::write(_writer, size, _err);
-
-			for (uint8_t side = 0, numSides = _cubeMap ? 6 : 1; side < numSides; ++side)
-			{
-				bx::write(_writer, src, size, _err);
-				src += size;
-			}
-
-			width  >>= 1;
-			height >>= 1;
-			depth  >>= 1;
-		}
-	}
-
-	void imageWriteKtx(bx::WriterI* _writer, ImageContainer& _imageContainer, const void* _data, uint32_t _size, bx::Error* _err)
-	{
-		BX_ERROR_SCOPE(_err);
-
-		imageWriteKtxHeader(_writer
-			, TextureFormat::Enum(_imageContainer.m_format)
-			, _imageContainer.m_cubeMap
-			, _imageContainer.m_width
-			, _imageContainer.m_height
-			, _imageContainer.m_depth
-			, _imageContainer.m_numMips
-			, _err
-			);
-
-		for (uint8_t lod = 0, num = _imageContainer.m_numMips; lod < num; ++lod)
-		{
-			ImageMip mip;
-			imageGetRawData(_imageContainer, 0, lod, _data, _size, mip);
-			bx::write(_writer, mip.m_size, _err);
-
-			for (uint8_t side = 0, numSides = _imageContainer.m_cubeMap ? 6 : 1; side < numSides; ++side)
-			{
-				if (imageGetRawData(_imageContainer, side, lod, _data, _size, mip) )
-				{
-					bx::write(_writer, mip.m_data, mip.m_size, _err);
-				}
-			}
-		}
-	}
-
-} // namespace bgfx
diff --git a/src/image.h b/src/image.h
deleted file mode 100644
index ef05a1f2b..000000000
--- a/src/image.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright 2011-2017 Branimir Karadzic. All rights reserved.
- * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
- */
-
-#ifndef BGFX_IMAGE_H_HEADER_GUARD
-#define BGFX_IMAGE_H_HEADER_GUARD
-
-#include <bx/pixelformat.h>
-
-namespace bgfx
-{
-	struct ImageContainer
-	{
-		bx::AllocatorI* m_allocator;
-		void*           m_data;
-
-		TextureFormat::Enum m_format;
-
-		uint32_t m_size;
-		uint32_t m_offset;
-		uint32_t m_width;
-		uint32_t m_height;
-		uint32_t m_depth;
-		uint16_t m_numLayers;
-		uint8_t  m_numMips;
-		bool     m_hasAlpha;
-		bool     m_cubeMap;
-		bool     m_ktx;
-		bool     m_ktxLE;
-		bool     m_srgb;
-	};
-
-	struct ImageMip
-	{
-		TextureFormat::Enum m_format;
-		uint32_t m_width;
-		uint32_t m_height;
-		uint32_t m_blockSize;
-		uint32_t m_size;
-		uint8_t  m_bpp;
-		bool     m_hasAlpha;
-		const uint8_t* m_data;
-	};
-
-	struct ImageBlockInfo
-	{
-		uint8_t bitsPerPixel;
-		uint8_t blockWidth;
-		uint8_t blockHeight;
-		uint8_t blockSize;
-		uint8_t minBlockX;
-		uint8_t minBlockY;
-		uint8_t depthBits;
-		uint8_t stencilBits;
-		uint8_t rBits;
-		uint8_t gBits;
-		uint8_t bBits;
-		uint8_t aBits;
-		uint8_t encoding;
-	};
-
-	/// Returns true if texture format is compressed.
-	bool isCompressed(TextureFormat::Enum _format);
-
-	/// Returns true if texture format is uncompressed.
-	bool isColor(TextureFormat::Enum _format);
-
-	/// Returns true if texture format is depth.
-	bool isDepth(TextureFormat::Enum _format);
-
-	/// Returns true if texture format is valid.
-	bool isValid(TextureFormat::Enum _format);
-
-	/// Returns bits per pixel.
-	uint8_t getBitsPerPixel(TextureFormat::Enum _format);
-
-	/// Returns texture block info.
-	const ImageBlockInfo& getBlockInfo(TextureFormat::Enum _format);
-
-	/// Converts format to string.
-	const char* getName(TextureFormat::Enum _format);
-
-	/// Converts string to format.
-	TextureFormat::Enum getFormat(const char* _name);
-
-	/// Returns number of mip-maps required for complete mip-map chain.
-	uint8_t imageGetNumMips(
-		  TextureFormat::Enum _format
-		, uint16_t _width
-		, uint16_t _height
-		, uint16_t _depth = 0
-		);
-
-	/// Returns image size.
-	uint32_t imageGetSize(
-		  TextureInfo* _info
-		, uint16_t _width
-		, uint16_t _height
-		, uint16_t _depth
-		, bool _cubeMap
-		, bool _hasMips
-		, uint16_t _numLayers
-		, TextureFormat::Enum _format
-		);
-
-	///
-	void imageSolid(void* _dst, uint32_t _width, uint32_t _height, uint32_t _solid);
-
-	///
-	void imageCheckerboard(void* _dst, uint32_t _width, uint32_t _height, uint32_t _step, uint32_t _0, uint32_t _1);
-
-	///
-	void imageRgba8Downsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-
-	///
-	void imageRgba32fToLinear(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-
-	///
-	void imageRgba32fToGamma(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-
-	///
-	void imageRgba32fLinearDownsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-
-	///
-	void imageRgba32fDownsample2x2NormalMap(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-
-	///
-	void imageSwizzleBgra8(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src);
-
-	///
-	void imageCopy(void* _dst, uint32_t _height, uint32_t _srcPitch, const void* _src, uint32_t _dstPitch);
-
-	///
-	void imageCopy(void* _dst, uint32_t _width, uint32_t _height, uint32_t _bpp, uint32_t _pitch, const void* _src);
-
-	///
-	bool imageConvert(TextureFormat::Enum _dstFormat, TextureFormat::Enum _srcFormat);
-
-	///
-	void imageConvert(
-		  void* _dst
-		, uint32_t _bpp
-		, bx::PackFn _pack
-		, const void* _src
-		, bx::UnpackFn _unpack
-		, uint32_t _size
-		);
-
-	///
-	void imageConvert(
-		  void* _dst
-		, uint32_t _dstBpp
-		, bx::PackFn _pack
-		, const void* _src
-		, uint32_t _srcBpp
-		, bx::UnpackFn _unpack
-		, uint32_t _width
-		, uint32_t _height
-		, uint32_t _srcPitch
-		);
-
-	///
-	bool imageConvert(
-		  void* _dst
-		, TextureFormat::Enum _dstFormat
-		, const void* _src
-		, TextureFormat::Enum _srcFormat
-		, uint32_t _width
-		, uint32_t _height
-		);
-
-	///
-	ImageContainer* imageConvert(
-		  bx::AllocatorI* _allocator
-		, TextureFormat::Enum _dstFormat
-		, const void* _src
-		, uint32_t _size
-		);
-
-	///
-	ImageContainer* imageAlloc(
-		  bx::AllocatorI* _allocator
-		, TextureFormat::Enum _format
-		, uint16_t _width
-		, uint16_t _height
-		, uint16_t _depth
-		, uint16_t _numLayers
-		, bool _cubeMap
-		, bool _hasMips
-		, const void* _data = NULL
-		);
-
-	///
-	void imageFree(ImageContainer* _imageContainer);
-
-	///
-	void imageWriteTga(
-		  bx::WriterI* _writer
-		, uint32_t _width
-		, uint32_t _height
-		, uint32_t _pitch
-		, const void* _src
-		, bool _grayscale
-		, bool _yflip
-		, bx::Error* _err = NULL
-		);
-
-	///
-	void imageWriteKtx(
-		  bx::WriterI* _writer
-		, TextureFormat::Enum _format
-		, bool _cubeMap
-		, uint32_t _width
-		, uint32_t _height
-		, uint32_t _depth
-		, uint8_t _numMips
-		, const void* _src
-		, bx::Error* _err = NULL
-		);
-
-	///
-	void imageWriteKtx(
-		  bx::WriterI* _writer
-		, ImageContainer& _imageContainer
-		, const void* _data
-		, uint32_t _size
-		, bx::Error* _err = NULL
-		);
-
-	///
-	bool imageParse(ImageContainer& _imageContainer, bx::ReaderSeekerI* _reader);
-
-	///
-	bool imageParse(ImageContainer& _imageContainer, const void* _data, uint32_t _size);
-
-	///
-	void imageDecodeToBgra8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _pitch, TextureFormat::Enum _format);
-
-	///
-	void imageDecodeToRgba8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _pitch, TextureFormat::Enum _format);
-
-	///
-	void imageDecodeToRgba32f(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _pitch, TextureFormat::Enum _format);
-
-	///
-	bool imageGetRawData(const ImageContainer& _imageContainer, uint16_t _side, uint8_t _lod, const void* _data, uint32_t _size, ImageMip& _mip);
-
-} // namespace bgfx
-
-#endif // BGFX_IMAGE_H_HEADER_GUARD
diff --git a/src/renderer_d3d11.cpp b/src/renderer_d3d11.cpp
index ccf84d971..cfbfa50ca 100644
--- a/src/renderer_d3d11.cpp
+++ b/src/renderer_d3d11.cpp
@@ -1386,7 +1386,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 				{
 					uint16_t support = BGFX_CAPS_FORMAT_TEXTURE_NONE;
 
-					const DXGI_FORMAT fmt = isDepth(TextureFormat::Enum(ii) )
+					const DXGI_FORMAT fmt = bimg::isDepth(bimg::TextureFormat::Enum(ii) )
 						? s_textureFormat[ii].m_fmtDsv
 						: s_textureFormat[ii].m_fmt
 						;
@@ -1861,7 +1861,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 			uint8_t* src       = (uint8_t*)mapped.pData;
 			uint32_t srcPitch  = mapped.RowPitch;
 
-			const uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(texture.m_textureFormat) );
+			const uint8_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(texture.m_textureFormat) );
 			uint8_t* dst      = (uint8_t*)_data;
 			uint32_t dstPitch = srcWidth*bpp/8;
 
@@ -2026,7 +2026,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 
 				D3D11_MAPPED_SUBRESOURCE mapped;
 				DX_CHECK(m_deviceCtx->Map(texture, 0, D3D11_MAP_READ, 0, &mapped) );
-				imageSwizzleBgra8(
+				bimg::imageSwizzleBgra8(
 					  mapped.pData
 					, backBufferDesc.Width
 					, backBufferDesc.Height
@@ -3294,7 +3294,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 				D3D11_MAPPED_SUBRESOURCE mapped;
 				DX_CHECK(m_deviceCtx->Map(m_captureTexture, 0, D3D11_MAP_READ, 0, &mapped) );
 
-				imageSwizzleBgra8(
+				bimg::imageSwizzleBgra8(
 					  mapped.pData
 					, getBufferWidth()
 					, getBufferHeight()
@@ -4319,14 +4319,14 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 
 	void TextureD3D11::create(const Memory* _mem, uint32_t _flags, uint8_t _skip)
 	{
-		ImageContainer imageContainer;
+		bimg::ImageContainer imageContainer;
 
-		if (imageParse(imageContainer, _mem->data, _mem->size) )
+		if (bimg::imageParse(imageContainer, _mem->data, _mem->size) )
 		{
 			uint8_t numMips = imageContainer.m_numMips;
 			const uint8_t startLod = uint8_t(bx::uint32_min(_skip, numMips-1) );
 			numMips -= startLod;
-			const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(imageContainer.m_format) );
+			const bimg::ImageBlockInfo& blockInfo = bimg::getBlockInfo(bimg::TextureFormat::Enum(imageContainer.m_format) );
 			const uint32_t textureWidth  = bx::uint32_max(blockInfo.blockWidth,  imageContainer.m_width >>startLod);
 			const uint32_t textureHeight = bx::uint32_max(blockInfo.blockHeight, imageContainer.m_height>>startLod);
 			const uint16_t numLayers     = imageContainer.m_numLayers;
@@ -4338,7 +4338,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 			m_requestedFormat  = uint8_t(imageContainer.m_format);
 			m_textureFormat    = uint8_t(getViableTextureFormat(imageContainer) );
 			const bool convert = m_textureFormat != m_requestedFormat;
-			const uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+			const uint8_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(m_textureFormat) );
 
 			if (imageContainer.m_cubeMap)
 			{
@@ -4361,7 +4361,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 
 			uint32_t kk = 0;
 
-			const bool compressed = isCompressed(TextureFormat::Enum(m_textureFormat) );
+			const bool compressed = bimg::isCompressed(bimg::TextureFormat::Enum(m_textureFormat) );
 			const bool swizzle    = TextureFormat::BGRA8 == m_textureFormat && 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE);
 
 			BX_TRACE("Texture %3d: %s (requested: %s), layers %d, %dx%d%s%s%s."
@@ -4388,8 +4388,8 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 					height = bx::uint32_max(1, height);
 					depth  = bx::uint32_max(1, depth);
 
-					ImageMip mip;
-					if (imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
+					bimg::ImageMip mip;
+					if (bimg::imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
 					{
 						srd[kk].pSysMem = mip.m_data;
 
@@ -4397,7 +4397,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 						{
 							uint32_t srcpitch = mip.m_width*bpp/8;
 							uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, mip.m_width*mip.m_height*bpp/8);
-							imageDecodeToBgra8(temp, mip.m_data, mip.m_width, mip.m_height, srcpitch, mip.m_format);
+							bimg::imageDecodeToBgra8(temp, mip.m_data, mip.m_width, mip.m_height, srcpitch, mip.m_format);
 
 							srd[kk].pSysMem = temp;
 							srd[kk].SysMemPitch = srcpitch;
@@ -4486,7 +4486,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 					desc.CPUAccessFlags = 0;
 					desc.MiscFlags      = 0;
 
-					if (isDepth( (TextureFormat::Enum)m_textureFormat) )
+					if (bimg::isDepth(bimg::TextureFormat::Enum(m_textureFormat) ) )
 					{
 						desc.BindFlags |= D3D11_BIND_DEPTH_STENCIL;
 						desc.Usage = D3D11_USAGE_DEFAULT;
@@ -4684,7 +4684,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		}
 
 		const uint32_t subres = _mip + ( (layer + _side) * m_numMips);
-		const uint32_t bpp    = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+		const uint32_t bpp    = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(m_textureFormat) );
 		const uint32_t rectpitch  = _rect.m_width*bpp/8;
 		const uint32_t srcpitch   = UINT16_MAX == _pitch ? rectpitch : _pitch;
 		const uint32_t slicepitch = rectpitch*_rect.m_height;
@@ -4697,7 +4697,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		if (convert)
 		{
 			temp = (uint8_t*)BX_ALLOC(g_allocator, slicepitch);
-			imageDecodeToBgra8(temp, data, _rect.m_width, _rect.m_height, srcpitch, TextureFormat::Enum(m_requestedFormat) );
+			bimg::imageDecodeToBgra8(temp, data, _rect.m_width, _rect.m_height, srcpitch, bimg::TextureFormat::Enum(m_requestedFormat) );
 			data = temp;
 		}
 
@@ -4793,7 +4793,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		DX_CHECK(device->CreateRenderTargetView(ptr, NULL, &m_rtv[0]) );
 		DX_RELEASE(ptr, 0);
 
-		DXGI_FORMAT fmtDsv = isDepth(_depthFormat)
+		DXGI_FORMAT fmtDsv = bimg::isDepth(bimg::TextureFormat::Enum(_depthFormat) )
 			? s_textureFormat[_depthFormat].m_fmtDsv
 			: DXGI_FORMAT_D24_UNORM_S8_UINT
 			;
@@ -4893,7 +4893,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 					const uint32_t msaaQuality = bx::uint32_satsub( (texture.m_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1);
 					const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality];
 
-					if (isDepth( (TextureFormat::Enum)texture.m_textureFormat) )
+					if (bimg::isDepth(bimg::TextureFormat::Enum(texture.m_textureFormat) ) )
 					{
 						BX_CHECK(NULL == m_dsv, "Frame buffer already has depth-stencil attached.");
 
@@ -5525,7 +5525,7 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 						}
 						else
 						{
-							bool depthStencil = isDepth(TextureFormat::Enum(src.m_textureFormat) );
+							bool depthStencil = bimg::isDepth(bimg::TextureFormat::Enum(src.m_textureFormat) );
 							BX_CHECK(!depthStencil
 								||  (width == src.m_width && height == src.m_height)
 								, "When blitting depthstencil surface, source resolution must match destination."
diff --git a/src/renderer_d3d12.cpp b/src/renderer_d3d12.cpp
index 210d7daa0..e469d9880 100644
--- a/src/renderer_d3d12.cpp
+++ b/src/renderer_d3d12.cpp
@@ -1014,7 +1014,7 @@ namespace bgfx { namespace d3d12
 				{
 					uint16_t support = BGFX_CAPS_FORMAT_TEXTURE_NONE;
 
-					const DXGI_FORMAT fmt = isDepth(TextureFormat::Enum(ii) )
+					const DXGI_FORMAT fmt = bimg::isDepth(bimg::TextureFormat::Enum(ii) )
 						? s_textureFormat[ii].m_fmtDsv
 						: s_textureFormat[ii].m_fmt
 						;
@@ -1448,7 +1448,7 @@ namespace bgfx { namespace d3d12
 			uint8_t* src;
 			readback->Map(0, NULL, (void**)&src);
 
-			const uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(texture.m_textureFormat) );
+			const uint8_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(texture.m_textureFormat) );
 			uint8_t* dst      = (uint8_t*)_data;
 			uint32_t dstPitch = srcWidth*bpp/8;
 
@@ -1606,7 +1606,7 @@ namespace bgfx { namespace d3d12
 
 			void* data;
 			readback->Map(0, NULL, (void**)&data);
-			imageSwizzleBgra8(
+			bimg::imageSwizzleBgra8(
 				  data
 				, width
 				, height
@@ -3968,14 +3968,14 @@ data.NumQualityLevels = 0;
 
 	void TextureD3D12::create(const Memory* _mem, uint32_t _flags, uint8_t _skip)
 	{
-		ImageContainer imageContainer;
+		bimg::ImageContainer imageContainer;
 
-		if (imageParse(imageContainer, _mem->data, _mem->size) )
+		if (bimg::imageParse(imageContainer, _mem->data, _mem->size) )
 		{
 			uint8_t numMips = imageContainer.m_numMips;
 			const uint8_t startLod = uint8_t(bx::uint32_min(_skip, numMips-1) );
 			numMips -= startLod;
-			const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(imageContainer.m_format) );
+			const bimg::ImageBlockInfo& blockInfo = bimg::getBlockInfo(imageContainer.m_format);
 			const uint32_t textureWidth  = bx::uint32_max(blockInfo.blockWidth,  imageContainer.m_width >>startLod);
 			const uint32_t textureHeight = bx::uint32_max(blockInfo.blockHeight, imageContainer.m_height>>startLod);
 			const uint16_t numLayers     = imageContainer.m_numLayers;
@@ -3987,7 +3987,7 @@ data.NumQualityLevels = 0;
 			m_requestedFormat  = uint8_t(imageContainer.m_format);
 			m_textureFormat    = uint8_t(getViableTextureFormat(imageContainer) );
 			const bool convert = m_textureFormat != m_requestedFormat;
-			const uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+			const uint8_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(m_textureFormat) );
 
 			if (imageContainer.m_cubeMap)
 			{
@@ -4009,7 +4009,7 @@ data.NumQualityLevels = 0;
 
 			uint32_t kk = 0;
 
-			const bool compressed = isCompressed(TextureFormat::Enum(m_textureFormat) );
+			const bool compressed = bimg::isCompressed(bimg::TextureFormat::Enum(m_textureFormat) );
 			const bool swizzle    = TextureFormat::BGRA8 == m_textureFormat && 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE);
 			uint32_t blockWidth   = 1;
 			uint32_t blockHeight  = 1;
@@ -4051,8 +4051,8 @@ data.NumQualityLevels = 0;
 					height = bx::uint32_max(blockHeight, height);
 					depth  = bx::uint32_max(1, depth);
 
-					ImageMip mip;
-					if (imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
+					bimg::ImageMip mip;
+					if (bimg::imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
 					{
 						if (convert)
 						{
@@ -4060,7 +4060,7 @@ data.NumQualityLevels = 0;
 							const uint32_t slice = bx::strideAlign(pitch * height, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
 
 							uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice);
-							imageDecodeToBgra8(temp
+							bimg::imageDecodeToBgra8(temp
 									, mip.m_data
 									, mip.m_width
 									, mip.m_height
@@ -4078,7 +4078,7 @@ data.NumQualityLevels = 0;
 							uint32_t slice = bx::strideAlign( (mip.m_height/blockInfo.blockHeight)*pitch,           D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
 
 							uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice);
-							imageCopy(temp
+							bimg::imageCopy(temp
 									, mip.m_height/blockInfo.blockHeight
 									, (mip.m_width /blockInfo.blockWidth )*mip.m_blockSize
 									, mip.m_data
@@ -4096,7 +4096,7 @@ data.NumQualityLevels = 0;
 							const uint32_t slice = bx::strideAlign(pitch * mip.m_height,      D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
 
 							uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice);
-							imageCopy(temp
+							bimg::imageCopy(temp
 									, mip.m_height
 									, mip.m_width*mip.m_bpp / 8
 									, mip.m_data
@@ -4164,7 +4164,7 @@ data.NumQualityLevels = 0;
 			D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
 
 			D3D12_CLEAR_VALUE* clearValue = NULL;
-			if (isDepth(TextureFormat::Enum(m_textureFormat) ) )
+			if (bimg::isDepth(bimg::TextureFormat::Enum(m_textureFormat) ) )
 			{
 				resourceDesc.Format = s_textureFormat[m_textureFormat].m_fmt;
 				resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
@@ -4364,7 +4364,7 @@ data.NumQualityLevels = 0;
 		setState(_commandList, D3D12_RESOURCE_STATE_COPY_DEST);
 
 		const uint32_t subres = _mip + (_side * m_numMips);
-		const uint32_t bpp    = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+		const uint32_t bpp    = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(m_textureFormat) );
 		const uint32_t rectpitch = _rect.m_width*bpp/8;
 		const uint32_t srcpitch  = UINT16_MAX == _pitch ? rectpitch : _pitch;
 
@@ -4481,7 +4481,7 @@ data.NumQualityLevels = 0;
 						m_height = uint32_t(desc.Height);
 					}
 
-					if (isDepth( (TextureFormat::Enum)texture.m_textureFormat) )
+					if (bimg::isDepth(bimg::TextureFormat::Enum(texture.m_textureFormat) ) )
 					{
 						BX_CHECK(!isValid(m_depth), "");
 						m_depth = handle;
@@ -4489,7 +4489,7 @@ data.NumQualityLevels = 0;
 						uint32_t dsvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
 						dsvDescriptor.ptr += (1 + fbhIdx) * dsvDescriptorSize;
 
-						const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(texture.m_textureFormat) );
+						const bimg::ImageBlockInfo& blockInfo = bimg::getBlockInfo(bimg::TextureFormat::Enum(texture.m_textureFormat) );
 						BX_UNUSED(blockInfo);
 
 						D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc;
@@ -5007,7 +5007,7 @@ data.NumQualityLevels = 0;
 							srcLocation.Type      = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
 							srcLocation.SubresourceIndex = srcZ*src.m_numMips+blit.m_srcMip;
 
-							bool depthStencil = isDepth(TextureFormat::Enum(src.m_textureFormat) );
+							bool depthStencil = bimg::isDepth(bimg::TextureFormat::Enum(src.m_textureFormat) );
 							m_commandList->CopyTextureRegion(&dstLocation
 								, blit.m_dstX
 								, blit.m_dstY
diff --git a/src/renderer_d3d9.cpp b/src/renderer_d3d9.cpp
index e133a3c67..9f055f250 100644
--- a/src/renderer_d3d9.cpp
+++ b/src/renderer_d3d9.cpp
@@ -700,7 +700,7 @@ namespace bgfx { namespace d3d9
 				support |= SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter
 					, m_deviceType
 					, adapterFormat
-					, isDepth(TextureFormat::Enum(ii) ) ? D3DUSAGE_DEPTHSTENCIL : D3DUSAGE_RENDERTARGET
+					, bimg::isDepth(bimg::TextureFormat::Enum(ii) ) ? D3DUSAGE_DEPTHSTENCIL : D3DUSAGE_RENDERTARGET
 					, D3DRTYPE_TEXTURE
 					, s_textureFormat[ii].m_fmt
 					) ) ? BGFX_CAPS_FORMAT_TEXTURE_FRAMEBUFFER : BGFX_CAPS_FORMAT_TEXTURE_NONE;
@@ -716,7 +716,7 @@ namespace bgfx { namespace d3d9
 				support |= SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter
 					, m_deviceType
 					, adapterFormat
-					, isDepth(TextureFormat::Enum(ii) ) ? D3DUSAGE_DEPTHSTENCIL : D3DUSAGE_RENDERTARGET
+					, bimg::isDepth(bimg::TextureFormat::Enum(ii) ) ? D3DUSAGE_DEPTHSTENCIL : D3DUSAGE_RENDERTARGET
 					, D3DRTYPE_TEXTURE
 					, s_textureFormat[ii].m_fmt
 					) ) ? BGFX_CAPS_FORMAT_TEXTURE_MIP_AUTOGEN : BGFX_CAPS_FORMAT_TEXTURE_NONE;
@@ -1015,7 +1015,7 @@ namespace bgfx { namespace d3d9
 			uint32_t srcPitch  = lockedRect.Pitch;
 			uint8_t* src       = (uint8_t*)lockedRect.pBits;
 
-			const uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(texture.m_textureFormat) );
+			const uint8_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(texture.m_textureFormat) );
 			uint8_t* dst      = (uint8_t*)_data;
 			uint32_t dstPitch = srcWidth*bpp/8;
 
@@ -2504,7 +2504,7 @@ namespace bgfx { namespace d3d9
 	void TextureD3D9::createTexture(uint32_t _width, uint32_t _height, uint8_t _numMips)
 	{
 		m_type = Texture2D;
-		const TextureFormat::Enum fmt = (TextureFormat::Enum)m_textureFormat;
+		const bimg::TextureFormat::Enum fmt = (bimg::TextureFormat::Enum)m_textureFormat;
 
 		DWORD usage = 0;
 		D3DPOOL pool = D3DPOOL_DEFAULT;
@@ -2512,7 +2512,7 @@ namespace bgfx { namespace d3d9
 		const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK);
 		const bool blit         = 0 != (m_flags&BGFX_TEXTURE_BLIT_DST);
 		const bool readBack     = 0 != (m_flags&BGFX_TEXTURE_READ_BACK);
-		if (isDepth(fmt) )
+		if (bimg::isDepth(fmt) )
 		{
 			usage = D3DUSAGE_DEPTHSTENCIL;
 		}
@@ -2543,7 +2543,7 @@ namespace bgfx { namespace d3d9
 			{
 				const Msaa& msaa = s_msaa[msaaQuality];
 
-				if (isDepth(fmt) )
+				if (bimg::isDepth(fmt) )
 				{
 					DX_CHECK(device->CreateDepthStencilSurface(
 						  m_width
@@ -2615,7 +2615,7 @@ namespace bgfx { namespace d3d9
 			, _width
 			, _height
 			, _numMips
-			, getName(fmt)
+			, bimg::getName(fmt)
 			);
 	}
 
@@ -2667,13 +2667,13 @@ namespace bgfx { namespace d3d9
 	void TextureD3D9::createCubeTexture(uint32_t _width, uint8_t _numMips)
 	{
 		m_type = TextureCube;
-		const TextureFormat::Enum fmt = (TextureFormat::Enum)m_textureFormat;
+		const bimg::TextureFormat::Enum fmt = (bimg::TextureFormat::Enum)m_textureFormat;
 
 		DWORD usage = 0;
 
 		const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK);
 		const bool blit         = 0 != (m_flags&BGFX_TEXTURE_BLIT_DST);
-		if (isDepth(fmt) )
+		if (bimg::isDepth(fmt) )
 		{
 			usage = D3DUSAGE_DEPTHSTENCIL;
 		}
@@ -2891,14 +2891,14 @@ namespace bgfx { namespace d3d9
 
 	void TextureD3D9::create(const Memory* _mem, uint32_t _flags, uint8_t _skip)
 	{
-		ImageContainer imageContainer;
+		bimg::ImageContainer imageContainer;
 
-		if (imageParse(imageContainer, _mem->data, _mem->size) )
+		if (bimg::imageParse(imageContainer, _mem->data, _mem->size) )
 		{
 			uint8_t numMips = imageContainer.m_numMips;
 			const uint8_t startLod = uint8_t(bx::uint32_min(_skip, numMips-1) );
 			numMips -= startLod;
-			const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(imageContainer.m_format) );
+			const bimg::ImageBlockInfo& blockInfo = bimg::getBlockInfo(bimg::TextureFormat::Enum(imageContainer.m_format) );
 			const uint32_t textureWidth  = bx::uint32_max(blockInfo.blockWidth,  imageContainer.m_width >>startLod);
 			const uint32_t textureHeight = bx::uint32_max(blockInfo.blockHeight, imageContainer.m_height>>startLod);
 
@@ -2911,7 +2911,7 @@ namespace bgfx { namespace d3d9
 			m_textureFormat   = uint8_t(getViableTextureFormat(imageContainer) );
 			const bool convert = m_textureFormat != m_requestedFormat;
 
-			uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+			uint8_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(m_textureFormat) );
 
 			if (imageContainer.m_cubeMap)
 			{
@@ -2950,8 +2950,8 @@ namespace bgfx { namespace d3d9
 			// bytes. If actual mip size is used it causes memory corruption.
 			// http://www.aras-p.info/texts/D3D9GPUHacks.html#3dc
 			const bool useMipSize = true
-							&& imageContainer.m_format != TextureFormat::BC4
-							&& imageContainer.m_format != TextureFormat::BC5
+							&& imageContainer.m_format != bimg::TextureFormat::BC4
+							&& imageContainer.m_format != bimg::TextureFormat::BC5
 							;
 
 			for (uint8_t side = 0, numSides = imageContainer.m_cubeMap ? 6 : 1; side < numSides; ++side)
@@ -2971,8 +2971,8 @@ namespace bgfx { namespace d3d9
 					mipHeight = bx::uint32_max(blockInfo.blockHeight, mipHeight);
 					uint32_t mipSize = width*height*depth*bpp/8;
 
-					ImageMip mip;
-					if (imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
+					bimg::ImageMip mip;
+					if (bimg::imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
 					{
 						uint32_t pitch;
 						uint32_t slicePitch;
@@ -2986,7 +2986,7 @@ namespace bgfx { namespace d3d9
 								uint32_t srcpitch = mipWidth*bpp/8;
 
 								uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, srcpitch*mipHeight);
-								imageDecodeToBgra8(temp
+								bimg::imageDecodeToBgra8(temp
 										, mip.m_data
 										, mip.m_width
 										, mip.m_height
@@ -3000,7 +3000,7 @@ namespace bgfx { namespace d3d9
 							}
 							else
 							{
-								imageDecodeToBgra8(bits, mip.m_data, mip.m_width, mip.m_height, pitch, mip.m_format);
+								bimg::imageDecodeToBgra8(bits, mip.m_data, mip.m_width, mip.m_height, pitch, mip.m_format);
 							}
 						}
 						else
@@ -3009,11 +3009,11 @@ namespace bgfx { namespace d3d9
 							switch (m_textureFormat)
 							{
 							case TextureFormat::RGB5A1:
-								imageConvert(bits, 16, bx::packBgr5a1, mip.m_data, bx::unpackRgb5a1, size);
+								bimg::imageConvert(bits, 16, bx::packBgr5a1, mip.m_data, bx::unpackRgb5a1, size);
 								break;
 
 							case TextureFormat::RGBA4:
-								imageConvert(bits, 16, bx::packBgra4, mip.m_data, bx::unpackRgba4, size);
+								bimg::imageConvert(bits, 16, bx::packBgra4, mip.m_data, bx::unpackRgba4, size);
 								break;
 
 							default:
@@ -3045,7 +3045,7 @@ namespace bgfx { namespace d3d9
 
 	void TextureD3D9::update(uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem)
 	{
-		const uint32_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+		const uint32_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(m_textureFormat) );
 		const uint32_t rectpitch = _rect.m_width*bpp/8;
 		const uint32_t srcpitch  = UINT16_MAX == _pitch ? rectpitch : _pitch;
 		const uint32_t dstpitch  = s_renderD3D9->m_updateTexturePitch;
@@ -3059,7 +3059,7 @@ namespace bgfx { namespace d3d9
 		if (convert)
 		{
 			temp = (uint8_t*)BX_ALLOC(g_allocator, rectpitch*_rect.m_height);
-			imageDecodeToBgra8(temp, data, _rect.m_width, _rect.m_height, srcpitch, TextureFormat::Enum(m_requestedFormat) );
+			bimg::imageDecodeToBgra8(temp, data, _rect.m_width, _rect.m_height, srcpitch, bimg::TextureFormat::Enum(m_requestedFormat) );
 			data = temp;
 		}
 
@@ -3071,11 +3071,11 @@ namespace bgfx { namespace d3d9
 				switch (m_textureFormat)
 				{
 				case TextureFormat::RGB5A1:
-					imageConvert(dst, 16, bx::packBgr5a1, src, bx::unpackRgb5a1, rectpitch);
+					bimg::imageConvert(dst, 16, bx::packBgr5a1, src, bx::unpackRgb5a1, rectpitch);
 					break;
 
 				case TextureFormat::RGBA4:
-					imageConvert(dst, 16, bx::packBgra4, src, bx::unpackRgba4, rectpitch);
+					bimg::imageConvert(dst, 16, bx::packBgra4, src, bx::unpackRgba4, rectpitch);
 					break;
 
 				default:
@@ -3211,7 +3211,7 @@ namespace bgfx { namespace d3d9
 					m_height = texture.m_height;
 				}
 
-				if (isDepth( (TextureFormat::Enum)texture.m_textureFormat) )
+				if (bimg::isDepth(bimg::TextureFormat::Enum(texture.m_textureFormat) ) )
 				{
 					m_dsIdx = uint8_t(ii);
 				}
@@ -3801,7 +3801,7 @@ namespace bgfx { namespace d3d9
 						//
 						// GetRenderTargetData (dst must be SYSTEMMEM)
 
-						bool depth = isDepth(TextureFormat::Enum(src.m_textureFormat) );
+						bool depth = bimg::isDepth(bimg::TextureFormat::Enum(src.m_textureFormat) );
 						HRESULT hr = m_device->StretchRect(srcSurface
 							, depth ? NULL : &srcRect
 							, dstSurface
diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp
index 8f4903a2c..0419c966f 100644
--- a/src/renderer_gl.cpp
+++ b/src/renderer_gl.cpp
@@ -1153,10 +1153,10 @@ namespace bgfx { namespace gl
 			: tfi.m_internalFmt
 			;
 
-		GLsizei size = (16*16*getBitsPerPixel(_format) )/8;
+		GLsizei size = (16*16*bimg::getBitsPerPixel(bimg::TextureFormat::Enum(_format) ) )/8;
 		void* data = NULL;
 
-		if (isDepth(_format) )
+		if (bimg::isDepth(bimg::TextureFormat::Enum(_format) ) )
 		{
 			_srgb    = false;
 			_mipmaps = false;
@@ -1169,7 +1169,7 @@ namespace bgfx { namespace gl
 		flushGlError();
 		GLenum err = 0;
 
-		if (isCompressed(_format) )
+		if (bimg::isCompressed(bimg::TextureFormat::Enum(_format) ) )
 		{
 			glCompressedTexImage2D(GL_TEXTURE_2D, 0, internalFmt, 16, 16, 0, size, data);
 			err |= glGetError();
@@ -1294,9 +1294,9 @@ namespace bgfx { namespace gl
 		GLenum err = initTestTexture(_format, _srgb, false);
 
 		GLenum attachment;
-		if (isDepth(_format) )
+		if (bimg::isDepth(bimg::TextureFormat::Enum(_format) ) )
 		{
-			const ImageBlockInfo& info = getBlockInfo(_format);
+			const bimg::ImageBlockInfo& info = bimg::getBlockInfo(bimg::TextureFormat::Enum(_format) );
 			if (0 == info.depthBits)
 			{
 				attachment = GL_STENCIL_ATTACHMENT;
@@ -2413,7 +2413,7 @@ namespace bgfx { namespace gl
 			if (m_readBackSupported)
 			{
 				const TextureGL& texture = m_textures[_handle.idx];
-				const bool compressed    = isCompressed(TextureFormat::Enum(texture.m_textureFormat) );
+				const bool compressed    = bimg::isCompressed(bimg::TextureFormat::Enum(texture.m_textureFormat) );
 
 				GL_CHECK(glBindTexture(texture.m_target, texture.m_id) );
 
@@ -2558,7 +2558,7 @@ namespace bgfx { namespace gl
 
 			if (GL_RGBA == m_readPixelsFmt)
 			{
-				imageSwizzleBgra8(data, width, height, width*4, data);
+				bimg::imageSwizzleBgra8(data, width, height, width*4, data);
 			}
 
 			g_callback->screenShot(_filePath
@@ -3127,7 +3127,7 @@ namespace bgfx { namespace gl
 
 				if (GL_RGBA == m_readPixelsFmt)
 				{
-					imageSwizzleBgra8(m_capture, m_resolution.m_width, m_resolution.m_height, m_resolution.m_width*4, m_capture);
+					bimg::imageSwizzleBgra8(m_capture, m_resolution.m_width, m_resolution.m_height, m_resolution.m_width*4, m_capture);
 				}
 
 				g_callback->captureFrame(m_capture, m_captureSize);
@@ -4810,9 +4810,9 @@ namespace bgfx { namespace gl
 
 	void TextureGL::create(const Memory* _mem, uint32_t _flags, uint8_t _skip)
 	{
-		ImageContainer imageContainer;
+		bimg::ImageContainer imageContainer;
 
-		if (imageParse(imageContainer, _mem->data, _mem->size) )
+		if (bimg::imageParse(imageContainer, _mem->data, _mem->size) )
 		{
 			uint8_t numMips = imageContainer.m_numMips;
 			const uint8_t startLod = uint8_t(bx::uint32_min(_skip, numMips-1) );
@@ -4822,7 +4822,7 @@ namespace bgfx { namespace gl
 			uint32_t textureHeight;
 			uint32_t textureDepth;
 			{
-				const ImageBlockInfo& ibi = getBlockInfo(TextureFormat::Enum(imageContainer.m_format) );
+				const bimg::ImageBlockInfo& ibi = bimg::getBlockInfo(bimg::TextureFormat::Enum(imageContainer.m_format) );
 				textureWidth  = bx::uint32_max(ibi.blockWidth,  imageContainer.m_width >>startLod);
 				textureHeight = bx::uint32_max(ibi.blockHeight, imageContainer.m_height>>startLod);
 				textureDepth  = 1 < imageContainer.m_depth
@@ -4888,7 +4888,7 @@ namespace bgfx { namespace gl
 				&& !s_textureFormat[m_requestedFormat].m_supported
 				&& !s_renderGL->m_textureSwizzleSupport
 				;
-			const bool compressed = isCompressed(TextureFormat::Enum(m_requestedFormat) );
+			const bool compressed = bimg::isCompressed(bimg::TextureFormat::Enum(m_requestedFormat) );
 			const bool convert    = false
 				|| m_textureFormat != m_requestedFormat
 				|| swizzle
@@ -4941,8 +4941,8 @@ namespace bgfx { namespace gl
 						: side
 						;
 
-					ImageMip mip;
-					if (imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
+					bimg::ImageMip mip;
+					if (bimg::imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
 					{
 						if (compressed
 						&& !convert)
@@ -4994,7 +4994,7 @@ namespace bgfx { namespace gl
 						{
 							uint32_t size = bx::uint32_max(1, (width  + 3)>>2)
 										  * bx::uint32_max(1, (height + 3)>>2)
-										  * 4*4*getBitsPerPixel(TextureFormat::Enum(m_textureFormat) )/8
+										  * 4*4* bimg::getBitsPerPixel(bimg::TextureFormat::Enum(m_textureFormat) )/8
 										  ;
 
 							compressedTexImage(imageTarget
@@ -5066,7 +5066,7 @@ namespace bgfx { namespace gl
 
 	void TextureGL::update(uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem)
 	{
-		const uint32_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+		const uint32_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(m_textureFormat) );
 		const uint32_t rectpitch = _rect.m_width*bpp/8;
 		uint32_t srcpitch  = UINT16_MAX == _pitch ? rectpitch : _pitch;
 
@@ -5084,7 +5084,7 @@ namespace bgfx { namespace gl
 			&& !s_renderGL->m_textureSwizzleSupport
 			;
 		const bool unpackRowLength = BX_IGNORE_C4127(!!BGFX_CONFIG_RENDERER_OPENGL || s_extension[Extension::EXT_unpack_subimage].m_supported);
-		const bool compressed      = isCompressed(TextureFormat::Enum(m_requestedFormat) );
+		const bool compressed      = bimg::isCompressed(bimg::TextureFormat::Enum(m_requestedFormat) );
 		const bool convert         = false
 			|| (compressed && m_textureFormat != m_requestedFormat)
 			|| swizzle
@@ -5110,7 +5110,7 @@ namespace bgfx { namespace gl
 
 			if (!unpackRowLength)
 			{
-				imageCopy(temp, width, height, bpp, srcpitch, data);
+				bimg::imageCopy(temp, width, height, bpp, srcpitch, data);
 				data = temp;
 			}
 
@@ -5133,7 +5133,7 @@ namespace bgfx { namespace gl
 
 			if (convert)
 			{
-				imageDecodeToRgba8(temp, data, width, height, srcpitch, TextureFormat::Enum(m_requestedFormat) );
+				bimg::imageDecodeToRgba8(temp, data, width, height, srcpitch, bimg::TextureFormat::Enum(m_requestedFormat) );
 				data = temp;
 				srcpitch = rectpitch;
 			}
@@ -5141,7 +5141,7 @@ namespace bgfx { namespace gl
 			if (!unpackRowLength
 			&&  !convert)
 			{
-				imageCopy(temp, width, height, bpp, srcpitch, data);
+				bimg::imageCopy(temp, width, height, bpp, srcpitch, data);
 				data = temp;
 			}
 
@@ -5883,10 +5883,10 @@ namespace bgfx { namespace gl
 					}
 
 					GLenum attachment = GL_COLOR_ATTACHMENT0 + colorIdx;
-					TextureFormat::Enum format = (TextureFormat::Enum)texture.m_textureFormat;
-					if (isDepth(format) )
+					bimg::TextureFormat::Enum format = bimg::TextureFormat::Enum(texture.m_textureFormat);
+					if (bimg::isDepth(format) )
 					{
-						const ImageBlockInfo& info = getBlockInfo(format);
+						const bimg::ImageBlockInfo& info = bimg::getBlockInfo(format);
 						if (0 < info.stencilBits)
 						{
 							attachment = GL_DEPTH_STENCIL_ATTACHMENT;
@@ -5973,7 +5973,7 @@ namespace bgfx { namespace gl
 						if (0 != texture.m_id)
 						{
 							GLenum attachment = GL_COLOR_ATTACHMENT0 + colorIdx;
-							if (!isDepth( (TextureFormat::Enum)texture.m_textureFormat) )
+							if (!bimg::isDepth(bimg::TextureFormat::Enum(texture.m_textureFormat) ) )
 							{
 								++colorIdx;
 
diff --git a/src/renderer_vk.cpp b/src/renderer_vk.cpp
index 8e2f66c51..5a4d523ce 100644
--- a/src/renderer_vk.cpp
+++ b/src/renderer_vk.cpp
@@ -986,7 +986,7 @@ VK_IMPORT_INSTANCE
 					{
 						uint8_t support = BGFX_CAPS_FORMAT_TEXTURE_NONE;
 
-						const bool depth = isDepth(TextureFormat::Enum(ii) );
+						const bool depth = bimg::isDepth(bimg::TextureFormat::Enum(ii) );
 						VkFormat fmt = depth
 							? s_textureFormat[ii].m_fmtDsv
 							: s_textureFormat[ii].m_fmt
diff --git a/tools/texturec/texturec.cpp b/tools/texturec/texturec.cpp
index 43d4aec42..b7e629f81 100644
--- a/tools/texturec/texturec.cpp
+++ b/tools/texturec/texturec.cpp
@@ -8,22 +8,8 @@
 #include <bx/readerwriter.h>
 #include <bx/endian.h>
 
-#include <bgfx/bgfx.h>
-
-#include "image.h"
-#include "image_decode.h"
-
-#include <libsquish/squish.h>
-#include <etc1/etc1.h>
-#include <etc2/ProcessRGB.hpp>
-#include <nvtt/nvtt.h>
-#include <pvrtc/PvrTcEncoder.h>
-
-#include <edtaa3/edtaa3func.h>
-
-extern "C" {
-#include <iqa.h>
-}
+#include <bimg/decode.h>
+#include <bimg/encode.h>
 
 #if 0
 #	define BX_TRACE(_format, ...) fprintf(stderr, "" _format "\n", ##__VA_ARGS__)
@@ -34,267 +20,9 @@ extern "C" {
 #include <bx/crtimpl.h>
 #include <bx/uint32_t.h>
 
-namespace bgfx
-{
-	bool imageEncodeFromRgba8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint8_t _format)
-	{
-		TextureFormat::Enum format = TextureFormat::Enum(_format);
-
-		switch (format)
-		{
-		case TextureFormat::BC1:
-		case TextureFormat::BC2:
-		case TextureFormat::BC3:
-		case TextureFormat::BC4:
-		case TextureFormat::BC5:
-			squish::CompressImage( (const uint8_t*)_src, _width, _height, _dst
-				, format == TextureFormat::BC2 ? squish::kDxt3
-				: format == TextureFormat::BC3 ? squish::kDxt5
-				: format == TextureFormat::BC4 ? squish::kBc4
-				: format == TextureFormat::BC5 ? squish::kBc5
-				:                                squish::kDxt1
-				);
-			return true;
-
-		case TextureFormat::BC6H:
-			nvtt::compressBC6H( (const uint8_t*)_src, _width, _height, 4, _dst);
-			return true;
-
-		case TextureFormat::BC7:
-			nvtt::compressBC7( (const uint8_t*)_src, _width, _height, 4, _dst);
-			return true;
-
-		case TextureFormat::ETC1:
-			etc1_encode_image( (const uint8_t*)_src, _width, _height, 4, _width*4, (uint8_t*)_dst);
-			return true;
-
-		case TextureFormat::ETC2:
-			{
-				const uint32_t blockWidth  = (_width +3)/4;
-				const uint32_t blockHeight = (_height+3)/4;
-				const uint32_t pitch = _width*4;
-				const uint8_t* src = (const uint8_t*)_src;
-				uint64_t* dst = (uint64_t*)_dst;
-				for (uint32_t yy = 0; yy < blockHeight; ++yy)
-				{
-					for (uint32_t xx = 0; xx < blockWidth; ++xx)
-					{
-						uint8_t block[4*4*4];
-						const uint8_t* ptr = &src[(yy*pitch+xx*4)*4];
-
-						for (uint32_t ii = 0; ii < 16; ++ii)
-						{ // BGRx
-							bx::memCopy(&block[ii*4], &ptr[(ii%4)*pitch + (ii&~3)], 4);
-							bx::xchg(block[ii*4+0], block[ii*4+2]);
-						}
-
-						*dst++ = ProcessRGB_ETC2(block);
-					}
-				}
-			}
-			return true;
-
-		case TextureFormat::PTC14:
-			{
-				using namespace Javelin;
-				RgbaBitmap bmp;
-				bmp.width  = _width;
-				bmp.height = _height;
-				bmp.data   = (uint8_t*)const_cast<void*>(_src);
-				PvrTcEncoder::EncodeRgb4Bpp(_dst, bmp);
-				bmp.data = NULL;
-			}
-			return true;
-
-		case TextureFormat::PTC14A:
-			{
-				using namespace Javelin;
-				RgbaBitmap bmp;
-				bmp.width  = _width;
-				bmp.height = _height;
-				bmp.data   = (uint8_t*)const_cast<void*>(_src);
-				PvrTcEncoder::EncodeRgba4Bpp(_dst, bmp);
-				bmp.data = NULL;
-			}
-			return true;
-
-		case TextureFormat::BGRA8:
-			imageSwizzleBgra8(_dst, _width, _height, _width*4, _src);
-			return true;
-
-		case TextureFormat::RGBA8:
-			bx::memCopy(_dst, _src, _width*_height*4);
-			return true;
-
-		default:
-			break;
-		}
-
-		return imageConvert(_dst, format, _src, TextureFormat::RGBA8, _width, _height);
-	}
-
-	bool imageEncodeFromRgba32f(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint8_t _format)
-	{
-		TextureFormat::Enum format = TextureFormat::Enum(_format);
-
-		const uint8_t* src = (const uint8_t*)_src;
-
-		switch (format)
-		{
-		case TextureFormat::RGBA8:
-			{
-				uint8_t* dst = (uint8_t*)_dst;
-				for (uint32_t yy = 0; yy < _height; ++yy)
-				{
-					for (uint32_t xx = 0; xx < _width; ++xx)
-					{
-						const uint32_t offset = yy*_width + xx;
-						const float* input = (const float*)&src[offset * 16];
-						uint8_t* output    = &dst[offset * 4];
-						output[0] = uint8_t(input[0]*255.0f + 0.5f);
-						output[1] = uint8_t(input[1]*255.0f + 0.5f);
-						output[2] = uint8_t(input[2]*255.0f + 0.5f);
-						output[3] = uint8_t(input[3]*255.0f + 0.5f);
-					}
-				}
-			}
-			return true;
-
-		case TextureFormat::BC5:
-			{
-				uint8_t* temp = (uint8_t*)BX_ALLOC(_allocator, _width*_height*4);
-				for (uint32_t yy = 0; yy < _height; ++yy)
-				{
-					for (uint32_t xx = 0; xx < _width; ++xx)
-					{
-						const uint32_t offset = yy*_width + xx;
-						const float* input = (const float*)&src[offset * 16];
-						uint8_t* output    = &temp[offset * 4];
-						output[0] = uint8_t(input[0]*255.0f + 0.5f);
-						output[1] = uint8_t(input[1]*255.0f + 0.5f);
-						output[2] = uint8_t(input[2]*255.0f + 0.5f);
-						output[3] = uint8_t(input[3]*255.0f + 0.5f);
-					}
-				}
-
-				imageEncodeFromRgba8(_dst, temp, _width, _height, _format);
-				BX_FREE(_allocator, temp);
-			}
-			return true;
-
-		default:
-			break;
-		}
-
-		return imageConvert(_dst, format, _src, TextureFormat::RGBA32F, _width, _height);
-	}
-
-	void imageRgba32f11to01(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
-	{
-		const uint8_t* src = (const uint8_t*)_src;
-		uint8_t* dst = (uint8_t*)_dst;
-
-		for (uint32_t yy = 0; yy < _height; ++yy)
-		{
-			for (uint32_t xx = 0; xx < _width; ++xx)
-			{
-				const uint32_t offset = yy*_pitch + xx * 16;
-				const float* input = (const float*)&src[offset];
-				float* output = (float*)&dst[offset];
-				output[0] = input[0]*0.5f + 0.5f;
-				output[1] = input[1]*0.5f + 0.5f;
-				output[2] = input[2]*0.5f + 0.5f;
-				output[3] = input[3]*0.5f + 0.5f;
-			}
-		}
-	}
-
-	static void edtaa3(bx::AllocatorI* _allocator, double* _dst, uint32_t _width, uint32_t _height, double* _src)
-	{
-		const uint32_t numPixels = _width*_height;
-
-		short* xdist = (short *)BX_ALLOC(_allocator, numPixels*sizeof(short) );
-		short* ydist = (short *)BX_ALLOC(_allocator, numPixels*sizeof(short) );
-		double* gx   = (double*)BX_ALLOC(_allocator, numPixels*sizeof(double) );
-		double* gy   = (double*)BX_ALLOC(_allocator, numPixels*sizeof(double) );
-
-		::computegradient(_src, _width, _height, gx, gy);
-		::edtaa3(_src, gx, gy, _width, _height, xdist, ydist, _dst);
-
-		for (uint32_t ii = 0; ii < numPixels; ++ii)
-		{
-			if (_dst[ii] < 0.0)
-			{
-				_dst[ii] = 0.0;
-			}
-		}
-
-		BX_FREE(_allocator, xdist);
-		BX_FREE(_allocator, ydist);
-		BX_FREE(_allocator, gx);
-		BX_FREE(_allocator, gy);
-	}
-
-	inline double min(double _a, double _b)
-	{
-		return _a > _b ? _b : _a;
-	}
-
-	inline double max(double _a, double _b)
-	{
-		return _a > _b ? _a : _b;
-	}
-
-	inline double clamp(double _val, double _min, double _max)
-	{
-		return max(min(_val, _max), _min);
-	}
-
-	void imageMakeDist(bx::AllocatorI* _allocator, void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, float _edge, const void* _src)
-	{
-		const uint32_t numPixels = _width*_height;
-
-		double* imgIn   = (double*)BX_ALLOC(_allocator, numPixels*sizeof(double) );
-		double* outside = (double*)BX_ALLOC(_allocator, numPixels*sizeof(double) );
-		double* inside  = (double*)BX_ALLOC(_allocator, numPixels*sizeof(double) );
-
-		for (uint32_t yy = 0; yy < _height; ++yy)
-		{
-			const uint8_t* src = (const uint8_t*)_src + yy*_pitch;
-			double* dst = &imgIn[yy*_width];
-			for (uint32_t xx = 0; xx < _width; ++xx)
-			{
-				dst[xx] = double(src[xx])/255.0;
-			}
-		}
-
-		edtaa3(_allocator, outside, _width, _height, imgIn);
-
-		for (uint32_t ii = 0; ii < numPixels; ++ii)
-		{
-			imgIn[ii] = 1.0 - imgIn[ii];
-		}
-
-		edtaa3(_allocator, inside, _width, _height, imgIn);
-
-		BX_FREE(_allocator, imgIn);
-
-		uint8_t* dst = (uint8_t*)_dst;
-
-		double edgeOffset = _edge*0.5;
-		double invEdge = 1.0/_edge;
-
-		for (uint32_t ii = 0; ii < numPixels; ++ii)
-		{
-			double dist = clamp( ( (outside[ii] - inside[ii])+edgeOffset) * invEdge, 0.0, 1.0);
-			dst[ii] = 255-uint8_t(dist * 255.0);
-		}
-
-		BX_FREE(_allocator, inside);
-		BX_FREE(_allocator, outside);
-	}
-
-} // namespace bgfx
+extern "C" {
+#include <iqa.h>
+}
 
 void help(const char* _error = NULL)
 {
@@ -389,7 +117,7 @@ int main(int _argc, const char* _argv[])
 	bx::close(&reader);
 
 	{
-		using namespace bgfx;
+		using namespace bimg;
 
 		ImageContainer* input = imageParse(&allocator, inputData, inputSize);
 
@@ -398,11 +126,11 @@ int main(int _argc, const char* _argv[])
 			BX_FREE(&allocator, inputData);
 
 			const char* type = cmdLine.findOption('t');
-			bgfx::TextureFormat::Enum format = input->m_format;
+			bimg::TextureFormat::Enum format = input->m_format;
 
 			if (NULL != type)
 			{
-				format = bgfx::getFormat(type);
+				format = bimg::getFormat(type);
 
 				if (!isValid(format) )
 				{
@@ -417,7 +145,7 @@ int main(int _argc, const char* _argv[])
 			if (imageGetRawData(*input, 0, 0, input->m_data, input->m_size, mip) )
 			{
 				uint8_t numMips = mips
-					? imageGetNumMips(format, mip.m_width, mip.m_height)
+					? imageGetNumMips(format, uint16_t(mip.m_width), uint16_t(mip.m_height) )
 					: 1
 					;
 
@@ -425,7 +153,7 @@ int main(int _argc, const char* _argv[])
 
 				if (normalMap)
 				{
-					output = imageAlloc(&allocator, format, mip.m_width, mip.m_height, 0, 1, false, mips);
+					output = imageAlloc(&allocator, format, uint16_t(mip.m_width), uint16_t(mip.m_height), 0, 1, false, mips);
 
 					ImageMip dstMip;
 					imageGetRawData(*output, 0, 0, NULL, 0, dstMip);
@@ -445,8 +173,8 @@ int main(int _argc, const char* _argv[])
 
 					uint32_t size = imageGetSize(
 						  NULL
-						, dstMip.m_width
-						, dstMip.m_height
+						, uint16_t(dstMip.m_width)
+						, uint16_t(dstMip.m_height)
 						, 0
 						, false
 						, false
@@ -498,7 +226,7 @@ int main(int _argc, const char* _argv[])
 				}
 				else if (8 != getBlockInfo(input->m_format).rBits)
 				{
-					output = imageAlloc(&allocator, format, mip.m_width, mip.m_height, 0, 1, false, mips);
+					output = imageAlloc(&allocator, format, uint16_t(mip.m_width), uint16_t(mip.m_height), 0, 1, false, mips);
 
 					ImageMip dstMip;
 					imageGetRawData(*output, 0, 0, NULL, 0, dstMip);
@@ -518,8 +246,8 @@ int main(int _argc, const char* _argv[])
 
 					uint32_t size = imageGetSize(
 						  NULL
-						, dstMip.m_width
-						, dstMip.m_height
+						, uint16_t(dstMip.m_width)
+						, uint16_t(dstMip.m_height)
 						, 0
 						, false
 						, false
@@ -567,7 +295,7 @@ int main(int _argc, const char* _argv[])
 				}
 				else
 				{
-					output = imageAlloc(&allocator, format, mip.m_width, mip.m_height, 0, 1, false, mips);
+					output = imageAlloc(&allocator, format, uint16_t(mip.m_width), uint16_t(mip.m_height), 0, 1, false, mips);
 
 					ImageMip dstMip;
 					imageGetRawData(*output, 0, 0, NULL, 0, dstMip);
@@ -587,8 +315,8 @@ int main(int _argc, const char* _argv[])
 
 					uint32_t size = imageGetSize(
 						  NULL
-						, dstMip.m_width
-						, dstMip.m_height
+						, uint16_t(dstMip.m_width)
+						, uint16_t(dstMip.m_height)
 						, 0
 						, false
 						, false
diff --git a/tools/texturev/texturev.cpp b/tools/texturev/texturev.cpp
index 961c9b81b..182f56f5a 100644
--- a/tools/texturev/texturev.cpp
+++ b/tools/texturev/texturev.cpp
@@ -24,7 +24,7 @@
 #include <string>
 namespace stl = tinystl;
 
-#include "image_decode.h"
+#include <bimg/decode.h>
 
 #include <bgfx/embedded_shader.h>
 
@@ -863,7 +863,7 @@ int _main_(int _argc, char** _argv)
 						, view.m_info.width
 						, view.m_info.height
 						, view.m_info.cubeMap ? " CubeMap" : ""
-						, bgfx::getName(view.m_info.format)
+						, bimg::getName(bimg::TextureFormat::Enum(view.m_info.format) )
 						);
 				}
 				else