From ec3f5d2369e7a11ccf111ea403f3b56c313a2272 Mon Sep 17 00:00:00 2001
From: bkaradzic <branimirkaradzic@gmail.com>
Date: Fri, 8 Nov 2013 20:53:23 -0800
Subject: [PATCH] Added workaround when unpack row length is not supported on
 GLES2.

---
 include/bgfx.h      |  2 +-
 src/image.cpp       | 71 +++++++++++++++++++++++++++------------------
 src/image.h         |  3 ++
 src/renderer_gl.cpp | 40 +++++++++++++++----------
 4 files changed, 71 insertions(+), 45 deletions(-)

diff --git a/include/bgfx.h b/include/bgfx.h
index e539422ce..056c6ca80 100644
--- a/include/bgfx.h
+++ b/include/bgfx.h
@@ -823,7 +823,7 @@ namespace bgfx
 
 	/// Create texture from memory buffer.
 	///
-	/// @param _mem DDS texture data.
+	/// @param _mem DDS, KTX or PVR texture data.
 	/// @param _flags Default texture sampling mode is linear, and wrap mode
 	///   is repeat.
 	///
diff --git a/src/image.cpp b/src/image.cpp
index b539b2f56..8728c4fb6 100644
--- a/src/image.cpp
+++ b/src/image.cpp
@@ -66,7 +66,7 @@ namespace bgfx
 		}
 	}
 
-	void imageRgba8Downsample2x2Ref(uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, void* _dst)
+	void imageRgba8Downsample2x2Ref(uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, void* _dst)
 	{
 		const uint32_t dstwidth  = _width/2;
 		const uint32_t dstheight = _height/2;
@@ -80,27 +80,27 @@ namespace bgfx
 		uint8_t* dst = (uint8_t*)_dst;
 		const uint8_t* src = (const uint8_t*)_src;
 		
-		for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep)
+		for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstheight; ++yy, src += ystep)
 		{
 			const uint8_t* rgba = src;
 			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 8, dst += 4)
 			{
-				float rr = powf(rgba[       0], 2.2f);
-				float gg = powf(rgba[       1], 2.2f);
-				float bb = powf(rgba[       2], 2.2f);
-				float aa =      rgba[       3];
-				rr      += powf(rgba[       4], 2.2f);
-				gg      += powf(rgba[       5], 2.2f);
-				bb      += powf(rgba[       6], 2.2f);
-				aa      +=      rgba[       7];
-				rr      += powf(rgba[_pitch+0], 2.2f);
-				gg      += powf(rgba[_pitch+1], 2.2f);
-				bb      += powf(rgba[_pitch+2], 2.2f);
-				aa      +=      rgba[_pitch+3];
-				rr      += powf(rgba[_pitch+4], 2.2f);
-				gg      += powf(rgba[_pitch+5], 2.2f);
-				bb      += powf(rgba[_pitch+6], 2.2f);
-				aa      +=      rgba[_pitch+7];
+				float rr = powf(rgba[          0], 2.2f);
+				float gg = powf(rgba[          1], 2.2f);
+				float bb = powf(rgba[          2], 2.2f);
+				float aa =      rgba[          3];
+				rr      += powf(rgba[          4], 2.2f);
+				gg      += powf(rgba[          5], 2.2f);
+				bb      += powf(rgba[          6], 2.2f);
+				aa      +=      rgba[          7];
+				rr      += powf(rgba[_srcPitch+0], 2.2f);
+				gg      += powf(rgba[_srcPitch+1], 2.2f);
+				bb      += powf(rgba[_srcPitch+2], 2.2f);
+				aa      +=      rgba[_srcPitch+3];
+				rr      += powf(rgba[_srcPitch+4], 2.2f);
+				gg      += powf(rgba[_srcPitch+5], 2.2f);
+				bb      += powf(rgba[_srcPitch+6], 2.2f);
+				aa      +=      rgba[_srcPitch+7];
 
 				rr *= 0.25f;
 				gg *= 0.25f;
@@ -117,7 +117,7 @@ namespace bgfx
 		}
 	}
 
-	void imageRgba8Downsample2x2(uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, void* _dst)
+	void imageRgba8Downsample2x2(uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, void* _dst)
 	{
 		const uint32_t dstwidth  = _width/2;
 		const uint32_t dstheight = _height/2;
@@ -142,15 +142,15 @@ namespace bgfx
 		const float4_t linear = float4_ld(2.2f, 2.2f, 2.2f, 1.0f);
 		const float4_t quater = float4_splat(0.25f);
 
-		for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep)
+		for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstheight; ++yy, src += ystep)
 		{
 			const uint8_t* rgba = src;
 			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 8, dst += 4)
 			{
 				const float4_t abgr0  = float4_splat(rgba);
 				const float4_t abgr1  = float4_splat(rgba+4);
-				const float4_t abgr2  = float4_splat(rgba+_pitch);
-				const float4_t abgr3  = float4_splat(rgba+_pitch+4);
+				const float4_t abgr2  = float4_splat(rgba+_srcPitch);
+				const float4_t abgr3  = float4_splat(rgba+_srcPitch+4);
 
 				const float4_t abgr0m = float4_and(abgr0, umask);
 				const float4_t abgr1m = float4_and(abgr1, umask);
@@ -198,13 +198,13 @@ namespace bgfx
 		}
 	}
 
-	void imageSwizzleBgra8Ref(uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, void* _dst)
+	void imageSwizzleBgra8Ref(uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, void* _dst)
 	{
 		const uint8_t* src = (uint8_t*) _src;
-		const uint8_t* next = src + _pitch;
+		const uint8_t* next = src + _srcPitch;
 		uint8_t* dst = (uint8_t*)_dst;
 
-		for (uint32_t yy = 0; yy < _height; ++yy, src = next, next += _pitch)
+		for (uint32_t yy = 0; yy < _height; ++yy, src = next, next += _srcPitch)
 		{
 			for (uint32_t xx = 0; xx < _width; ++xx, src += 4, dst += 4)
 			{
@@ -220,7 +220,7 @@ namespace bgfx
 		}
 	}
 
-	void imageSwizzleBgra8(uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, void* _dst)
+	void imageSwizzleBgra8(uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, void* _dst)
 	{
 		// Test can we do four 4-byte pixels at the time.
 		if (0 != (_width&0x3)
@@ -232,7 +232,7 @@ namespace bgfx
 			BX_WARN(bx::isPtrAligned(_src, 16), "Source %p is not 16-byte aligned.", _src);
 			BX_WARN(bx::isPtrAligned(_dst, 16), "Destination %p is not 16-byte aligned.", _dst);
 			BX_WARN(_width < 4, "Image width must be multiple of 4 (width %d).", _width);
-			imageSwizzleBgra8Ref(_width, _height, _pitch, _src, _dst);
+			imageSwizzleBgra8Ref(_width, _height, _srcPitch, _src, _dst);
 			return;
 		}
 
@@ -241,12 +241,12 @@ namespace bgfx
 		const float4_t mf0f0 = float4_isplat(0xff00ff00);
 		const float4_t m0f0f = float4_isplat(0x00ff00ff);
 		const uint8_t* src = (uint8_t*) _src;
-		const uint8_t* next = src + _pitch;
+		const uint8_t* next = src + _srcPitch;
 		uint8_t* dst = (uint8_t*)_dst;
 
 		const uint32_t width = _width/4;
 
-		for (uint32_t yy = 0; yy < _height; ++yy, src = next, next += _pitch)
+		for (uint32_t yy = 0; yy < _height; ++yy, src = next, next += _srcPitch)
 		{
 			for (uint32_t xx = 0; xx < width; ++xx, src += 16, dst += 16)
 			{
@@ -262,6 +262,19 @@ namespace bgfx
 		}
 	}
 
+	void imageCopy(uint32_t _width, uint32_t _height, uint32_t _bpp, uint32_t _srcPitch, const void* _src, void* _dst)
+	{
+		const uint32_t pitch = _width*_bpp/8;
+		const uint8_t* src = (uint8_t*) _src;
+		const uint8_t* next = src + _srcPitch;
+		uint8_t* dst = (uint8_t*)_dst;
+
+		for (uint32_t yy = 0; yy < _height; ++yy, src = next, next += _srcPitch)
+		{
+			memcpy(dst, src, pitch);
+		}
+	}
+
 	void imageWriteTga(bx::WriterI* _writer, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, bool _grayscale, bool _yflip)
 	{
 		uint8_t type = _grayscale ? 3 : 2;
diff --git a/src/image.h b/src/image.h
index 9c7be951f..90e7b03c2 100644
--- a/src/image.h
+++ b/src/image.h
@@ -54,6 +54,9 @@ namespace bgfx
 	///
 	void imageSwizzleBgra8(uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, void* _dst);
 
+	///
+	void imageCopy(uint32_t _width, uint32_t _height, uint32_t _bpp, uint32_t _pitch, const void* _src, void* _dst);
+
 	///
 	void imageWriteTga(bx::WriterI* _writer, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, bool _grayscale, bool _yflip);
 
diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp
index ca7e27086..727e4f47d 100644
--- a/src/renderer_gl.cpp
+++ b/src/renderer_gl.cpp
@@ -1612,33 +1612,38 @@ namespace bgfx
 		GL_CHECK(glBindTexture(m_target, m_id) );
 		GL_CHECK(glPixelStorei(GL_UNPACK_ALIGNMENT, 1) );
 
-		if (!!BGFX_CONFIG_RENDERER_OPENGL
-		||  s_extension[Extension::EXT_unpack_subimage].m_supported)
-		{
-			GL_CHECK(glPixelStorei(GL_UNPACK_ROW_LENGTH, srcpitch*8/bpp) );
-		}
-		else
-		{
-			BX_CHECK(false, "There is no fallback for GLES2 when GL_EXT_unpack_subimage extension is not available.");
-		}
-
 		GLenum target = GL_TEXTURE_CUBE_MAP == m_target ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : m_target;
 
-		const bool swizzle    = GL_RGBA == m_fmt && !s_renderCtx->m_textureSwizzleSupport;
-		const bool convert    = m_textureFormat != m_requestedFormat;
-		const bool compressed = TextureFormat::Unknown > m_textureFormat;
+		const bool unpackRowLength = !!BGFX_CONFIG_RENDERER_OPENGL || s_extension[Extension::EXT_unpack_subimage].m_supported;
+		const bool swizzle         = GL_RGBA == m_fmt && !s_renderCtx->m_textureSwizzleSupport;
+		const bool convert         = m_textureFormat != m_requestedFormat;
+		const bool compressed      = TextureFormat::Unknown > m_textureFormat;
 
 		const uint32_t width  = _rect.m_width;
 		const uint32_t height = _rect.m_height;
 
 		uint8_t* temp = NULL;
-		if (convert || swizzle)
+		if (convert
+		||  swizzle
+		||  !unpackRowLength)
 		{
 			temp = (uint8_t*)BX_ALLOC(g_allocator, rectpitch*height);
 		}
+		else if (unpackRowLength)
+		{
+			GL_CHECK(glPixelStorei(GL_UNPACK_ROW_LENGTH, srcpitch*8/bpp) );
+		}
 
 		if (compressed)
 		{
+			const uint8_t* data = _mem->data;
+
+			if (!unpackRowLength)
+			{
+				imageCopy(width, height, bpp, srcpitch, data, temp);
+				data = temp;
+			}
+
 			GL_CHECK(compressedTexSubImage(target+_side
 				, _mip
 				, _rect.m_x
@@ -1649,7 +1654,7 @@ namespace bgfx
 				, _depth
 				, m_fmt
 				, _mem->size
-				, _mem->data
+				, data
 				) );
 		}
 		else
@@ -1668,6 +1673,11 @@ namespace bgfx
 				imageSwizzleBgra8(width, height, srcpitch, data, temp);
 				data = temp;
 			}
+			else if (!unpackRowLength && !convert)
+			{
+				imageCopy(width, height, bpp, srcpitch, data, temp);
+				data = temp;
+			}
 
 			GL_CHECK(texSubImage(target+_side
 				, _mip