From 2d91c114c20f59216163a9e66b9f258e7e0e61be Mon Sep 17 00:00:00 2001 From: Dario Manesku Date: Tue, 12 Nov 2013 17:50:03 +0100 Subject: [PATCH 1/6] Removed duplicate edges from platform mesh. --- examples/14-shadowvolumes/platform.obj | 43 ++++++++++--------------- examples/runtime/meshes/platform.bin | Bin 1641 -> 1437 bytes 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/examples/14-shadowvolumes/platform.obj b/examples/14-shadowvolumes/platform.obj index a8e1ba053..050594362 100644 --- a/examples/14-shadowvolumes/platform.obj +++ b/examples/14-shadowvolumes/platform.obj @@ -1,20 +1,16 @@ # Blender v2.66 (sub 1) OBJ File: '' # www.blender.org -g Plane +g Plane.002 v -1.000000 -0.000000 1.000000 v -1.000000 0.000000 -1.000000 v 1.000000 0.000000 -1.000000 v 1.000000 -0.000000 1.000000 v 1.000000 0.081440 -1.000000 v -1.000000 0.081440 -1.000000 -v -1.000000 0.081440 -1.000000 -v 1.000000 0.081440 -1.000000 v 1.000000 0.081440 1.000000 v -1.000000 0.081440 1.000000 v -0.934893 0.081440 -0.934893 v 0.934893 0.081440 -0.934893 -v 1.000000 0.081440 1.000000 -v -1.000000 0.081440 1.000000 v -0.934893 0.081440 0.934893 v 0.934893 0.081440 0.934893 v 0.934893 0.140616 0.934893 @@ -26,19 +22,18 @@ vt 0.011681 0.988318 vt 0.988318 0.988318 vt 0.988318 0.011681 vt 1.000000 1.000000 -vt 0.000000 1.000000 vt 1.000000 0.000000 vt 0.000000 0.000000 -vt 0.032554 0.967446 -vt 0.967446 0.967446 +vt 0.000000 1.000000 vt 0.032554 0.032553 vt 0.967446 0.032553 vt 0.975715 0.024285 vt 0.024285 0.024285 vt 0.975715 0.975715 vt 0.024285 0.975715 +vt 0.967446 0.967446 +vt 0.032554 0.967446 vn 0.000000 -1.000000 -0.000000 -vn 0.000000 0.000000 0.000000 vn 1.000000 0.000000 0.000000 vn -1.000000 -0.000000 0.000000 vn 0.000000 0.000000 1.000000 @@ -46,20 +41,16 @@ vn 0.000000 -0.000000 -1.000000 vn 0.000000 1.000000 0.000000 s off f 1/1/1 2/2/1 3/3/1 4/4/1 -f 5/5/2 6/6/2 7/6/2 8/5/2 -f 4/4/3 3/3/3 5/5/3 9/7/3 -f 2/2/4 1/1/4 10/8/4 6/6/4 -f 1/1/5 4/4/5 9/7/5 10/8/5 -f 3/3/6 2/2/6 6/6/6 5/5/6 -f 8/5/7 7/6/7 11/9/7 12/10/7 -f 9/7/2 5/5/2 8/5/2 13/7/2 -f 6/6/2 10/8/2 14/8/2 7/6/2 -f 10/8/2 9/7/2 13/7/2 14/8/2 -f 15/11/5 16/12/5 17/13/5 18/14/5 -f 13/7/7 8/5/7 12/10/7 16/12/7 -f 7/6/7 14/8/7 15/11/7 11/9/7 -f 14/8/7 13/7/7 16/12/7 15/11/7 -f 18/14/7 17/13/7 19/15/7 20/16/7 -f 12/10/6 11/9/6 20/16/6 19/15/6 -f 16/12/3 12/10/3 19/15/3 17/13/3 -f 11/9/4 15/11/4 18/14/4 20/16/4 +f 4/4/2 3/3/2 5/5/2 7/6/2 +f 2/2/3 1/1/3 8/7/3 6/8/3 +f 1/1/4 4/4/4 7/6/4 8/7/4 +f 3/3/5 2/2/5 6/8/5 5/5/5 +f 11/9/4 12/10/4 13/11/4 14/12/4 +f 14/12/6 13/11/6 15/13/6 16/14/6 +f 10/15/5 9/16/5 16/14/5 15/13/5 +f 12/10/2 10/15/2 15/13/2 13/11/2 +f 9/16/3 11/9/3 14/12/3 16/14/3 +f 5/5/6 6/8/6 9/16/6 10/15/6 +f 7/6/6 5/5/6 10/15/6 12/10/6 +f 6/8/6 8/7/6 11/9/6 9/16/6 +f 8/7/6 7/6/6 12/10/6 11/9/6 diff --git a/examples/runtime/meshes/platform.bin b/examples/runtime/meshes/platform.bin index 00190f0dd49943b4171623fa10daeba7124b276e..097a73721269ded5b4dcc5694ac349eef20b5503 100644 GIT binary patch delta 203 zcmXAjO%B0e6h-gXYDJhz>aVS$sJ6=3pfQki)(CS3M#L5*T><&V64uZi*o{qyTVC?c zJ9qT%yLPW$A0oOvxA{segmlz2f2pyMnX;3a8e7TI#sYw#33a`x7bZ$jo-Ak Date: Tue, 12 Nov 2013 22:18:14 +0100 Subject: [PATCH 2/6] Performance optimizations and SIMD support. --- examples/14-shadowvolumes/shadowvolumes.cpp | 270 +++++++++++++------- 1 file changed, 176 insertions(+), 94 deletions(-) diff --git a/examples/14-shadowvolumes/shadowvolumes.cpp b/examples/14-shadowvolumes/shadowvolumes.cpp index 48e6aff36..b04965a78 100644 --- a/examples/14-shadowvolumes/shadowvolumes.cpp +++ b/examples/14-shadowvolumes/shadowvolumes.cpp @@ -11,6 +11,7 @@ using namespace std::tr1; #include #include #include +#include #include "entry/entry.h" #include "fpumath.h" #include "imgui/imgui.h" @@ -23,6 +24,7 @@ using namespace std::tr1; #include #include +#define SV_USE_SIMD 1 #define MAX_INSTANCE_COUNT 25 #define MAX_LIGHTS_COUNT 5 @@ -695,31 +697,23 @@ typedef std::vector FaceArray; struct Edge { - struct Plane - { - float m_plane[4]; - bool m_reverseVertexOrder; - }; - Edge(const float* _v0, const float* _v1) : m_faceIndex(0) { memcpy(m_v0, _v0, 3*sizeof(float) ); memcpy(m_v1, _v1, 3*sizeof(float) ); - } - - Plane& nextFace() - { - BX_CHECK(m_faceIndex < FACE_NUM, "Error! 2-manifold meshes must be used!"); - return m_faces[(m_faceIndex++)%FACE_NUM]; } - float m_v0[3], m_v1[3]; - static const uint8_t FACE_NUM = 2; - Plane m_faces[FACE_NUM]; + struct Plane + { + float m_plane[4]; + }; + + Plane m_face[2]; + bool m_faceReverseOrder[2]; uint8_t m_faceIndex; + float m_v0[3], m_v1[3]; }; -typedef std::vector EdgeArray; struct HalfEdge { @@ -844,24 +838,48 @@ struct Group m_vertices = NULL; m_numIndices = 0; m_indices = NULL; + m_numEdges = 0; + m_edges = NULL; m_prims.clear(); } + typedef struct { float f[6]; } f6_t; + + struct EdgeComparator + { + bool operator()(const f6_t& _a, const f6_t& _b) + { + if (_a.f[0] < _b.f[0]) return true; + if (_a.f[0] > _b.f[0]) return false; + if (_a.f[1] < _b.f[1]) return true; + if (_a.f[1] > _b.f[1]) return false; + if (_a.f[2] < _b.f[2]) return true; + if (_a.f[2] > _b.f[2]) return false; + if (_a.f[3] < _b.f[3]) return true; + if (_a.f[3] > _b.f[3]) return false; + if (_a.f[4] < _b.f[4]) return true; + if (_a.f[4] > _b.f[4]) return false; + if (_a.f[5] < _b.f[5]) return true; + /*if (_a.f[5] > _b.f[5]) return false;*/ + + return false; + } + }; + void fillStructures(uint16_t _stride) { m_faces.clear(); - m_edges.clear(); m_halfEdges.destroy(); //init halfedges m_halfEdges.init(m_indices, m_numIndices); //init faces and edges - m_faces.reserve(m_numIndices/3); //1 face = 3 indices - m_edges.reserve(m_numIndices); //1 triangle = 3 indices = 3 edges. + m_faces.reserve(m_numIndices/3); //1 face = 3 indices + m_edges = (Edge*)malloc(m_numIndices * sizeof(Edge)); //1 triangle = 3 indices = 3 edges. - typedef std::map, uint32_t> EdgeIndexMap; - EdgeIndexMap edgeIndexMap; + typedef std::map EdgeMap; + EdgeMap edgeMap; for (uint32_t ii = 0, size = m_numIndices/3; ii < size; ++ii) { @@ -883,13 +901,6 @@ struct Group memcpy(face.m_plane, plane, 4*sizeof(float) ); m_faces.push_back(face); - uint16_t triangleI[3][2] = - { - {i0, i1}, - {i1, i2}, - {i2, i0}, - }; - const float* triangleV[3][2] = { {v0, v1}, @@ -897,43 +908,38 @@ struct Group {v2, v0}, }; - typedef std::vector TriangleIndex; - TriangleIndex triangleIndex; - for (uint8_t jj = 0; jj < 3; ++jj) - { - EdgeIndexMap::iterator iter = edgeIndexMap.find(std::make_pair(triangleI[jj][1], triangleI[jj][0]) ); - if (edgeIndexMap.end() != iter) - { - const uint32_t index = iter->second; - Edge* edge = &m_edges[index]; + { + const float* v0 = triangleV[jj][0]; + const float* v1 = triangleV[jj][1]; + f6_t key; + f6_t keyInv; + memcpy(&key.f[0], v0, 3*sizeof(float) ); + memcpy(&key.f[3], v1, 3*sizeof(float) ); + memcpy(&keyInv.f[0], v1, 3*sizeof(float) ); + memcpy(&keyInv.f[3], v0, 3*sizeof(float) ); - Edge::Plane& face = edge->nextFace(); - memcpy(face.m_plane, plane, 4*sizeof(float) ); - face.m_reverseVertexOrder = true; + EdgeMap::iterator iter = edgeMap.find(keyInv); + if (iter != edgeMap.end()) + { + Edge& edge = iter->second; + memcpy(edge.m_face[edge.m_faceIndex].m_plane, plane, 4*sizeof(float) ); + edge.m_faceReverseOrder[edge.m_faceIndex] = true; } else { - triangleIndex.push_back(jj); + std::pair result = edgeMap.insert(std::make_pair(key, Edge(v0, v1)) ); + Edge& edge = result.first->second; + memcpy(edge.m_face[edge.m_faceIndex].m_plane, plane, 4*sizeof(float) ); + edge.m_faceReverseOrder[edge.m_faceIndex] = false; + edge.m_faceIndex++; } } + } - for (TriangleIndex::const_iterator iter = triangleIndex.begin(), end = triangleIndex.end(); iter != end; ++iter) - { - const uint8_t index = *iter; - const uint16_t i0 = triangleI[index][0]; - const uint16_t i1 = triangleI[index][1]; - const float* v0 = triangleV[index][0]; - const float* v1 = triangleV[index][1]; - - Edge edge(v0, v1); - Edge::Plane& face = edge.nextFace(); - memcpy(face.m_plane, plane, 4*sizeof(float) ); - face.m_reverseVertexOrder = false; - m_edges.push_back(edge); - - edgeIndexMap.insert(std::make_pair(std::make_pair(i0, i1), (uint32_t)m_edges.size()-1) ); - } + for (EdgeMap::const_iterator iter = edgeMap.begin(), end = edgeMap.end(); iter != end; ++iter) + { + memcpy(&m_edges[m_numEdges++], &iter->second, sizeof(Edge)); } } @@ -948,6 +954,8 @@ struct Group m_vertices = NULL; free(m_indices); m_indices = NULL; + free(m_edges); + m_edges = NULL; m_halfEdges.destroy(); } @@ -961,7 +969,8 @@ struct Group Aabb m_aabb; Obb m_obb; PrimitiveArray m_prims; - EdgeArray m_edges; + uint32_t m_numEdges; + Edge* m_edges; FaceArray m_faces; HalfEdges m_halfEdges; }; @@ -1343,7 +1352,8 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume { const uint8_t* vertices = _group.m_vertices; const FaceArray& faces = _group.m_faces; - const EdgeArray& edges = _group.m_edges; + const Edge* edges = _group.m_edges; + const uint32_t numEdges = _group.m_numEdges; HalfEdges& halfEdges = _group.m_halfEdges; struct VertexData @@ -1482,49 +1492,121 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume } else // ShadowVolumeAlgorithm::EdgeBased: { - for (EdgeArray::const_iterator iter = edges.begin(), end = edges.end(); iter != end; ++iter) + uint32_t ii = 0; + +#if SV_USE_SIMD + uint32_t numEdgesRounded = uint32_t(numEdges / 2) * 2; + + using namespace bx; + + const float4_t lx = float4_splat(_light[0]); + const float4_t ly = float4_splat(_light[1]); + const float4_t lz = float4_splat(_light[2]); + + for (; ii < numEdgesRounded; ii+=2) { - const Edge& edge = *iter; - const float* v0 = edge.m_v0; - const float* v1 = edge.m_v1; + const Edge& edge0 = edges[ii]; + const Edge& edge1 = edges[ii+1]; - int16_t k = 0; - for (uint8_t ii = 0; ii < edge.m_faceIndex; ++ii) + const float4_t reverse = float4_ild(edge0.m_faceReverseOrder[0] + , edge1.m_faceReverseOrder[0] + , edge0.m_faceReverseOrder[1] + , edge1.m_faceReverseOrder[1] + ); + + const float4_t v0 = float4_ldu(edge0.m_face[0].m_plane); + const float4_t v1 = float4_ldu(edge1.m_face[0].m_plane); + const float4_t v2 = float4_ldu(edge0.m_face[1].m_plane); + const float4_t v3 = float4_ldu(edge1.m_face[1].m_plane); + + const float4_t xxyy0 = float4_shuf_xAyB(v0, v2); + const float4_t zzww0 = float4_shuf_zCwD(v0, v2); + const float4_t xxyy1 = float4_shuf_xAyB(v1, v3); + const float4_t zzww1 = float4_shuf_zCwD(v1, v3); + + const float4_t vX = float4_shuf_xAyB(xxyy0, xxyy1); + const float4_t vY = float4_shuf_zCwD(xxyy0, xxyy1); + const float4_t vZ = float4_shuf_xAyB(zzww0, zzww1); + const float4_t vW = float4_shuf_zCwD(zzww0, zzww1); + + const float4_t r0 = float4_mul(vX, lx); + const float4_t r1 = float4_mul(vY, ly); + const float4_t r2 = float4_mul(vZ, lz); + + const float4_t dot = float4_add(r0, float4_add(r1, r2)); + const float4_t f = float4_add(dot, vW); + + const float4_t mask = float4_cmpgt(f, float4_zero() ); + const float4_t tmp0 = float4_and(mask, float4_splat(1.0f) ); + const float4_t tmp1 = float4_ftoi(tmp0); + const float4_t tmp2 = float4_xor(tmp1, reverse); + const float4_t tmp3 = float4_sll(tmp2, 1); + const float4_t tmp4 = float4_isub(tmp3, float4_isplat(1) ); + + BX_ALIGN_STRUCT_16(int32_t res[4]); + float4_st(&res, tmp4); + + for (uint16_t jj = 0; jj < 2; ++jj) { - const Edge::Plane& face = edge.m_faces[ii]; - - int16_t s = (int16_t)fsign(vec3Dot(face.m_plane, _light) + face.m_plane[3]); - if (face.m_reverseVertexOrder) + int16_t k = res[jj] + res[jj+2]; + if (k != 0) { - s = -s; + verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v0, 0.0f, float(k)); + verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v0, 1.0f, float(k)); + verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v1, 0.0f, float(k)); + verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v1, 1.0f, float(k)); + + k = _textureAsStencil ? 1 : k; + uint16_t winding = uint16_t(k > 0); + for (uint8_t ii = 0, end = abs(k); ii < end; ++ii) + { + indicesSide[sideI++] = indexSide; + indicesSide[sideI++] = indexSide + 2 - winding; + indicesSide[sideI++] = indexSide + 1 + winding; + + indicesSide[sideI++] = indexSide + 2; + indicesSide[sideI++] = indexSide + 3 - winding*2; + indicesSide[sideI++] = indexSide + 1 + winding*2; + } + + indexSide += 4; } - k += s; } + } +#endif - if (k == 0) + for (; ii < numEdges; ++ii) + { + const Edge& edge = edges[ii]; + const Edge::Plane& face0 = edge.m_face[0]; + const Edge::Plane& face1 = edge.m_face[1]; + + int16_t s0 = ( (vec3Dot(face0.m_plane, _light) + face0.m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[0]; + int16_t s1 = ( (vec3Dot(face1.m_plane, _light) + face1.m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[1]; + int16_t k = ( (s0 + s1) << 1) - 2; + + if (k != 0) { - continue; + verticesSide[vsideI++] = VertexData(edge.m_v0, 0.0f, k); + verticesSide[vsideI++] = VertexData(edge.m_v0, 1.0f, k); + verticesSide[vsideI++] = VertexData(edge.m_v1, 0.0f, k); + verticesSide[vsideI++] = VertexData(edge.m_v1, 1.0f, k); + + k = _textureAsStencil ? 1 : k; + uint16_t winding = uint16_t(k > 0); + for (uint8_t ii = 0, end = abs(k); ii < end; ++ii) + { + indicesSide[sideI++] = indexSide; + indicesSide[sideI++] = indexSide + 2 - winding; + indicesSide[sideI++] = indexSide + 1 + winding; + + indicesSide[sideI++] = indexSide + 2; + indicesSide[sideI++] = indexSide + 3 - winding*2; + indicesSide[sideI++] = indexSide + 1 + winding*2; + } + + indexSide += 4; } - - verticesSide[vsideI++] = VertexData(v0, 0.0f, k); - verticesSide[vsideI++] = VertexData(v0, 1.0f, k); - verticesSide[vsideI++] = VertexData(v1, 0.0f, k); - verticesSide[vsideI++] = VertexData(v1, 1.0f, k); - - k = _textureAsStencil ? 1 : k; - uint16_t winding = uint16_t(k > 0); - for (uint8_t ii = 0, end = abs(k); ii < end; ++ii) - { - indicesSide[sideI++] = indexSide; - indicesSide[sideI++] = indexSide + 2 - winding; - indicesSide[sideI++] = indexSide + 1 + winding; - - indicesSide[sideI++] = indexSide + 2; - indicesSide[sideI++] = indexSide + 3 - winding*2; - indicesSide[sideI++] = indexSide + 1 + winding*2; - } - - indexSide += 4; } if (cap) @@ -1944,7 +2026,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) bool settings_updateLights = true; bool settings_updateScene = true; bool settings_mixedSvImpl = true; - bool settings_useStencilTexture = false; + bool settings_useStencilTexture = true; bool settings_drawShadowVolumes = false; float settings_numLights = 1.0f; float settings_instanceCount = 9.0f; From c0c62570ea221f1b1e457596f0f00447915f8861 Mon Sep 17 00:00:00 2001 From: Dario Manesku Date: Tue, 12 Nov 2013 22:20:50 +0100 Subject: [PATCH 3/6] Removed trailing whitespaces. --- examples/14-shadowvolumes/shadowvolumes.cpp | 86 ++++++++++----------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/examples/14-shadowvolumes/shadowvolumes.cpp b/examples/14-shadowvolumes/shadowvolumes.cpp index b04965a78..5e91331ab 100644 --- a/examples/14-shadowvolumes/shadowvolumes.cpp +++ b/examples/14-shadowvolumes/shadowvolumes.cpp @@ -28,7 +28,7 @@ using namespace std::tr1; #define MAX_INSTANCE_COUNT 25 #define MAX_LIGHTS_COUNT 5 -#define VIEWID_RANGE1_PASS0 1 +#define VIEWID_RANGE1_PASS0 1 #define VIEWID_RANGE1_RT_PASS1 2 #define VIEWID_RANGE15_PASS2 3 #define VIEWID_RANGE1_PASS3 20 @@ -374,7 +374,7 @@ struct Uniforms float m_alpha; float m_lightCount; }; - + struct SvParams { float m_useStencilTex; @@ -529,13 +529,13 @@ static RenderState s_renderStates[RenderState::Count] = | BGFX_STATE_MSAA , UINT32_MAX , BGFX_STENCIL_TEST_ALWAYS - | BGFX_STENCIL_FUNC_REF(1) + | BGFX_STENCIL_FUNC_REF(1) | BGFX_STENCIL_FUNC_RMASK(0xff) | BGFX_STENCIL_OP_FAIL_S_KEEP | BGFX_STENCIL_OP_FAIL_Z_KEEP | BGFX_STENCIL_OP_PASS_Z_DECR , BGFX_STENCIL_TEST_ALWAYS - | BGFX_STENCIL_FUNC_REF(1) + | BGFX_STENCIL_FUNC_REF(1) | BGFX_STENCIL_FUNC_RMASK(0xff) | BGFX_STENCIL_OP_FAIL_S_KEEP | BGFX_STENCIL_OP_FAIL_Z_KEEP @@ -546,13 +546,13 @@ static RenderState s_renderStates[RenderState::Count] = | BGFX_STATE_MSAA , UINT32_MAX , BGFX_STENCIL_TEST_ALWAYS - | BGFX_STENCIL_FUNC_REF(1) + | BGFX_STENCIL_FUNC_REF(1) | BGFX_STENCIL_FUNC_RMASK(0xff) | BGFX_STENCIL_OP_FAIL_S_KEEP | BGFX_STENCIL_OP_FAIL_Z_INCR | BGFX_STENCIL_OP_PASS_Z_KEEP , BGFX_STENCIL_TEST_ALWAYS - | BGFX_STENCIL_FUNC_REF(1) + | BGFX_STENCIL_FUNC_REF(1) | BGFX_STENCIL_FUNC_RMASK(0xff) | BGFX_STENCIL_OP_FAIL_S_KEEP | BGFX_STENCIL_OP_FAIL_Z_DECR @@ -1123,7 +1123,7 @@ struct Mesh struct Model { Model() - { + { m_program.idx = bgfx::invalidHandle; m_texture.idx = bgfx::invalidHandle; } @@ -1187,7 +1187,7 @@ struct Instance { Instance() : m_svExtrusionDistance(150.0f) - { + { m_color[0] = 1.0f; m_color[1] = 1.0f; m_color[2] = 1.0f; @@ -1376,8 +1376,8 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume bool cap = (ShadowVolumeImpl::DepthFail == _impl); - VertexData* verticesSide = (VertexData*) s_svAllocator.alloc (20000 * sizeof(VertexData) ); - uint16_t* indicesSide = (uint16_t*) s_svAllocator.alloc (20000 * 3*sizeof(uint16_t) ); + VertexData* verticesSide = (VertexData*) s_svAllocator.alloc(20000 * sizeof(VertexData) ); + uint16_t* indicesSide = (uint16_t*) s_svAllocator.alloc(20000 * 3*sizeof(uint16_t) ); uint16_t* indicesFrontCap = 0; uint16_t* indicesBackCap = 0; @@ -1405,7 +1405,7 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume if (f > 0.0f) { frontFacing = true; - uint16_t triangleEdges[3][2] = + uint16_t triangleEdges[3][2] = { { face.m_i[0], face.m_i[1] }, { face.m_i[1], face.m_i[2] }, @@ -1617,7 +1617,7 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume const Face& face = *iter; float f = vec3Dot(face.m_plane, _light) + face.m_plane[3]; - bool frontFacing = (f > 0.0f); + bool frontFacing = (f > 0.0f); for (uint8_t ii = 0, end = 1 + uint8_t(!_textureAsStencil); ii < end; ++ii) { @@ -1766,10 +1766,10 @@ void createNearClipVolume(float* __restrict _outPlanes24f float nearPlaneV[4] = { - 0.0f * lightSide, - 0.0f * lightSide, - 1.0f * lightSide, - _near * lightSide, + 0.0f * lightSide, + 0.0f * lightSide, + 1.0f * lightSide, + _near * lightSide, }; vec4MulMtx(volumePlanes[4], nearPlaneV, mtxViewTrans); @@ -1817,9 +1817,9 @@ bool clipTest(const float* _planes, uint8_t _planeNum, const Mesh& _mesh, const } } - if (isInside) + if (isInside) { - return true; + return true; } } @@ -1907,19 +1907,19 @@ int _main_(int /*_argc*/, char** /*_argv*/) bgfx::ProgramHandle programColorTexture = loadProgram("vs_shadowvolume_color_texture", "fs_shadowvolume_color_texture" ); bgfx::ProgramHandle programTexture = loadProgram("vs_shadowvolume_texture", "fs_shadowvolume_texture" ); - bgfx::ProgramHandle programBackBlank = loadProgram("vs_shadowvolume_svback", "fs_shadowvolume_svbackblank" ); - bgfx::ProgramHandle programSideBlank = loadProgram("vs_shadowvolume_svside", "fs_shadowvolume_svsideblank" ); - bgfx::ProgramHandle programFrontBlank = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontblank"); + bgfx::ProgramHandle programBackBlank = loadProgram("vs_shadowvolume_svback", "fs_shadowvolume_svbackblank" ); + bgfx::ProgramHandle programSideBlank = loadProgram("vs_shadowvolume_svside", "fs_shadowvolume_svsideblank" ); + bgfx::ProgramHandle programFrontBlank = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontblank"); - bgfx::ProgramHandle programBackColor = loadProgram("vs_shadowvolume_svback", "fs_shadowvolume_svbackcolor" ); - bgfx::ProgramHandle programSideColor = loadProgram("vs_shadowvolume_svside", "fs_shadowvolume_svsidecolor" ); - bgfx::ProgramHandle programFrontColor = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontcolor"); + bgfx::ProgramHandle programBackColor = loadProgram("vs_shadowvolume_svback", "fs_shadowvolume_svbackcolor" ); + bgfx::ProgramHandle programSideColor = loadProgram("vs_shadowvolume_svside", "fs_shadowvolume_svsidecolor" ); + bgfx::ProgramHandle programFrontColor = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontcolor"); - bgfx::ProgramHandle programSideTex = loadProgram("vs_shadowvolume_svside", "fs_shadowvolume_svsidetex" ); - bgfx::ProgramHandle programBackTex1 = loadProgram("vs_shadowvolume_svback", "fs_shadowvolume_svbacktex1" ); - bgfx::ProgramHandle programBackTex2 = loadProgram("vs_shadowvolume_svback", "fs_shadowvolume_svbacktex2" ); - bgfx::ProgramHandle programFrontTex1 = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex1" ); - bgfx::ProgramHandle programFrontTex2 = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex2" ); + bgfx::ProgramHandle programSideTex = loadProgram("vs_shadowvolume_svside", "fs_shadowvolume_svsidetex" ); + bgfx::ProgramHandle programBackTex1 = loadProgram("vs_shadowvolume_svback", "fs_shadowvolume_svbacktex1" ); + bgfx::ProgramHandle programBackTex2 = loadProgram("vs_shadowvolume_svback", "fs_shadowvolume_svbacktex2" ); + bgfx::ProgramHandle programFrontTex1 = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex1" ); + bgfx::ProgramHandle programFrontTex2 = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex2" ); struct ShadowVolumeProgramType { @@ -1947,7 +1947,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) }; bgfx::ProgramHandle svProgs[ShadowVolumeProgramType::Count][ShadowVolumePart::Count] = - { + { { programBackBlank, programSideBlank, programFrontBlank } // Blank ,{ programBackColor, programSideColor, programFrontColor } // Color ,{ programBackTex1, programSideTex, programFrontTex1 } // Tex1 @@ -2015,7 +2015,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) int64_t profTime = 0; int64_t timeOffset = bx::getHPCounter(); - uint32_t numShadowVolumeVertices = 0; + uint32_t numShadowVolumeVertices = 0; uint32_t numShadowVolumeIndices = 0; uint32_t oldWidth = 0; @@ -2030,8 +2030,8 @@ int _main_(int /*_argc*/, char** /*_argv*/) bool settings_drawShadowVolumes = false; float settings_numLights = 1.0f; float settings_instanceCount = 9.0f; - ShadowVolumeImpl::Enum settings_shadowVolumeImpl = ShadowVolumeImpl::DepthFail; - ShadowVolumeAlgorithm::Enum settings_shadowVolumeAlgorithm = ShadowVolumeAlgorithm::FaceBased; + ShadowVolumeImpl::Enum settings_shadowVolumeImpl = ShadowVolumeImpl::DepthFail; + ShadowVolumeAlgorithm::Enum settings_shadowVolumeAlgorithm = ShadowVolumeAlgorithm::EdgeBased; int32_t scrollAreaRight = 0; const char* titles[2] = @@ -2193,8 +2193,8 @@ int _main_(int /*_argc*/, char** /*_argv*/) imguiLabel("CPU Time: %7.1f [ms]", double(profTime)*toMs); imguiLabel("Volume Vertices: %5.uk", numShadowVolumeVertices/1000); imguiLabel("Volume Indices: %6.uk", numShadowVolumeIndices/1000); - numShadowVolumeVertices = 0; - numShadowVolumeIndices = 0; + numShadowVolumeVertices = 0; + numShadowVolumeIndices = 0; imguiSeparatorLine(); settings_drawShadowVolumes = imguiCheck("Draw Shadow Volumes", settings_drawShadowVolumes) @@ -2286,7 +2286,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) bgfx::dbgTextPrintf(3, row++, 0x0f, "Stencil:"); bgfx::dbgTextPrintf(8, row++, 0x0f, "Stencil buffer - Faster, but capable only of +1 incr."); bgfx::dbgTextPrintf(8, row++, 0x0f, "Texture as stencil - Slower, but capable of +2 incr."); - } + } // Setup instances Instance shadowCasters[SceneCount][60]; @@ -2538,7 +2538,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) bgfx::setViewClear(VIEWID_RANGE1_RT_PASS1, BGFX_CLEAR_DEPTH_BIT, 0x00000000, 1.0f, 0); bgfx::setViewRenderTarget(VIEWID_RANGE1_RT_PASS1, s_stencilRt); - const RenderState& renderState = s_renderStates[RenderState::ShadowVolume_UsingStencilTexture_BuildDepth]; + const RenderState& renderState = s_renderStates[RenderState::ShadowVolume_UsingStencilTexture_BuildDepth]; for (uint8_t ii = 0; ii < shadowCastersCount[currentScene]; ++ii) { @@ -2554,7 +2554,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) profTime = bx::getHPCounter(); /** - * For each light: + * For each light: * 1. Compute and draw shadow volume to stencil buffer * 2. Draw diffuse with stencil test */ @@ -2607,14 +2607,14 @@ int _main_(int /*_argc*/, char** /*_argv*/) const Instance& instance = shadowCasters[currentScene][jj]; Model* model = instance.m_model; - ShadowVolumeImpl::Enum shadowVolumeImpl = settings_shadowVolumeImpl; + ShadowVolumeImpl::Enum shadowVolumeImpl = settings_shadowVolumeImpl; if (settings_mixedSvImpl) - { + { // If instance is inside near clip volume, depth fail must be used, else depth pass is fine. bool isInsideVolume = clipTest(nearClipVolume, 6, model->m_mesh, instance.m_scale, instance.m_pos); shadowVolumeImpl = (isInsideVolume ? ShadowVolumeImpl::DepthFail : ShadowVolumeImpl::DepthPass); } - s_uniforms.m_svparams.m_dfail = float(ShadowVolumeImpl::DepthFail == shadowVolumeImpl); + s_uniforms.m_svparams.m_dfail = float(ShadowVolumeImpl::DepthFail == shadowVolumeImpl); // Compute virtual light position for shadow volume generation. float transformedLightPos[3]; @@ -2673,7 +2673,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) : RenderState::ShadowVolume_UsingStencilTexture_CraftStencil_DepthPass ; - programIndex = ShadowVolumeAlgorithm::FaceBased == settings_shadowVolumeAlgorithm + programIndex = ShadowVolumeAlgorithm::FaceBased == settings_shadowVolumeAlgorithm ? ShadowVolumeProgramType::Tex1 : ShadowVolumeProgramType::Tex2 ; @@ -2773,7 +2773,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) } } - profTime = bx::getHPCounter() - profTime; + profTime = bx::getHPCounter() - profTime; // Lights. const float lightScale[3] = { 1.5f, 1.5f, 1.5f }; From d7647a4cb7000ef71a9a6d98926df880f90098f6 Mon Sep 17 00:00:00 2001 From: Dario Manesku Date: Wed, 13 Nov 2013 16:55:07 +0100 Subject: [PATCH 4/6] Small optimization. --- examples/14-shadowvolumes/shadowvolumes.cpp | 31 ++++++++++++--------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/examples/14-shadowvolumes/shadowvolumes.cpp b/examples/14-shadowvolumes/shadowvolumes.cpp index 5e91331ab..2d370e76b 100644 --- a/examples/14-shadowvolumes/shadowvolumes.cpp +++ b/examples/14-shadowvolumes/shadowvolumes.cpp @@ -849,20 +849,25 @@ struct Group { bool operator()(const f6_t& _a, const f6_t& _b) { - if (_a.f[0] < _b.f[0]) return true; - if (_a.f[0] > _b.f[0]) return false; - if (_a.f[1] < _b.f[1]) return true; - if (_a.f[1] > _b.f[1]) return false; - if (_a.f[2] < _b.f[2]) return true; - if (_a.f[2] > _b.f[2]) return false; - if (_a.f[3] < _b.f[3]) return true; - if (_a.f[3] > _b.f[3]) return false; - if (_a.f[4] < _b.f[4]) return true; - if (_a.f[4] > _b.f[4]) return false; - if (_a.f[5] < _b.f[5]) return true; - /*if (_a.f[5] > _b.f[5]) return false;*/ + const uint8_t t0 = 0 + | ( (_a.f[0] < _b.f[0]) << 5) + | ( (_a.f[1] < _b.f[1]) << 4) + | ( (_a.f[2] < _b.f[2]) << 3) + | ( (_a.f[3] < _b.f[3]) << 2) + | ( (_a.f[4] < _b.f[4]) << 1) + | ( (_a.f[5] < _b.f[5]) << 0) + ; - return false; + const uint8_t t1 = 0 + | ( (_a.f[0] > _b.f[0]) << 5) + | ( (_a.f[1] > _b.f[1]) << 4) + | ( (_a.f[2] > _b.f[2]) << 3) + | ( (_a.f[3] > _b.f[3]) << 2) + | ( (_a.f[4] > _b.f[4]) << 1) + | ( (_a.f[5] > _b.f[5]) << 0) + ; + + return t0 > t1; } }; From 0381c60b6ddf77ae5f506562fe5105750913f4f5 Mon Sep 17 00:00:00 2001 From: Dario Manesku Date: Wed, 13 Nov 2013 17:53:44 +0100 Subject: [PATCH 5/6] Data structures sepatared for aligned memory access. --- examples/14-shadowvolumes/shadowvolumes.cpp | 98 +++++++++++++-------- 1 file changed, 61 insertions(+), 37 deletions(-) diff --git a/examples/14-shadowvolumes/shadowvolumes.cpp b/examples/14-shadowvolumes/shadowvolumes.cpp index 2d370e76b..b730a2e14 100644 --- a/examples/14-shadowvolumes/shadowvolumes.cpp +++ b/examples/14-shadowvolumes/shadowvolumes.cpp @@ -11,6 +11,7 @@ using namespace std::tr1; #include #include #include +#include #include #include "entry/entry.h" #include "fpumath.h" @@ -697,24 +698,16 @@ typedef std::vector FaceArray; struct Edge { - Edge(const float* _v0, const float* _v1) - : m_faceIndex(0) - { - memcpy(m_v0, _v0, 3*sizeof(float) ); - memcpy(m_v1, _v1, 3*sizeof(float) ); - } - - struct Plane - { - float m_plane[4]; - }; - - Plane m_face[2]; bool m_faceReverseOrder[2]; uint8_t m_faceIndex; float m_v0[3], m_v1[3]; }; +struct Plane +{ + float m_plane[4]; +}; + struct HalfEdge { #define INVALID_EDGE_INDEX UINT16_MAX @@ -840,6 +833,7 @@ struct Group m_indices = NULL; m_numEdges = 0; m_edges = NULL; + m_edgePlanesUnalignedPtr = NULL; m_prims.clear(); } @@ -882,8 +876,24 @@ struct Group //init faces and edges m_faces.reserve(m_numIndices/3); //1 face = 3 indices m_edges = (Edge*)malloc(m_numIndices * sizeof(Edge)); //1 triangle = 3 indices = 3 edges. + m_edgePlanesUnalignedPtr = (Plane*)malloc(m_numIndices * sizeof(Plane) + 15); + m_edgePlanes = (Plane*)bx::alignPtr(m_edgePlanesUnalignedPtr, 0, 16); - typedef std::map EdgeMap; + struct EdgeAndPlane + { + EdgeAndPlane(const float* _v0, const float* _v1) + : m_faceIndex(0) + { + memcpy(m_v0, _v0, 3*sizeof(float) ); + memcpy(m_v1, _v1, 3*sizeof(float) ); + } + + bool m_faceReverseOrder[2]; + uint8_t m_faceIndex; + float m_v0[3], m_v1[3]; + Plane m_plane[2]; + }; + typedef std::map EdgeMap; EdgeMap edgeMap; for (uint32_t ii = 0, size = m_numIndices/3; ii < size; ++ii) @@ -927,24 +937,32 @@ struct Group EdgeMap::iterator iter = edgeMap.find(keyInv); if (iter != edgeMap.end()) { - Edge& edge = iter->second; - memcpy(edge.m_face[edge.m_faceIndex].m_plane, plane, 4*sizeof(float) ); - edge.m_faceReverseOrder[edge.m_faceIndex] = true; + EdgeAndPlane& ep = iter->second; + memcpy(ep.m_plane[ep.m_faceIndex].m_plane, plane, 4*sizeof(float) ); + ep.m_faceReverseOrder[ep.m_faceIndex] = true; } else { - std::pair result = edgeMap.insert(std::make_pair(key, Edge(v0, v1)) ); - Edge& edge = result.first->second; - memcpy(edge.m_face[edge.m_faceIndex].m_plane, plane, 4*sizeof(float) ); - edge.m_faceReverseOrder[edge.m_faceIndex] = false; - edge.m_faceIndex++; + std::pair result = edgeMap.insert(std::make_pair(key, EdgeAndPlane(v0, v1)) ); + EdgeAndPlane& ep = result.first->second; + memcpy(ep.m_plane[ep.m_faceIndex].m_plane, plane, 4*sizeof(float) ); + ep.m_faceReverseOrder[ep.m_faceIndex] = false; + ep.m_faceIndex++; } } } + uint32_t index = 0; for (EdgeMap::const_iterator iter = edgeMap.begin(), end = edgeMap.end(); iter != end; ++iter) { - memcpy(&m_edges[m_numEdges++], &iter->second, sizeof(Edge)); + Edge* edge = &m_edges[m_numEdges]; + Plane* plane = &m_edgePlanes[index]; + + memcpy(edge, iter->second.m_faceReverseOrder, sizeof(Edge)); + memcpy(plane, iter->second.m_plane, 2 * sizeof(Plane)); + + m_numEdges++; + index += 2; } } @@ -961,6 +979,8 @@ struct Group m_indices = NULL; free(m_edges); m_edges = NULL; + free(m_edgePlanesUnalignedPtr); + m_edgePlanesUnalignedPtr = NULL; m_halfEdges.destroy(); } @@ -976,6 +996,8 @@ struct Group PrimitiveArray m_prims; uint32_t m_numEdges; Edge* m_edges; + Plane* m_edgePlanesUnalignedPtr; + Plane* m_edgePlanes; FaceArray m_faces; HalfEdges m_halfEdges; }; @@ -1355,11 +1377,12 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume , bool _textureAsStencil = false ) { - const uint8_t* vertices = _group.m_vertices; - const FaceArray& faces = _group.m_faces; - const Edge* edges = _group.m_edges; - const uint32_t numEdges = _group.m_numEdges; - HalfEdges& halfEdges = _group.m_halfEdges; + const uint8_t* vertices = _group.m_vertices; + const FaceArray& faces = _group.m_faces; + const Edge* edges = _group.m_edges; + const Plane* edgePlanes = _group.m_edgePlanes; + const uint32_t numEdges = _group.m_numEdges; + HalfEdges& halfEdges = _group.m_halfEdges; struct VertexData { @@ -1500,7 +1523,7 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume uint32_t ii = 0; #if SV_USE_SIMD - uint32_t numEdgesRounded = uint32_t(numEdges / 2) * 2; + uint32_t numEdgesRounded = numEdges & (~0x1); using namespace bx; @@ -1512,6 +1535,8 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume { const Edge& edge0 = edges[ii]; const Edge& edge1 = edges[ii+1]; + const Plane* edgePlane0 = &edgePlanes[ii*2]; + const Plane* edgePlane1 = &edgePlanes[ii*2 + 2]; const float4_t reverse = float4_ild(edge0.m_faceReverseOrder[0] , edge1.m_faceReverseOrder[0] @@ -1519,10 +1544,10 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume , edge1.m_faceReverseOrder[1] ); - const float4_t v0 = float4_ldu(edge0.m_face[0].m_plane); - const float4_t v1 = float4_ldu(edge1.m_face[0].m_plane); - const float4_t v2 = float4_ldu(edge0.m_face[1].m_plane); - const float4_t v3 = float4_ldu(edge1.m_face[1].m_plane); + const float4_t v0 = float4_ld(edgePlane0[0].m_plane); + const float4_t v1 = float4_ld(edgePlane1[0].m_plane); + const float4_t v2 = float4_ld(edgePlane0[1].m_plane); + const float4_t v3 = float4_ld(edgePlane1[1].m_plane); const float4_t xxyy0 = float4_shuf_xAyB(v0, v2); const float4_t zzww0 = float4_shuf_zCwD(v0, v2); @@ -1583,11 +1608,10 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume for (; ii < numEdges; ++ii) { const Edge& edge = edges[ii]; - const Edge::Plane& face0 = edge.m_face[0]; - const Edge::Plane& face1 = edge.m_face[1]; + const Plane* edgePlane = &edgePlanes[ii*2]; - int16_t s0 = ( (vec3Dot(face0.m_plane, _light) + face0.m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[0]; - int16_t s1 = ( (vec3Dot(face1.m_plane, _light) + face1.m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[1]; + int16_t s0 = ( (vec3Dot(edgePlane[0].m_plane, _light) + edgePlane[0].m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[0]; + int16_t s1 = ( (vec3Dot(edgePlane[1].m_plane, _light) + edgePlane[1].m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[1]; int16_t k = ( (s0 + s1) << 1) - 2; if (k != 0) From e432a3739631a17f8734f5f80c4110bdbac71afb Mon Sep 17 00:00:00 2001 From: Dario Manesku Date: Wed, 13 Nov 2013 19:11:34 +0100 Subject: [PATCH 6/6] Small refactor. --- examples/14-shadowvolumes/shadowvolumes.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/14-shadowvolumes/shadowvolumes.cpp b/examples/14-shadowvolumes/shadowvolumes.cpp index b730a2e14..0ec841d87 100644 --- a/examples/14-shadowvolumes/shadowvolumes.cpp +++ b/examples/14-shadowvolumes/shadowvolumes.cpp @@ -1566,12 +1566,15 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume const float4_t dot = float4_add(r0, float4_add(r1, r2)); const float4_t f = float4_add(dot, vW); - const float4_t mask = float4_cmpgt(f, float4_zero() ); - const float4_t tmp0 = float4_and(mask, float4_splat(1.0f) ); + const float4_t zero = float4_zero(); + const float4_t mask = float4_cmpgt(f, zero); + const float4_t onef = float4_splat(1.0f); + const float4_t tmp0 = float4_and(mask, onef); const float4_t tmp1 = float4_ftoi(tmp0); const float4_t tmp2 = float4_xor(tmp1, reverse); const float4_t tmp3 = float4_sll(tmp2, 1); - const float4_t tmp4 = float4_isub(tmp3, float4_isplat(1) ); + const float4_t onei = float4_isplat(1); + const float4_t tmp4 = float4_isub(tmp3, onei); BX_ALIGN_STRUCT_16(int32_t res[4]); float4_st(&res, tmp4);