This commit is contained in:
Бранимир Караџић
2019-07-21 22:02:07 -07:00
parent 4059478495
commit 267269f01b
10 changed files with 194 additions and 226 deletions

View File

@@ -3,47 +3,41 @@
#include "uniforms.sh"
BUFFER_WR(u_SubdBufferOut, uint, 1);
BUFFER_RW(u_CulledSubdBuffer, uint, 2);
BUFFER_RW(indirectBuffer, uvec4, 3);
BUFFER_RW(atomicCounterBuffer, uint, 4);
BUFFER_WR(u_SubdBufferIn, uint, 8);
NUM_THREADS(1u, 1u, 1u)
void main()
{
uint subd = 6 << (2 * u_gpu_subd - 1);
if((2 * u_gpu_subd - 1) <= 0) {
subd = 3u;
}
drawIndexedIndirect(indirectBuffer, 0u, subd, 0u, 0u, 0u, 0u);
dispatchIndirect(indirectBuffer, 1u, 2u / UPDATE_INDIRECT_VALUE_DIVIDE + 1u, 1u, 1u);
u_SubdBufferOut[0] = 0;
u_SubdBufferOut[1] = 1;
u_SubdBufferOut[2] = 1;
u_SubdBufferOut[3] = 1;
u_CulledSubdBuffer[0] = 0;
u_CulledSubdBuffer[1] = 1;
u_CulledSubdBuffer[2] = 1;
u_CulledSubdBuffer[3] = 1;
u_SubdBufferIn[0] = 0;
u_SubdBufferIn[1] = 1;
u_SubdBufferIn[2] = 1;
u_SubdBufferIn[3] = 1;
uint tmp;
atomicFetchAndExchange(atomicCounterBuffer[0], 0, tmp);
atomicFetchAndExchange(atomicCounterBuffer[1], 0, tmp);
atomicFetchAndExchange(atomicCounterBuffer[2], 2, tmp);
atomicFetchAndExchange(atomicCounterBuffer[0], 0, tmp);
atomicFetchAndExchange(atomicCounterBuffer[1], 0, tmp);
atomicFetchAndExchange(atomicCounterBuffer[2], 2, tmp);
}

View File

@@ -7,15 +7,10 @@
#include "fcull.sh"
BUFFER_RO(u_SubdBufferIn, uint, 8);
BUFFER_RW(u_CulledSubdBuffer, uint, 2);
BUFFER_RO(u_VertexBuffer, vec4, 6);
BUFFER_RO(u_IndexBuffer, uint, 7);
// -----------------------------------------------------------------------------
/**
* Compute LoD Shader
*
@@ -26,61 +21,61 @@ BUFFER_RO(u_IndexBuffer, uint, 7);
NUM_THREADS(COMPUTE_THREAD_COUNT, 1u, 1u)
void main()
{
// get threadID (each key is associated to a thread)
uint threadID = gl_GlobalInvocationID.x;
// get threadID (each key is associated to a thread)
uint threadID = gl_GlobalInvocationID.x;
if (threadID >= u_AtomicCounterBuffer[2])
{
return;
}
// get coarse triangle associated to the key
uint primID = u_SubdBufferIn[threadID*2];
vec4 v_in[3];
// get coarse triangle associated to the key
uint primID = u_SubdBufferIn[threadID*2];
vec4 v_in[3];
v_in[0] = u_VertexBuffer[u_IndexBuffer[primID * 3 ]];
v_in[1] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 1]];
v_in[2] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 2]];
// compute distance-based LOD
uint key = u_SubdBufferIn[threadID*2+1];
vec4 v[3];
vec4 vp[3];
// compute distance-based LOD
uint key = u_SubdBufferIn[threadID*2+1];
vec4 v[3];
vec4 vp[3];
subd(key, v_in, v, vp);
uint targetLod; uint parentLod;
if(u_freeze == 0) {
if (u_freeze == 0)
{
targetLod = uint(computeLod(v));
parentLod = uint(computeLod(vp));
}
else {
else
{
targetLod = parentLod = findMSB(key);
}
updateSubdBuffer(primID, key, targetLod, parentLod);
// Cull invisible nodes
mat4 mvp = u_modelViewProj;
vec4 bmin = min(min(v[0], v[1]), v[2]);
vec4 bmax = max(max(v[0], v[1]), v[2]);
updateSubdBuffer(primID, key, targetLod, parentLod);
// account for displacement in bound computations
bmin.z = 0;
bmax.z = u_DmapFactor;
// update CulledSubdBuffer
if (u_cull == 0 || frustumCullingTest(mvp, bmin.xyz, bmax.xyz)) {
// write key
// Cull invisible nodes
mat4 mvp = u_modelViewProj;
vec4 bmin = min(min(v[0], v[1]), v[2]);
vec4 bmax = max(max(v[0], v[1]), v[2]);
// account for displacement in bound computations
bmin.z = 0;
bmax.z = u_DmapFactor;
// update CulledSubdBuffer
if (u_cull == 0
|| frustumCullingTest(mvp, bmin.xyz, bmax.xyz) )
{
// write key
uint idx = 0;
atomicFetchAndAdd(u_AtomicCounterBuffer[1], 2, idx);
u_CulledSubdBuffer[idx] = primID;
u_CulledSubdBuffer[idx] = primID;
u_CulledSubdBuffer[idx+1] = key;
}
}
}

View File

@@ -10,11 +10,10 @@ void main()
uint counter = atomicCounterBuffer[1];
uint subd = 6 << (2 * u_gpu_subd - 1);
if((2 * u_gpu_subd - 1) <= 0) {
subd = 3u;
}
drawIndexedIndirect(indirectBuffer, 0, subd, counter / 2, 0u, 0u, 0u);
}

View File

@@ -9,16 +9,15 @@ void main()
{
uint counter;
uint counter2;
atomicFetchAndExchange(atomicCounterBuffer[0], 0u, counter);
atomicFetchAndExchange(atomicCounterBuffer[1], 0u, counter2);
uint cnt = (counter / 2u) / UPDATE_INDIRECT_VALUE_DIVIDE + 1u;
uint cnt = (counter / 2u) / UPDATE_INDIRECT_VALUE_DIVIDE + 1u;
uint tmp;
atomicFetchAndExchange(atomicCounterBuffer[2], (counter / 2), tmp);
atomicFetchAndExchange(atomicCounterBuffer[2], (counter / 2), tmp);
dispatchIndirect(indirectBuffer, 1u, cnt, 1u, 1u);
}

View File

@@ -1,20 +1,7 @@
//////////////////////////////////////////////////////////////////////////////
//
// Frustum Culling API
//
bool frustumCullingTest(mat4 mvp, vec3 bmin, vec3 bmax);
//
//
//// end header file /////////////////////////////////////////////////////
// *****************************************************************************
// Frustum Implementation
struct Frustum {
struct Frustum
{
vec4 planes[6];
};
@@ -23,18 +10,21 @@ struct Frustum {
*
* Based on "Fast Extraction of Viewing Frustum Planes from the World-
* View-Projection Matrix", by Gil Gribb and Klaus Hartmann.
* This procedure computes the planes of the frustum and normalizes
* This procedure computes the planes of the frustum and normalizes
* them.
*/
void loadFrustum(out Frustum f, mat4 mvp)
{
for (int i = 0; i < 3; ++i)
for (int j = 0; j < 2; ++j) {
f.planes[i*2+j].x = mtxGetElement(mvp, 0, 3) + (j == 0 ? mtxGetElement(mvp, 0, i) : -mtxGetElement(mvp, 0, i));
f.planes[i*2+j].y = mtxGetElement(mvp, 1, 3) + (j == 0 ? mtxGetElement(mvp, 1, i) : -mtxGetElement(mvp, 1, i));
f.planes[i*2+j].z = mtxGetElement(mvp, 2, 3) + (j == 0 ? mtxGetElement(mvp, 2, i) : -mtxGetElement(mvp, 2, i));
f.planes[i*2+j].w = mtxGetElement(mvp, 3, 3) + (j == 0 ? mtxGetElement(mvp, 3, i) : -mtxGetElement(mvp, 3, i));
f.planes[i*2+j]*= length(f.planes[i*2+j].xyz);
{
for (int j = 0; j < 2; ++j)
{
f.planes[i*2+j].x = mtxGetElement(mvp, 0, 3) + (j == 0 ? mtxGetElement(mvp, 0, i) : -mtxGetElement(mvp, 0, i));
f.planes[i*2+j].y = mtxGetElement(mvp, 1, 3) + (j == 0 ? mtxGetElement(mvp, 1, i) : -mtxGetElement(mvp, 1, i));
f.planes[i*2+j].z = mtxGetElement(mvp, 2, 3) + (j == 0 ? mtxGetElement(mvp, 2, i) : -mtxGetElement(mvp, 2, i));
f.planes[i*2+j].w = mtxGetElement(mvp, 3, 3) + (j == 0 ? mtxGetElement(mvp, 3, i) : -mtxGetElement(mvp, 3, i));
f.planes[i*2+j]*= length(f.planes[i*2+j].xyz);
}
}
}
@@ -66,13 +56,11 @@ bool frustumCullingTest(mat4 mvp, vec3 bmin, vec3 bmax)
Frustum f;
loadFrustum(f, mvp);
for (int i = 0; i < 6 && a >= 0.0f; ++i) {
for (int i = 0; i < 6 && a >= 0.0f; ++i)
{
vec3 n = negativeVertex(bmin, bmax, f.planes[i].xyz);
a = dot(vec4(n, 1.0f), f.planes[i]);
}
return (a >= 0.0);
}

View File

@@ -4,10 +4,9 @@ $input v_texcoord0
void main()
{
vec2 s = texture2D(u_SmapSampler, v_texcoord0).rg * u_DmapFactor;
vec3 n = normalize(vec3(-s, 1));
float d = clamp(n.z, 0.0, 1.0) / 3.14159;
vec2 s = texture2D(u_SmapSampler, v_texcoord0).rg * u_DmapFactor;
vec3 n = normalize(vec3(-s, 1));
float d = clamp(n.z, 0.0, 1.0) / 3.14159;
vec3 r = vec3(d, d, d);
gl_FragColor = vec4(r, 1);
gl_FragColor = vec4(r, 1);
}

View File

@@ -4,8 +4,7 @@ $input v_texcoord0
void main()
{
vec2 s = texture2D(u_SmapSampler, v_texcoord0).rg * u_DmapFactor;
vec3 n = normalize(vec3(-s, 1));
gl_FragColor = vec4(abs(n), 1);
vec2 s = texture2D(u_SmapSampler, v_texcoord0).rg * u_DmapFactor;
vec3 n = normalize(vec3(-s, 1));
gl_FragColor = vec4(abs(n), 1);
}

View File

@@ -1,71 +1,70 @@
uint findMSB(uint x)
{
uint i;
uint mask;
uint res = -1;
for(i = 0; i < 32; i++) {
mask = 0x80000000 >> i;
if ((x & mask) != 0) {
res = 31 - i;
break;
}
}
return res;
}
uint i;
uint mask;
uint res = -1;
for (i = 0; i < 32; i++)
{
mask = 0x80000000 >> i;
if ((x & mask) != 0)
{
res = 31 - i;
break;
}
}
return res;
}
uint parentKey(in uint key)
{
return (key >> 1u);
return (key >> 1u);
}
void childrenKeys(in uint key, out uint children[2])
{
children[0] = (key << 1u) | 0u;
children[1] = (key << 1u) | 1u;
children[0] = (key << 1u) | 0u;
children[1] = (key << 1u) | 1u;
}
bool isRootKey(in uint key)
{
return (key == 1u);
return (key == 1u);
}
bool isLeafKey(in uint key)
{
return findMSB(key) == 31;
return findMSB(key) == 31;
}
bool isChildZeroKey(in uint key)
{
return ((key & 1u) == 0u);
return ((key & 1u) == 0u);
}
// barycentric interpolation
vec3 berp(in vec3 v[3], in vec2 u)
{
return v[0] + u.x * (v[1] - v[0]) + u.y * (v[2] - v[0]);
return v[0] + u.x * (v[1] - v[0]) + u.y * (v[2] - v[0]);
}
vec4 berp(in vec4 v[3], in vec2 u)
{
return v[0] + u.x * (v[1] - v[0]) + u.y * (v[2] - v[0]);
return v[0] + u.x * (v[1] - v[0]) + u.y * (v[2] - v[0]);
}
// get xform from bit value
mat3 bitToXform(in uint bit)
{
float b = float(bit);
float c = 1.0f - b;
vec3 c1 = vec3(0.0f, c , b );
vec3 c2 = vec3(0.5f, b , 0.0f);
vec3 c3 = vec3(0.5f, 0.0f, c );
float b = float(bit);
float c = 1.0f - b;
vec3 c1 = vec3(0.0f, c , b );
vec3 c2 = vec3(0.5f, b , 0.0f);
vec3 c3 = vec3(0.5f, 0.0f, c );
return mtxFromCols(c1, c2, c3);
}
@@ -73,55 +72,55 @@ mat3 bitToXform(in uint bit)
mat3 keyToXform(in uint key)
{
vec3 c1 = vec3(1.0f, 0.0f, 0.0f);
vec3 c2 = vec3(0.0f, 1.0f, 0.0f);
vec3 c3 = vec3(0.0f, 0.0f, 1.0f);
vec3 c2 = vec3(0.0f, 1.0f, 0.0f);
vec3 c3 = vec3(0.0f, 0.0f, 1.0f);
mat3 xf = mtxFromCols(c1, c2, c3);
while (key > 1u) {
xf = mul(xf, bitToXform(key & 1u));
key = key >> 1u;
}
mat3 xf = mtxFromCols(c1, c2, c3);
return xf;
while (key > 1u) {
xf = mul(xf, bitToXform(key & 1u));
key = key >> 1u;
}
return xf;
}
// get xform from key as well as xform from parent key
mat3 keyToXform(in uint key, out mat3 xfp)
{
xfp = keyToXform(parentKey(key));
return keyToXform(key);
xfp = keyToXform(parentKey(key));
return keyToXform(key);
}
// subdivision routine (vertex position only)
void subd(in uint key, in vec4 v_in[3], out vec4 v_out[3])
{
mat3 xf = keyToXform(key);
mat3 xf = keyToXform(key);
mat4x3 m = mtxFromRows(v_in[0], v_in[1], v_in[2]);
mat4x3 v = mul(xf, m);
mat4x3 v = mul(xf, m);
v_out[0] = mtxGetRow(v, 0);
v_out[1] = mtxGetRow(v, 1);
v_out[2] = mtxGetRow(v, 2);
v_out[2] = mtxGetRow(v, 2);
}
// subdivision routine (vertex position only)
// also computes parent position
void subd(in uint key, in vec4 v_in[3], out vec4 v_out[3], out vec4 v_out_p[3])
{
mat3 xfp; mat3 xf = keyToXform(key, xfp);
mat3 xfp; mat3 xf = keyToXform(key, xfp);
mat4x3 m = mtxFromRows(v_in[0], v_in[1], v_in[2]);
mat4x3 v = mul(xf, m);
mat4x3 vp = mul(xfp, m);
mat4x3 vp = mul(xfp, m);
v_out[0] = mtxGetRow(v, 0);
v_out[1] = mtxGetRow(v, 1);
v_out[2] = mtxGetRow(v, 2);
v_out_p[0] = mtxGetRow(vp, 0);
v_out_p[1] = mtxGetRow(vp, 1);
v_out_p[2] = mtxGetRow(vp, 2);

View File

@@ -1,101 +1,99 @@
#include "bgfx_compute.sh"
#include "matrices.sh"
#include "isubd.sh"
#include "uniforms.sh"
BUFFER_RW(u_AtomicCounterBuffer, uint, 4);
BUFFER_RW(u_SubdBufferOut, uint, 1);
SAMPLER2D(u_DmapSampler, 0); // displacement map
SAMPLER2D(u_SmapSampler, 1); // slope map
// displacement map
float dmap(vec2 pos)
{
return (texture2DLod(u_DmapSampler, pos * 0.5 + 0.5, 0).x) * u_DmapFactor;
return (texture2DLod(u_DmapSampler, pos * 0.5 + 0.5, 0).x) * u_DmapFactor;
}
float distanceToLod(float z, float lodFactor)
{
// Note that we multiply the result by two because the triangles
// edge lengths decreases by half every two subdivision steps.
return -2.0 * log2(clamp(z * lodFactor, 0.0f, 1.0f));
// Note that we multiply the result by two because the triangles
// edge lengths decreases by half every two subdivision steps.
return -2.0 * log2(clamp(z * lodFactor, 0.0f, 1.0f));
}
float computeLod(vec3 c)
{
//displace
c.z += dmap(mtxGetColumn(u_invView, 3).xy);
c.z += dmap(mtxGetColumn(u_invView, 3).xy);
vec3 cxf = mul(u_modelView, vec4(c.x, c.y, c.z, 1)).xyz;
float z = length(cxf);
vec3 cxf = mul(u_modelView, vec4(c.x, c.y, c.z, 1)).xyz;
float z = length(cxf);
return distanceToLod(z, u_LodFactor);
return distanceToLod(z, u_LodFactor);
}
float computeLod(in vec4 v[3])
{
vec3 c = (v[1].xyz + v[2].xyz) / 2.0;
return computeLod(c);
vec3 c = (v[1].xyz + v[2].xyz) / 2.0;
return computeLod(c);
}
float computeLod(in vec3 v[3])
{
vec3 c = (v[1].xyz + v[2].xyz) / 2.0;
return computeLod(c);
vec3 c = (v[1].xyz + v[2].xyz) / 2.0;
return computeLod(c);
}
void writeKey(uint primID, uint key)
{
uint idx = 0;
uint idx = 0;
atomicFetchAndAdd(u_AtomicCounterBuffer[0], 2, idx);
u_SubdBufferOut[idx] = primID;
u_SubdBufferOut[idx] = primID;
u_SubdBufferOut[idx+1] = key;
}
void updateSubdBuffer(
uint primID,
uint key,
uint targetLod,
uint parentLod,
bool isVisible
) {
// extract subdivision level associated to the key
uint keyLod = findMSB(key);
uint primID
, uint key
, uint targetLod
, uint parentLod
, bool isVisible
)
{
// extract subdivision level associated to the key
uint keyLod = findMSB(key);
// update the key accordingly
if (/* subdivide ? */ keyLod < targetLod && !isLeafKey(key) && isVisible) {
uint children[2]; childrenKeys(key, children);
// update the key accordingly
if (/* subdivide ? */ keyLod < targetLod && !isLeafKey(key) && isVisible)
{
uint children[2]; childrenKeys(key, children);
writeKey(primID, children[0]);
writeKey(primID, children[1]);
}
else if (/* keep ? */ keyLod < (parentLod + 1) && isVisible) {
writeKey(primID, key);
}
else /* merge ? */ {
writeKey(primID, children[0]);
writeKey(primID, children[1]);
}
else if (/* keep ? */ keyLod < (parentLod + 1) && isVisible)
{
writeKey(primID, key);
}
else /* merge ? */
{
if (/* is root ? */isRootKey(key))
{
writeKey(primID, key);
}
else if (/* is zero child ? */isChildZeroKey(key)) {
writeKey(primID, parentKey(key));
}
if (/* is root ? */isRootKey(key))
{
writeKey(primID, key);
}
}
else if (/* is zero child ? */isChildZeroKey(key)) {
writeKey(primID, parentKey(key));
}
}
}
void updateSubdBuffer(uint primID, uint key, uint targetLod, uint parentLod)
{
updateSubdBuffer(primID, key, targetLod, parentLod, true);
updateSubdBuffer(primID, key, targetLod, parentLod, true);
}

View File

@@ -7,35 +7,33 @@ BUFFER_RO(u_CulledSubdBuffer, uint, 2);
BUFFER_RO(u_VertexBuffer, vec4, 3);
BUFFER_RO(u_IndexBuffer, uint, 4);
void main()
{
// get threadID (each key is associated to a thread)
int threadID = gl_InstanceID;
// get threadID (each key is associated to a thread)
int threadID = gl_InstanceID;
// get coarse triangle associated to the key
uint primID = u_CulledSubdBuffer[threadID*2];
vec4 v_in[3];
// get coarse triangle associated to the key
uint primID = u_CulledSubdBuffer[threadID*2];
vec4 v_in[3];
v_in[0] = u_VertexBuffer[u_IndexBuffer[primID * 3 ]];
v_in[1] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 1]];
v_in[2] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 2]];
// compute sub-triangle associated to the key
uint key = u_CulledSubdBuffer[threadID*2+1];
vec4 v[3];
// compute sub-triangle associated to the key
uint key = u_CulledSubdBuffer[threadID*2+1];
vec4 v[3];
subd(key, v_in, v);
// compute vertex location
vec4 finalVertex = berp(v, a_texcoord0);
finalVertex.z+= dmap(finalVertex.xy);
// compute vertex location
vec4 finalVertex = berp(v, a_texcoord0);
v_texcoord0 = finalVertex.xy * 0.5 + 0.5;
finalVertex.z+= dmap(finalVertex.xy);
gl_Position = mul(u_modelViewProj, finalVertex);
v_texcoord0 = finalVertex.xy * 0.5 + 0.5;
gl_Position = mul(u_modelViewProj, finalVertex);
}