diff --git a/examples/48-drawindirect/cs_drawindirect.sc b/examples/48-drawindirect/cs_drawindirect.sc index 76b82f955..989bb3005 100644 --- a/examples/48-drawindirect/cs_drawindirect.sc +++ b/examples/48-drawindirect/cs_drawindirect.sc @@ -14,19 +14,26 @@ BUFFER_WR(instanceBufferOut, vec4, 2); uniform vec4 u_drawParams; -NUM_THREADS(1, 1, 1) +// Use 64*1*1 local threads +NUM_THREADS(64, 1, 1) void main() { + int tId = int(gl_GlobalInvocationID.x); int numDrawItems = int(u_drawParams.x); int sideSize = int(u_drawParams.y); float time = u_drawParams.z; - // Prepare draw mtx - + // Work out the amount of work we're going to do here int maxToDraw = min(sideSize*sideSize, numDrawItems); - for (int k = 0; k < maxToDraw; k++) { + int numToDrawPerThread = maxToDraw/64 + 1; + + int idxStart = tId*numToDrawPerThread; + int idxMax = min(maxToDraw, (tId+1)*numToDrawPerThread); + + // Prepare draw mtx + for (int k = idxStart; k < idxMax; k++) { int yy = k / sideSize; int xx = k % sideSize; @@ -58,7 +65,7 @@ void main() // Fill indirect buffer - for (int k = 0; k < maxToDraw; k++) { + for (int k = idxStart; k < idxMax; k++) { drawIndexedIndirect( // Target location params: indirectBuffer, // target buffer diff --git a/examples/48-drawindirect/drawindirect.cpp b/examples/48-drawindirect/drawindirect.cpp index 3065fed24..eefd5e172 100644 --- a/examples/48-drawindirect/drawindirect.cpp +++ b/examples/48-drawindirect/drawindirect.cpp @@ -344,26 +344,30 @@ public: // The model matrix for each instance is also set on compute // you could modify this to, eg, do frustrum culling on the GPU float ud[4] = { float(m_nDrawElements), float(m_sideSize), float(time), 0 }; + uint32_t numToDraw = (m_sideSize*m_sideSize); + bgfx::setUniform(u_drawParams, ud); bgfx::setBuffer(0, m_object_list_buffer, bgfx::Access::Read); bgfx::setBuffer(1, m_indirect_buffer_handle, bgfx::Access::Write); bgfx::setBuffer(2, m_instance_buffer, bgfx::Access::Write); - bgfx::dispatch(0, m_indirect_program); + // Dispatch the call. We are using 64 local threads on the GPU to process the object list + // So lets dispatch ceil(numToDraw/64) workgroups of 64 local threads + bgfx::dispatch(0, m_indirect_program, uint32_t(numToDraw/64 + 1), 1, 1); // Submit our 1 draw call // Set vertex and index buffer. bgfx::setIndexBuffer(m_ibh); bgfx::setVertexBuffer(0, m_vbh); - bgfx::setInstanceDataBuffer(m_instance_buffer, 0, m_sideSize*m_sideSize); + bgfx::setInstanceDataBuffer(m_instance_buffer, 0, numToDraw); // Set render states. bgfx::setState(BGFX_STATE_DEFAULT); // Submit primitive for rendering to view 0. // note that this submission requires the draw count - bgfx::submit(0, m_program, m_indirect_buffer_handle, 0, uint16_t(m_sideSize*m_sideSize)); + bgfx::submit(0, m_program, m_indirect_buffer_handle, 0, uint16_t(numToDraw)); } else { diff --git a/examples/runtime/shaders/dx11/cs_drawindirect.bin b/examples/runtime/shaders/dx11/cs_drawindirect.bin index c7b570e39..5df326d70 100755 Binary files a/examples/runtime/shaders/dx11/cs_drawindirect.bin and b/examples/runtime/shaders/dx11/cs_drawindirect.bin differ diff --git a/examples/runtime/shaders/essl/cs_drawindirect.bin b/examples/runtime/shaders/essl/cs_drawindirect.bin index db53939da..42deaaa49 100755 Binary files a/examples/runtime/shaders/essl/cs_drawindirect.bin and b/examples/runtime/shaders/essl/cs_drawindirect.bin differ diff --git a/examples/runtime/shaders/glsl/cs_drawindirect.bin b/examples/runtime/shaders/glsl/cs_drawindirect.bin index 74133748d..1e6d99753 100755 Binary files a/examples/runtime/shaders/glsl/cs_drawindirect.bin and b/examples/runtime/shaders/glsl/cs_drawindirect.bin differ diff --git a/examples/runtime/shaders/metal/cs_drawindirect.bin b/examples/runtime/shaders/metal/cs_drawindirect.bin index 85a18267e..c871d30b2 100755 Binary files a/examples/runtime/shaders/metal/cs_drawindirect.bin and b/examples/runtime/shaders/metal/cs_drawindirect.bin differ diff --git a/examples/runtime/shaders/spirv/cs_drawindirect.bin b/examples/runtime/shaders/spirv/cs_drawindirect.bin index 35f871dd3..e72fcda3b 100755 Binary files a/examples/runtime/shaders/spirv/cs_drawindirect.bin and b/examples/runtime/shaders/spirv/cs_drawindirect.bin differ