diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp index 3901083ee..5ebc64819 100644 --- a/src/renderer_gl.cpp +++ b/src/renderer_gl.cpp @@ -2638,7 +2638,9 @@ namespace bgfx { namespace gl GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib.m_id) ); ProgramGL& program = m_program[_blitter.m_program.idx]; + program.bindAttributesBegin(); program.bindAttributes(_blitter.m_decl, 0); + program.bindAttributesEnd(); GL_CHECK(glDrawElements(GL_TRIANGLES , _numIndices @@ -3391,7 +3393,9 @@ namespace bgfx { namespace gl ProgramGL& program = m_program[_clearQuad.m_program[numMrt-1].idx]; GL_CHECK(glUseProgram(program.m_id) ); + program.bindAttributesBegin(); program.bindAttributes(vertexDecl, 0); + program.bindAttributesEnd(); float mrtClear[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS][4]; @@ -3980,7 +3984,7 @@ namespace bgfx { namespace gl { BX_WARN(0 != _vsh.m_id, "Invalid vertex/compute shader."); GL_CHECK(glDeleteProgram(m_id) ); - m_used[0] = Attrib::Count; + m_usedCount = 0; m_id = 0; return; } @@ -4310,11 +4314,8 @@ namespace bgfx { namespace gl m_used[used++] = ii; } } - BX_CHECK(used < BX_COUNTOF(m_used), "Out of bounds %d > array size %d." - , used - , BX_COUNTOF(m_used) - ); - m_used[used] = Attrib::Count; + BX_CHECK(used < BX_COUNTOF(m_used), "Out of bounds %d > array size %d.", used, Attrib::Count); + m_usedCount = (uint8_t)used; used = 0; for (uint32_t ii = 0; ii < BX_COUNTOF(s_instanceDataName); ++ii) @@ -4333,9 +4334,9 @@ namespace bgfx { namespace gl m_instanceData[used] = 0xffff; } - void ProgramGL::bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex) const + void ProgramGL::bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex) { - for (uint32_t ii = 0; Attrib::Count != m_used[ii]; ++ii) + for (uint32_t ii = 0, iiEnd = m_usedCount; ii < iiEnd; ++ii) { Attrib::Enum attr = Attrib::Enum(m_used[ii]); GLint loc = m_attributes[attr]; @@ -4375,10 +4376,8 @@ namespace bgfx { namespace gl , (void*)(uintptr_t)baseVertex) ); } - } - else - { - GL_CHECK(glDisableVertexAttribArray(loc) ); + + m_unboundUsedAttrib[ii] = Attrib::Count; } } } @@ -6222,7 +6221,6 @@ namespace bgfx { namespace gl : GL_FILL ) ); - uint32_t baseVertex = 0; GLuint currentVao = 0; bool wasCompute = false; bool viewHasScissor = false; @@ -6945,27 +6943,54 @@ namespace bgfx { namespace gl && 0 == draw.m_stream[0].m_startVertex && 0 == draw.m_instanceDataOffset) { + bool diffStreamHandles = false; + for (uint32_t idx = 0, streamMask = draw.m_streamMask, ntz = bx::uint32_cnttz(streamMask) + ; 0 != streamMask + ; streamMask >>= 1, idx += 1, ntz = bx::uint32_cnttz(streamMask) + ) + { + streamMask >>= ntz; + idx += ntz; + + if (currentState.m_stream[idx].m_handle.idx != draw.m_stream[idx].m_handle.idx) + { + diffStreamHandles = true; + break; + } + } + if (programChanged - || baseVertex != draw.m_stream[0].m_startVertex || currentState.m_streamMask != draw.m_streamMask - || currentState.m_stream[0].m_handle.idx != draw.m_stream[0].m_handle.idx || currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx || currentState.m_instanceDataOffset != draw.m_instanceDataOffset || currentState.m_instanceDataStride != draw.m_instanceDataStride - || currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx) + || currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx + || diffStreamHandles) { bx::HashMurmur2A murmur; murmur.begin(); - const Stream& stream = draw.m_stream[0]; - murmur.add(stream.m_handle.idx); - - if (isValid(stream.m_handle) ) + for (uint32_t idx = 0, streamMask = draw.m_streamMask, ntz = bx::uint32_cnttz(streamMask) + ; 0 != streamMask + ; streamMask >>= 1, idx += 1, ntz = bx::uint32_cnttz(streamMask) + ) { - const VertexBufferGL& vb = m_vertexBuffers[stream.m_handle.idx]; - uint16_t decl = !isValid(vb.m_decl) ? stream.m_decl.idx : vb.m_decl.idx; - murmur.add(decl); + streamMask >>= ntz; + idx += ntz; + + const Stream& stream = draw.m_stream[idx]; + murmur.add(stream.m_handle.idx); + + if (isValid(stream.m_handle) ) + { + const VertexBufferGL& vb = m_vertexBuffers[stream.m_handle.idx]; + uint16_t decl = !isValid(vb.m_decl) ? stream.m_decl.idx : vb.m_decl.idx; + murmur.add(decl); + } + + currentState.m_stream[idx].m_handle = stream.m_handle; } + currentState.m_streamMask = draw.m_streamMask; murmur.add(draw.m_indexBuffer.idx); murmur.add(draw.m_instanceDataBuffer.idx); @@ -6974,11 +6999,7 @@ namespace bgfx { namespace gl murmur.add(programIdx); uint32_t hash = murmur.end(); - currentState.m_streamMask = draw.m_streamMask; - currentState.m_stream[0].m_handle = stream.m_handle; - baseVertex = stream.m_startVertex; - - currentState.m_indexBuffer = draw.m_indexBuffer; + currentState.m_indexBuffer = draw.m_indexBuffer; currentState.m_instanceDataOffset = draw.m_instanceDataOffset; currentState.m_instanceDataStride = draw.m_instanceDataStride; @@ -6996,14 +7017,26 @@ namespace bgfx { namespace gl program.add(hash); - if (isValid(stream.m_handle) ) + program.bindAttributesBegin(); + for (uint32_t idx = 0, streamMask = draw.m_streamMask, ntz = bx::uint32_cnttz(streamMask) + ; 0 != streamMask + ; streamMask >>= 1, idx += 1, ntz = bx::uint32_cnttz(streamMask) + ) { - VertexBufferGL& vb = m_vertexBuffers[stream.m_handle.idx]; - vb.add(hash); - GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) ); + streamMask >>= ntz; + idx += ntz; - uint16_t decl = !isValid(vb.m_decl) ? stream.m_decl.idx : vb.m_decl.idx; - program.bindAttributes(m_vertexDecls[decl], stream.m_startVertex); + const Stream& stream = draw.m_stream[idx]; + + if (isValid(stream.m_handle) ) + { + VertexBufferGL& vb = m_vertexBuffers[stream.m_handle.idx]; + vb.add(hash); + + uint16_t decl = !isValid(vb.m_decl) ? stream.m_decl.idx : vb.m_decl.idx; + GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) ); + program.bindAttributes(m_vertexDecls[decl], stream.m_startVertex); + } if (isValid(draw.m_instanceDataBuffer) ) { @@ -7013,10 +7046,7 @@ namespace bgfx { namespace gl program.bindInstanceData(draw.m_instanceDataStride, draw.m_instanceDataOffset); } } - else - { - GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, 0) ); - } + program.bindAttributesEnd(); if (isValid(draw.m_indexBuffer) ) { @@ -7037,36 +7067,53 @@ namespace bgfx { namespace gl && 0 != currentVao) { GL_CHECK(glBindVertexArray(defaultVao) ); - currentState.m_streamMask = 0; - currentState.m_stream[0].m_handle.idx = invalidHandle; - currentState.m_indexBuffer.idx = invalidHandle; + currentState.m_streamMask = 0; + for (size_t ii = 0; ii < BGFX_CONFIG_MAX_VERTEX_STREAMS; ++ii) + { + currentState.m_stream[ii].m_handle.idx = invalidHandle; + } + currentState.m_indexBuffer.idx = invalidHandle; bindAttribs = true; currentVao = 0; } + bool diffStreamHandles = false; + for (uint32_t idx = 0, streamMask = draw.m_streamMask, ntz = bx::uint32_cnttz(streamMask) + ; 0 != streamMask + ; streamMask >>= 1, idx += 1, ntz = bx::uint32_cnttz(streamMask) + ) + { + streamMask >>= ntz; + idx += ntz; + + if (currentState.m_stream[idx].m_handle.idx != draw.m_stream[idx].m_handle.idx) + { + diffStreamHandles = true; + break; + } + } + if (programChanged || currentState.m_streamMask != draw.m_streamMask - || currentState.m_stream[0].m_handle.idx != draw.m_stream[0].m_handle.idx || currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx || currentState.m_instanceDataOffset != draw.m_instanceDataOffset - || currentState.m_instanceDataStride != draw.m_instanceDataStride) + || currentState.m_instanceDataStride != draw.m_instanceDataStride + || diffStreamHandles) { currentState.m_streamMask = draw.m_streamMask; - currentState.m_stream[0].m_handle = draw.m_stream[0].m_handle; currentState.m_instanceDataBuffer.idx = draw.m_instanceDataBuffer.idx; currentState.m_instanceDataOffset = draw.m_instanceDataOffset; currentState.m_instanceDataStride = draw.m_instanceDataStride; - uint16_t handle = draw.m_stream[0].m_handle.idx; - if (invalidHandle != handle) + for (uint32_t idx = 0, streamMask = draw.m_streamMask, ntz = bx::uint32_cnttz(streamMask) + ; 0 != streamMask + ; streamMask >>= 1, idx += 1, ntz = bx::uint32_cnttz(streamMask) + ) { - VertexBufferGL& vb = m_vertexBuffers[handle]; - GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) ); - bindAttribs = true; - } - else - { - GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, 0) ); + streamMask >>= ntz; + idx += ntz; + + currentState.m_stream[idx].m_handle = draw.m_stream[idx].m_handle; } } @@ -7088,13 +7135,41 @@ namespace bgfx { namespace gl if (0 != currentState.m_streamMask) { - if (baseVertex != draw.m_stream[0].m_startVertex - || bindAttribs) + bool diffStartVertex = false; + for (uint32_t idx = 0, streamMask = draw.m_streamMask, ntz = bx::uint32_cnttz(streamMask) + ; 0 != streamMask + ; streamMask >>= 1, idx += 1, ntz = bx::uint32_cnttz(streamMask) + ) { - baseVertex = draw.m_stream[0].m_startVertex; - const VertexBufferGL& vb = m_vertexBuffers[draw.m_stream[0].m_handle.idx]; - uint16_t decl = !isValid(vb.m_decl) ? draw.m_stream[0].m_decl.idx : vb.m_decl.idx; - program.bindAttributes(m_vertexDecls[decl], draw.m_stream[0].m_startVertex); + streamMask >>= ntz; + idx += ntz; + + if (currentState.m_stream[idx].m_startVertex != draw.m_stream[idx].m_startVertex) + { + diffStartVertex = true; + break; + } + } + + if (bindAttribs || diffStartVertex) + { + program.bindAttributesBegin(); + for (uint32_t idx = 0, streamMask = draw.m_streamMask, ntz = bx::uint32_cnttz(streamMask) + ; 0 != streamMask + ; streamMask >>= 1, idx += 1, ntz = bx::uint32_cnttz(streamMask) + ) + { + streamMask >>= ntz; + idx += ntz; + + const Stream& stream = draw.m_stream[idx]; + + const VertexBufferGL& vb = m_vertexBuffers[stream.m_handle.idx]; + uint16_t decl = !isValid(vb.m_decl) ? stream.m_decl.idx : vb.m_decl.idx; + GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) ); + program.bindAttributes(m_vertexDecls[decl], stream.m_startVertex); + } + program.bindAttributesEnd(); if (isValid(draw.m_instanceDataBuffer) ) { @@ -7110,10 +7185,22 @@ namespace bgfx { namespace gl uint32_t numVertices = draw.m_numVertices; if (UINT32_MAX == numVertices) { - const VertexBufferGL& vb = m_vertexBuffers[currentState.m_stream[0].m_handle.idx]; - uint16_t decl = !isValid(vb.m_decl) ? draw.m_stream[0].m_decl.idx : vb.m_decl.idx; - const VertexDecl& vertexDecl = m_vertexDecls[decl]; - numVertices = vb.m_size/vertexDecl.m_stride; + for (uint32_t idx = 0, streamMask = draw.m_streamMask, ntz = bx::uint32_cnttz(streamMask) + ; 0 != streamMask + ; streamMask >>= 1, idx += 1, ntz = bx::uint32_cnttz(streamMask) + ) + { + streamMask >>= ntz; + idx += ntz; + + const Stream& stream = currentState.m_stream[idx]; + + const VertexBufferGL& vb = m_vertexBuffers[stream.m_handle.idx]; + uint16_t decl = !isValid(vb.m_decl) ? stream.m_decl.idx : vb.m_decl.idx; + const VertexDecl& vertexDecl = m_vertexDecls[decl]; + + numVertices = bx::uint32_min(numVertices, vb.m_size/vertexDecl.m_stride); + } } uint32_t numIndices = 0; diff --git a/src/renderer_gl.h b/src/renderer_gl.h index 9d25ec249..0ed712be6 100644 --- a/src/renderer_gl.h +++ b/src/renderer_gl.h @@ -1343,8 +1343,24 @@ namespace bgfx { namespace gl void create(const ShaderGL& _vsh, const ShaderGL& _fsh); void destroy(); void init(); - void bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex = 0) const; void bindInstanceData(uint32_t _stride, uint32_t _baseVertex = 0) const; + void bindAttributesBegin() + { + memcpy(m_unboundUsedAttrib, m_used, sizeof(m_unboundUsedAttrib) ); + } + void bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex = 0); + void bindAttributesEnd() + { + for (uint32_t ii = 0, iiEnd = m_usedCount; ii < iiEnd; ++ii) + { + if (Attrib::Count != m_unboundUsedAttrib[ii]) + { + Attrib::Enum attr = Attrib::Enum(m_unboundUsedAttrib[ii]); + GLint loc = m_attributes[attr]; + GL_CHECK(glDisableVertexAttribArray(loc) ); + } + } + } void add(uint32_t _hash) { @@ -1353,8 +1369,10 @@ namespace bgfx { namespace gl GLuint m_id; - uint8_t m_used[Attrib::Count+1]; // dense - GLint m_attributes[Attrib::Count]; // sparse + uint8_t m_unboundUsedAttrib[Attrib::Count]; // For tracking unbound used attributes between begin()/end(). + uint8_t m_usedCount; + uint8_t m_used[Attrib::Count]; // Dense. + GLint m_attributes[Attrib::Count]; // Sparse. GLint m_instanceData[BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT+1]; GLint m_sampler[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];