From 742dad2b905a90d746df1675bfd6570514d495d5 Mon Sep 17 00:00:00 2001 From: attilaz Date: Tue, 26 Jul 2016 18:29:07 +0200 Subject: [PATCH] non-blockig texture/vb/ib update when possible (#856) set vertex/fragment texture only when required per program --- src/renderer_mtl.h | 23 ++++++++- src/renderer_mtl.mm | 118 ++++++++++++++++++++++++++++++++------------ 2 files changed, 109 insertions(+), 32 deletions(-) diff --git a/src/renderer_mtl.h b/src/renderer_mtl.h index b0907c032..42145e0c0 100644 --- a/src/renderer_mtl.h +++ b/src/renderer_mtl.h @@ -70,6 +70,23 @@ namespace bgfx { namespace mtl [m_obj copyFromTexture:_sourceTexture sourceSlice:_sourceSlice sourceLevel:_sourceLevel sourceOrigin:_sourceOrigin sourceSize:_sourceSize toTexture:_destinationTexture destinationSlice:_destinationSlice destinationLevel:_destinationLevel destinationOrigin:_destinationOrigin]; } + + void copyFromBuffer(id _sourceBuffer, NSUInteger _sourceOffset, id _destinationBuffer, + NSUInteger _destinationOffset, NSUInteger _size) + { + [m_obj copyFromBuffer:_sourceBuffer sourceOffset:_sourceOffset toBuffer:_destinationBuffer + destinationOffset:_destinationOffset size:_size]; + } + + void copyFromBuffer(id _sourceBuffer, NSUInteger _sourceOffset, NSUInteger _sourceBytesPerRow, + NSUInteger _sourceBytesPerImage, MTLSize _sourceSize, id _destinationTexture, + NSUInteger _destinationSlice, NSUInteger _destinationLevel, MTLOrigin _destinationOrigin) + { + [m_obj copyFromBuffer:_sourceBuffer sourceOffset:_sourceOffset sourceBytesPerRow:_sourceBytesPerRow + sourceBytesPerImage:_sourceBytesPerImage sourceSize:_sourceSize toTexture:_destinationTexture + destinationSlice:_destinationSlice destinationLevel:_destinationLevel destinationOrigin:_destinationOrigin]; + } + void endEncoding() { [m_obj endEncoding]; @@ -702,6 +719,8 @@ namespace bgfx { namespace mtl , m_vshConstantBufferAlignmentMask(0) , m_fshConstantBufferSize(0) , m_fshConstantBufferAlignmentMask(0) + , m_usedVertexSamplerStages(0) + , m_usedFragmentSamplerStages(0) , m_numPredefined(0) , m_processedUniforms(false) { @@ -727,6 +746,8 @@ namespace bgfx { namespace mtl uint32_t m_vshConstantBufferAlignmentMask; uint32_t m_fshConstantBufferSize; uint32_t m_fshConstantBufferAlignmentMask; + uint32_t m_usedVertexSamplerStages; + uint32_t m_usedFragmentSamplerStages; PredefinedUniform m_predefined[PredefinedUniform::Count*2]; uint8_t m_numPredefined; bool m_processedUniforms; @@ -754,7 +775,7 @@ namespace bgfx { namespace mtl MTL_RELEASE(m_ptrStencil); } void update(uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem); - void commit(uint8_t _stage, uint32_t _flags = BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER); + void commit(uint8_t _stage, bool _vertex, bool _fragment, uint32_t _flags = BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER); Texture m_ptr; Texture m_ptrMSAA; diff --git a/src/renderer_mtl.mm b/src/renderer_mtl.mm index b3b956b51..eb3a92a1b 100644 --- a/src/renderer_mtl.mm +++ b/src/renderer_mtl.mm @@ -44,9 +44,8 @@ Known issues(driver problems??): Only on this device ( no problem on iPad Air 2 with iOS9.3.1) TODOs: - - remove sync points at texture/mesh update - - - textureMtl::commit set only vertex/fragment stage + - remove sync points at mesh update. clearquad: 13-stencil, 26-occlusion, 30-picking + - framebufferMtl and TextureMtl resolve - FrameBufferMtl::postReset recreate framebuffer??? @@ -1022,9 +1021,6 @@ namespace bgfx { namespace mtl rce.setFragmentBuffer(m_uniformBuffer, m_uniformBufferFragmentOffset, 0); } - VertexBufferMtl& vb = m_vertexBuffers[_blitter.m_vb->handle.idx]; - rce.setVertexBuffer(vb.getBuffer(), 0, 1); - float proj[16]; bx::mtxOrtho(proj, 0.0f, (float)width, (float)height, 0.0f, 0.0f, 1000.0f); @@ -1032,7 +1028,7 @@ namespace bgfx { namespace mtl uint8_t flags = predefined.m_type; setShaderUniform(flags, predefined.m_loc, proj, 4); - m_textures[_blitter.m_texture.idx].commit(0); + m_textures[_blitter.m_texture.idx].commit(0, false, true); } void blitRender(TextVideoMemBlitter& _blitter, uint32_t _numIndices) BX_OVERRIDE @@ -1043,6 +1039,9 @@ namespace bgfx { namespace mtl m_indexBuffers [_blitter.m_ib->handle.idx].update(0, _numIndices*2, _blitter.m_ib->data, true); m_vertexBuffers[_blitter.m_vb->handle.idx].update(0, numVertices*_blitter.m_decl.m_stride, _blitter.m_vb->data, true); + VertexBufferMtl& vb = m_vertexBuffers[_blitter.m_vb->handle.idx]; + m_renderCommandEncoder.setVertexBuffer(vb.getBuffer(), 0, 1); + m_renderCommandEncoder.drawIndexedPrimitives(MTLPrimitiveTypeTriangle, _numIndices, MTLIndexTypeUInt16, m_indexBuffers[_blitter.m_ib->handle.idx].getBuffer(), 0, 1); } } @@ -2280,6 +2279,9 @@ namespace bgfx { namespace mtl } else if (arg.type == MTLArgumentTypeTexture) { + if ( shaderType == 0 ) m_usedVertexSamplerStages |= 1<m_renderCommandEncoder ) + { // NOTE: cannot blit while rendercommander is active. have to sync. slow. remove these. + // ClearQuad triggers this now s_renderMtl->sync(); - memcpy( (uint8_t*)getBuffer().contents() + _offset, _data, _size); + memcpy( (uint8_t*)getBuffer().contents() + _offset, _data, _size); + } + else + { + BlitCommandEncoder bce = s_renderMtl->getBlitCommandEncoder(); + + Buffer temp = s_renderMtl->m_device.newBufferWithBytes(_data, _size, 0); + bce.copyFromBuffer(temp, 0, getBuffer(), _offset, _size); + release(temp); + } } void VertexBufferMtl::create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint16_t _flags) @@ -2549,14 +2565,6 @@ namespace bgfx { namespace mtl void TextureMtl::update(uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem) { - s_renderMtl->sync(); - - MTLRegion region = - { - { _rect.m_x, _rect.m_y, _z }, - { _rect.m_width, _rect.m_height, _depth }, - }; - const uint32_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) ); const uint32_t rectpitch = _rect.m_width*bpp/8; const uint32_t srcpitch = UINT16_MAX == _pitch ? rectpitch : _pitch; @@ -2579,7 +2587,38 @@ namespace bgfx { namespace mtl data = temp; } - m_ptr.replaceRegion(region, _mip, _side, data, srcpitch, srcpitch * _rect.m_height); + if ( NULL != s_renderMtl->m_renderCommandEncoder ) + { + s_renderMtl->sync(); + + MTLRegion region = + { + { _rect.m_x, _rect.m_y, _z }, + { _rect.m_width, _rect.m_height, _depth }, + }; + + m_ptr.replaceRegion(region, _mip, _side, data, srcpitch, srcpitch * _rect.m_height); + } + else + { + BlitCommandEncoder bce = s_renderMtl->getBlitCommandEncoder(); + + const uint32_t dstpitch = bx::strideAlign(rectpitch, 64); + + Buffer tempBuffer = s_renderMtl->m_device.newBufferWithLength(dstpitch*_rect.m_height, 0); + + const uint8_t* src = (uint8_t*)data; + uint8_t* dst = (uint8_t*)tempBuffer.contents(); + + for (uint32_t yy = 0; yy < _rect.m_height; ++yy, src += srcpitch, dst += dstpitch) + { + memcpy(dst, src, rectpitch); + } + + bce.copyFromBuffer(tempBuffer, 0, dstpitch, dstpitch * _rect.m_height, MTLSizeMake(_rect.m_width, _rect.m_height, _depth), + m_ptr, _side, _mip, MTLOriginMake(_rect.m_x, _rect.m_y, _z)); + release(tempBuffer); + } if (NULL != temp) { @@ -2587,18 +2626,23 @@ namespace bgfx { namespace mtl } } - void TextureMtl::commit(uint8_t _stage, uint32_t _flags) + void TextureMtl::commit(uint8_t _stage, bool _vertex, bool _fragment, uint32_t _flags) { - //TODO: vertex or fragment stage? - s_renderMtl->m_renderCommandEncoder.setVertexTexture(m_ptr, _stage); - s_renderMtl->m_renderCommandEncoder.setVertexSamplerState(0 == (BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER & _flags) - ? s_renderMtl->getSamplerState(_flags) - : m_sampler, _stage); + if (_vertex) + { + s_renderMtl->m_renderCommandEncoder.setVertexTexture(m_ptr, _stage); + s_renderMtl->m_renderCommandEncoder.setVertexSamplerState(0 == (BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER & _flags) + ? s_renderMtl->getSamplerState(_flags) + : m_sampler, _stage); + } - s_renderMtl->m_renderCommandEncoder.setFragmentTexture(m_ptr, _stage); - s_renderMtl->m_renderCommandEncoder.setFragmentSamplerState(0 == (BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER & _flags) - ? s_renderMtl->getSamplerState(_flags) - : m_sampler, _stage); + if (_fragment) + { + s_renderMtl->m_renderCommandEncoder.setFragmentTexture(m_ptr, _stage); + s_renderMtl->m_renderCommandEncoder.setFragmentSamplerState(0 == (BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER & _flags) + ? s_renderMtl->getSamplerState(_flags) + : m_sampler, _stage); + } } void FrameBufferMtl::create(uint8_t _num, const Attachment* _attachment) @@ -2784,7 +2828,7 @@ namespace bgfx { namespace mtl updateResolution(_render->m_resolution); - if ( m_saveScreenshot ) + if ( m_saveScreenshot || NULL != m_capture ) { if ( m_screenshotTarget ) { @@ -3373,6 +3417,16 @@ namespace bgfx { namespace mtl } { + uint32_t usedVertexSamplerStages = 0; + uint32_t usedFragmentSamplerStages = 0; + + if (invalidHandle != programIdx) + { + ProgramMtl& program = m_program[programIdx]; + usedVertexSamplerStages = program.m_usedVertexSamplerStages; + usedFragmentSamplerStages = program.m_usedFragmentSamplerStages; + } + for (uint8_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage) { const Binding& sampler = draw.m_bind[stage]; @@ -3384,7 +3438,9 @@ namespace bgfx { namespace mtl if (invalidHandle != sampler.m_idx) { TextureMtl& texture = m_textures[sampler.m_idx]; - texture.commit(stage, sampler.m_un.m_draw.m_textureFlags); + texture.commit(stage, (usedVertexSamplerStages&(1<