From 1e7b94e82a4fdff55919df92b7570678df1b40a2 Mon Sep 17 00:00:00 2001 From: bkaradzic Date: Sat, 7 Jul 2012 23:22:52 -0700 Subject: [PATCH] Adding geometry instancing support. --- include/bgfx.h | 16 ++++ src/bgfx.cpp | 10 +++ src/bgfx_p.h | 41 +++++++++ src/config.h | 4 + src/glimports.h | 4 + src/renderer_d3d9.cpp | 199 ++++++++++++++++++++++++++++++------------ src/renderer_d3d9.h | 83 +++++++++++++----- src/renderer_gl.cpp | 109 ++++++++++++++++------- src/renderer_gl.h | 6 +- 9 files changed, 361 insertions(+), 111 deletions(-) diff --git a/include/bgfx.h b/include/bgfx.h index 8a2548ae1..097770c43 100644 --- a/include/bgfx.h +++ b/include/bgfx.h @@ -253,6 +253,16 @@ namespace bgfx VertexDeclHandle decl; }; + struct InstanceDataBuffer + { + uint8_t* data; + uint32_t size; + uint32_t offset; + uint16_t stride; + uint16_t num; + VertexBufferHandle handle; + }; + struct ConstantType { enum Enum @@ -369,6 +379,9 @@ namespace bgfx /// const TransientVertexBuffer* allocTransientVertexBuffer(uint16_t _num, const VertexDecl& _decl); + /// + const InstanceDataBuffer* allocInstanceDataBuffer(uint16_t _num, uint16_t _stride); + /// VertexShaderHandle createVertexShader(const Memory* _mem); @@ -471,6 +484,9 @@ namespace bgfx /// void setVertexBuffer(const TransientVertexBuffer* _vb); + /// + void setInstanceDataBuffer(const InstanceDataBuffer* _idb); + /// void setMaterial(MaterialHandle _handle); diff --git a/src/bgfx.cpp b/src/bgfx.cpp index 21b20a086..9a2cf0559 100644 --- a/src/bgfx.cpp +++ b/src/bgfx.cpp @@ -845,6 +845,11 @@ namespace bgfx return s_ctx.allocTransientVertexBuffer(_num, _decl); } + const InstanceDataBuffer* allocInstanceDataBuffer(uint16_t _num, uint16_t _stride) + { + return s_ctx.allocInstanceDataBuffer(_num, _stride); + } + VertexShaderHandle createVertexShader(const Memory* _mem) { return s_ctx.createVertexShader(_mem); @@ -1021,6 +1026,11 @@ namespace bgfx s_ctx.m_submit->setVertexBuffer(_vb); } + void setInstanceDataBuffer(const InstanceDataBuffer* _idb) + { + s_ctx.m_submit->setInstanceDataBuffer(_idb); + } + void setMaterial(MaterialHandle _handle) { s_ctx.m_submit->setMaterial(_handle); diff --git a/src/bgfx_p.h b/src/bgfx_p.h index 8e7450954..70ee8ecd3 100644 --- a/src/bgfx_p.h +++ b/src/bgfx_p.h @@ -862,10 +862,14 @@ namespace bgfx m_numIndices = 0; m_startVertex = 0; m_numVertices = UINT32_C(0xffffffff); + m_instanceDataOffset = 0; + m_instanceDataStride = 0; + m_numInstances = 1; m_num = 1; m_vertexBuffer.idx = bgfx::invalidHandle; m_vertexDecl.idx = bgfx::invalidHandle; m_indexBuffer.idx = bgfx::invalidHandle; + m_instanceDataBuffer.idx = bgfx::invalidHandle; for (uint32_t ii = 0; ii < BGFX_STATE_TEX_COUNT; ++ii) { @@ -882,11 +886,15 @@ namespace bgfx uint32_t m_numIndices; uint32_t m_startVertex; uint32_t m_numVertices; + uint32_t m_instanceDataOffset; + uint16_t m_instanceDataStride; + uint16_t m_numInstances; uint16_t m_num; VertexBufferHandle m_vertexBuffer; VertexDeclHandle m_vertexDecl; IndexBufferHandle m_indexBuffer; + VertexBufferHandle m_instanceDataBuffer; Sampler m_sampler[BGFX_STATE_TEX_COUNT]; }; @@ -1079,6 +1087,18 @@ namespace bgfx g_free(const_cast(_vb) ); } + void setInstanceDataBuffer(const InstanceDataBuffer* _idb) + { +#if BGFX_CONFIG_RENDERER_OPENGLES +#else + m_state.m_instanceDataOffset = _idb->offset; + m_state.m_instanceDataStride = _idb->stride; + m_state.m_numInstances = _idb->num; + m_state.m_instanceDataBuffer = _idb->handle; + g_free(const_cast(_idb) ); +#endif // BGFX_CONFIG_RENDERER_OPENGLES + } + void setMaterial(MaterialHandle _handle) { BX_CHECK(invalidHandle != _handle.idx, "Can't set material with invalid handle."); @@ -1857,6 +1877,27 @@ namespace bgfx return vb; } + const InstanceDataBuffer* allocInstanceDataBuffer(uint16_t _num, uint16_t _stride) + { +#if BGFX_CONFIG_RENDERER_OPENGLES + return NULL; +#else + uint16_t stride = BX_ALIGN_16(_stride); + uint32_t offset = m_submit->allocTransientVertexBuffer(_num, stride); + + TransientVertexBuffer& dvb = *m_submit->m_transientVb; + InstanceDataBuffer* idb = (InstanceDataBuffer*)g_realloc(NULL, sizeof(InstanceDataBuffer) ); + idb->data = &dvb.data[offset]; + idb->size = _num * stride; + idb->offset = offset; + idb->stride = stride; + idb->num = _num; + idb->handle = dvb.handle; + + return idb; +#endif // BGFX_CONFIG_RENDERER_OPENGLES + } + VertexShaderHandle createVertexShader(const Memory* _mem) { VertexShaderHandle handle = { m_vertexShaderHandle.alloc() }; diff --git a/src/config.h b/src/config.h index 5993dfe98..433f80596 100644 --- a/src/config.h +++ b/src/config.h @@ -128,4 +128,8 @@ # define BGFX_CONFIG_USE_TINYSTL 0 #endif // BGFX_CONFIG_USE_TINYSTL +#ifndef BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT +# define BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT 5 +#endif // BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT + #endif // __CONFIG_H__ diff --git a/src/glimports.h b/src/glimports.h index d90542696..a01a33d65 100644 --- a/src/glimports.h +++ b/src/glimports.h @@ -117,6 +117,10 @@ GL_IMPORT(true, PFNGLGETQUERYOBJECTI64VEXTPROC, glGetQueryObjecti64vEX GL_IMPORT(true, PFNGLGETQUERYOBJECTUI64VEXTPROC, glGetQueryObjectui64vEXT); GL_IMPORT(true, PFNGLSAMPLECOVERAGEARBPROC, glSampleCoverageARB); + +GL_IMPORT(true, PFNGLDRAWARRAYSINSTANCEDARBPROC, glDrawArraysInstanced); +GL_IMPORT(true, PFNGLDRAWELEMENTSINSTANCEDARBPROC, glDrawElementsInstanced); +GL_IMPORT(true, PFNGLVERTEXATTRIBDIVISORARBPROC, glVertexAttribDivisor); #if BGFX_CONFIG_DEBUG_GREMEDY GL_IMPORT(true, PFNGLSTRINGMARKERGREMEDYPROC, glStringMarkerGREMEDY); diff --git a/src/renderer_d3d9.cpp b/src/renderer_d3d9.cpp index 3f84c61f2..1f97ee2ef 100644 --- a/src/renderer_d3d9.cpp +++ b/src/renderer_d3d9.cpp @@ -132,16 +132,27 @@ namespace bgfx { D3DFMT_A16B16G16R16, 8 }, }; + static ExtendedFormat s_extendedFormats[ExtendedFormat::Count] = + { + { D3DFMT_ATI1, 0, D3DRTYPE_TEXTURE, false }, + { D3DFMT_ATI2, 0, D3DRTYPE_TEXTURE, false }, + { D3DFMT_DF16, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false }, + { D3DFMT_DF24, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false }, + { D3DFMT_INST, 0, D3DRTYPE_SURFACE, false }, + { D3DFMT_INTZ, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false }, + { D3DFMT_NULL, D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, false }, + { D3DFMT_RESZ, D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, false }, + { D3DFMT_RAWZ, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false }, + }; + struct RendererContext { RendererContext() : m_flags(BGFX_RESET_NONE) , m_initialized(false) - , m_fmtNULL(false) - , m_fmtDF16(false) - , m_fmtDF24(false) - , m_fmtINTZ(false) - , m_fmtRAWZ(false) + , m_amd(false) + , m_nvidia(false) + , m_instancing(false) , m_rtMsaa(false) { m_rt.idx = invalidHandle; @@ -222,6 +233,11 @@ namespace bgfx #endif // BGFX_CONFIG_DEBUG_PERFHUD } + D3DADAPTER_IDENTIFIER9 identifier; + DX_CHECK(m_d3d9->GetAdapterIdentifier(m_adapter, 0, &identifier) ); + m_amd = identifier.VendorId == 0x1002; + m_nvidia = identifier.VendorId == 0x10de; + uint32_t behaviorFlags[] = { D3DCREATE_HARDWARE_VERTEXPROCESSING|D3DCREATE_PUREDEVICE, @@ -277,11 +293,24 @@ namespace bgfx BX_TRACE("Max fragment shader 2.0 instr. slots: %d", m_caps.PS20Caps.NumInstructionSlots); BX_TRACE("Max fragment shader 3.0 instr. slots: %d", m_caps.MaxPixelShader30InstructionSlots); - m_fmtNULL = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_NULL) ); - m_fmtDF16 = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_DF16) ); - m_fmtDF24 = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_DF24) ); - m_fmtINTZ = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_INTZ) ); - m_fmtRAWZ = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_RAWZ) ); + BX_TRACE("Extended formats:"); + for (uint32_t ii = 0; ii < ExtendedFormat::Count; ++ii) + { + ExtendedFormat& fmt = s_extendedFormats[ii]; + fmt.m_supported = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, fmt.m_usage, fmt.m_type, fmt.m_fmt) ); + const char* fourcc = (const char*)&fmt.m_fmt; + BX_TRACE("\t%2d: %c%c%c%c %s", ii, fourcc[0], fourcc[1], fourcc[2], fourcc[3], fmt.m_supported ? "supported" : ""); + } + + m_instancing = false + || s_extendedFormats[ExtendedFormat::Inst].m_supported + || (m_caps.VertexShaderVersion >= D3DVS_VERSION(3, 0) ) + ; + + if (m_instancing) + { + m_device->SetRenderState(D3DRS_POINTSIZE, D3DFMT_INST); + } uint32_t index = 1; for (const D3DFORMAT* fmt = &s_checkColorFormats[index]; *fmt != D3DFMT_UNKNOWN; ++fmt, ++index) @@ -674,6 +703,7 @@ namespace bgfx IDirect3DSurface9* m_backBufferColor; IDirect3DSurface9* m_backBufferDepthStencil; + IDirect3DVertexDeclaration9* m_instanceDataDecls[BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT]; HMODULE m_d3d9dll; uint32_t m_adapter; @@ -682,11 +712,9 @@ namespace bgfx uint32_t m_flags; bool m_initialized; - bool m_fmtNULL; - bool m_fmtDF16; - bool m_fmtDF24; - bool m_fmtINTZ; - bool m_fmtRAWZ; + bool m_amd; + bool m_nvidia; + bool m_instancing; D3DFORMAT m_fmtDepth; @@ -811,30 +839,26 @@ namespace bgfx static const D3DVERTEXELEMENT9 s_attrib[Attrib::Count+1] = { - {0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0}, - {0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_NORMAL, 0}, - {0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0}, - {0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 1}, - {0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_BLENDINDICES, 0}, - {0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_BLENDWEIGHT, 0}, - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0}, - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 1}, - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 2}, - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 3}, - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 4}, - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 5}, - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 6}, - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 7}, + { 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 }, + { 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_NORMAL, 0 }, + { 0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0 }, + { 0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 1 }, + { 0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_BLENDINDICES, 0 }, + { 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_BLENDWEIGHT, 0 }, + { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 }, + { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 1 }, + { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 2 }, + { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 3 }, + { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 4 }, + { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 5 }, + { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 6 }, + { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 7 }, D3DDECL_END() }; - void VertexDeclaration::create(const VertexDecl& _decl) + static D3DVERTEXELEMENT9* fillVertexDecl(D3DVERTEXELEMENT9* _out, uint32_t _count, const VertexDecl& _decl) { - memcpy(&m_decl, &_decl, sizeof(VertexDecl) ); - dump(m_decl); - - D3DVERTEXELEMENT9 vertexElements[Attrib::Count+1]; - D3DVERTEXELEMENT9* elem = vertexElements; + D3DVERTEXELEMENT9* elem = _out; for (uint32_t attr = 0; attr < Attrib::Count; ++attr) { @@ -903,7 +927,7 @@ namespace bgfx case 2: declType = D3DDECLTYPE_FLOAT2; break; - + default: case 3: declType = D3DDECLTYPE_FLOAT3; @@ -925,19 +949,46 @@ namespace bgfx elem->Offset = _decl.m_offset[attr]; ++elem; - BX_TRACE("\tattr %d, num %d, type %d, norm %d, offset %d" - , attr - , num - , type - , normalized - , _decl.m_offset[attr] - ); +// BX_TRACE("\tattr %d, num %d, type %d, norm %d, offset %d" +// , attr +// , num +// , type +// , normalized +// , _decl.m_offset[attr] +// ); } } + return elem; + } + + static IDirect3DVertexDeclaration9* createVertexDecl(const VertexDecl& _decl, uint8_t _numInstanceData) + { + D3DVERTEXELEMENT9 vertexElements[Attrib::Count+1+BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT]; + D3DVERTEXELEMENT9* elem = fillVertexDecl(vertexElements, Attrib::Count, _decl); + + const D3DVERTEXELEMENT9 inst = { 1, 0, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 }; + + for (uint32_t ii = 0; ii < _numInstanceData; ++ii) + { + memcpy(elem, &inst, sizeof(D3DVERTEXELEMENT9) ); + elem->UsageIndex = 8-_numInstanceData+ii; + elem->Offset = ii*16; + ++elem; + } + memcpy(elem, &s_attrib[Attrib::Count], sizeof(D3DVERTEXELEMENT9) ); - DX_CHECK(s_renderCtx.m_device->CreateVertexDeclaration(vertexElements, &m_ptr) ); + IDirect3DVertexDeclaration9* ptr; + DX_CHECK(s_renderCtx.m_device->CreateVertexDeclaration(vertexElements, &ptr) ); + return ptr; + } + + void VertexDeclaration::create(const VertexDecl& _decl) + { + memcpy(&m_decl, &_decl, sizeof(VertexDecl) ); + dump(m_decl); + m_ptr = createVertexDecl(_decl, 0); } void Shader::create(bool _fragment, const Memory* _mem) @@ -1816,8 +1867,10 @@ namespace bgfx D3DPRIMITIVETYPE primType = D3DPT_TRIANGLELIST; uint32_t primNumVerts = 3; - uint32_t statsNumPrims = 0; + uint32_t statsNumPrimsSubmitted = 0; uint32_t statsNumIndices = 0; + uint32_t statsNumInstances = 0; + uint32_t statsNumPrimsRendered = 0; int64_t elapsed = -bx::getHPCounter(); @@ -2182,16 +2235,34 @@ namespace bgfx uint16_t handle = state.m_vertexBuffer.idx; if (bgfx::invalidHandle != handle) { - VertexBuffer& vb = s_renderCtx.m_vertexBuffers[handle]; + const VertexBuffer& vb = s_renderCtx.m_vertexBuffers[handle]; uint16_t decl = vb.m_decl.idx == bgfx::invalidHandle ? state.m_vertexDecl.idx : vb.m_decl.idx; - VertexDeclaration& vertexDecl = s_renderCtx.m_vertexDecls[decl]; + const VertexDeclaration& vertexDecl = s_renderCtx.m_vertexDecls[decl]; DX_CHECK(s_renderCtx.m_device->SetStreamSource(0, vb.m_ptr, 0, vertexDecl.m_decl.m_stride) ); - DX_CHECK(s_renderCtx.m_device->SetVertexDeclaration(vertexDecl.m_ptr) ); + + if (invalidHandle != state.m_instanceDataBuffer.idx) + { + const VertexBuffer& inst = s_renderCtx.m_vertexBuffers[state.m_instanceDataBuffer.idx]; + DX_CHECK(s_renderCtx.m_device->SetStreamSourceFreq(0, D3DSTREAMSOURCE_INDEXEDDATA|state.m_numInstances) ); + DX_CHECK(s_renderCtx.m_device->SetStreamSourceFreq(1, D3DSTREAMSOURCE_INSTANCEDATA|1) ); + DX_CHECK(s_renderCtx.m_device->SetStreamSource(1, inst.m_ptr, state.m_instanceDataOffset, state.m_instanceDataStride) ); + + IDirect3DVertexDeclaration9* ptr = createVertexDecl(vertexDecl.m_decl, state.m_instanceDataStride/16); + DX_CHECK(s_renderCtx.m_device->SetVertexDeclaration(ptr) ); + DX_RELEASE(ptr, 0); + } + else + { + DX_CHECK(s_renderCtx.m_device->SetStreamSourceFreq(0, 1) ); + DX_CHECK(s_renderCtx.m_device->SetStreamSource(1, NULL, 0, 0) ); + DX_CHECK(s_renderCtx.m_device->SetVertexDeclaration(vertexDecl.m_ptr) ); + } } else { DX_CHECK(s_renderCtx.m_device->SetStreamSource(0, NULL, 0, 0) ); + DX_CHECK(s_renderCtx.m_device->SetStreamSource(1, NULL, 0, 0) ); } } @@ -2223,48 +2294,58 @@ namespace bgfx } uint32_t numIndices = 0; - uint32_t numPrims = 0; + uint32_t numPrimsSubmitted = 0; + uint32_t numInstances = 0; + uint32_t numPrimsRendered = 0; if (bgfx::invalidHandle != state.m_indexBuffer.idx) { if (BGFX_DRAW_WHOLE_INDEX_BUFFER == state.m_startIndex) { numIndices = s_renderCtx.m_indexBuffers[state.m_indexBuffer.idx].m_size/2; - numPrims = numIndices/primNumVerts; + numPrimsSubmitted = numIndices/primNumVerts; + numInstances = state.m_numInstances; + numPrimsRendered = numPrimsSubmitted*state.m_numInstances; DX_CHECK(s_renderCtx.m_device->DrawIndexedPrimitive(primType , state.m_startVertex , 0 , numVertices , 0 - , numPrims + , numPrimsSubmitted ) ); } else if (primNumVerts <= state.m_numIndices) { numIndices = state.m_numIndices; - numPrims = numIndices/primNumVerts; + numPrimsSubmitted = numIndices/primNumVerts; + numInstances = state.m_numInstances; + numPrimsRendered = numPrimsSubmitted*state.m_numInstances; DX_CHECK(s_renderCtx.m_device->DrawIndexedPrimitive(primType , state.m_startVertex , 0 , numVertices , state.m_startIndex - , numPrims + , numPrimsSubmitted ) ); } } else { - numPrims = numVertices/primNumVerts; + numPrimsSubmitted = numVertices/primNumVerts; + numInstances = state.m_numInstances; + numPrimsRendered = numPrimsSubmitted*state.m_numInstances; DX_CHECK(s_renderCtx.m_device->DrawPrimitive(primType , state.m_startVertex - , numPrims + , numPrimsSubmitted ) ); } - statsNumPrims += numPrims; + statsNumPrimsSubmitted += numPrimsSubmitted; statsNumIndices += numIndices; + statsNumInstances += numInstances; + statsNumPrimsRendered += numPrimsRendered; } } @@ -2300,7 +2381,11 @@ namespace bgfx , m_render->m_num , elapsedCpuMs ); - tvm.printf(10, pos++, 0x8e, " Prims: %7d", statsNumPrims); + tvm.printf(10, pos++, 0x8e, " Prims: %7d (#inst: %5d), submitted: %7d" + , statsNumPrimsRendered + , statsNumInstances + , statsNumPrimsSubmitted + ); tvm.printf(10, pos++, 0x8e, " Indices: %7d", statsNumIndices); tvm.printf(10, pos++, 0x8e, " DVB size: %7d", m_render->m_vboffset); tvm.printf(10, pos++, 0x8e, " DIB size: %7d", m_render->m_iboffset); diff --git a/src/renderer_d3d9.h b/src/renderer_d3d9.h index 3d9fbddaa..c4ea0d44a 100644 --- a/src/renderer_d3d9.h +++ b/src/renderer_d3d9.h @@ -14,26 +14,6 @@ # endif // !BGFX_CONFIG_RENDERER_DIRECT3D_EX # include -# ifndef D3DFMT_NULL -# define D3DFMT_NULL ( (D3DFORMAT)MAKEFOURCC('N','U','L','L') ) -# endif // D3DFMT_NULL - -# ifndef D3DFMT_DF16 -# define D3DFMT_DF16 ( (D3DFORMAT)MAKEFOURCC('D','F','1','6') ) -# endif // D3DFMT_DF16 - -# ifndef D3DFMT_DF24 -# define D3DFMT_DF24 ( (D3DFORMAT)MAKEFOURCC('D','F','2','4') ) -# endif // D3DFMT_DF24 - -# ifndef D3DFMT_INTZ -# define D3DFMT_INTZ ( (D3DFORMAT)MAKEFOURCC('I','N','T','Z') ) -# endif // D3DFMT_INTZ - -# ifndef D3DFMT_RAWZ -# define D3DFMT_RAWZ ( (D3DFORMAT)MAKEFOURCC('R','A','W','Z') ) -# endif // D3DFMT_RAWZ - # if BGFX_CONFIG_RENDERER_DIRECT3D_EX typedef HRESULT (WINAPI *Direct3DCreate9ExFunc)(UINT SDKVersion, IDirect3D9Ex**); # else @@ -113,6 +93,69 @@ namespace bgfx } while (0) #endif // BGFX_CONFIG_DEBUG +# ifndef D3DFMT_ATI1 +# define D3DFMT_ATI1 ( (D3DFORMAT)MAKEFOURCC('A','T','I','1') ) +# endif // D3DFMT_ATI1 + +# ifndef D3DFMT_ATI2 +# define D3DFMT_ATI2 ( (D3DFORMAT)MAKEFOURCC('A','T','I','2') ) +# endif // D3DFMT_ATI2 + +# ifndef D3DFMT_ATOC +# define D3DFMT_ATOC ( (D3DFORMAT)MAKEFOURCC('A','T','O','C') ) +# endif // D3DFMT_ATOC + +# ifndef D3DFMT_DF16 +# define D3DFMT_DF16 ( (D3DFORMAT)MAKEFOURCC('D','F','1','6') ) +# endif // D3DFMT_DF16 + +# ifndef D3DFMT_DF24 +# define D3DFMT_DF24 ( (D3DFORMAT)MAKEFOURCC('D','F','2','4') ) +# endif // D3DFMT_DF24 + +# ifndef D3DFMT_INST +# define D3DFMT_INST ( (D3DFORMAT)MAKEFOURCC('I','N','S','T') ) +# endif // D3DFMT_INST + +# ifndef D3DFMT_INTZ +# define D3DFMT_INTZ ( (D3DFORMAT)MAKEFOURCC('I','N','T','Z') ) +# endif // D3DFMT_INTZ + +# ifndef D3DFMT_NULL +# define D3DFMT_NULL ( (D3DFORMAT)MAKEFOURCC('N','U','L','L') ) +# endif // D3DFMT_NULL + +# ifndef D3DFMT_RESZ +# define D3DFMT_RESZ ( (D3DFORMAT)MAKEFOURCC('R','E','S','Z') ) +# endif // D3DFMT_RESZ + +# ifndef D3DFMT_RAWZ +# define D3DFMT_RAWZ ( (D3DFORMAT)MAKEFOURCC('R','A','W','Z') ) +# endif // D3DFMT_RAWZ + + struct ExtendedFormat + { + enum Enum + { + Ati1, + Ati2, + Df16, + Df24, + Inst, + Intz, + Null, + Resz, + Rawz, + + Count, + }; + + D3DFORMAT m_fmt; + DWORD m_usage; + D3DRESOURCETYPE m_type; + bool m_supported; + }; + struct Msaa { D3DMULTISAMPLE_TYPE m_type; diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp index e50d4e61d..7015ce408 100644 --- a/src/renderer_gl.cpp +++ b/src/renderer_gl.cpp @@ -532,6 +532,7 @@ namespace bgfx ARB_multisample, CHROMIUM_framebuffer_multisample, ANGLE_translated_shader_source, + ARB_instanced_arrays, ANGLE_instanced_arrays, OES_texture_float, OES_texture_float_linear, @@ -568,6 +569,7 @@ namespace bgfx { "GL_ARB_multisample", false, true }, { "GL_CHROMIUM_framebuffer_multisample", false, true }, { "GL_ANGLE_translated_shader_source", false, true }, + { "GL_ARB_instanced_arrays", false, true }, { "GL_ANGLE_instanced_arrays", false, true }, { "GL_OES_texture_float", false, true }, { "GL_OES_texture_float_linear", false, true }, @@ -608,6 +610,15 @@ namespace bgfx "a_texcoord7", }; + static const char* s_instanceDataName[BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT] = + { + "i_data0", + "i_data1", + "i_data2", + "i_data3", + "i_data4", + }; + static const GLenum s_attribType[AttribType::Count] = { GL_UNSIGNED_BYTE, @@ -978,7 +989,7 @@ namespace bgfx for (uint32_t ii = 0; ii < Attrib::Count; ++ii) { GLuint loc = glGetAttribLocation(m_id, s_attribName[ii]); - if ( GLuint(-1) != loc ) + if (GLuint(-1) != loc ) { BX_TRACE("attr %s: %d", s_attribName[ii], loc); m_attributes[ii] = loc; @@ -986,9 +997,21 @@ namespace bgfx } } m_used[used] = Attrib::Count; + + used = 0; + for (uint32_t ii = 0; ii < countof(s_instanceDataName); ++ii) + { + GLuint loc = glGetAttribLocation(m_id, s_instanceDataName[ii]); + if (GLuint(-1) != loc ) + { + BX_TRACE("instance data %s: %d", s_instanceDataName[ii], loc); + m_instanceData[used++] = loc; + } + } + m_instanceData[used] = 0xffff; } - void Material::bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex) + void Material::bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex) const { uint32_t enabled = 0; for (uint32_t ii = 0; Attrib::Count != m_used[ii]; ++ii) @@ -1007,6 +1030,8 @@ namespace bgfx GL_CHECK(glEnableVertexAttribArray(loc) ); enabled |= 1<m_debug&BGFX_DEBUG_IFH) ) { @@ -2336,49 +2357,69 @@ namespace bgfx baseVertex = state.m_startVertex; VertexBuffer& vb = s_renderCtx.m_vertexBuffers[state.m_vertexBuffer.idx]; uint16_t decl = vb.m_decl.idx == bgfx::invalidHandle ? state.m_vertexDecl.idx : vb.m_decl.idx; - s_renderCtx.m_materials[materialIdx].bindAttributes(s_renderCtx.m_vertexDecls[decl], state.m_startVertex); + const Material& material = s_renderCtx.m_materials[materialIdx]; + material.bindAttributes(s_renderCtx.m_vertexDecls[decl], state.m_startVertex); + + if (invalidHandle != state.m_instanceDataBuffer.idx) + { + GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, s_renderCtx.m_vertexBuffers[state.m_instanceDataBuffer.idx].m_id) ); + material.bindInstanceData(state.m_instanceDataStride, state.m_instanceDataOffset); + } } uint32_t numIndices = 0; - uint32_t numPrims = 0; + uint32_t numPrimsSubmitted = 0; + uint32_t numInstances = 0; + uint32_t numPrimsRendered = 0; if (bgfx::invalidHandle != state.m_indexBuffer.idx) { if (BGFX_DRAW_WHOLE_INDEX_BUFFER == state.m_startIndex) { numIndices = s_renderCtx.m_indexBuffers[state.m_indexBuffer.idx].m_size/2; - numPrims = numIndices/primNumVerts; + numPrimsSubmitted = numIndices/primNumVerts; + numInstances = state.m_numInstances; + numPrimsRendered = numPrimsSubmitted*state.m_numInstances; - GL_CHECK(glDrawElements(primType + GL_CHECK(glDrawElementsInstanced(primType , s_renderCtx.m_indexBuffers[state.m_indexBuffer.idx].m_size/2 , GL_UNSIGNED_SHORT , (void*)0 + , state.m_numInstances ) ); } else if (primNumVerts <= state.m_numIndices) { numIndices = state.m_numIndices; - numPrims = numIndices/primNumVerts; + numPrimsSubmitted = numIndices/primNumVerts; + numInstances = state.m_numInstances; + numPrimsRendered = numPrimsSubmitted*state.m_numInstances; - GL_CHECK(glDrawElements(primType + GL_CHECK(glDrawElementsInstanced(primType , numIndices , GL_UNSIGNED_SHORT , (void*)(uintptr_t)(state.m_startIndex*2) + , state.m_numInstances ) ); } } else { - numPrims = state.m_numVertices/primNumVerts; + numPrimsSubmitted = state.m_numVertices/primNumVerts; + numInstances = state.m_numInstances; + numPrimsRendered = numPrimsSubmitted*state.m_numInstances; - GL_CHECK(glDrawArrays(primType + GL_CHECK(glDrawArraysInstanced(primType , 0 , state.m_numVertices + , state.m_numInstances ) ); } - statsNumPrims += numPrims; + statsNumPrimsSubmitted += numPrimsSubmitted; statsNumIndices += numIndices; + statsNumInstances += numInstances; + statsNumPrimsRendered += numPrimsRendered; } } } @@ -2420,7 +2461,11 @@ namespace bgfx , elapsedCpuMs > elapsedGpuMs ? '>' : '<' , elapsedGpuMs ); - tvm.printf(10, pos++, 0x8e, " Prims: %7d", statsNumPrims); + tvm.printf(10, pos++, 0x8e, " Prims: %7d (#inst: %5d), submitted: %7d" + , statsNumPrimsRendered + , statsNumInstances + , statsNumPrimsSubmitted + ); tvm.printf(10, pos++, 0x8e, " Indices: %7d", statsNumIndices); tvm.printf(10, pos++, 0x8e, " DVB size: %7d", m_render->m_vboffset); tvm.printf(10, pos++, 0x8e, " DIB size: %7d", m_render->m_iboffset); diff --git a/src/renderer_gl.h b/src/renderer_gl.h index 44afef13a..3a65ed77b 100644 --- a/src/renderer_gl.h +++ b/src/renderer_gl.h @@ -140,7 +140,7 @@ namespace bgfx BX_CHECK(0 == err, #_call "; glError 0x%x %d", err, err); \ } while (0) -#if 0 // BGFX_CONFIG_DEBUG +#if BGFX_CONFIG_DEBUG # define GL_CHECK(_call) _GL_CHECK(_call) #else # define GL_CHECK(_call) _call @@ -334,12 +334,14 @@ namespace bgfx void create(const Shader& _vsh, const Shader& _fsh); void destroy(); void init(); - void bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex = 0); + void bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex = 0) const; + void bindInstanceData(uint32_t _stride, uint32_t _baseVertex = 0) const; GLuint m_id; uint8_t m_used[Attrib::Count+1]; // dense uint16_t m_attributes[Attrib::Count]; // sparse + uint16_t m_instanceData[BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT]; uint32_t m_enabled; GLuint m_sampler[BGFX_CONFIG_MAX_TEXTURES];