diff --git a/src/renderer_d3d11.cpp b/src/renderer_d3d11.cpp index 5c6d20205..eed393ee1 100644 --- a/src/renderer_d3d11.cpp +++ b/src/renderer_d3d11.cpp @@ -1468,6 +1468,8 @@ BX_PRAGMA_DIAGNOSTIC_POP(); { ovrPreReset(); + m_gpuTimer.destroy(); + if (NULL == g_platformData.backBufferDS) { DX_RELEASE(m_backBufferDepthStencil, 0); @@ -1510,6 +1512,8 @@ BX_PRAGMA_DIAGNOSTIC_POP(); DX_RELEASE(color, 0); } + m_gpuTimer.create(); + ovrPostReset(); // If OVR doesn't create separate depth stencil view, create default one. @@ -2608,9 +2612,10 @@ BX_PRAGMA_DIAGNOSTIC_POP(); uint16_t m_numWindows; FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS]; - ID3D11Device* m_device; - ID3D11DeviceContext* m_deviceCtx; - ID3D11InfoQueue* m_infoQueue; + ID3D11Device* m_device; + ID3D11DeviceContext* m_deviceCtx; + ID3D11InfoQueue* m_infoQueue; + TimerQueryD3D11 m_gpuTimer; ID3D11RenderTargetView* m_backBufferColor; ID3D11DepthStencilView* m_backBufferDepthStencil; @@ -3471,6 +3476,88 @@ BX_PRAGMA_DIAGNOSTIC_POP(); } } + void TimerQueryD3D11::create() + { + ID3D11Device* device = s_renderD3D11->m_device; + + D3D11_QUERY_DESC query; + query.MiscFlags = 0; + for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii) + { + Frame& frame = m_frame[ii]; + + query.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + DX_CHECK(device->CreateQuery(&query, &frame.m_disjoint) ); + + query.Query = D3D11_QUERY_TIMESTAMP; + DX_CHECK(device->CreateQuery(&query, &frame.m_start) ); + DX_CHECK(device->CreateQuery(&query, &frame.m_end) ); + } + + m_elapsed = 0; + m_frequency = 1; + } + + void TimerQueryD3D11::destroy() + { + for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii) + { + Frame& frame = m_frame[ii]; + DX_RELEASE(frame.m_disjoint, 0); + DX_RELEASE(frame.m_start, 0); + DX_RELEASE(frame.m_end, 0); + } + } + + void TimerQueryD3D11::begin() + { + ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx; + + while (0 == m_control.reserve(1) ) + { + get(); + } + + Frame& frame = m_frame[m_control.m_current]; + deviceCtx->Begin(frame.m_disjoint); + deviceCtx->End(frame.m_start); + } + + void TimerQueryD3D11::end() + { + ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx; + Frame& frame = m_frame[m_control.m_current]; + deviceCtx->End(frame.m_end); + deviceCtx->End(frame.m_disjoint); + m_control.commit(1); + } + + bool TimerQueryD3D11::get() + { + ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx; + Frame& frame = m_frame[m_control.m_read]; + + uint64_t end; + HRESULT hr = deviceCtx->GetData(frame.m_end, &end, sizeof(end), 0); + if (S_OK == hr) + { + m_control.consume(1); + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; + deviceCtx->GetData(frame.m_disjoint, &disjoint, sizeof(disjoint), 0); + + uint64_t start; + deviceCtx->GetData(frame.m_start, &start, sizeof(start), 0); + + m_frequency = disjoint.Frequency; + m_elapsed = end - start; + + return true; + } + + return false; + } + void RendererContextD3D11::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) { PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), L"rendererSubmit"); @@ -3482,6 +3569,11 @@ BX_PRAGMA_DIAGNOSTIC_POP(); int64_t elapsed = -bx::getHPCounter(); int64_t captureElapsed = 0; + if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) ) + { + m_gpuTimer.begin(); + } + if (0 < _render->m_iboffset) { TransientIndexBuffer* ib = _render->m_transientIb; @@ -4256,6 +4348,20 @@ BX_PRAGMA_DIAGNOSTIC_POP(); { PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats"); + static uint32_t maxGpuLatency = 0; + static double maxGpuElapsed = 0.0f; + double elapsedGpuMs = 0.0; + + m_gpuTimer.end(); + + while (m_gpuTimer.get() ) + { + double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency); + elapsedGpuMs = m_gpuTimer.m_elapsed * toGpuMs; + maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed; + } + maxGpuLatency = bx::uint32_max(maxGpuLatency, m_gpuTimer.m_control.available()-1); + TextVideoMem& tvm = m_textVideoMem; static int64_t next = now; @@ -4314,12 +4420,18 @@ BX_PRAGMA_DIAGNOSTIC_POP(); ); double elapsedCpuMs = double(elapsed)*toMs; - tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]" + tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)" , _render->m_num , statsKeyType[0] , statsKeyType[1] , elapsedCpuMs + , elapsedCpuMs > maxGpuElapsed ? '>' : '<' + , maxGpuElapsed + , maxGpuLatency ); + maxGpuLatency = 0; + maxGpuElapsed = 0.0; + for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii) { tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d, indirect %7d" diff --git a/src/renderer_d3d11.h b/src/renderer_d3d11.h index a39087fe7..ee6bf5c09 100644 --- a/src/renderer_d3d11.h +++ b/src/renderer_d3d11.h @@ -297,6 +297,33 @@ namespace bgfx { namespace d3d11 TextureHandle m_th[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS]; }; + struct TimerQueryD3D11 + { + TimerQueryD3D11() + : m_control(BX_COUNTOF(m_frame) ) + { + } + + void create(); + void destroy(); + void begin(); + void end(); + bool get(); + + struct Frame + { + ID3D11Query* m_disjoint; + ID3D11Query* m_start; + ID3D11Query* m_end; + }; + + uint64_t m_elapsed; + uint64_t m_frequency; + + Frame m_frame[4]; + bx::RingBufferControl m_control; + }; + } /* namespace d3d11 */ } // namespace bgfx #endif // BGFX_RENDERER_D3D11_H_HEADER_GUARD diff --git a/src/renderer_d3d9.cpp b/src/renderer_d3d9.cpp index 08b577773..ca0d48e23 100644 --- a/src/renderer_d3d9.cpp +++ b/src/renderer_d3d9.cpp @@ -1239,6 +1239,8 @@ namespace bgfx { namespace d3d9 capturePreReset(); + m_gpuTimer.destroy(); + for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii) { m_indexBuffers[ii].preReset(); @@ -1266,6 +1268,8 @@ namespace bgfx { namespace d3d9 DX_CHECK(m_swapChain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &m_backBufferColor) ); DX_CHECK(m_device->GetDepthStencilSurface(&m_backBufferDepthStencil) ); + m_gpuTimer.create(); + capturePostReset(); for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii) @@ -1719,8 +1723,9 @@ namespace bgfx { namespace d3d9 IDirect3DDevice9Ex* m_deviceEx; #endif // BGFX_CONFIG_RENDERER_DIRECT3D9EX - IDirect3D9* m_d3d9; + IDirect3D9* m_d3d9; IDirect3DDevice9* m_device; + TimerQueryD3D9 m_gpuTimer; D3DPOOL m_pool; IDirect3DSwapChain9* m_swapChain; @@ -2892,6 +2897,80 @@ namespace bgfx { namespace d3d9 ) ); } + void TimerQueryD3D9::create() + { + IDirect3DDevice9* device = s_renderD3D9->m_device; + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii) + { + Frame& frame = m_frame[ii]; + DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMPDISJOINT, &frame.m_disjoint) ); + DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &frame.m_start) ); + DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &frame.m_end) ); + DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMPFREQ, &frame.m_freq) ); + } + + m_elapsed = 0; + m_frequency = 1; + } + + void TimerQueryD3D9::destroy() + { + for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii) + { + Frame& frame = m_frame[ii]; + DX_RELEASE(frame.m_disjoint, 0); + DX_RELEASE(frame.m_start, 0); + DX_RELEASE(frame.m_end, 0); + DX_RELEASE(frame.m_freq, 0); + } + } + + void TimerQueryD3D9::begin() + { + while (0 == m_control.reserve(1) ) + { + get(); + } + + Frame& frame = m_frame[m_control.m_current]; + frame.m_disjoint->Issue(D3DISSUE_BEGIN); + frame.m_start->Issue(D3DISSUE_END); + } + + void TimerQueryD3D9::end() + { + Frame& frame = m_frame[m_control.m_current]; + frame.m_end->Issue(D3DISSUE_END); + frame.m_freq->Issue(D3DISSUE_END); + m_control.commit(1); + } + + bool TimerQueryD3D9::get() + { + Frame& frame = m_frame[m_control.m_read]; + + uint64_t freq; + HRESULT hr = frame.m_freq->GetData(&freq, sizeof(freq), 0); + if (S_OK == hr) + { + m_control.consume(1); + + uint64_t start; + DX_CHECK(frame.m_start->GetData(&start, sizeof(start), 0) ); + + uint64_t end; + DX_CHECK(frame.m_end->GetData(&end, sizeof(end), 0) ); + + m_frequency = freq; + m_elapsed = end - start; + + return true; + } + + return false; + } + void RendererContextD3D9::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) { IDirect3DDevice9* device = m_device; @@ -2905,6 +2984,11 @@ namespace bgfx { namespace d3d9 device->BeginScene(); + if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) ) + { + m_gpuTimer.begin(); + } + if (0 < _render->m_iboffset) { TransientIndexBuffer* ib = _render->m_transientIb; @@ -3453,6 +3537,20 @@ namespace bgfx { namespace d3d9 { PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats"); + static uint32_t maxGpuLatency = 0; + static double maxGpuElapsed = 0.0f; + double elapsedGpuMs = 0.0; + + m_gpuTimer.end(); + + while (m_gpuTimer.get() ) + { + double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency); + elapsedGpuMs = m_gpuTimer.m_elapsed * toGpuMs; + maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed; + } + maxGpuLatency = bx::uint32_max(maxGpuLatency, m_gpuTimer.m_control.available()-1); + TextVideoMem& tvm = m_textVideoMem; static int64_t next = now; @@ -3490,12 +3588,18 @@ namespace bgfx { namespace d3d9 ); double elapsedCpuMs = double(elapsed)*toMs; - tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]" + tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)" , _render->m_num , statsKeyType[0] , statsKeyType[1] , elapsedCpuMs + , elapsedCpuMs > maxGpuElapsed ? '>' : '<' + , maxGpuElapsed + , maxGpuLatency ); + maxGpuLatency = 0; + maxGpuElapsed = 0.0; + for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii) { tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d" diff --git a/src/renderer_d3d9.h b/src/renderer_d3d9.h index 429721fab..5d509b257 100644 --- a/src/renderer_d3d9.h +++ b/src/renderer_d3d9.h @@ -389,6 +389,34 @@ namespace bgfx { namespace d3d9 bool m_needResolve; }; + struct TimerQueryD3D9 + { + TimerQueryD3D9() + : m_control(BX_COUNTOF(m_frame) ) + { + } + + void create(); + void destroy(); + void begin(); + void end(); + bool get(); + + struct Frame + { + IDirect3DQuery9* m_disjoint; + IDirect3DQuery9* m_start; + IDirect3DQuery9* m_end; + IDirect3DQuery9* m_freq; + }; + + uint64_t m_elapsed; + uint64_t m_frequency; + + Frame m_frame[4]; + bx::RingBufferControl m_control; + }; + } /* namespace d3d9 */ } // namespace bgfx #endif // BGFX_RENDERER_D3D9_H_HEADER_GUARD diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp index c790ecb44..716e805fc 100644 --- a/src/renderer_gl.cpp +++ b/src/renderer_gl.cpp @@ -5661,6 +5661,7 @@ namespace bgfx { namespace gl , elapsedCpuMs > elapsedGpuMs ? '>' : '<' , elapsedGpuMs ); + for (uint32_t ii = 0; ii < BX_COUNTOF(s_primInfo); ++ii) { tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d"