From c930137c54dca54df95e3b4ad3e233d7871773b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Sun, 13 Aug 2017 21:15:39 -0700 Subject: [PATCH] Internal profiler WIP. --- examples/common/example-glue.cpp | 72 ++++++++++ include/bgfx/bgfx.h | 44 ++++--- include/bgfx/c99/bgfx.h | 13 ++ src/renderer_d3d11.cpp | 220 +++++++++++++++++++------------ src/renderer_d3d11.h | 35 +++-- 5 files changed, 275 insertions(+), 109 deletions(-) diff --git a/examples/common/example-glue.cpp b/examples/common/example-glue.cpp index 16497e38c..12ae96c9c 100644 --- a/examples/common/example-glue.cpp +++ b/examples/common/example-glue.cpp @@ -8,6 +8,7 @@ #include "entry/cmd.h" #include #include +#include void showExampleDialog(entry::AppI* _app, const char* _errorText) { @@ -133,5 +134,76 @@ void showExampleDialog(entry::AppI* _app, const char* _errorText) , stats->maxGpuLatency ); + if (0 != stats->numViews) + { + if (ImGui::CollapsingHeader(ICON_FA_CLOCK_O " Profiler") ) + { + if (ImGui::BeginChild("##view_profiler", ImVec2(0.0f, 0.0f) ) ) + { + ImGui::PushFont(ImGui::Font::Mono); + + ImVec4 cpuColor(0.5f, 1.0f, 0.5f, 1.0f); + ImVec4 gpuColor(0.5f, 0.5f, 1.0f, 1.0f); + + const float itemHeight = ImGui::GetTextLineHeightWithSpacing(); + + if (ImGui::ListBoxHeader("##empty", ImVec2(ImGui::GetWindowWidth(), stats->numViews*itemHeight) ) ) + { + ImGuiListClipper clipper(stats->numViews, itemHeight); + + const double toCpuMs = 1000.0/stats->cpuTimerFreq; + const double toGpuMs = 1000.0/stats->gpuTimerFreq; + const float scale = 3.0f; + + while (clipper.Step() ) + { + for (int32_t pos = clipper.DisplayStart; pos < clipper.DisplayEnd; ++pos) + { + const bgfx::ViewStats& viewStats = stats->viewStats[pos]; + + ImGui::Text("%3d %3d %s", pos, viewStats.view, viewStats.name); + ImGui::SameLine(64.0f); + + ImGui::PushStyleColor(ImGuiCol_Button, cpuColor); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, cpuColor); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, cpuColor); + + const float maxWidth = 30.0f*scale; + const float cpuWidth = bx::fclamp(float(viewStats.cpuTimeElapsed*toCpuMs)*scale, 1.0f, maxWidth); + const float gpuWidth = bx::fclamp(float(viewStats.gpuTimeElapsed*toGpuMs)*scale, 1.0f, maxWidth); + + ImGui::Button("", ImVec2(cpuWidth, itemHeight) ); + if (ImGui::IsItemHovered() ) + { + ImGui::SetTooltip("CPU: %f [ms]", viewStats.cpuTimeElapsed*toCpuMs); + } + ImGui::PopStyleColor(3); + ImGui::SameLine(); + + ImGui::InvisibleButton("", ImVec2(maxWidth-cpuWidth, itemHeight) ); + ImGui::SameLine(); + + ImGui::PushStyleColor(ImGuiCol_Button, gpuColor); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, gpuColor); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, gpuColor); + ImGui::Button("", ImVec2(gpuWidth, itemHeight) ); + if (ImGui::IsItemHovered() ) + { + ImGui::SetTooltip("GPU: %f [ms]", viewStats.gpuTimeElapsed*toGpuMs); + } + ImGui::PopStyleColor(3); + } + } + + ImGui::ListBoxFooter(); + } + + ImGui::PopFont(); + + ImGui::EndChild(); + } + } + } + ImGui::End(); } diff --git a/include/bgfx/bgfx.h b/include/bgfx/bgfx.h index 7d8926053..b3fd8909d 100644 --- a/include/bgfx/bgfx.h +++ b/include/bgfx/bgfx.h @@ -723,32 +723,44 @@ namespace bgfx uint8_t flags; //!< Status flags. }; + /// + struct ViewStats + { + char name[256]; //!< + uint8_t view; //!< + uint64_t cpuTimeElapsed; //!< + uint64_t gpuTimeElapsed; //!< + }; + /// Renderer statistics data. /// /// @attention C99 equivalent is `bgfx_stats_t`. /// struct Stats { - uint64_t cpuTimeBegin; //!< CPU frame begin time. - uint64_t cpuTimeEnd; //!< CPU frame end time. - uint64_t cpuTimerFreq; //!< CPU timer frequency. + uint64_t cpuTimeBegin; //!< CPU frame begin time. + uint64_t cpuTimeEnd; //!< CPU frame end time. + uint64_t cpuTimerFreq; //!< CPU timer frequency. - uint64_t gpuTimeBegin; //!< GPU frame begin time. - uint64_t gpuTimeEnd; //!< GPU frame end time. - uint64_t gpuTimerFreq; //!< GPU timer frequency. + uint64_t gpuTimeBegin; //!< GPU frame begin time. + uint64_t gpuTimeEnd; //!< GPU frame end time. + uint64_t gpuTimerFreq; //!< GPU timer frequency. - int64_t waitRender; //!< Time spent waiting for render backend thread to finish issuing - //! draw commands to underlying graphics API. - int64_t waitSubmit; //!< Time spent waiting for submit thread to advance to next frame. + int64_t waitRender; //!< Time spent waiting for render backend thread to finish issuing + //! draw commands to underlying graphics API. + int64_t waitSubmit; //!< Time spent waiting for submit thread to advance to next frame. - uint32_t numDraw; //!< Number of draw calls submitted. - uint32_t numCompute; //!< Number of compute calls submitted. - uint32_t maxGpuLatency; //!< GPU driver latency. + uint32_t numDraw; //!< Number of draw calls submitted. + uint32_t numCompute; //!< Number of compute calls submitted. + uint32_t maxGpuLatency; //!< GPU driver latency. - uint16_t width; //!< Backbuffer width in pixels. - uint16_t height; //!< Backbuffer height in pixels. - uint16_t textWidth; //!< Debug text width in characters. - uint16_t textHeight; //!< Debug text height in characters. + uint16_t width; //!< Backbuffer width in pixels. + uint16_t height; //!< Backbuffer height in pixels. + uint16_t textWidth; //!< Debug text width in characters. + uint16_t textHeight; //!< Debug text height in characters. + + uint16_t numViews; //!< + ViewStats viewStats[256]; //!< }; /// Vertex declaration. diff --git a/include/bgfx/c99/bgfx.h b/include/bgfx/c99/bgfx.h index 7907d21df..4e0fa4472 100644 --- a/include/bgfx/c99/bgfx.h +++ b/include/bgfx/c99/bgfx.h @@ -335,6 +335,16 @@ typedef struct bgfx_hmd } bgfx_hmd_t; +/**/ +typedef struct bgfx_view_stats +{ + char name[256]; + uint8_t view; + uint64_t cpuTimeElapsed; + uint64_t gpuTimeElapsed; + +} bgfx_view_stats_t; + /**/ typedef struct bgfx_stats { @@ -358,6 +368,9 @@ typedef struct bgfx_stats uint16_t textWidth; uint16_t textHeight; + uint16_t numViews; + bgfx_view_stats_t viewStats[256]; + } bgfx_stats_t; /**/ diff --git a/src/renderer_d3d11.cpp b/src/renderer_d3d11.cpp index 9900c62ec..b86938336 100644 --- a/src/renderer_d3d11.cpp +++ b/src/renderer_d3d11.cpp @@ -15,20 +15,6 @@ # endif // BX_PLATFORM_WINRT #endif // !BX_PLATFORM_WINDOWS -#if BGFX_CONFIG_PROFILER_REMOTERY -# define BGFX_GPU_PROFILER_BIND(_device, _context) rmt_BindD3D11(_device, _context) -# define BGFX_GPU_PROFILER_UNBIND() rmt_UnbindD3D11() -# define BGFX_GPU_PROFILER_BEGIN(_group, _name, _color) rmt_BeginD3D11Sample(_group##_##_name) -# define BGFX_GPU_PROFILER_BEGIN_DYNAMIC(_namestr) rmt_BeginD3D11SampleDynamic(_namestr) -# define BGFX_GPU_PROFILER_END() rmt_EndD3D11Sample() -#else -# define BGFX_GPU_PROFILER_BIND(_device, _context) BX_NOOP() -# define BGFX_GPU_PROFILER_UNBIND() BX_NOOP() -# define BGFX_GPU_PROFILER_BEGIN(_group, _name, _color) BX_NOOP() -# define BGFX_GPU_PROFILER_BEGIN_DYNAMIC(_namestr) BX_NOOP() -# define BGFX_GPU_PROFILER_END() BX_NOOP() -#endif - #if BGFX_CONFIG_USE_OVR # include "hmd_ovr.h" #endif // BGFX_CONFIG_USE_OVR @@ -1680,8 +1666,6 @@ BX_PRAGMA_DIAGNOSTIC_POP(); postReset(); } - BGFX_GPU_PROFILER_BIND(m_device, m_deviceCtx); - g_internalData.context = m_device; return true; @@ -1734,8 +1718,6 @@ BX_PRAGMA_DIAGNOSTIC_POP(); void shutdown() { - BGFX_GPU_PROFILER_UNBIND(); - preReset(); m_ovr.shutdown(); @@ -2726,7 +2708,7 @@ BX_PRAGMA_DIAGNOSTIC_POP(); && (BGFX_CLEAR_DEPTH|BGFX_CLEAR_STENCIL) & _clear.m_flags) { DWORD flags = 0; - flags |= (_clear.m_flags & BGFX_CLEAR_DEPTH) ? D3D11_CLEAR_DEPTH : 0; + flags |= (_clear.m_flags & BGFX_CLEAR_DEPTH) ? D3D11_CLEAR_DEPTH : 0; flags |= (_clear.m_flags & BGFX_CLEAR_STENCIL) ? D3D11_CLEAR_STENCIL : 0; m_deviceCtx->ClearDepthStencilView(m_currentDepthStencil, flags, _clear.m_depth, _clear.m_stencil); } @@ -5200,68 +5182,94 @@ BX_PRAGMA_DIAGNOSTIC_POP(); { ID3D11Device* device = s_renderD3D11->m_device; - D3D11_QUERY_DESC query; - query.MiscFlags = 0; - for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii) + D3D11_QUERY_DESC qd; + qd.MiscFlags = 0; + for (uint32_t ii = 0; ii < BX_COUNTOF(m_query); ++ii) { - Frame& frame = m_frame[ii]; + Query& query = m_query[ii]; + query.m_ready = false; - query.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; - DX_CHECK(device->CreateQuery(&query, &frame.m_disjoint) ); + qd.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + DX_CHECK(device->CreateQuery(&qd, &query.m_disjoint) ); - query.Query = D3D11_QUERY_TIMESTAMP; - DX_CHECK(device->CreateQuery(&query, &frame.m_begin) ); - DX_CHECK(device->CreateQuery(&query, &frame.m_end) ); + qd.Query = D3D11_QUERY_TIMESTAMP; + DX_CHECK(device->CreateQuery(&qd, &query.m_begin) ); + DX_CHECK(device->CreateQuery(&qd, &query.m_end) ); + } + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_result); ++ii) + { + Result& result = m_result[ii]; + result.reset(); } - m_elapsed = 0; - m_frequency = 1; m_control.reset(); } void TimerQueryD3D11::preReset() { - for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii) + for (uint32_t ii = 0; ii < BX_COUNTOF(m_query); ++ii) { - Frame& frame = m_frame[ii]; - DX_RELEASE(frame.m_disjoint, 0); - DX_RELEASE(frame.m_begin, 0); - DX_RELEASE(frame.m_end, 0); + Query& query = m_query[ii]; + DX_RELEASE(query.m_disjoint, 0); + DX_RELEASE(query.m_begin, 0); + DX_RELEASE(query.m_end, 0); } } - void TimerQueryD3D11::begin() + uint32_t TimerQueryD3D11::begin(uint32_t _resultIdx) { ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx; while (0 == m_control.reserve(1) ) { - get(); + update(); } - Frame& frame = m_frame[m_control.m_current]; - deviceCtx->Begin(frame.m_disjoint); - deviceCtx->End(frame.m_begin); + Result& result = m_result[_resultIdx]; + ++result.m_pending; + + const uint32_t idx = m_control.m_current; + Query& query = m_query[idx]; + query.m_resultIdx = _resultIdx; + query.m_ready = false; + + deviceCtx->Begin(query.m_disjoint); + deviceCtx->End(query.m_begin); + + m_control.commit(1); + + return idx; } - void TimerQueryD3D11::end() + void TimerQueryD3D11::end(uint32_t _idx) { ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx; - Frame& frame = m_frame[m_control.m_current]; - deviceCtx->End(frame.m_end); - deviceCtx->End(frame.m_disjoint); - m_control.commit(1); + Query& query = m_query[_idx]; + query.m_ready = true; + + deviceCtx->End(query.m_end); + deviceCtx->End(query.m_disjoint); + + while (update() ) + { + } } - bool TimerQueryD3D11::get() + bool TimerQueryD3D11::update() { if (0 != m_control.available() ) { - ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx; - Frame& frame = m_frame[m_control.m_read]; + Query& query = m_query[m_control.m_read]; + + if (!query.m_ready) + { + return false; + } uint64_t timeEnd; - HRESULT hr = deviceCtx->GetData(frame.m_end, &timeEnd, sizeof(timeEnd), D3D11_ASYNC_GETDATA_DONOTFLUSH); + ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx; + HRESULT hr = deviceCtx->GetData(query.m_end, &timeEnd, sizeof(timeEnd), D3D11_ASYNC_GETDATA_DONOTFLUSH); if (S_OK == hr || isLost(hr) ) { @@ -5274,15 +5282,17 @@ BX_PRAGMA_DIAGNOSTIC_POP(); }; D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; - deviceCtx->GetData(frame.m_disjoint, &disjoint, sizeof(disjoint), 0); + DX_CHECK(deviceCtx->GetData(query.m_disjoint, &disjoint, sizeof(disjoint), 0) ); uint64_t timeBegin; - deviceCtx->GetData(frame.m_begin, &timeBegin, sizeof(timeBegin), 0); + DX_CHECK(deviceCtx->GetData(query.m_begin, &timeBegin, sizeof(timeBegin), 0) ); - m_frequency = disjoint.Frequency; - m_begin = timeBegin; - m_end = timeEnd; - m_elapsed = timeEnd - timeBegin; + Result& result = m_result[query.m_resultIdx]; + --result.m_pending; + + result.m_frequency = disjoint.Frequency; + result.m_begin = timeBegin; + result.m_end = timeEnd; return true; } @@ -5291,6 +5301,58 @@ BX_PRAGMA_DIAGNOSTIC_POP(); return false; } + struct Profiler + { + Profiler(Frame* _frame, TimerQueryD3D11& _gpuTimer, bool _enabled = false) + : m_frame(_frame) + , m_gpuTimer(_gpuTimer) + , m_numViews(0) + , m_enabled(_enabled) + { + } + + ~Profiler() + { + m_frame->m_perfStats.numViews = m_numViews; + } + + void begin(uint16_t _view) + { + if (m_enabled) + { + ViewStats& viewStats = m_frame->m_perfStats.viewStats[m_numViews]; + viewStats.cpuTimeElapsed = -bx::getHPCounter(); + + m_queryIdx = m_gpuTimer.begin(_view); + + viewStats.view = uint8_t(_view); + bx::strCopy(viewStats.name, BGFX_CONFIG_MAX_VIEW_NAME, &s_viewName[_view][BGFX_CONFIG_MAX_VIEW_NAME_RESERVED]); + } + } + + void end() + { + if (m_enabled) + { + m_gpuTimer.end(m_queryIdx); + + ViewStats& viewStats = m_frame->m_perfStats.viewStats[m_numViews]; + const TimerQueryD3D11::Result& result = m_gpuTimer.m_result[viewStats.view]; + + viewStats.cpuTimeElapsed += bx::getHPCounter(); + viewStats.gpuTimeElapsed = result.m_end - result.m_begin; + + ++m_numViews; + } + } + + Frame* m_frame; + TimerQueryD3D11& m_gpuTimer; + uint32_t m_queryIdx; + uint16_t m_numViews; + bool m_enabled; + }; + void OcclusionQueryD3D11::postReset() { ID3D11Device* device = s_renderD3D11->m_device; @@ -5466,16 +5528,17 @@ BX_PRAGMA_DIAGNOSTIC_POP(); } PIX_BEGINEVENT(D3DCOLOR_FRAME, L"rendererSubmit"); - BGFX_GPU_PROFILER_BEGIN_DYNAMIC("rendererSubmit"); ID3D11DeviceContext* deviceCtx = m_deviceCtx; int64_t elapsed = -bx::getHPCounter(); int64_t captureElapsed = 0; + uint32_t frameQueryIdx = UINT32_MAX; + if (m_timerQuerySupport) { - m_gpuTimer.begin(); + frameQueryIdx = m_gpuTimer.begin(BGFX_CONFIG_MAX_VIEWS); } if (0 < _render->m_iboffset) @@ -5534,6 +5597,8 @@ BX_PRAGMA_DIAGNOSTIC_POP(); uint32_t statsNumIndices = 0; uint32_t statsKeyType[2] = {}; + Profiler profiler(_render, m_gpuTimer); + m_occlusionQuery.resolve(_render); if (0 == (_render->m_debug&BGFX_DEBUG_IFH) ) @@ -5606,11 +5671,10 @@ BX_PRAGMA_DIAGNOSTIC_POP(); PIX_ENDEVENT(); if (item > 1) { - BGFX_GPU_PROFILER_END(); - BGFX_PROFILER_END(); + profiler.end(); } - BGFX_PROFILER_BEGIN_DYNAMIC(s_viewName[view]); - BGFX_GPU_PROFILER_BEGIN_DYNAMIC(s_viewName[view]); + + profiler.begin(view); viewState.m_rect = _render->m_rect[view]; if (viewRestart) @@ -6351,13 +6415,11 @@ BX_PRAGMA_DIAGNOSTIC_POP(); capture(); captureElapsed += bx::getHPCounter(); - BGFX_GPU_PROFILER_END(); - BGFX_PROFILER_END(); + profiler.end(); } } PIX_ENDEVENT(); - BGFX_GPU_PROFILER_END(); int64_t now = bx::getHPCounter(); elapsed += now; @@ -6379,28 +6441,26 @@ BX_PRAGMA_DIAGNOSTIC_POP(); static double maxGpuElapsed = 0.0f; double elapsedGpuMs = 0.0; - if (m_timerQuerySupport) + if (UINT32_MAX != frameQueryIdx) { - m_gpuTimer.end(); + m_gpuTimer.end(frameQueryIdx); - do - { - double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency); - elapsedGpuMs = m_gpuTimer.m_elapsed * toGpuMs; - maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed; - } - while (m_gpuTimer.get() ); + const TimerQueryD3D11::Result& result = m_gpuTimer.m_result[BGFX_CONFIG_MAX_VIEWS]; + double toGpuMs = 1000.0 / double(result.m_frequency); + elapsedGpuMs = (result.m_end - result.m_begin) * toGpuMs; + maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed; - maxGpuLatency = bx::uint32_imax(maxGpuLatency, m_gpuTimer.m_control.available()-1); + maxGpuLatency = bx::uint32_imax(maxGpuLatency, result.m_pending-1); } const int64_t timerFreq = bx::getHPFrequency(); perfStats.cpuTimeEnd = now; perfStats.cpuTimerFreq = timerFreq; - perfStats.gpuTimeBegin = m_gpuTimer.m_begin; - perfStats.gpuTimeEnd = m_gpuTimer.m_end; - perfStats.gpuTimerFreq = m_gpuTimer.m_frequency; + const TimerQueryD3D11::Result& result = m_gpuTimer.m_result[BGFX_CONFIG_MAX_VIEWS]; + perfStats.gpuTimeBegin = result.m_begin; + perfStats.gpuTimeEnd = result.m_end; + perfStats.gpuTimerFreq = result.m_frequency; perfStats.numDraw = statsKeyType[0]; perfStats.numCompute = statsKeyType[1]; perfStats.maxGpuLatency = maxGpuLatency; @@ -6551,12 +6611,6 @@ BX_PRAGMA_DIAGNOSTIC_POP(); } } /* namespace d3d11 */ } // namespace bgfx -#undef BGFX_GPU_PROFILER_BIND -#undef BGFX_GPU_PROFILER_UNBIND -#undef BGFX_GPU_PROFILER_BEGIN -#undef BGFX_GPU_PROFILER_BEGIN_DYNAMIC -#undef BGFX_GPU_PROFILER_END - #else namespace bgfx { namespace d3d11 diff --git a/src/renderer_d3d11.h b/src/renderer_d3d11.h index 852a69e11..1af417983 100644 --- a/src/renderer_d3d11.h +++ b/src/renderer_d3d11.h @@ -300,29 +300,44 @@ namespace bgfx { namespace d3d11 struct TimerQueryD3D11 { TimerQueryD3D11() - : m_control(BX_COUNTOF(m_frame) ) + : m_control(BX_COUNTOF(m_query) ) { } void postReset(); void preReset(); - void begin(); - void end(); - bool get(); + uint32_t begin(uint32_t _resultIdx); + void end(uint32_t _idx); + bool update(); - struct Frame + struct Query { ID3D11Query* m_disjoint; ID3D11Query* m_begin; ID3D11Query* m_end; + uint32_t m_resultIdx; + bool m_ready; }; - uint64_t m_begin; - uint64_t m_end; - uint64_t m_elapsed; - uint64_t m_frequency; + struct Result + { + void reset() + { + m_begin = 0; + m_end = 0; + m_frequency = 1; + m_pending = 0; + } - Frame m_frame[4]; + uint64_t m_begin; + uint64_t m_end; + uint64_t m_frequency; + uint32_t m_pending; + }; + + Result m_result[BGFX_CONFIG_MAX_VIEWS+1]; + + Query m_query[BGFX_CONFIG_MAX_VIEWS*4]; bx::RingBufferControl m_control; };