diff --git a/src/bgfx.cpp b/src/bgfx.cpp index eefc60855..0751d77e7 100644 --- a/src/bgfx.cpp +++ b/src/bgfx.cpp @@ -1831,7 +1831,7 @@ namespace bgfx m_init = _init; m_init.resolution.reset &= ~BGFX_RESET_INTERNAL_FORCE; m_init.resolution.numBackBuffers = bx::clamp(_init.resolution.numBackBuffers, 2, BGFX_CONFIG_MAX_BACK_BUFFERS); - m_init.resolution.maxFrameLatency = bx::min(_init.resolution.maxFrameLatency, 3); + m_init.resolution.maxFrameLatency = bx::min(_init.resolution.maxFrameLatency, BGFX_CONFIG_MAX_FRAME_LATENCY); dump(m_init.resolution); if (g_platformData.ndt == NULL diff --git a/src/config.h b/src/config.h index 2340dda4a..49354a0d5 100644 --- a/src/config.h +++ b/src/config.h @@ -361,6 +361,10 @@ BX_STATIC_ASSERT(bx::isPowerOf2(BGFX_CONFIG_MAX_VIEWS), "BGFX_CONFIG_MAX_VIEWS m # define BGFX_CONFIG_MAX_BACK_BUFFERS 4 #endif // BGFX_CONFIG_MAX_BACK_BUFFERS +#ifndef BGFX_CONFIG_MAX_FRAME_LATENCY +# define BGFX_CONFIG_MAX_FRAME_LATENCY 3 +#endif // BGFX_CONFIG_MAX_FRAME_LATENCY + #ifndef BGFX_CONFIG_PREFER_DISCRETE_GPU // On laptops with integrated and discrete GPU, prefer selection of discrete GPU. // nVidia and AMD, on Windows only. diff --git a/src/renderer_d3d11.cpp b/src/renderer_d3d11.cpp index b369affdd..0c2f1260a 100644 --- a/src/renderer_d3d11.cpp +++ b/src/renderer_d3d11.cpp @@ -1035,7 +1035,7 @@ namespace bgfx { namespace d3d11 m_scd.alphaMode = DXGI_ALPHA_MODE_IGNORE; m_scd.flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; - m_scd.maxFrameLatency = bx::min(_init.resolution.maxFrameLatency, 3); + m_scd.maxFrameLatency = bx::min(_init.resolution.maxFrameLatency, BGFX_CONFIG_MAX_FRAME_LATENCY); m_scd.nwh = g_platformData.nwh; m_scd.ndt = g_platformData.ndt; m_scd.windowed = true; diff --git a/src/renderer_d3d12.cpp b/src/renderer_d3d12.cpp index fb8695770..1af376684 100644 --- a/src/renderer_d3d12.cpp +++ b/src/renderer_d3d12.cpp @@ -954,7 +954,7 @@ namespace bgfx { namespace d3d12 m_scd.alphaMode = DXGI_ALPHA_MODE_IGNORE; m_scd.flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; - m_scd.maxFrameLatency = bx::min(_init.resolution.maxFrameLatency, 3); + m_scd.maxFrameLatency = bx::min(_init.resolution.maxFrameLatency, BGFX_CONFIG_MAX_FRAME_LATENCY); m_scd.nwh = g_platformData.nwh; m_scd.ndt = g_platformData.ndt; m_scd.windowed = true; diff --git a/src/renderer_mtl.h b/src/renderer_mtl.h index 97655df6a..cb445b84a 100644 --- a/src/renderer_mtl.h +++ b/src/renderer_mtl.h @@ -67,8 +67,6 @@ namespace bgfx { namespace mtl // objects with creation functions starting with 'new' has a refcount 1 after creation, object must be destroyed with release. // commandBuffer, commandEncoders are autoreleased objects. Needs AutoreleasePool! -#define MTL_MAX_FRAMES_IN_FLIGHT (3) - #define MTL_CLASS(name) \ class name \ { \ @@ -1115,7 +1113,7 @@ namespace bgfx { namespace mtl int m_releaseWriteIndex; int m_releaseReadIndex; typedef stl::vector ResourceArray; - ResourceArray m_release[MTL_MAX_FRAMES_IN_FLIGHT]; + ResourceArray m_release[BGFX_CONFIG_MAX_FRAME_LATENCY]; }; struct TimerQueryMtl diff --git a/src/renderer_mtl.mm b/src/renderer_mtl.mm index 25560fc40..dcd17c36a 100644 --- a/src/renderer_mtl.mm +++ b/src/renderer_mtl.mm @@ -423,7 +423,7 @@ namespace bgfx { namespace mtl m_textureDescriptor = newTextureDescriptor(); m_samplerDescriptor = newSamplerDescriptor(); - for (uint8_t ii = 0; ii < MTL_MAX_FRAMES_IN_FLIGHT; ++ii) + for (uint8_t ii = 0; ii < BGFX_CONFIG_MAX_FRAME_LATENCY; ++ii) { m_uniformBuffers[ii] = m_device.newBufferWithLength(UNIFORM_BUFFER_SIZE, 0); } @@ -711,7 +711,7 @@ namespace bgfx { namespace mtl m_mainFrameBuffer.destroy(); - for (uint8_t i=0; i < MTL_MAX_FRAMES_IN_FLIGHT; ++i) + for (uint8_t i=0; i < BGFX_CONFIG_MAX_FRAME_LATENCY; ++i) { MTL_RELEASE(m_uniformBuffers[i]); } @@ -2344,7 +2344,7 @@ namespace bgfx { namespace mtl bool m_hasStoreActionStoreAndMultisampleResolve; Buffer m_uniformBuffer; - Buffer m_uniformBuffers[MTL_MAX_FRAMES_IN_FLIGHT]; + Buffer m_uniformBuffers[BGFX_CONFIG_MAX_FRAME_LATENCY]; uint32_t m_uniformBufferVertexOffset; uint32_t m_uniformBufferFragmentOffset; @@ -3403,7 +3403,7 @@ namespace bgfx { namespace mtl void CommandQueueMtl::init(Device _device) { m_commandQueue = _device.newCommandQueue(); - m_framesSemaphore.post(MTL_MAX_FRAMES_IN_FLIGHT); + m_framesSemaphore.post(BGFX_CONFIG_MAX_FRAME_LATENCY); } void CommandQueueMtl::shutdown() @@ -3435,7 +3435,7 @@ namespace bgfx { namespace mtl { if (_endFrame) { - m_releaseWriteIndex = (m_releaseWriteIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT; + m_releaseWriteIndex = (m_releaseWriteIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY; m_activeCommandBuffer.addCompletedHandler(commandBufferFinishedCallback, this); } @@ -3480,7 +3480,7 @@ namespace bgfx { namespace mtl void CommandQueueMtl::consume() { m_framesSemaphore.wait(); - m_releaseReadIndex = (m_releaseReadIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT; + m_releaseReadIndex = (m_releaseReadIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY; ResourceArray& ra = m_release[m_releaseReadIndex]; @@ -3755,7 +3755,7 @@ namespace bgfx { namespace mtl } m_uniformBuffer = m_uniformBuffers[m_bufferIndex]; - m_bufferIndex = (m_bufferIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT; + m_bufferIndex = (m_bufferIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY; m_uniformBufferVertexOffset = 0; m_uniformBufferFragmentOffset = 0; diff --git a/src/renderer_vk.cpp b/src/renderer_vk.cpp index 4d07cc626..522593be1 100644 --- a/src/renderer_vk.cpp +++ b/src/renderer_vk.cpp @@ -482,7 +482,7 @@ VK_IMPORT_DEVICE const char* getName(VkPhysicalDeviceType _type) { - return s_deviceTypeName[bx::min(_type, BX_COUNTOF(s_deviceTypeName) )]; + return s_deviceTypeName[bx::min(_type, BX_COUNTOF(s_deviceTypeName)-1 )]; } static const char* s_allocScopeName[] = @@ -860,40 +860,38 @@ VK_IMPORT_DEVICE switch (_oldLayout) { case VK_IMAGE_LAYOUT_UNDEFINED: -// srcAccessMask |= VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; break; case VK_IMAGE_LAYOUT_GENERAL: + srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; break; case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; break; case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break; case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: break; case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - srcAccessMask |= VK_ACCESS_SHADER_READ_BIT; break; case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - srcAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; break; case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; case VK_IMAGE_LAYOUT_PREINITIALIZED: - srcAccessMask |= VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; break; case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT; break; default: @@ -902,40 +900,36 @@ VK_IMPORT_DEVICE switch (_newLayout) { - case VK_IMAGE_LAYOUT_UNDEFINED: - break; - case VK_IMAGE_LAYOUT_GENERAL: + dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; break; case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; break; case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break; case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; break; case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; break; case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - dstAccessMask |= VK_ACCESS_SHADER_READ_BIT; + dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break; case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; - break; - - case VK_IMAGE_LAYOUT_PREINITIALIZED: + dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT; + dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; break; default: @@ -2103,11 +2097,13 @@ VK_IMPORT_DEVICE compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; } + uint8_t swapBufferCount = bx::clamp(_init.resolution.numBackBuffers, 2, BGFX_CONFIG_MAX_BACK_BUFFERS); m_sci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; m_sci.pNext = NULL; m_sci.flags = 0; m_sci.surface = m_surface; m_sci.minImageCount = surfaceCapabilities.minImageCount; + m_sci.minImageCount = bx::clamp(swapBufferCount, surfaceCapabilities.minImageCount, surfaceCapabilities.maxImageCount); m_sci.imageFormat = m_backBufferColorFormat.format; m_sci.imageColorSpace = m_backBufferColorFormat.colorSpace; m_sci.imageExtent.width = width; @@ -2693,6 +2689,23 @@ VK_IMPORT_DEVICE , 1 ); + // Make changes to image visible to host read + VkMemoryBarrier memBarrier{ VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; + vkCmdPipelineBarrier( + copyCmd + , VK_PIPELINE_STAGE_TRANSFER_BIT + , VK_PIPELINE_STAGE_HOST_BIT + , 0 + , 1 + , &memBarrier + , 0 + , NULL + , 0 + , NULL + ); + setImageMemoryBarrier( copyCmd , srcImage @@ -2908,6 +2921,23 @@ VK_IMPORT_DEVICE , 1 ); + // Make changes to image visible to host read + VkMemoryBarrier memBarrier{ VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; + vkCmdPipelineBarrier( + copyCmd + , VK_PIPELINE_STAGE_TRANSFER_BIT + , VK_PIPELINE_STAGE_HOST_BIT + , 0 + , 1 + , &memBarrier + , 0 + , NULL + , 0 + , NULL + ); + // Transition back the swap chain image after the blit is done setImageMemoryBarrier( copyCmd @@ -4528,7 +4558,7 @@ VK_IMPORT_DEVICE uint64_t kick(VkSemaphore _wait = VK_NULL_HANDLE, VkSemaphore _signal = VK_NULL_HANDLE) { VkPipelineStageFlags stageFlags = 0 - | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT + | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT ; VkSubmitInfo si; @@ -6659,11 +6689,20 @@ VK_DESTROY beginRenderPass = false; } - VK_CHECK(vkEndCommandBuffer(m_commandBuffer) ); - - kick(renderWait); - renderWait = VK_NULL_HANDLE; - finishAll(); + const VkPipelineStageFlags srcStage = wasCompute + ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT + : VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT + ; + const VkPipelineStageFlags dstStage = isCompute + ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT + : VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT + ; + VkMemoryBarrier memBarrier; + memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + memBarrier.pNext = NULL; + memBarrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + memBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + vkCmdPipelineBarrier(m_commandBuffer, srcStage, dstStage, 0, 1, &memBarrier, 0, NULL, 0, NULL); view = key.m_view; currentPipeline = VK_NULL_HANDLE; @@ -6671,8 +6710,7 @@ VK_DESTROY currentProgram = BGFX_INVALID_HANDLE; hasPredefined = false; BX_UNUSED(currentSamplerStateIdx); - - VK_CHECK(vkBeginCommandBuffer(m_commandBuffer, &cbbi) ); + fbh = _render->m_view[view].m_fbh; setFrameBuffer(fbh); diff --git a/src/renderer_webgpu.cpp b/src/renderer_webgpu.cpp index fac8b1e56..f310d48d5 100644 --- a/src/renderer_webgpu.cpp +++ b/src/renderer_webgpu.cpp @@ -569,7 +569,7 @@ namespace bgfx { namespace webgpu m_cmd.init(m_queue); //BGFX_FATAL(NULL != m_cmd.m_commandQueue, Fatal::UnableToInitialize, "Unable to create Metal device."); - for (uint8_t ii = 0; ii < WEBGPU_MAX_FRAMES_IN_FLIGHT; ++ii) + for (uint8_t ii = 0; ii < BGFX_CONFIG_MAX_FRAME_LATENCY; ++ii) { BX_TRACE("Create scratch buffer %d", ii); m_scratchBuffers[ii].create(BGFX_CONFIG_MAX_DRAW_CALLS * 128); @@ -2372,9 +2372,9 @@ namespace bgfx { namespace webgpu CommandQueueWgpu m_cmd; StagingBufferWgpu m_uniformBuffers[WEBGPU_NUM_UNIFORM_BUFFERS]; - ScratchBufferWgpu m_scratchBuffers[WEBGPU_MAX_FRAMES_IN_FLIGHT]; + ScratchBufferWgpu m_scratchBuffers[BGFX_CONFIG_MAX_FRAME_LATENCY]; - BindStateCacheWgpu m_bindStateCache[WEBGPU_MAX_FRAMES_IN_FLIGHT]; + BindStateCacheWgpu m_bindStateCache[BGFX_CONFIG_MAX_FRAME_LATENCY]; uint8_t m_frameIndex; @@ -3800,7 +3800,7 @@ namespace bgfx { namespace webgpu { m_queue = _queue; #if BGFX_CONFIG_MULTITHREADED - //m_framesSemaphore.post(WEBGPU_MAX_FRAMES_IN_FLIGHT); + //m_framesSemaphore.post(BGFX_CONFIG_MAX_FRAME_LATENCY); #endif } @@ -3838,7 +3838,7 @@ namespace bgfx { namespace webgpu { if (_endFrame) { - m_releaseWriteIndex = (m_releaseWriteIndex + 1) % WEBGPU_MAX_FRAMES_IN_FLIGHT; + m_releaseWriteIndex = (m_releaseWriteIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY; //m_encoder.addCompletedHandler(commandBufferFinishedCallback, this); } @@ -3898,7 +3898,7 @@ namespace bgfx { namespace webgpu //m_framesSemaphore.wait(); #endif - m_releaseReadIndex = (m_releaseReadIndex + 1) % WEBGPU_MAX_FRAMES_IN_FLIGHT; + m_releaseReadIndex = (m_releaseReadIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY; for (wgpu::Buffer& buffer : m_release[m_releaseReadIndex]) { @@ -4044,7 +4044,7 @@ namespace bgfx { namespace webgpu updateResolution(_render->m_resolution); - m_frameIndex = 0; // (m_frameIndex + 1) % WEBGPU_MAX_FRAMES_IN_FLIGHT; + m_frameIndex = 0; // (m_frameIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY; ScratchBufferWgpu& scratchBuffer = m_scratchBuffers[m_frameIndex]; scratchBuffer.begin(); diff --git a/src/renderer_webgpu.h b/src/renderer_webgpu.h index 14f2358e9..0790031b2 100644 --- a/src/renderer_webgpu.h +++ b/src/renderer_webgpu.h @@ -32,7 +32,6 @@ BGFX_PROFILER_END(); \ BX_MACRO_BLOCK_END -#define WEBGPU_MAX_FRAMES_IN_FLIGHT 3 #define WEBGPU_NUM_UNIFORM_BUFFERS 8 namespace bgfx { namespace webgpu @@ -501,7 +500,7 @@ namespace bgfx { namespace webgpu int m_releaseReadIndex = 0; typedef stl::vector ResourceArray; - ResourceArray m_release[WEBGPU_MAX_FRAMES_IN_FLIGHT]; + ResourceArray m_release[BGFX_CONFIG_MAX_FRAME_LATENCY]; }; struct TimerQueryWgpu