From 2c38e090d2a8e7a223cf863b050395479a95cf4e Mon Sep 17 00:00:00 2001 From: Julian Xhokaxhiu Date: Sun, 4 Oct 2020 23:21:12 +0200 Subject: [PATCH] Various Vulkan/Direct3D 12 enhancements (#2246) * [VK] Add RT MSAA support * [VK] Add GenerateMips support * [VK] Add Screenshot feature * [D3D12] Add MSAA RT support * [VK] Fix blit operation on MSAA RT textures * [D3D12] Fix blit operation on MSAA RT textures --- src/renderer_d3d12.cpp | 90 +++++++- src/renderer_d3d12.h | 4 +- src/renderer_vk.cpp | 457 ++++++++++++++++++++++++++++++++++++++--- src/renderer_vk.h | 20 ++ 4 files changed, 534 insertions(+), 37 deletions(-) diff --git a/src/renderer_d3d12.cpp b/src/renderer_d3d12.cpp index e30200913..b587fefc8 100644 --- a/src/renderer_d3d12.cpp +++ b/src/renderer_d3d12.cpp @@ -2412,7 +2412,9 @@ namespace bgfx { namespace d3d12 if (isValid(m_fbh) && m_fbh.idx != _fbh.idx) { - const FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx]; + FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx]; + + if (m_rtMsaa) frameBuffer.resolve(); if (NULL == frameBuffer.m_swapChain) { @@ -3527,7 +3529,7 @@ namespace bgfx { namespace d3d12 } } - device->CreateShaderResourceView(_texture.m_ptr + device->CreateShaderResourceView(NULL != _texture.m_singleMsaa ? _texture.m_singleMsaa : _texture.m_ptr , srvd , m_cpuHandle ); @@ -4684,6 +4686,15 @@ namespace bgfx { namespace d3d12 const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK); const bool blit = 0 != (m_flags&BGFX_TEXTURE_BLIT_DST); + const uint32_t msaaQuality = bx::uint32_satsub((m_flags & BGFX_TEXTURE_RT_MSAA_MASK) >> BGFX_TEXTURE_RT_MSAA_SHIFT, 1); + const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality]; + + const bool needResolve = true + && 1 < msaa.Count + && 0 == (m_flags & BGFX_TEXTURE_MSAA_SAMPLE) + && !writeOnly + ; + BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s RT[%c], BO[%c], CW[%c]%s." , this - s_renderD3D12->m_textures , getName( (TextureFormat::Enum)m_textureFormat) @@ -4768,9 +4779,6 @@ namespace bgfx { namespace d3d12 } } - const uint32_t msaaQuality = bx::uint32_satsub( (m_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1); - const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality]; - bx::memSet(&m_srvd, 0, sizeof(m_srvd) ); m_srvd.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; m_srvd.Format = (m_flags & BGFX_TEXTURE_SRGB) ? s_textureFormat[m_textureFormat].m_fmtSrgb : s_textureFormat[m_textureFormat].m_fmtSrv; @@ -4983,6 +4991,23 @@ namespace bgfx { namespace d3d12 } } } + + if (needResolve) + { + D3D12_RESOURCE_DESC rd = resourceDesc; + + rd.Alignment = 0; + rd.SampleDesc = s_msaa[0]; + rd.Flags &= ~(D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + + m_singleMsaa = createCommittedResource(device, HeapProperty::Texture, &rd, NULL); + + setResourceBarrier(commandList + , m_singleMsaa + , D3D12_RESOURCE_STATE_COMMON + , D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + ); + } } return m_directAccessPtr; @@ -5004,6 +5029,12 @@ namespace bgfx { namespace d3d12 s_renderD3D12->m_cmd.release(m_ptr); m_ptr = NULL; m_state = D3D12_RESOURCE_STATE_COMMON; + + if (NULL != m_singleMsaa) + { + s_renderD3D12->m_cmd.release(m_singleMsaa); + m_singleMsaa = NULL; + } } } } @@ -5101,9 +5132,36 @@ namespace bgfx { namespace d3d12 s_renderD3D12->m_cmd.release(staging); } - void TextureD3D12::resolve(uint8_t _resolve) const + void TextureD3D12::resolve(ID3D12GraphicsCommandList* _commandList, uint8_t _resolve) { BX_UNUSED(_resolve); + + bool needResolve = NULL != m_singleMsaa; + if (needResolve) + { + D3D12_RESOURCE_STATES state = setState(_commandList, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + + setResourceBarrier(_commandList + , m_singleMsaa + , D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + , D3D12_RESOURCE_STATE_RESOLVE_DEST + ); + + _commandList->ResolveSubresource(m_singleMsaa + , 0 + , m_ptr + , 0 + , s_textureFormat[m_textureFormat].m_fmt + ); + + setResourceBarrier(_commandList + , m_singleMsaa + , D3D12_RESOURCE_STATE_RESOLVE_DEST + , D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + ); + + setState(_commandList, state); + } } D3D12_RESOURCE_STATES TextureD3D12::setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state) @@ -5418,8 +5476,8 @@ namespace bgfx { namespace d3d12 if (isValid(at.handle) ) { - const TextureD3D12& texture = s_renderD3D12->m_textures[at.handle.idx]; - texture.resolve(at.resolve); + TextureD3D12& texture = s_renderD3D12->m_textures[at.handle.idx]; + texture.resolve(s_renderD3D12->m_commandList, at.resolve); } } } @@ -5750,6 +5808,13 @@ namespace bgfx { namespace d3d12 currentSrc = blit.m_src; + if (NULL != src.m_singleMsaa) + setResourceBarrier(m_commandList + , src.m_singleMsaa + , D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + , D3D12_RESOURCE_STATE_COPY_SOURCE + ); + state = src.setState(m_commandList, D3D12_RESOURCE_STATE_COPY_SOURCE); } @@ -5807,7 +5872,7 @@ namespace bgfx { namespace d3d12 dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dstLocation.SubresourceIndex = dstZ*dst.m_numMips+blit.m_dstMip; D3D12_TEXTURE_COPY_LOCATION srcLocation; - srcLocation.pResource = src.m_ptr; + srcLocation.pResource = NULL != src.m_singleMsaa ? src.m_singleMsaa : src.m_ptr; srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; srcLocation.SubresourceIndex = srcZ*src.m_numMips+blit.m_srcMip; @@ -5820,6 +5885,13 @@ namespace bgfx { namespace d3d12 , depthStencil ? NULL : &box ); } + + if (NULL != src.m_singleMsaa) + setResourceBarrier(m_commandList + , src.m_singleMsaa + , D3D12_RESOURCE_STATE_COPY_SOURCE + , D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + ); } if (isValid(currentSrc) diff --git a/src/renderer_d3d12.h b/src/renderer_d3d12.h index f9b2ff553..f01fa9dbe 100644 --- a/src/renderer_d3d12.h +++ b/src/renderer_d3d12.h @@ -321,6 +321,7 @@ namespace bgfx { namespace d3d12 TextureD3D12() : m_ptr(NULL) + , m_singleMsaa(NULL) , m_directAccessPtr(NULL) , m_state(D3D12_RESOURCE_STATE_COMMON) , m_numMips(0) @@ -333,12 +334,13 @@ namespace bgfx { namespace d3d12 void destroy(); void overrideInternal(uintptr_t _ptr); void update(ID3D12GraphicsCommandList* _commandList, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem); - void resolve(uint8_t _resolve) const; + void resolve(ID3D12GraphicsCommandList* _commandList, uint8_t _resolve); D3D12_RESOURCE_STATES setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state); D3D12_SHADER_RESOURCE_VIEW_DESC m_srvd; D3D12_UNORDERED_ACCESS_VIEW_DESC m_uavd; ID3D12Resource* m_ptr; + ID3D12Resource* m_singleMsaa; void* m_directAccessPtr; D3D12_RESOURCE_STATES m_state; uint64_t m_flags; diff --git a/src/renderer_vk.cpp b/src/renderer_vk.cpp index 931bd78b0..2b84b01f5 100644 --- a/src/renderer_vk.cpp +++ b/src/renderer_vk.cpp @@ -55,14 +55,14 @@ namespace bgfx { namespace vk 16, }; -// static DXGI_SAMPLE_DESC s_msaa[] = -// { -// { 1, 0 }, -// { 2, 0 }, -// { 4, 0 }, -// { 8, 0 }, -// { 16, 0 }, -// }; + static MsaaSamplerVK s_msaa[] = + { + { 1, VK_SAMPLE_COUNT_1_BIT }, + { 2, VK_SAMPLE_COUNT_2_BIT }, + { 4, VK_SAMPLE_COUNT_4_BIT }, + { 8, VK_SAMPLE_COUNT_8_BIT }, + { 16, VK_SAMPLE_COUNT_16_BIT }, + }; static const VkBlendFactor s_blendFactor[][2] = { @@ -970,6 +970,7 @@ VK_IMPORT_DEVICE , m_maxAnisotropy(1) , m_depthClamp(false) , m_wireframe(false) + , m_rtMsaa(false) { } @@ -1580,6 +1581,16 @@ VK_IMPORT_INSTANCE vkGetPhysicalDeviceFeatures(m_physicalDevice, &m_deviceFeatures); m_deviceFeatures.robustBufferAccess = VK_FALSE; + { + for (uint16_t ii = 0, last = 0; ii < BX_COUNTOF(s_msaa); ii++) + { + if ((m_deviceProperties.limits.framebufferColorSampleCounts >= s_msaa[ii].Count) && (m_deviceProperties.limits.framebufferDepthSampleCounts >= s_msaa[ii].Count)) + last = ii; + else + s_msaa[ii] = s_msaa[last]; + } + } + { struct ImageTest { @@ -2051,7 +2062,7 @@ VK_IMPORT_DEVICE m_sci.imageExtent.width = width; m_sci.imageExtent.height = height; m_sci.imageArrayLayers = 1; - m_sci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + m_sci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; m_sci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; m_sci.queueFamilyIndexCount = 0; m_sci.pQueueFamilyIndices = NULL; @@ -2624,8 +2635,198 @@ VK_IMPORT_DEVICE m_uniforms[_handle.idx] = NULL; } - void requestScreenShot(FrameBufferHandle /*_handle*/, const char* /*_filePath*/) override + void requestScreenShot(FrameBufferHandle _fbh, const char* _filePath) override { + bool supportsBlit = true; + + // Check blit support for source and destination + VkFormatProperties formatProps; + + // Check if the device supports blitting from optimal images (the swapchain images are in optimal format) + vkGetPhysicalDeviceFormatProperties(m_physicalDevice, m_sci.imageFormat, &formatProps); + if (!(formatProps.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT)) { + BX_TRACE("Device does not support blitting from optimal tiled images, using copy instead of blit!\n"); + supportsBlit = false; + } + + // Check if the device supports blitting to linear images + vkGetPhysicalDeviceFormatProperties(m_physicalDevice, VK_FORMAT_R8G8B8A8_UNORM, &formatProps); + if (!(formatProps.linearTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT)) { + BX_TRACE("Device does not support blitting to linear tiled images, using copy instead of blit!\n"); + supportsBlit = false; + } + + // Source for the copy is the last rendered swapchain image + VkImage srcImage = m_backBufferColorImage[m_backBufferColorIdx]; + uint32_t width = m_sci.imageExtent.width, height = m_sci.imageExtent.height; + + if (isValid(_fbh)) + { + TextureVK& texture = m_textures[m_frameBuffers[_fbh.idx].m_attachment[0].handle.idx]; + srcImage = VK_NULL_HANDLE != texture.m_singleMsaaImage ? texture.m_singleMsaaImage : texture.m_textureImage; + } + + // Create the linear tiled destination image to copy to and to read the memory from + VkImage dstImage = VK_NULL_HANDLE; + VkImageCreateInfo ici; + ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + ici.pNext = NULL; + ici.flags = 0; + // Note that vkCmdBlitImage (if supported) will also do format conversions if the swapchain color format would differ + ici.imageType = VK_IMAGE_TYPE_2D; + ici.format = VK_FORMAT_R8G8B8A8_UNORM; + ici.extent.width = width; + ici.extent.height = height; + ici.extent.depth = 1; + ici.arrayLayers = 1; + ici.mipLevels = 1; + ici.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + ici.samples = VK_SAMPLE_COUNT_1_BIT; + ici.tiling = VK_IMAGE_TILING_LINEAR; + ici.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; + ici.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ici.queueFamilyIndexCount = 0; + ici.pQueueFamilyIndices = NULL; + // Create the image + VK_CHECK(vkCreateImage(m_device, &ici, m_allocatorCb, &dstImage)); + + // Create memory to back up the image + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(m_device, dstImage, &memRequirements); + + VkMemoryAllocateInfo ma; + ma.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + ma.pNext = NULL; + ma.allocationSize = memRequirements.size; + + VkDeviceMemory dstImageMemory = VK_NULL_HANDLE; + // Memory must be host visible to copy from + VK_CHECK(allocateMemory(&memRequirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &dstImageMemory)); + VK_CHECK(vkBindImageMemory(m_device, dstImage, dstImageMemory, 0)); + + // Do the actual blit from the swapchain image to our host visible destination image + VkCommandBuffer copyCmd = beginNewCommand(); + + // Transition destination image to transfer destination layout + bgfx::vk::setImageMemoryBarrier(copyCmd + , dstImage + , VK_IMAGE_ASPECT_COLOR_BIT + , VK_IMAGE_LAYOUT_UNDEFINED + , VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL + , 1 + , 1 + ); + + // Transition swapchain image from present to transfer source layout + bgfx::vk::setImageMemoryBarrier(copyCmd + , srcImage + , VK_IMAGE_ASPECT_COLOR_BIT + , VK_IMAGE_LAYOUT_PRESENT_SRC_KHR + , VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL + , 1 + , 1 + ); + + // If source and destination support blit we'll blit as this also does automatic format conversion (e.g. from BGR to RGB) + if (supportsBlit) + { + // Define the region to blit (we will blit the whole swapchain image) + VkOffset3D blitSize; + blitSize.x = width; + blitSize.y = height; + blitSize.z = 1; + VkImageBlit imageBlitRegion{}; + imageBlitRegion.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageBlitRegion.srcSubresource.layerCount = 1; + imageBlitRegion.srcOffsets[1] = blitSize; + imageBlitRegion.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageBlitRegion.dstSubresource.layerCount = 1; + imageBlitRegion.dstOffsets[1] = blitSize; + + // Issue the blit command + vkCmdBlitImage( + copyCmd, + srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + dstImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &imageBlitRegion, + VK_FILTER_NEAREST); + } + else + { + // Otherwise use image copy (requires us to manually flip components) + VkImageCopy imageCopyRegion{}; + imageCopyRegion.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageCopyRegion.srcSubresource.layerCount = 1; + imageCopyRegion.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageCopyRegion.dstSubresource.layerCount = 1; + imageCopyRegion.extent.width = width; + imageCopyRegion.extent.height = height; + imageCopyRegion.extent.depth = 1; + + // Issue the copy command + vkCmdCopyImage( + copyCmd, + srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + dstImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &imageCopyRegion); + } + + // Transition destination image to general layout, which is the required layout for mapping the image memory later on + bgfx::vk::setImageMemoryBarrier(copyCmd + , dstImage + , VK_IMAGE_ASPECT_COLOR_BIT + , VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL + , VK_IMAGE_LAYOUT_GENERAL + , 1 + , 1 + ); + + // Transition back the swap chain image after the blit is done + bgfx::vk::setImageMemoryBarrier(copyCmd + , srcImage + , VK_IMAGE_ASPECT_COLOR_BIT + , VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL + , VK_IMAGE_LAYOUT_PRESENT_SRC_KHR + , 1 + , 1 + ); + + submitCommandAndWait(copyCmd); + + // Get layout of the image (including row pitch) + VkImageSubresource subResource{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 0 }; + VkSubresourceLayout subResourceLayout; + vkGetImageSubresourceLayout(m_device, dstImage, &subResource, &subResourceLayout); + + // Map image memory so we can start copying from it + char* data; + vkMapMemory(m_device, dstImageMemory, 0, VK_WHOLE_SIZE, 0, (void**)&data); + data += subResourceLayout.offset; + + bimg::imageSwizzleBgra8( + data + , subResourceLayout.rowPitch + , width + , height + , data + , subResourceLayout.rowPitch + ); + + g_callback->screenShot(_filePath + , width + , height + , subResourceLayout.rowPitch + , data + , height * subResourceLayout.rowPitch + , false + ); + + // Clean up resources + vkUnmapMemory(m_device, dstImageMemory); + vkFreeMemory(m_device, dstImageMemory, m_allocatorCb); + vkDestroyImage(m_device, dstImage, m_allocatorCb); } void updateViewName(ViewId _id, const char* _name) override @@ -3089,9 +3290,11 @@ VK_IMPORT_DEVICE if (isValid(m_fbh) && m_fbh.idx != _fbh.idx) { - const FrameBufferVK& frameBuffer = m_frameBuffers[m_fbh.idx]; + FrameBufferVK& frameBuffer = m_frameBuffers[m_fbh.idx]; BX_UNUSED(frameBuffer); + if (m_rtMsaa) frameBuffer.resolve(); + for (uint8_t ii = 0, num = frameBuffer.m_num; ii < num; ++ii) { TextureVK& texture = m_textures[frameBuffer.m_texture[ii].idx]; @@ -3169,7 +3372,7 @@ VK_IMPORT_DEVICE } m_fbh = _fbh; -// m_rtMsaa = _msaa; + m_rtMsaa = _msaa; } void setBlendState(VkPipelineColorBlendStateCreateInfo& _desc, uint64_t _state, uint32_t _rgba = 0) @@ -3410,7 +3613,7 @@ VK_IMPORT_DEVICE TextureVK& texture = m_textures[_attachments[ii].handle.idx]; ad[ii].flags = 0; ad[ii].format = texture.m_format; - ad[ii].samples = VK_SAMPLE_COUNT_1_BIT; + ad[ii].samples = texture.m_sampler.Sample; if (texture.m_aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { @@ -3701,11 +3904,13 @@ VK_IMPORT_DEVICE viewportState.scissorCount = 1; viewportState.pScissors = NULL; + VkSampleCountFlagBits rasterizerMsaa = (isValid(m_fbh) && !!(BGFX_STATE_MSAA & _state) ? m_textures[m_frameBuffers[m_fbh.idx].m_attachment[0].handle.idx].m_sampler.Sample : VK_SAMPLE_COUNT_1_BIT); + VkPipelineMultisampleStateCreateInfo multisampleState; multisampleState.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; multisampleState.pNext = NULL; multisampleState.flags = 0; - multisampleState.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisampleState.rasterizationSamples = rasterizerMsaa; multisampleState.sampleShadingEnable = VK_FALSE; multisampleState.minSampleShading = !!(BGFX_STATE_CONSERVATIVE_RASTER & _state) ? 1.0f : 0.0f; multisampleState.pSampleMask = NULL; @@ -3912,10 +4117,17 @@ VK_IMPORT_DEVICE } imageInfo[imageCount].imageLayout = texture.m_currentImageLayout; + imageInfo[imageCount].imageView = VK_NULL_HANDLE != texture.m_textureImageDepthView ? texture.m_textureImageDepthView : texture.m_textureImageView ; + + if (VK_NULL_HANDLE != texture.m_singleMsaaImageView) + { + imageInfo[imageCount].imageView = texture.m_singleMsaaImageView; + } + imageInfo[imageCount].sampler = sampler; wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -4353,6 +4565,7 @@ VK_IMPORT_DEVICE uint32_t m_maxAnisotropy; bool m_depthClamp; bool m_wireframe; + bool m_rtMsaa; TextVideoMem m_textVideoMem; @@ -5208,6 +5421,8 @@ VK_DESTROY : VK_IMAGE_ASPECT_COLOR_BIT ; + m_sampler = s_msaa[bx::uint32_satsub((m_flags & BGFX_TEXTURE_RT_MSAA_MASK) >> BGFX_TEXTURE_RT_MSAA_SHIFT, 1)]; + if (m_format == VK_FORMAT_S8_UINT || m_format == VK_FORMAT_D16_UNORM_S8_UINT || m_format == VK_FORMAT_D24_UNORM_S8_UINT @@ -5244,6 +5459,12 @@ VK_DESTROY const bool renderTarget = 0 != (m_flags & BGFX_TEXTURE_RT_MASK); const bool blit = 0 != (m_flags & BGFX_TEXTURE_BLIT_DST); + const bool needResolve = true + && 1 < m_sampler.Count + && 0 == (m_flags & BGFX_TEXTURE_MSAA_SAMPLE) + && !writeOnly + ; + BX_UNUSED(swizzle, writeOnly, computeWrite, renderTarget, blit); BX_TRACE( @@ -5468,7 +5689,7 @@ VK_DESTROY | (_flags & BGFX_TEXTURE_COMPUTE_WRITE ? VK_IMAGE_USAGE_STORAGE_BIT : 0) ; ici.format = m_format; - ici.samples = VK_SAMPLE_COUNT_1_BIT; + ici.samples = m_sampler.Sample; ici.mipLevels = m_numMips; ici.arrayLayers = m_numSides; ici.extent.width = m_width; @@ -5528,9 +5749,9 @@ VK_DESTROY viewInfo.components = m_components; viewInfo.subresourceRange.aspectMask = m_aspectMask; viewInfo.subresourceRange.baseMipLevel = 0; - viewInfo.subresourceRange.levelCount = m_numMips; //m_numMips; + viewInfo.subresourceRange.levelCount = m_numMips; viewInfo.subresourceRange.baseArrayLayer = 0; - viewInfo.subresourceRange.layerCount = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers); + viewInfo.subresourceRange.layerCount = m_numSides; VK_CHECK(vkCreateImageView( device , &viewInfo @@ -5552,9 +5773,9 @@ VK_DESTROY viewInfo.components = m_components; viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; viewInfo.subresourceRange.baseMipLevel = 0; - viewInfo.subresourceRange.levelCount = m_numMips; //m_numMips; + viewInfo.subresourceRange.levelCount = m_numMips; viewInfo.subresourceRange.baseArrayLayer = 0; - viewInfo.subresourceRange.layerCount = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers); + viewInfo.subresourceRange.layerCount = m_numSides; VK_CHECK(vkCreateImageView( device , &viewInfo @@ -5579,9 +5800,9 @@ VK_DESTROY viewInfo.components = m_components; viewInfo.subresourceRange.aspectMask = m_aspectMask; viewInfo.subresourceRange.baseMipLevel = 0; - viewInfo.subresourceRange.levelCount = m_numMips; //m_numMips; + viewInfo.subresourceRange.levelCount = m_numMips; viewInfo.subresourceRange.baseArrayLayer = 0; - viewInfo.subresourceRange.layerCount = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers); + viewInfo.subresourceRange.layerCount = m_numSides; VK_CHECK(vkCreateImageView( device , &viewInfo @@ -5589,6 +5810,61 @@ VK_DESTROY , &m_textureImageStorageView ) ); } + + if (needResolve) + { + { + VkImageCreateInfo ici_resolve = ici; + ici_resolve.samples = s_msaa[0].Sample; + ici_resolve.usage &= ~(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + + VK_CHECK(vkCreateImage(device, &ici_resolve, allocatorCb, &m_singleMsaaImage)); + + VkMemoryRequirements imageMemReq_resolve; + vkGetImageMemoryRequirements(device, m_singleMsaaImage, &imageMemReq_resolve); + + VK_CHECK(s_renderVK->allocateMemory(&imageMemReq_resolve, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_singleMsaaDeviceMem)); + + vkBindImageMemory(device, m_singleMsaaImage, m_singleMsaaDeviceMem, 0); + } + + { + VkCommandBuffer commandBuffer = s_renderVK->beginNewCommand(); + + bgfx::vk::setImageMemoryBarrier(commandBuffer + , m_singleMsaaImage + , m_aspectMask + , VK_IMAGE_LAYOUT_UNDEFINED + , VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL + , m_numMips + , m_numSides + ); + + s_renderVK->submitCommandAndWait(commandBuffer); + } + + { + VkImageViewCreateInfo viewInfo; + viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + viewInfo.pNext = NULL; + viewInfo.flags = 0; + viewInfo.image = m_singleMsaaImage; + viewInfo.viewType = m_type; + viewInfo.format = m_format; + viewInfo.components = m_components; + viewInfo.subresourceRange.aspectMask = m_aspectMask; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = m_numMips; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = m_numSides; + VK_CHECK(vkCreateImageView( + device + , &viewInfo + , allocatorCb + , &m_singleMsaaImageView + )); + } + } } return m_directAccessPtr; @@ -5596,20 +5872,28 @@ VK_DESTROY void TextureVK::destroy() { + VkAllocationCallbacks* allocatorCb = s_renderVK->m_allocatorCb; + VkDevice device = s_renderVK->m_device; + if (m_textureImage) { - VkAllocationCallbacks* allocatorCb = s_renderVK->m_allocatorCb; - VkDevice device = s_renderVK->m_device; - vkFreeMemory(device, m_textureDeviceMem, allocatorCb); vkDestroy(m_textureImageStorageView); vkDestroy(m_textureImageDepthView); vkDestroy(m_textureImageView); vkDestroy(m_textureImage); - - m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; } + + if (m_singleMsaaImage) + { + vkFreeMemory(device, m_singleMsaaDeviceMem, allocatorCb); + + vkDestroy(m_singleMsaaImageView); + vkDestroy(m_singleMsaaImage); + } + + m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; } void TextureVK::update(VkCommandPool _commandPool, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem) @@ -5699,6 +5983,108 @@ VK_DESTROY } } + void TextureVK::resolve(uint8_t _resolve) + { + BX_UNUSED(_resolve); + + bool needResolve = VK_NULL_HANDLE != m_singleMsaaImage; + if (needResolve) + { + VkCommandBuffer commandBuffer = s_renderVK->beginNewCommand(); + + VkImageResolve blitInfo; + blitInfo.srcOffset.x = 0; + blitInfo.srcOffset.y = 0; + blitInfo.srcOffset.z = 0; + blitInfo.dstOffset.x = 0; + blitInfo.dstOffset.y = 0; + blitInfo.dstOffset.z = 0; + blitInfo.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + blitInfo.srcSubresource.mipLevel = 0; + blitInfo.srcSubresource.baseArrayLayer = 0; + blitInfo.srcSubresource.layerCount = 1; + blitInfo.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + blitInfo.dstSubresource.mipLevel = 0; + blitInfo.dstSubresource.baseArrayLayer = 0; + blitInfo.dstSubresource.layerCount = 1; + blitInfo.extent.width = m_width; + blitInfo.extent.height = m_height; + blitInfo.extent.depth = 1; + + vkCmdResolveImage(commandBuffer, + m_textureImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + m_singleMsaaImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, &blitInfo); + + s_renderVK->submitCommandAndWait(commandBuffer); + } + + const bool renderTarget = 0 != (m_flags & BGFX_TEXTURE_RT_MASK); + if (renderTarget + && 1 < m_numMips + && 0 != (_resolve & BGFX_RESOLVE_AUTO_GEN_MIPS)) + { + VkCommandBuffer commandBuffer = s_renderVK->beginNewCommand(); + + int32_t mipWidth = m_width; + int32_t mipHeight = m_height; + + for (uint32_t i = 1; i < m_numMips; i++) { + bgfx::vk::setImageMemoryBarrier(commandBuffer + , needResolve ? m_singleMsaaImage : m_textureImage + , m_aspectMask + , VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL + , VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL + , i - 1 + , 1 + ); + + VkImageBlit blit{}; + blit.srcOffsets[0] = { 0, 0, 0 }; + blit.srcOffsets[1] = { mipWidth, mipHeight, 1 }; + blit.srcSubresource.aspectMask = m_aspectMask; + blit.srcSubresource.mipLevel = i - 1; + blit.srcSubresource.baseArrayLayer = 0; + blit.srcSubresource.layerCount = 1; + blit.dstOffsets[0] = { 0, 0, 0 }; + blit.dstOffsets[1] = { mipWidth > 1 ? mipWidth / 2 : 1, mipHeight > 1 ? mipHeight / 2 : 1, 1 }; + blit.dstSubresource.aspectMask = m_aspectMask; + blit.dstSubresource.mipLevel = i; + blit.dstSubresource.baseArrayLayer = 0; + blit.dstSubresource.layerCount = 1; + + vkCmdBlitImage(commandBuffer, + needResolve ? m_singleMsaaImage : m_textureImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + needResolve ? m_singleMsaaImage : m_textureImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, &blit, + VK_FILTER_LINEAR); + + bgfx::vk::setImageMemoryBarrier(commandBuffer + , needResolve ? m_singleMsaaImage : m_textureImage + , m_aspectMask + , VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL + , VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL + , i - 1 + , 1 + ); + + if (mipWidth > 1) mipWidth /= 2; + if (mipHeight > 1) mipHeight /= 2; + } + + bgfx::vk::setImageMemoryBarrier(commandBuffer + , needResolve ? m_singleMsaaImage : m_textureImage + , m_aspectMask + , VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL + , VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL + , m_numMips - 1 + , 1 + ); + + s_renderVK->submitCommandAndWait(commandBuffer); + } + } + void TextureVK::copyBufferToTexture(VkBuffer stagingBuffer, uint32_t bufferImageCopyCount, VkBufferImageCopy* bufferImageCopy) { VkCommandBuffer commandBuffer = s_renderVK->beginNewCommand(); @@ -5779,6 +6165,23 @@ VK_DESTROY m_renderPass = renderPass; } + void FrameBufferVK::resolve() + { + if (0 < m_numAttachment) + { + for (uint32_t ii = 0; ii < m_numAttachment; ++ii) + { + const Attachment& at = m_attachment[ii]; + + if (isValid(at.handle)) + { + TextureVK& texture = s_renderVK->m_textures[at.handle.idx]; + texture.resolve(at.resolve); + } + } + } + } + void FrameBufferVK::destroy() { vkDestroy(m_framebuffer); @@ -5853,7 +6256,7 @@ VK_DESTROY VkFilter filter = bimg::isDepth(bimg::TextureFormat::Enum(src.m_textureFormat) ) ? VK_FILTER_NEAREST : VK_FILTER_LINEAR; vkCmdBlitImage( commandBuffer - , src.m_textureImage + , VK_NULL_HANDLE != src.m_singleMsaaImage ? src.m_singleMsaaImage : src.m_textureImage , src.m_currentImageLayout , dst.m_textureImage , dst.m_currentImageLayout diff --git a/src/renderer_vk.h b/src/renderer_vk.h index 9f7942016..ef3bcf5de 100644 --- a/src/renderer_vk.h +++ b/src/renderer_vk.h @@ -108,6 +108,7 @@ VK_IMPORT_DEVICE_FUNC(false, vkFreeCommandBuffers); \ VK_IMPORT_DEVICE_FUNC(false, vkGetBufferMemoryRequirements); \ VK_IMPORT_DEVICE_FUNC(false, vkGetImageMemoryRequirements); \ + VK_IMPORT_DEVICE_FUNC(false, vkGetImageSubresourceLayout); \ VK_IMPORT_DEVICE_FUNC(false, vkAllocateMemory); \ VK_IMPORT_DEVICE_FUNC(false, vkFreeMemory); \ VK_IMPORT_DEVICE_FUNC(false, vkCreateImage); \ @@ -171,6 +172,7 @@ VK_IMPORT_DEVICE_FUNC(false, vkCmdResolveImage); \ VK_IMPORT_DEVICE_FUNC(false, vkCmdCopyBuffer); \ VK_IMPORT_DEVICE_FUNC(false, vkCmdCopyBufferToImage); \ + VK_IMPORT_DEVICE_FUNC(false, vkCmdCopyImage); \ VK_IMPORT_DEVICE_FUNC(false, vkCmdBlitImage); \ VK_IMPORT_DEVICE_FUNC(false, vkMapMemory); \ VK_IMPORT_DEVICE_FUNC(false, vkUnmapMemory); \ @@ -423,6 +425,12 @@ VK_DESTROY typedef BufferVK IndexBufferVK; + struct MsaaSamplerVK + { + uint16_t Count; + VkSampleCountFlagBits Sample; + }; + struct VertexBufferVK : public BufferVK { void create(uint32_t _size, void* _data, VertexLayoutHandle _layoutHandle, uint16_t _flags); @@ -523,12 +531,17 @@ VK_DESTROY , m_textureImageDepthView(VK_NULL_HANDLE) , m_textureImageStorageView(VK_NULL_HANDLE) , m_currentImageLayout(VK_IMAGE_LAYOUT_UNDEFINED) + , m_sampler({ 1, VK_SAMPLE_COUNT_1_BIT }) + , m_singleMsaaImage(VK_NULL_HANDLE) + , m_singleMsaaDeviceMem(VK_NULL_HANDLE) + , m_singleMsaaImageView(VK_NULL_HANDLE) { } void* create(const Memory* _mem, uint64_t _flags, uint8_t _skip); void destroy(); void update(VkCommandPool commandPool, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem); + void resolve(uint8_t _resolve); void copyBufferToTexture(VkBuffer stagingBuffer, uint32_t bufferImageCopyCount, VkBufferImageCopy* bufferImageCopy); void setImageMemoryBarrier(VkCommandBuffer commandBuffer, VkImageLayout newImageLayout); @@ -544,6 +557,8 @@ VK_DESTROY uint8_t m_textureFormat; uint8_t m_numMips; + MsaaSamplerVK m_sampler; + VkImageViewType m_type; VkFormat m_format; VkComponentMapping m_components; @@ -555,6 +570,10 @@ VK_DESTROY VkImageView m_textureImageDepthView; VkImageView m_textureImageStorageView; VkImageLayout m_currentImageLayout; + + VkImage m_singleMsaaImage; + VkDeviceMemory m_singleMsaaDeviceMem; + VkImageView m_singleMsaaImageView; }; struct FrameBufferVK @@ -570,6 +589,7 @@ VK_DESTROY { } void create(uint8_t _num, const Attachment* _attachment); + void resolve(); void destroy(); TextureHandle m_texture[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];