From 4342db8f3bf1a70363f62a3e35385ce87b41fcc0 Mon Sep 17 00:00:00 2001 From: Rinthel Date: Wed, 14 Aug 2019 10:06:37 +0900 Subject: [PATCH] VK: descriptor set optimization / refactoring - add support to indirect compute call - use dynamic uniform buffer instead of uniform - create depth view for depth/stencil texture - fix dynamic uniform buffer bug, apply it to compute shader - refactor descriptor set allocation / setting - fix image layout reinit - get and set device feature / fix independent blending - cleanup --- src/renderer_vk.cpp | 753 +++++++++++++++++++------------------------- src/renderer_vk.h | 8 + 2 files changed, 337 insertions(+), 424 deletions(-) diff --git a/src/renderer_vk.cpp b/src/renderer_vk.cpp index 45e08a570..6e436ae53 100644 --- a/src/renderer_vk.cpp +++ b/src/renderer_vk.cpp @@ -1149,6 +1149,7 @@ VK_IMPORT_INSTANCE g_caps.supported |= ( 0 | BGFX_CAPS_TEXTURE_BLIT + | BGFX_CAPS_DRAW_INDIRECT | BGFX_CAPS_INSTANCING ); g_caps.limits.maxTextureSize = m_deviceProperties.limits.maxImageDimension2D; @@ -1156,6 +1157,8 @@ VK_IMPORT_INSTANCE g_caps.limits.maxComputeBindings = BGFX_MAX_COMPUTE_BINDINGS; g_caps.limits.maxVertexStreams = BGFX_CONFIG_MAX_VERTEX_STREAMS; + vkGetPhysicalDeviceFeatures(m_physicalDevice, &m_deviceFeatures); + { struct ImageTest { @@ -1345,7 +1348,7 @@ VK_IMPORT_INSTANCE dci.ppEnabledLayerNames = enabledLayerNames; dci.enabledExtensionCount = numEnabledExtensions; dci.ppEnabledExtensionNames = enabledExtension; - dci.pEnabledFeatures = NULL; + dci.pEnabledFeatures = &m_deviceFeatures; result = vkCreateDevice( m_physicalDevice @@ -2007,9 +2010,9 @@ VK_IMPORT_DEVICE // { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 }, { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 }, { VK_DESCRIPTOR_TYPE_SAMPLER, (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 }, - { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 10<<10 }, + { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 10<<10 }, { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 }, - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 }, + { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 }, }; // VkDescriptorSetLayoutBinding dslb[] = @@ -2611,11 +2614,13 @@ VK_IMPORT_DEVICE uint32_t samplerFlags = (uint32_t)(texture.m_flags & BGFX_SAMPLER_BITS_MASK); VkSampler sampler = getSampler(samplerFlags, 1); + uint32_t bufferOffset = scratchBuffer.m_pos; VkDescriptorBufferInfo bufferInfo; bufferInfo.buffer = scratchBuffer.m_buffer; - bufferInfo.offset = scratchBuffer.m_pos; - bufferInfo.range = bx::strideAlign(program.m_vsh->m_size, align); + bufferInfo.offset = 0; + bufferInfo.range = bx::strideAlign(program.m_vsh->m_size, align); bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size); + scratchBuffer.m_pos += bufferInfo.range; VkWriteDescriptorSet wds[3]; wds[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -2624,7 +2629,7 @@ VK_IMPORT_DEVICE wds[0].dstBinding = program.m_vsh->m_uniformBinding; wds[0].dstArrayElement = 0; wds[0].descriptorCount = 1; - wds[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + wds[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; wds[0].pImageInfo = NULL; wds[0].pBufferInfo = &bufferInfo; wds[0].pTexelBufferView = NULL; @@ -2667,8 +2672,8 @@ VK_IMPORT_DEVICE , 0 , 1 , &scratchBuffer.m_descriptorSet[scratchBuffer.m_currentDs] - , 0 - , NULL + , 1 + , &bufferOffset ); scratchBuffer.m_currentDs++; @@ -2983,7 +2988,7 @@ VK_IMPORT_DEVICE numAttachments = frameBuffer.m_num; } - if (!!(BGFX_STATE_BLEND_INDEPENDENT & _state) ) + if (!!(BGFX_STATE_BLEND_INDEPENDENT & _state) && m_deviceFeatures.independentBlend ) { for (uint32_t ii = 1, rgba = _rgba; ii < numAttachments; ++ii, rgba >>= 11) { @@ -3265,7 +3270,7 @@ VK_IMPORT_DEVICE sci.addressModeW = s_textureAddress[(_samplerFlags&BGFX_SAMPLER_W_MASK)>>BGFX_SAMPLER_W_SHIFT]; sci.mipLodBias = 0.0f; sci.anisotropyEnable = VK_FALSE; - sci.maxAnisotropy = 0; + sci.maxAnisotropy = 4.0f; sci.compareEnable = 0 != cmpFunc; sci.compareOp = s_cmpFunc[cmpFunc]; sci.minLod = 0.0f; @@ -3556,6 +3561,202 @@ VK_IMPORT_DEVICE return pipeline; } + void allocDescriptorSet(ProgramVK& program, const RenderBind& renderBind, ScratchBufferVK& scratchBuffer) + { + VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash); + VkDescriptorSetAllocateInfo dsai; + dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + dsai.pNext = NULL; + dsai.descriptorPool = m_descriptorPool; + dsai.descriptorSetCount = 1; + dsai.pSetLayouts = &dsl; + + VkDescriptorSet& descriptorSet = scratchBuffer.m_descriptorSet[scratchBuffer.m_currentDs]; + vkAllocateDescriptorSets(m_device, &dsai, &descriptorSet); + scratchBuffer.m_currentDs++; + + VkDescriptorImageInfo imageInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; + VkDescriptorBufferInfo bufferInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; + VkWriteDescriptorSet wds[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; + bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS); + uint32_t wdsCount = 0; + uint32_t bufferCount = 0; + uint32_t imageCount = 0; + + for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage) + { + const Binding& bind = renderBind.m_bind[stage]; + if (kInvalidHandle != bind.m_idx) + { + const Binding& bind = renderBind.m_bind[stage]; + const ShaderVK::BindInfo* bindInfo = NULL; + if (isValid(program.m_vsh->m_bindInfo[stage].uniformHandle)) + { + bindInfo = &(program.m_vsh->m_bindInfo[stage]); + } + else if (NULL != program.m_fsh && isValid(program.m_fsh->m_bindInfo[stage].uniformHandle)) + { + bindInfo = &(program.m_fsh->m_bindInfo[stage]); + } + + if (NULL == bindInfo) + continue; + + if (ShaderVK::BindType::Storage == bindInfo->type) + { + VkDescriptorType descriptorType = (VkDescriptorType)bindInfo->samplerBinding; + wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + wds[wdsCount].pNext = NULL; + wds[wdsCount].dstSet = descriptorSet; + wds[wdsCount].dstBinding = bindInfo->binding; + wds[wdsCount].dstArrayElement = 0; + wds[wdsCount].descriptorCount = 1; + wds[wdsCount].descriptorType = descriptorType; + wds[wdsCount].pImageInfo = NULL; + wds[wdsCount].pBufferInfo = NULL; + wds[wdsCount].pTexelBufferView = NULL; + + if (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == descriptorType) + { + VertexBufferVK& vb = m_vertexBuffers[bind.m_idx]; + bufferInfo[bufferCount].buffer = vb.m_buffer; + bufferInfo[bufferCount].offset = 0; + bufferInfo[bufferCount].range = vb.m_size; + wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount]; + ++bufferCount; + } + else if (VK_DESCRIPTOR_TYPE_STORAGE_IMAGE == descriptorType) + { + TextureVK& texture = m_textures[bind.m_idx]; + VkSampler sampler = getSampler( + (0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags) + ? bind.m_samplerFlags + : (uint32_t)texture.m_flags + ) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK) + , (uint32_t)texture.m_numMips); + + if (VK_IMAGE_LAYOUT_GENERAL != texture.m_currentImageLayout + && VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL != texture.m_currentImageLayout) + { + texture.setImageMemoryBarrier(m_commandBuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + imageInfo[imageCount].imageLayout = texture.m_currentImageLayout; + imageInfo[imageCount].imageView = VK_NULL_HANDLE != texture.m_textureImageStorageView + ? texture.m_textureImageStorageView + : texture.m_textureImageView + ; + imageInfo[imageCount].sampler = sampler; + wds[wdsCount].pImageInfo = &imageInfo[imageCount]; + ++imageCount; + } + + ++wdsCount; + } + else if (ShaderVK::BindType::Sampler == bindInfo->type) + { + TextureVK& texture = m_textures[bind.m_idx]; + VkSampler sampler = getSampler( + (0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags) + ? bind.m_samplerFlags + : (uint32_t)texture.m_flags + ) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK) + , (uint32_t)texture.m_numMips); + + if (VK_IMAGE_LAYOUT_GENERAL != texture.m_currentImageLayout + && VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL != texture.m_currentImageLayout) + { + texture.setImageMemoryBarrier(m_commandBuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + imageInfo[imageCount].imageLayout = texture.m_currentImageLayout; + imageInfo[imageCount].imageView = VK_NULL_HANDLE != texture.m_textureImageDepthView + ? texture.m_textureImageDepthView + : texture.m_textureImageView + ; + imageInfo[imageCount].sampler = sampler; + + wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + wds[wdsCount].pNext = NULL; + wds[wdsCount].dstSet = descriptorSet; + wds[wdsCount].dstBinding = bindInfo->binding; + wds[wdsCount].dstArrayElement = 0; + wds[wdsCount].descriptorCount = 1; + wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + wds[wdsCount].pImageInfo = &imageInfo[imageCount]; + wds[wdsCount].pBufferInfo = NULL; + wds[wdsCount].pTexelBufferView = NULL; + ++wdsCount; + + wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + wds[wdsCount].pNext = NULL; + wds[wdsCount].dstSet = descriptorSet; + wds[wdsCount].dstBinding = bindInfo->samplerBinding; + wds[wdsCount].dstArrayElement = 0; + wds[wdsCount].descriptorCount = 1; + wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + wds[wdsCount].pImageInfo = &imageInfo[imageCount]; + wds[wdsCount].pBufferInfo = NULL; + wds[wdsCount].pTexelBufferView = NULL; + ++wdsCount; + + ++imageCount; + } + } + } + + const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment); + const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align); + const uint32_t fsize = bx::strideAlign((NULL != program.m_fsh ? program.m_fsh->m_size : 0), align); + const uint32_t total = vsize + fsize; + + if (0 < total) + { + uint32_t vsUniformBinding = program.m_vsh->m_uniformBinding; + uint32_t fsUniformBinding = program.m_fsh ? program.m_fsh->m_uniformBinding : 0; + + if (vsize > 0) + { + bufferInfo[bufferCount].buffer = scratchBuffer.m_buffer; + bufferInfo[bufferCount].offset = 0; + bufferInfo[bufferCount].range = vsize; + + wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + wds[wdsCount].pNext = NULL; + wds[wdsCount].dstSet = descriptorSet; + wds[wdsCount].dstBinding = vsUniformBinding; + wds[wdsCount].dstArrayElement = 0; + wds[wdsCount].descriptorCount = 1; + wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + wds[wdsCount].pImageInfo = NULL; + wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount]; + wds[wdsCount].pTexelBufferView = NULL; + ++wdsCount; + ++bufferCount; + } + + if (fsize > 0) + { + bufferInfo[bufferCount].buffer = scratchBuffer.m_buffer; + bufferInfo[bufferCount].offset = 0; + bufferInfo[bufferCount].range = fsize; + + wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + wds[wdsCount].pNext = NULL; + wds[wdsCount].dstSet = descriptorSet; + wds[wdsCount].dstBinding = fsUniformBinding; + wds[wdsCount].dstArrayElement = 0; + wds[wdsCount].descriptorCount = 1; + wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + wds[wdsCount].pImageInfo = NULL; + wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount]; + wds[wdsCount].pTexelBufferView = NULL; + ++wdsCount; + ++bufferCount; + } + } + + vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL); + } + void commit(UniformBuffer& _uniformBuffer) { _uniformBuffer.reset(); @@ -3819,6 +4020,7 @@ VK_IMPORT_DEVICE VkPhysicalDeviceProperties m_deviceProperties; VkPhysicalDeviceMemoryProperties m_memoryProperties; + VkPhysicalDeviceFeatures m_deviceFeatures; VkSwapchainCreateInfoKHR m_sci; VkSurfaceKHR m_surface; @@ -3958,7 +4160,7 @@ VK_DESTROY ma.pNext = NULL; ma.allocationSize = mr.size; ma.memoryTypeIndex = s_renderVK->selectMemoryType(mr.memoryTypeBits - , VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + , VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ); VK_CHECK(vkAllocateMemory(device , &ma @@ -4120,16 +4322,18 @@ VK_DESTROY m_flags = _flags; m_dynamic = NULL == _data; - bool compute = m_flags & BGFX_BUFFER_COMPUTE_READ_WRITE; + bool storage = m_flags & BGFX_BUFFER_COMPUTE_READ_WRITE; + bool indirect = m_flags & BGFX_BUFFER_DRAW_INDIRECT; VkBufferCreateInfo bci; bci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bci.pNext = NULL; bci.flags = 0; bci.size = _size; bci.usage = 0 -// | (m_dynamic ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : 0) - | (_vertex ? VK_BUFFER_USAGE_VERTEX_BUFFER_BIT : VK_BUFFER_USAGE_INDEX_BUFFER_BIT) - | (compute ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : 0) +// | (m_dynamic ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : 0) + | (_vertex ? VK_BUFFER_USAGE_VERTEX_BUFFER_BIT : VK_BUFFER_USAGE_INDEX_BUFFER_BIT) + | (storage || indirect ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : 0) + | (indirect ? VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT : 0) | VK_BUFFER_USAGE_TRANSFER_DST_BIT ; bci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; @@ -4531,7 +4735,7 @@ VK_DESTROY { m_uniformBinding = fragment ? 48 : 0; m_bindings[bidx].stageFlags = VK_SHADER_STAGE_ALL; - m_bindings[bidx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + m_bindings[bidx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; m_bindings[bidx].binding = m_uniformBinding; m_bindings[bidx].pImmutableSamplers = NULL; m_bindings[bidx].descriptorCount = 1; @@ -5102,6 +5306,33 @@ VK_DESTROY )); } + if ((m_vkTextureAspect & VK_IMAGE_ASPECT_DEPTH_BIT) + && (m_vkTextureAspect & VK_IMAGE_ASPECT_STENCIL_BIT)) + { + VkImageViewCreateInfo viewInfo; + viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + viewInfo.pNext = NULL; + viewInfo.flags = 0; + viewInfo.image = m_textureImage; + viewInfo.viewType = m_type; + viewInfo.format = m_vkTextureFormat; + viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + viewInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = m_numMips; //m_numMips; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers); + VK_CHECK(vkCreateImageView( + device + , &viewInfo + , allocatorCb + , &m_textureImageDepthView + )); + } + // image view creation for storage if needed if (m_flags & BGFX_TEXTURE_COMPUTE_WRITE) { @@ -5143,8 +5374,11 @@ VK_DESTROY vkFreeMemory(device, m_textureDeviceMem, allocatorCb); vkDestroy(m_textureImageStorageView); + vkDestroy(m_textureImageDepthView); vkDestroy(m_textureImageView); vkDestroy(m_textureImage); + + m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; } } @@ -5433,6 +5667,7 @@ VK_DESTROY uint16_t currentSamplerStateIdx = kInvalidHandle; ProgramHandle currentProgram = BGFX_INVALID_HANDLE; uint32_t currentBindHash = 0; + uint32_t currentDslHash = 0; bool hasPredefined = false; bool commandListChanged = false; VkPipeline currentPipeline = VK_NULL_HANDLE; @@ -5653,96 +5888,9 @@ VK_DESTROY currentPipeline = pipeline; vkCmdBindPipeline(m_commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); currentBindHash = 0; + currentDslHash = 0; } -// uint32_t bindHash = bx::hash(renderBind.m_bind, sizeof(renderBind.m_bind) ); -// if (currentBindHash != bindHash) -// { -// currentBindHash = bindHash; -// -// Bind* bindCached = bindLru.find(bindHash); -// if (NULL == bindCached) -// { -// D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_MAX_COMPUTE_BINDINGS] = {}; -// uint32_t samplerFlags[BGFX_MAX_COMPUTE_BINDINGS] = {}; -// -// for (uint32_t ii = 0; ii < maxComputeBindings; ++ii) -// { -// const Binding& bind = renderBind.m_bind[ii]; -// if (kInvalidHandle != bind.m_idx) -// { -// switch (bind.m_type) -// { -// case Binding::Image: -// { -// TextureD3D12& texture = m_textures[bind.m_idx]; -// -// if (Access::Read != bind.m_access) -// { -// texture.setState(m_commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); -// scratchBuffer.allocUav(srvHandle[ii], texture, bind.m_mip); -// } -// else -// { -// texture.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ); -// scratchBuffer.allocSrv(srvHandle[ii], texture, bind.m_mip); -// samplerFlags[ii] = texture.m_flags; -// } -// } -// break; -// -// case Binding::IndexBuffer: -// case Binding::VertexBuffer: -// { -// BufferD3D12& buffer = Binding::IndexBuffer == bind.m_type -// ? m_indexBuffers[bind.m_idx] -// : m_vertexBuffers[bind.m_idx] -// ; -// -// if (Access::Read != bind.m_access) -// { -// buffer.setState(m_commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); -// scratchBuffer.allocUav(srvHandle[ii], buffer); -// } -// else -// { -// buffer.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ); -// scratchBuffer.allocSrv(srvHandle[ii], buffer); -// } -// } -// break; -// } -// } -// } -// -// uint16_t samplerStateIdx = getSamplerState(samplerFlags, maxComputeBindings, _render->m_colorPalette); -// if (samplerStateIdx != currentSamplerStateIdx) -// { -// currentSamplerStateIdx = samplerStateIdx; -// m_commandList->SetComputeRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) ); -// } -// -// m_commandList->SetComputeRootDescriptorTable(Rdt::SRV, srvHandle[0]); -// m_commandList->SetComputeRootDescriptorTable(Rdt::UAV, srvHandle[0]); -// -// Bind bind; -// bind.m_srvHandle = srvHandle[0]; -// bind.m_samplerStateIdx = samplerStateIdx; -// bindLru.add(bindHash, bind, 0); -// } -// else -// { -// uint16_t samplerStateIdx = bindCached->m_samplerStateIdx; -// if (samplerStateIdx != currentSamplerStateIdx) -// { -// currentSamplerStateIdx = samplerStateIdx; -// m_commandList->SetComputeRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) ); -// } -// m_commandList->SetComputeRootDescriptorTable(Rdt::SRV, bindCached->m_srvHandle); -// m_commandList->SetComputeRootDescriptorTable(Rdt::UAV, bindCached->m_srvHandle); -// } -// } - bool constantsChanged = false; if (compute.m_uniformBegin < compute.m_uniformEnd || currentProgram.idx != key.m_program.idx) @@ -5762,135 +5910,54 @@ VK_DESTROY constantsChanged = true; } + ProgramVK& program = m_program[currentProgram.idx]; if (constantsChanged || hasPredefined) { - ProgramVK& program = m_program[currentProgram.idx]; viewState.setPredefined<4>(this, view, program, _render, compute); // commitShaderConstants(key.m_program, gpuAddress); // m_commandList->SetComputeRootConstantBufferView(Rdt::CBV, gpuAddress); } + uint32_t bindHash = bx::hash(renderBind.m_bind, sizeof(renderBind.m_bind) ); + if (currentBindHash != bindHash + || currentDslHash != program.m_descriptorSetLayoutHash) { - ProgramVK& program = m_program[currentProgram.idx]; - ScratchBufferVK& sb = m_scratchBuffer[m_backBufferColorIdx]; + currentBindHash = bindHash; + currentDslHash = program.m_descriptorSetLayoutHash; - VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash); - VkDescriptorSetAllocateInfo dsai; - dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - dsai.pNext = NULL; - dsai.descriptorPool = m_descriptorPool; - dsai.descriptorSetCount = 1; - dsai.pSetLayouts = &dsl; - vkAllocateDescriptorSets(m_device, &dsai, &sb.m_descriptorSet[sb.m_currentDs]); + allocDescriptorSet(program, renderBind, scratchBuffer); + } - VkDescriptorImageInfo imageInfo[BGFX_MAX_COMPUTE_BINDINGS]; - VkDescriptorBufferInfo bufferInfo[BGFX_MAX_COMPUTE_BINDINGS]; - VkWriteDescriptorSet wds[BGFX_MAX_COMPUTE_BINDINGS]; - bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_MAX_COMPUTE_BINDINGS); - uint32_t wdsCount = 0; - uint32_t imageCount = 0; - uint32_t bufferCount = 0; - for (uint32_t stage = 0; stage < BGFX_MAX_COMPUTE_BINDINGS; ++stage) - { - const Binding& bind = renderBind.m_bind[stage]; - if (kInvalidHandle != bind.m_idx) - { - VkDescriptorType descriptorType = (VkDescriptorType)program.m_vsh->m_bindInfo[stage].samplerBinding; - if (descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) - { - VertexBufferVK& vb = m_vertexBuffers[bind.m_idx]; - bufferInfo[bufferCount].buffer = vb.m_buffer; - bufferInfo[bufferCount].offset = 0; - bufferInfo[bufferCount].range = vb.m_size; - - wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wds[wdsCount].pNext = NULL; - wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs]; - wds[wdsCount].dstBinding = program.m_vsh->m_bindInfo[stage].binding; - wds[wdsCount].dstArrayElement = 0; - wds[wdsCount].descriptorCount = 1; - wds[wdsCount].descriptorType = descriptorType; - wds[wdsCount].pImageInfo = NULL; - wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount]; - wds[wdsCount].pTexelBufferView = NULL; - wdsCount++; - bufferCount++; - } - else if (descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) - { - TextureVK& texture = m_textures[bind.m_idx]; - VkSampler sampler = getSampler( - (0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags) - ? bind.m_samplerFlags - : (uint32_t)texture.m_flags - ) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK) - , (uint32_t)texture.m_numMips); - - imageInfo[stage].imageLayout = texture.m_currentImageLayout; - imageInfo[stage].imageView = texture.m_textureImageStorageView ? texture.m_textureImageStorageView : texture.m_textureImageView; - imageInfo[stage].sampler = sampler; - - wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wds[wdsCount].pNext = NULL; - wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs]; - wds[wdsCount].dstBinding = program.m_vsh->m_bindInfo[stage].binding; - wds[wdsCount].dstArrayElement = 0; - wds[wdsCount].descriptorCount = 1; - wds[wdsCount].descriptorType = descriptorType; - wds[wdsCount].pImageInfo = &imageInfo[imageCount]; - wds[wdsCount].pBufferInfo = NULL; - wds[wdsCount].pTexelBufferView = NULL; - wdsCount++; - } - } - } + uint32_t offset = 0; + if (constantsChanged + || hasPredefined) + { const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment); const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align); - if (vsize > 0) - { - bufferInfo[bufferCount].buffer = sb.m_buffer; - bufferInfo[bufferCount].offset = sb.m_pos; - bufferInfo[bufferCount].range = vsize; - - wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wds[wdsCount].pNext = NULL; - wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs]; - wds[wdsCount].dstBinding = program.m_vsh->m_uniformBinding; - wds[wdsCount].dstArrayElement = 0; - wds[wdsCount].descriptorCount = 1; - wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - wds[wdsCount].pImageInfo = NULL; - wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount]; - wds[wdsCount].pTexelBufferView = NULL; - wdsCount++; - bufferCount++; - - bx::memCopy(&sb.m_data[sb.m_pos], m_vsScratch, program.m_vsh->m_size); - } - - sb.m_pos += vsize; + offset = scratchBuffer.m_pos; m_vsChanges = 0; m_fsChanges = 0; - vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL); - vkCmdBindDescriptorSets( - m_commandBuffer - , VK_PIPELINE_BIND_POINT_COMPUTE - , program.m_pipelineLayout - , 0 - , 1 - , &sb.m_descriptorSet[sb.m_currentDs] - , 0 - , NULL - ); + bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size); - sb.m_currentDs++; + scratchBuffer.m_pos += vsize; } + vkCmdBindDescriptorSets( + m_commandBuffer + , VK_PIPELINE_BIND_POINT_COMPUTE + , program.m_pipelineLayout + , 0 + , 1 + , &scratchBuffer.getCurrentDS() + , constantsChanged || hasPredefined ? 1 : 0 + , &offset + ); + if (isValid(compute.m_indirectBuffer) ) { const VertexBufferVK& vb = m_vertexBuffers[compute.m_indirectBuffer.idx]; @@ -5980,6 +6047,7 @@ VK_DESTROY currentPipeline = VK_NULL_HANDLE; currentBindHash = 0; + currentDslHash = 0; currentSamplerStateIdx = kInvalidHandle; currentProgram = BGFX_INVALID_HANDLE; currentState.clear(); @@ -6046,79 +6114,6 @@ VK_DESTROY uint16_t scissor = draw.m_scissor; uint32_t bindHash = bx::hash(renderBind.m_bind, sizeof(renderBind.m_bind) ); - if (currentBindHash != bindHash - || 0 != changedStencil - || (hasFactor && blendFactor != draw.m_rgba) - || (0 != (BGFX_STATE_PT_MASK & changedFlags) - || prim.m_topology != s_primInfo[primIndex].m_topology) - || currentState.m_scissor != scissor - || pipeline != currentPipeline - || hasOcclusionQuery) - { -// m_batch.flush(m_commandList); - } - -// if (currentBindHash != bindHash) -// { -// currentBindHash = bindHash; -// -// Bind* bindCached = bindLru.find(bindHash); -// if (NULL == bindCached) -// { -// D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; -// uint32_t samplerFlags[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; -// { -// srvHandle[0].ptr = 0; -// for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage) -// { -// const Binding& bind = renderBind.m_bind[stage]; -// if (kInvalidHandle != bind.m_idx) -// { -// TextureD3D12& texture = m_textures[bind.m_idx]; -// texture.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ); -// scratchBuffer.allocSrv(srvHandle[stage], texture); -// samplerFlags[stage] = (0 == (BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER & bind.m_textureFlags) -// ? bind.m_textureFlags -// : texture.m_flags -// ) & (BGFX_TEXTURE_SAMPLER_BITS_MASK|BGFX_TEXTURE_BORDER_COLOR_MASK) -// ; -// } -// else -// { -// bx::memCopy(&srvHandle[stage], &srvHandle[0], sizeof(D3D12_GPU_DESCRIPTOR_HANDLE) ); -// samplerFlags[stage] = 0; -// } -// } -// } -// -// if (srvHandle[0].ptr != 0) -// { -// uint16_t samplerStateIdx = getSamplerState(samplerFlags, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, _render->m_colorPalette); -// if (samplerStateIdx != currentSamplerStateIdx) -// { -// currentSamplerStateIdx = samplerStateIdx; -// m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) ); -// } -// -// m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, srvHandle[0]); -// -// Bind bind; -// bind.m_srvHandle = srvHandle[0]; -// bind.m_samplerStateIdx = samplerStateIdx; -// bindLru.add(bindHash, bind, 0); -// } -// } -// else -// { -// uint16_t samplerStateIdx = bindCached->m_samplerStateIdx; -// if (samplerStateIdx != currentSamplerStateIdx) -// { -// currentSamplerStateIdx = samplerStateIdx; -// m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) ); -// } -// m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, bindCached->m_srvHandle); -// } -// } if (pipeline != currentPipeline || 0 != changedStencil) @@ -6213,158 +6208,60 @@ VK_DESTROY constantsChanged = true; } - if (constantsChanged - || hasPredefined - || currentBindHash != bindHash) + ProgramVK& program = m_program[currentProgram.idx]; + if (hasPredefined) { - ProgramVK& program = m_program[currentProgram.idx]; - ScratchBufferVK& sb = m_scratchBuffer[m_backBufferColorIdx]; - - VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash); - VkDescriptorSetAllocateInfo dsai; - dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - dsai.pNext = NULL; - dsai.descriptorPool = m_descriptorPool; - dsai.descriptorSetCount = 1; - dsai.pSetLayouts = &dsl; - vkAllocateDescriptorSets(m_device, &dsai, &sb.m_descriptorSet[sb.m_currentDs]); - - VkDescriptorImageInfo imageInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; - VkDescriptorBufferInfo bufferInfo[16]; - VkWriteDescriptorSet wds[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; - bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS); - uint32_t wdsCount = 0; - uint32_t bufferCount = 0; - for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage) - { - const Binding& bind = renderBind.m_bind[stage]; - if (kInvalidHandle != bind.m_idx && - isValid(program.m_fsh->m_bindInfo[stage].uniformHandle)) - { - TextureVK& texture = m_textures[bind.m_idx]; - VkSampler sampler = getSampler( - (0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags) - ? bind.m_samplerFlags - : (uint32_t)texture.m_flags - ) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK) - , (uint32_t)texture.m_numMips); - - imageInfo[stage].imageLayout = texture.m_currentImageLayout; - imageInfo[stage].imageView = texture.m_textureImageView; - imageInfo[stage].sampler = sampler; - - wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wds[wdsCount].pNext = NULL; - wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs]; - wds[wdsCount].dstBinding = program.m_fsh->m_bindInfo[stage].binding; - wds[wdsCount].dstArrayElement = 0; - wds[wdsCount].descriptorCount = 1; - wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - wds[wdsCount].pImageInfo = &imageInfo[stage]; - wds[wdsCount].pBufferInfo = NULL; - wds[wdsCount].pTexelBufferView = NULL; - wdsCount++; - - wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wds[wdsCount].pNext = NULL; - wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs]; - wds[wdsCount].dstBinding = program.m_fsh->m_bindInfo[stage].samplerBinding; - wds[wdsCount].dstArrayElement = 0; - wds[wdsCount].descriptorCount = 1; - wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - wds[wdsCount].pImageInfo = &imageInfo[stage]; - wds[wdsCount].pBufferInfo = NULL; - wds[wdsCount].pTexelBufferView = NULL; - wdsCount++; - } - else - { - imageInfo[stage].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - imageInfo[stage].imageView = VK_NULL_HANDLE; - imageInfo[stage].sampler = VK_NULL_HANDLE; - } - } - uint32_t ref = (newFlags & BGFX_STATE_ALPHA_REF_MASK) >> BGFX_STATE_ALPHA_REF_SHIFT; viewState.m_alphaRef = ref / 255.0f; viewState.setPredefined<4>(this, view, program, _render, draw); + } + if (currentBindHash != bindHash + || currentDslHash != program.m_descriptorSetLayoutHash) + { + currentBindHash = bindHash; + currentDslHash = program.m_descriptorSetLayoutHash; + + allocDescriptorSet(program, renderBind, scratchBuffer); + } + + uint32_t numOffset = 0; + uint32_t offsets[2] = {0, 0}; + if (constantsChanged + || hasPredefined) + { const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment); const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align); const uint32_t fsize = bx::strideAlign((NULL != program.m_fsh ? program.m_fsh->m_size : 0), align); const uint32_t total = vsize + fsize; - - if (0 < total) + if (vsize > 0) { - uint32_t vsUniformBinding = program.m_vsh->m_uniformBinding; - uint32_t fsUniformBinding = program.m_fsh->m_uniformBinding; - - if (vsize > 0) - { - bufferInfo[bufferCount].buffer = sb.m_buffer; - bufferInfo[bufferCount].offset = sb.m_pos; - bufferInfo[bufferCount].range = vsize; - - wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wds[wdsCount].pNext = NULL; - wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs]; - wds[wdsCount].dstBinding = vsUniformBinding; - wds[wdsCount].dstArrayElement = 0; - wds[wdsCount].descriptorCount = 1; - wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - wds[wdsCount].pImageInfo = NULL; - wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount]; - wds[wdsCount].pTexelBufferView = NULL; - wdsCount++; - bufferCount++; - - bx::memCopy(&sb.m_data[sb.m_pos], m_vsScratch, program.m_vsh->m_size); - } - - if (fsize > 0) - { - bufferInfo[bufferCount].buffer = sb.m_buffer; - bufferInfo[bufferCount].offset = sb.m_pos + vsize; - bufferInfo[bufferCount].range = fsize; - - wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wds[wdsCount].pNext = NULL; - wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs]; - wds[wdsCount].dstBinding = fsUniformBinding; - wds[wdsCount].dstArrayElement = 0; - wds[wdsCount].descriptorCount = 1; - wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - wds[wdsCount].pImageInfo = NULL; - wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount]; - wds[wdsCount].pTexelBufferView = NULL; - wdsCount++; - bufferCount++; - - bx::memCopy(&sb.m_data[sb.m_pos + vsize], m_fsScratch, program.m_fsh->m_size); - } - - sb.m_pos += vsize + fsize; + offsets[numOffset++] = scratchBuffer.m_pos; + bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size); + } + if (fsize > 0) + { + offsets[numOffset++] = scratchBuffer.m_pos + vsize; + bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos + vsize], m_fsScratch, program.m_fsh->m_size); } m_vsChanges = 0; m_fsChanges = 0; - - - vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL); - vkCmdBindDescriptorSets( - m_commandBuffer - , VK_PIPELINE_BIND_POINT_GRAPHICS - , program.m_pipelineLayout - , 0 - , 1 - , &sb.m_descriptorSet[sb.m_currentDs] - , 0 - , NULL - ); - - sb.m_currentDs++; + scratchBuffer.m_pos += total; } + vkCmdBindDescriptorSets( + m_commandBuffer + , VK_PIPELINE_BIND_POINT_GRAPHICS + , program.m_pipelineLayout + , 0 + , 1 + , &scratchBuffer.getCurrentDS() + , numOffset + , offsets + ); + + // if (constantsChanged // || hasPredefined) // { @@ -6638,11 +6535,11 @@ BX_UNUSED(presentMin, presentMax); pos++; tvm.printf(10, pos++, 0x8b, " State cache: "); - tvm.printf(10, pos++, 0x8b, " PSO | Sampler | Bind | Queued "); - tvm.printf(10, pos++, 0x8b, " %6d " //| %6d | %6d | %6d " + tvm.printf(10, pos++, 0x8b, " PSO | DSL | DS | Queued "); + tvm.printf(10, pos++, 0x8b, " %6d | %6d | %6d | %6d " , m_pipelineStateCache.getCount() -// , m_samplerStateCache.getCount() -// , bindLru.getCount() + , m_descriptorSetLayoutCache.getCount() + , scratchBuffer.m_currentDs // , m_cmd.m_control.available() ); pos++; @@ -6675,6 +6572,14 @@ BX_UNUSED(presentMin, presentMax); // PIX_ENDEVENT(); } + VkMappedMemoryRange range; + range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range.pNext = NULL; + range.memory = scratchBuffer.m_deviceMem; + range.offset = 0; + range.size = scratchBuffer.m_pos; + vkFlushMappedMemoryRanges(m_device, 1, &range); + if (beginRenderPass) { vkCmdEndRenderPass(m_commandBuffer); diff --git a/src/renderer_vk.h b/src/renderer_vk.h index 98605242a..36da1e82c 100644 --- a/src/renderer_vk.h +++ b/src/renderer_vk.h @@ -71,6 +71,7 @@ VK_IMPORT_INSTANCE_FUNC(false, vkEnumerateDeviceLayerProperties); \ VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceProperties); \ VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceFormatProperties); \ + VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceFeatures); \ VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceImageFormatProperties); \ VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceMemoryProperties); \ VK_IMPORT_INSTANCE_FUNC(true, vkGetPhysicalDeviceMemoryProperties2KHR); \ @@ -325,6 +326,11 @@ VK_DESTROY void destroy(); void reset(); + VkDescriptorSet& getCurrentDS() + { + return m_descriptorSet[m_currentDs - 1]; + } + VkDescriptorSet* m_descriptorSet; VkBuffer m_buffer; VkDeviceMemory m_deviceMem; @@ -469,6 +475,7 @@ VK_DESTROY , m_textureImage(VK_NULL_HANDLE) , m_textureDeviceMem(VK_NULL_HANDLE) , m_textureImageView(VK_NULL_HANDLE) + , m_textureImageDepthView(VK_NULL_HANDLE) , m_textureImageStorageView(VK_NULL_HANDLE) , m_currentImageLayout(VK_IMAGE_LAYOUT_UNDEFINED) { @@ -498,6 +505,7 @@ VK_DESTROY VkImage m_textureImage; VkDeviceMemory m_textureDeviceMem; VkImageView m_textureImageView; + VkImageView m_textureImageDepthView; VkImageView m_textureImageStorageView; VkImageLayout m_currentImageLayout; };