VK: descriptor set optimization / refactoring

- add support to indirect compute call
- use dynamic uniform buffer instead of uniform
- create depth view for depth/stencil texture
- fix dynamic uniform buffer bug, apply it to compute shader
- refactor descriptor set allocation / setting
- fix image layout reinit
- get and set device feature / fix independent blending
- cleanup
This commit is contained in:
Rinthel
2019-08-14 10:06:37 +09:00
committed by Бранимир Караџић
parent df42d7e0eb
commit 4342db8f3b
2 changed files with 337 additions and 424 deletions

View File

@@ -1149,6 +1149,7 @@ VK_IMPORT_INSTANCE
g_caps.supported |= ( 0
| BGFX_CAPS_TEXTURE_BLIT
| BGFX_CAPS_DRAW_INDIRECT
| BGFX_CAPS_INSTANCING
);
g_caps.limits.maxTextureSize = m_deviceProperties.limits.maxImageDimension2D;
@@ -1156,6 +1157,8 @@ VK_IMPORT_INSTANCE
g_caps.limits.maxComputeBindings = BGFX_MAX_COMPUTE_BINDINGS;
g_caps.limits.maxVertexStreams = BGFX_CONFIG_MAX_VERTEX_STREAMS;
vkGetPhysicalDeviceFeatures(m_physicalDevice, &m_deviceFeatures);
{
struct ImageTest
{
@@ -1345,7 +1348,7 @@ VK_IMPORT_INSTANCE
dci.ppEnabledLayerNames = enabledLayerNames;
dci.enabledExtensionCount = numEnabledExtensions;
dci.ppEnabledExtensionNames = enabledExtension;
dci.pEnabledFeatures = NULL;
dci.pEnabledFeatures = &m_deviceFeatures;
result = vkCreateDevice(
m_physicalDevice
@@ -2007,9 +2010,9 @@ VK_IMPORT_DEVICE
// { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 },
{ VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 },
{ VK_DESCRIPTOR_TYPE_SAMPLER, (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 },
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 10<<10 },
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 10<<10 },
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 },
{ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 },
{ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 },
};
// VkDescriptorSetLayoutBinding dslb[] =
@@ -2611,11 +2614,13 @@ VK_IMPORT_DEVICE
uint32_t samplerFlags = (uint32_t)(texture.m_flags & BGFX_SAMPLER_BITS_MASK);
VkSampler sampler = getSampler(samplerFlags, 1);
uint32_t bufferOffset = scratchBuffer.m_pos;
VkDescriptorBufferInfo bufferInfo;
bufferInfo.buffer = scratchBuffer.m_buffer;
bufferInfo.offset = scratchBuffer.m_pos;
bufferInfo.range = bx::strideAlign(program.m_vsh->m_size, align);
bufferInfo.offset = 0;
bufferInfo.range = bx::strideAlign(program.m_vsh->m_size, align);
bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size);
scratchBuffer.m_pos += bufferInfo.range;
VkWriteDescriptorSet wds[3];
wds[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@@ -2624,7 +2629,7 @@ VK_IMPORT_DEVICE
wds[0].dstBinding = program.m_vsh->m_uniformBinding;
wds[0].dstArrayElement = 0;
wds[0].descriptorCount = 1;
wds[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
wds[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
wds[0].pImageInfo = NULL;
wds[0].pBufferInfo = &bufferInfo;
wds[0].pTexelBufferView = NULL;
@@ -2667,8 +2672,8 @@ VK_IMPORT_DEVICE
, 0
, 1
, &scratchBuffer.m_descriptorSet[scratchBuffer.m_currentDs]
, 0
, NULL
, 1
, &bufferOffset
);
scratchBuffer.m_currentDs++;
@@ -2983,7 +2988,7 @@ VK_IMPORT_DEVICE
numAttachments = frameBuffer.m_num;
}
if (!!(BGFX_STATE_BLEND_INDEPENDENT & _state) )
if (!!(BGFX_STATE_BLEND_INDEPENDENT & _state) && m_deviceFeatures.independentBlend )
{
for (uint32_t ii = 1, rgba = _rgba; ii < numAttachments; ++ii, rgba >>= 11)
{
@@ -3265,7 +3270,7 @@ VK_IMPORT_DEVICE
sci.addressModeW = s_textureAddress[(_samplerFlags&BGFX_SAMPLER_W_MASK)>>BGFX_SAMPLER_W_SHIFT];
sci.mipLodBias = 0.0f;
sci.anisotropyEnable = VK_FALSE;
sci.maxAnisotropy = 0;
sci.maxAnisotropy = 4.0f;
sci.compareEnable = 0 != cmpFunc;
sci.compareOp = s_cmpFunc[cmpFunc];
sci.minLod = 0.0f;
@@ -3556,6 +3561,202 @@ VK_IMPORT_DEVICE
return pipeline;
}
void allocDescriptorSet(ProgramVK& program, const RenderBind& renderBind, ScratchBufferVK& scratchBuffer)
{
VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash);
VkDescriptorSetAllocateInfo dsai;
dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
dsai.pNext = NULL;
dsai.descriptorPool = m_descriptorPool;
dsai.descriptorSetCount = 1;
dsai.pSetLayouts = &dsl;
VkDescriptorSet& descriptorSet = scratchBuffer.m_descriptorSet[scratchBuffer.m_currentDs];
vkAllocateDescriptorSets(m_device, &dsai, &descriptorSet);
scratchBuffer.m_currentDs++;
VkDescriptorImageInfo imageInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
VkDescriptorBufferInfo bufferInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
VkWriteDescriptorSet wds[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS);
uint32_t wdsCount = 0;
uint32_t bufferCount = 0;
uint32_t imageCount = 0;
for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage)
{
const Binding& bind = renderBind.m_bind[stage];
if (kInvalidHandle != bind.m_idx)
{
const Binding& bind = renderBind.m_bind[stage];
const ShaderVK::BindInfo* bindInfo = NULL;
if (isValid(program.m_vsh->m_bindInfo[stage].uniformHandle))
{
bindInfo = &(program.m_vsh->m_bindInfo[stage]);
}
else if (NULL != program.m_fsh && isValid(program.m_fsh->m_bindInfo[stage].uniformHandle))
{
bindInfo = &(program.m_fsh->m_bindInfo[stage]);
}
if (NULL == bindInfo)
continue;
if (ShaderVK::BindType::Storage == bindInfo->type)
{
VkDescriptorType descriptorType = (VkDescriptorType)bindInfo->samplerBinding;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = descriptorSet;
wds[wdsCount].dstBinding = bindInfo->binding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = descriptorType;
wds[wdsCount].pImageInfo = NULL;
wds[wdsCount].pBufferInfo = NULL;
wds[wdsCount].pTexelBufferView = NULL;
if (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == descriptorType)
{
VertexBufferVK& vb = m_vertexBuffers[bind.m_idx];
bufferInfo[bufferCount].buffer = vb.m_buffer;
bufferInfo[bufferCount].offset = 0;
bufferInfo[bufferCount].range = vb.m_size;
wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
++bufferCount;
}
else if (VK_DESCRIPTOR_TYPE_STORAGE_IMAGE == descriptorType)
{
TextureVK& texture = m_textures[bind.m_idx];
VkSampler sampler = getSampler(
(0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags)
? bind.m_samplerFlags
: (uint32_t)texture.m_flags
) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK)
, (uint32_t)texture.m_numMips);
if (VK_IMAGE_LAYOUT_GENERAL != texture.m_currentImageLayout
&& VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL != texture.m_currentImageLayout)
{
texture.setImageMemoryBarrier(m_commandBuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
imageInfo[imageCount].imageLayout = texture.m_currentImageLayout;
imageInfo[imageCount].imageView = VK_NULL_HANDLE != texture.m_textureImageStorageView
? texture.m_textureImageStorageView
: texture.m_textureImageView
;
imageInfo[imageCount].sampler = sampler;
wds[wdsCount].pImageInfo = &imageInfo[imageCount];
++imageCount;
}
++wdsCount;
}
else if (ShaderVK::BindType::Sampler == bindInfo->type)
{
TextureVK& texture = m_textures[bind.m_idx];
VkSampler sampler = getSampler(
(0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags)
? bind.m_samplerFlags
: (uint32_t)texture.m_flags
) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK)
, (uint32_t)texture.m_numMips);
if (VK_IMAGE_LAYOUT_GENERAL != texture.m_currentImageLayout
&& VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL != texture.m_currentImageLayout)
{
texture.setImageMemoryBarrier(m_commandBuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
imageInfo[imageCount].imageLayout = texture.m_currentImageLayout;
imageInfo[imageCount].imageView = VK_NULL_HANDLE != texture.m_textureImageDepthView
? texture.m_textureImageDepthView
: texture.m_textureImageView
;
imageInfo[imageCount].sampler = sampler;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = descriptorSet;
wds[wdsCount].dstBinding = bindInfo->binding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
wds[wdsCount].pImageInfo = &imageInfo[imageCount];
wds[wdsCount].pBufferInfo = NULL;
wds[wdsCount].pTexelBufferView = NULL;
++wdsCount;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = descriptorSet;
wds[wdsCount].dstBinding = bindInfo->samplerBinding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
wds[wdsCount].pImageInfo = &imageInfo[imageCount];
wds[wdsCount].pBufferInfo = NULL;
wds[wdsCount].pTexelBufferView = NULL;
++wdsCount;
++imageCount;
}
}
}
const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment);
const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align);
const uint32_t fsize = bx::strideAlign((NULL != program.m_fsh ? program.m_fsh->m_size : 0), align);
const uint32_t total = vsize + fsize;
if (0 < total)
{
uint32_t vsUniformBinding = program.m_vsh->m_uniformBinding;
uint32_t fsUniformBinding = program.m_fsh ? program.m_fsh->m_uniformBinding : 0;
if (vsize > 0)
{
bufferInfo[bufferCount].buffer = scratchBuffer.m_buffer;
bufferInfo[bufferCount].offset = 0;
bufferInfo[bufferCount].range = vsize;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = descriptorSet;
wds[wdsCount].dstBinding = vsUniformBinding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
wds[wdsCount].pImageInfo = NULL;
wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
wds[wdsCount].pTexelBufferView = NULL;
++wdsCount;
++bufferCount;
}
if (fsize > 0)
{
bufferInfo[bufferCount].buffer = scratchBuffer.m_buffer;
bufferInfo[bufferCount].offset = 0;
bufferInfo[bufferCount].range = fsize;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = descriptorSet;
wds[wdsCount].dstBinding = fsUniformBinding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
wds[wdsCount].pImageInfo = NULL;
wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
wds[wdsCount].pTexelBufferView = NULL;
++wdsCount;
++bufferCount;
}
}
vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL);
}
void commit(UniformBuffer& _uniformBuffer)
{
_uniformBuffer.reset();
@@ -3819,6 +4020,7 @@ VK_IMPORT_DEVICE
VkPhysicalDeviceProperties m_deviceProperties;
VkPhysicalDeviceMemoryProperties m_memoryProperties;
VkPhysicalDeviceFeatures m_deviceFeatures;
VkSwapchainCreateInfoKHR m_sci;
VkSurfaceKHR m_surface;
@@ -3958,7 +4160,7 @@ VK_DESTROY
ma.pNext = NULL;
ma.allocationSize = mr.size;
ma.memoryTypeIndex = s_renderVK->selectMemoryType(mr.memoryTypeBits
, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
);
VK_CHECK(vkAllocateMemory(device
, &ma
@@ -4120,16 +4322,18 @@ VK_DESTROY
m_flags = _flags;
m_dynamic = NULL == _data;
bool compute = m_flags & BGFX_BUFFER_COMPUTE_READ_WRITE;
bool storage = m_flags & BGFX_BUFFER_COMPUTE_READ_WRITE;
bool indirect = m_flags & BGFX_BUFFER_DRAW_INDIRECT;
VkBufferCreateInfo bci;
bci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bci.pNext = NULL;
bci.flags = 0;
bci.size = _size;
bci.usage = 0
// | (m_dynamic ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : 0)
| (_vertex ? VK_BUFFER_USAGE_VERTEX_BUFFER_BIT : VK_BUFFER_USAGE_INDEX_BUFFER_BIT)
| (compute ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : 0)
// | (m_dynamic ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : 0)
| (_vertex ? VK_BUFFER_USAGE_VERTEX_BUFFER_BIT : VK_BUFFER_USAGE_INDEX_BUFFER_BIT)
| (storage || indirect ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : 0)
| (indirect ? VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT : 0)
| VK_BUFFER_USAGE_TRANSFER_DST_BIT
;
bci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
@@ -4531,7 +4735,7 @@ VK_DESTROY
{
m_uniformBinding = fragment ? 48 : 0;
m_bindings[bidx].stageFlags = VK_SHADER_STAGE_ALL;
m_bindings[bidx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
m_bindings[bidx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
m_bindings[bidx].binding = m_uniformBinding;
m_bindings[bidx].pImmutableSamplers = NULL;
m_bindings[bidx].descriptorCount = 1;
@@ -5102,6 +5306,33 @@ VK_DESTROY
));
}
if ((m_vkTextureAspect & VK_IMAGE_ASPECT_DEPTH_BIT)
&& (m_vkTextureAspect & VK_IMAGE_ASPECT_STENCIL_BIT))
{
VkImageViewCreateInfo viewInfo;
viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
viewInfo.pNext = NULL;
viewInfo.flags = 0;
viewInfo.image = m_textureImage;
viewInfo.viewType = m_type;
viewInfo.format = m_vkTextureFormat;
viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
viewInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
viewInfo.subresourceRange.baseMipLevel = 0;
viewInfo.subresourceRange.levelCount = m_numMips; //m_numMips;
viewInfo.subresourceRange.baseArrayLayer = 0;
viewInfo.subresourceRange.layerCount = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers);
VK_CHECK(vkCreateImageView(
device
, &viewInfo
, allocatorCb
, &m_textureImageDepthView
));
}
// image view creation for storage if needed
if (m_flags & BGFX_TEXTURE_COMPUTE_WRITE)
{
@@ -5143,8 +5374,11 @@ VK_DESTROY
vkFreeMemory(device, m_textureDeviceMem, allocatorCb);
vkDestroy(m_textureImageStorageView);
vkDestroy(m_textureImageDepthView);
vkDestroy(m_textureImageView);
vkDestroy(m_textureImage);
m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
}
}
@@ -5433,6 +5667,7 @@ VK_DESTROY
uint16_t currentSamplerStateIdx = kInvalidHandle;
ProgramHandle currentProgram = BGFX_INVALID_HANDLE;
uint32_t currentBindHash = 0;
uint32_t currentDslHash = 0;
bool hasPredefined = false;
bool commandListChanged = false;
VkPipeline currentPipeline = VK_NULL_HANDLE;
@@ -5653,96 +5888,9 @@ VK_DESTROY
currentPipeline = pipeline;
vkCmdBindPipeline(m_commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
currentBindHash = 0;
currentDslHash = 0;
}
// uint32_t bindHash = bx::hash<bx::HashMurmur2A>(renderBind.m_bind, sizeof(renderBind.m_bind) );
// if (currentBindHash != bindHash)
// {
// currentBindHash = bindHash;
//
// Bind* bindCached = bindLru.find(bindHash);
// if (NULL == bindCached)
// {
// D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_MAX_COMPUTE_BINDINGS] = {};
// uint32_t samplerFlags[BGFX_MAX_COMPUTE_BINDINGS] = {};
//
// for (uint32_t ii = 0; ii < maxComputeBindings; ++ii)
// {
// const Binding& bind = renderBind.m_bind[ii];
// if (kInvalidHandle != bind.m_idx)
// {
// switch (bind.m_type)
// {
// case Binding::Image:
// {
// TextureD3D12& texture = m_textures[bind.m_idx];
//
// if (Access::Read != bind.m_access)
// {
// texture.setState(m_commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
// scratchBuffer.allocUav(srvHandle[ii], texture, bind.m_mip);
// }
// else
// {
// texture.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
// scratchBuffer.allocSrv(srvHandle[ii], texture, bind.m_mip);
// samplerFlags[ii] = texture.m_flags;
// }
// }
// break;
//
// case Binding::IndexBuffer:
// case Binding::VertexBuffer:
// {
// BufferD3D12& buffer = Binding::IndexBuffer == bind.m_type
// ? m_indexBuffers[bind.m_idx]
// : m_vertexBuffers[bind.m_idx]
// ;
//
// if (Access::Read != bind.m_access)
// {
// buffer.setState(m_commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
// scratchBuffer.allocUav(srvHandle[ii], buffer);
// }
// else
// {
// buffer.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
// scratchBuffer.allocSrv(srvHandle[ii], buffer);
// }
// }
// break;
// }
// }
// }
//
// uint16_t samplerStateIdx = getSamplerState(samplerFlags, maxComputeBindings, _render->m_colorPalette);
// if (samplerStateIdx != currentSamplerStateIdx)
// {
// currentSamplerStateIdx = samplerStateIdx;
// m_commandList->SetComputeRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
// }
//
// m_commandList->SetComputeRootDescriptorTable(Rdt::SRV, srvHandle[0]);
// m_commandList->SetComputeRootDescriptorTable(Rdt::UAV, srvHandle[0]);
//
// Bind bind;
// bind.m_srvHandle = srvHandle[0];
// bind.m_samplerStateIdx = samplerStateIdx;
// bindLru.add(bindHash, bind, 0);
// }
// else
// {
// uint16_t samplerStateIdx = bindCached->m_samplerStateIdx;
// if (samplerStateIdx != currentSamplerStateIdx)
// {
// currentSamplerStateIdx = samplerStateIdx;
// m_commandList->SetComputeRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
// }
// m_commandList->SetComputeRootDescriptorTable(Rdt::SRV, bindCached->m_srvHandle);
// m_commandList->SetComputeRootDescriptorTable(Rdt::UAV, bindCached->m_srvHandle);
// }
// }
bool constantsChanged = false;
if (compute.m_uniformBegin < compute.m_uniformEnd
|| currentProgram.idx != key.m_program.idx)
@@ -5762,135 +5910,54 @@ VK_DESTROY
constantsChanged = true;
}
ProgramVK& program = m_program[currentProgram.idx];
if (constantsChanged
|| hasPredefined)
{
ProgramVK& program = m_program[currentProgram.idx];
viewState.setPredefined<4>(this, view, program, _render, compute);
// commitShaderConstants(key.m_program, gpuAddress);
// m_commandList->SetComputeRootConstantBufferView(Rdt::CBV, gpuAddress);
}
uint32_t bindHash = bx::hash<bx::HashMurmur2A>(renderBind.m_bind, sizeof(renderBind.m_bind) );
if (currentBindHash != bindHash
|| currentDslHash != program.m_descriptorSetLayoutHash)
{
ProgramVK& program = m_program[currentProgram.idx];
ScratchBufferVK& sb = m_scratchBuffer[m_backBufferColorIdx];
currentBindHash = bindHash;
currentDslHash = program.m_descriptorSetLayoutHash;
VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash);
VkDescriptorSetAllocateInfo dsai;
dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
dsai.pNext = NULL;
dsai.descriptorPool = m_descriptorPool;
dsai.descriptorSetCount = 1;
dsai.pSetLayouts = &dsl;
vkAllocateDescriptorSets(m_device, &dsai, &sb.m_descriptorSet[sb.m_currentDs]);
allocDescriptorSet(program, renderBind, scratchBuffer);
}
VkDescriptorImageInfo imageInfo[BGFX_MAX_COMPUTE_BINDINGS];
VkDescriptorBufferInfo bufferInfo[BGFX_MAX_COMPUTE_BINDINGS];
VkWriteDescriptorSet wds[BGFX_MAX_COMPUTE_BINDINGS];
bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_MAX_COMPUTE_BINDINGS);
uint32_t wdsCount = 0;
uint32_t imageCount = 0;
uint32_t bufferCount = 0;
for (uint32_t stage = 0; stage < BGFX_MAX_COMPUTE_BINDINGS; ++stage)
{
const Binding& bind = renderBind.m_bind[stage];
if (kInvalidHandle != bind.m_idx)
{
VkDescriptorType descriptorType = (VkDescriptorType)program.m_vsh->m_bindInfo[stage].samplerBinding;
if (descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
{
VertexBufferVK& vb = m_vertexBuffers[bind.m_idx];
bufferInfo[bufferCount].buffer = vb.m_buffer;
bufferInfo[bufferCount].offset = 0;
bufferInfo[bufferCount].range = vb.m_size;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
wds[wdsCount].dstBinding = program.m_vsh->m_bindInfo[stage].binding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = descriptorType;
wds[wdsCount].pImageInfo = NULL;
wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
wds[wdsCount].pTexelBufferView = NULL;
wdsCount++;
bufferCount++;
}
else if (descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
{
TextureVK& texture = m_textures[bind.m_idx];
VkSampler sampler = getSampler(
(0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags)
? bind.m_samplerFlags
: (uint32_t)texture.m_flags
) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK)
, (uint32_t)texture.m_numMips);
imageInfo[stage].imageLayout = texture.m_currentImageLayout;
imageInfo[stage].imageView = texture.m_textureImageStorageView ? texture.m_textureImageStorageView : texture.m_textureImageView;
imageInfo[stage].sampler = sampler;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
wds[wdsCount].dstBinding = program.m_vsh->m_bindInfo[stage].binding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = descriptorType;
wds[wdsCount].pImageInfo = &imageInfo[imageCount];
wds[wdsCount].pBufferInfo = NULL;
wds[wdsCount].pTexelBufferView = NULL;
wdsCount++;
}
}
}
uint32_t offset = 0;
if (constantsChanged
|| hasPredefined)
{
const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment);
const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align);
if (vsize > 0)
{
bufferInfo[bufferCount].buffer = sb.m_buffer;
bufferInfo[bufferCount].offset = sb.m_pos;
bufferInfo[bufferCount].range = vsize;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
wds[wdsCount].dstBinding = program.m_vsh->m_uniformBinding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
wds[wdsCount].pImageInfo = NULL;
wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
wds[wdsCount].pTexelBufferView = NULL;
wdsCount++;
bufferCount++;
bx::memCopy(&sb.m_data[sb.m_pos], m_vsScratch, program.m_vsh->m_size);
}
sb.m_pos += vsize;
offset = scratchBuffer.m_pos;
m_vsChanges = 0;
m_fsChanges = 0;
vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL);
vkCmdBindDescriptorSets(
m_commandBuffer
, VK_PIPELINE_BIND_POINT_COMPUTE
, program.m_pipelineLayout
, 0
, 1
, &sb.m_descriptorSet[sb.m_currentDs]
, 0
, NULL
);
bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size);
sb.m_currentDs++;
scratchBuffer.m_pos += vsize;
}
vkCmdBindDescriptorSets(
m_commandBuffer
, VK_PIPELINE_BIND_POINT_COMPUTE
, program.m_pipelineLayout
, 0
, 1
, &scratchBuffer.getCurrentDS()
, constantsChanged || hasPredefined ? 1 : 0
, &offset
);
if (isValid(compute.m_indirectBuffer) )
{
const VertexBufferVK& vb = m_vertexBuffers[compute.m_indirectBuffer.idx];
@@ -5980,6 +6047,7 @@ VK_DESTROY
currentPipeline = VK_NULL_HANDLE;
currentBindHash = 0;
currentDslHash = 0;
currentSamplerStateIdx = kInvalidHandle;
currentProgram = BGFX_INVALID_HANDLE;
currentState.clear();
@@ -6046,79 +6114,6 @@ VK_DESTROY
uint16_t scissor = draw.m_scissor;
uint32_t bindHash = bx::hash<bx::HashMurmur2A>(renderBind.m_bind, sizeof(renderBind.m_bind) );
if (currentBindHash != bindHash
|| 0 != changedStencil
|| (hasFactor && blendFactor != draw.m_rgba)
|| (0 != (BGFX_STATE_PT_MASK & changedFlags)
|| prim.m_topology != s_primInfo[primIndex].m_topology)
|| currentState.m_scissor != scissor
|| pipeline != currentPipeline
|| hasOcclusionQuery)
{
// m_batch.flush(m_commandList);
}
// if (currentBindHash != bindHash)
// {
// currentBindHash = bindHash;
//
// Bind* bindCached = bindLru.find(bindHash);
// if (NULL == bindCached)
// {
// D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
// uint32_t samplerFlags[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
// {
// srvHandle[0].ptr = 0;
// for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage)
// {
// const Binding& bind = renderBind.m_bind[stage];
// if (kInvalidHandle != bind.m_idx)
// {
// TextureD3D12& texture = m_textures[bind.m_idx];
// texture.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
// scratchBuffer.allocSrv(srvHandle[stage], texture);
// samplerFlags[stage] = (0 == (BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER & bind.m_textureFlags)
// ? bind.m_textureFlags
// : texture.m_flags
// ) & (BGFX_TEXTURE_SAMPLER_BITS_MASK|BGFX_TEXTURE_BORDER_COLOR_MASK)
// ;
// }
// else
// {
// bx::memCopy(&srvHandle[stage], &srvHandle[0], sizeof(D3D12_GPU_DESCRIPTOR_HANDLE) );
// samplerFlags[stage] = 0;
// }
// }
// }
//
// if (srvHandle[0].ptr != 0)
// {
// uint16_t samplerStateIdx = getSamplerState(samplerFlags, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, _render->m_colorPalette);
// if (samplerStateIdx != currentSamplerStateIdx)
// {
// currentSamplerStateIdx = samplerStateIdx;
// m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
// }
//
// m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, srvHandle[0]);
//
// Bind bind;
// bind.m_srvHandle = srvHandle[0];
// bind.m_samplerStateIdx = samplerStateIdx;
// bindLru.add(bindHash, bind, 0);
// }
// }
// else
// {
// uint16_t samplerStateIdx = bindCached->m_samplerStateIdx;
// if (samplerStateIdx != currentSamplerStateIdx)
// {
// currentSamplerStateIdx = samplerStateIdx;
// m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
// }
// m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, bindCached->m_srvHandle);
// }
// }
if (pipeline != currentPipeline
|| 0 != changedStencil)
@@ -6213,158 +6208,60 @@ VK_DESTROY
constantsChanged = true;
}
if (constantsChanged
|| hasPredefined
|| currentBindHash != bindHash)
ProgramVK& program = m_program[currentProgram.idx];
if (hasPredefined)
{
ProgramVK& program = m_program[currentProgram.idx];
ScratchBufferVK& sb = m_scratchBuffer[m_backBufferColorIdx];
VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash);
VkDescriptorSetAllocateInfo dsai;
dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
dsai.pNext = NULL;
dsai.descriptorPool = m_descriptorPool;
dsai.descriptorSetCount = 1;
dsai.pSetLayouts = &dsl;
vkAllocateDescriptorSets(m_device, &dsai, &sb.m_descriptorSet[sb.m_currentDs]);
VkDescriptorImageInfo imageInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
VkDescriptorBufferInfo bufferInfo[16];
VkWriteDescriptorSet wds[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS);
uint32_t wdsCount = 0;
uint32_t bufferCount = 0;
for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage)
{
const Binding& bind = renderBind.m_bind[stage];
if (kInvalidHandle != bind.m_idx &&
isValid(program.m_fsh->m_bindInfo[stage].uniformHandle))
{
TextureVK& texture = m_textures[bind.m_idx];
VkSampler sampler = getSampler(
(0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags)
? bind.m_samplerFlags
: (uint32_t)texture.m_flags
) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK)
, (uint32_t)texture.m_numMips);
imageInfo[stage].imageLayout = texture.m_currentImageLayout;
imageInfo[stage].imageView = texture.m_textureImageView;
imageInfo[stage].sampler = sampler;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
wds[wdsCount].dstBinding = program.m_fsh->m_bindInfo[stage].binding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
wds[wdsCount].pImageInfo = &imageInfo[stage];
wds[wdsCount].pBufferInfo = NULL;
wds[wdsCount].pTexelBufferView = NULL;
wdsCount++;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
wds[wdsCount].dstBinding = program.m_fsh->m_bindInfo[stage].samplerBinding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
wds[wdsCount].pImageInfo = &imageInfo[stage];
wds[wdsCount].pBufferInfo = NULL;
wds[wdsCount].pTexelBufferView = NULL;
wdsCount++;
}
else
{
imageInfo[stage].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageInfo[stage].imageView = VK_NULL_HANDLE;
imageInfo[stage].sampler = VK_NULL_HANDLE;
}
}
uint32_t ref = (newFlags & BGFX_STATE_ALPHA_REF_MASK) >> BGFX_STATE_ALPHA_REF_SHIFT;
viewState.m_alphaRef = ref / 255.0f;
viewState.setPredefined<4>(this, view, program, _render, draw);
}
if (currentBindHash != bindHash
|| currentDslHash != program.m_descriptorSetLayoutHash)
{
currentBindHash = bindHash;
currentDslHash = program.m_descriptorSetLayoutHash;
allocDescriptorSet(program, renderBind, scratchBuffer);
}
uint32_t numOffset = 0;
uint32_t offsets[2] = {0, 0};
if (constantsChanged
|| hasPredefined)
{
const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment);
const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align);
const uint32_t fsize = bx::strideAlign((NULL != program.m_fsh ? program.m_fsh->m_size : 0), align);
const uint32_t total = vsize + fsize;
if (0 < total)
if (vsize > 0)
{
uint32_t vsUniformBinding = program.m_vsh->m_uniformBinding;
uint32_t fsUniformBinding = program.m_fsh->m_uniformBinding;
if (vsize > 0)
{
bufferInfo[bufferCount].buffer = sb.m_buffer;
bufferInfo[bufferCount].offset = sb.m_pos;
bufferInfo[bufferCount].range = vsize;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
wds[wdsCount].dstBinding = vsUniformBinding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
wds[wdsCount].pImageInfo = NULL;
wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
wds[wdsCount].pTexelBufferView = NULL;
wdsCount++;
bufferCount++;
bx::memCopy(&sb.m_data[sb.m_pos], m_vsScratch, program.m_vsh->m_size);
}
if (fsize > 0)
{
bufferInfo[bufferCount].buffer = sb.m_buffer;
bufferInfo[bufferCount].offset = sb.m_pos + vsize;
bufferInfo[bufferCount].range = fsize;
wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds[wdsCount].pNext = NULL;
wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
wds[wdsCount].dstBinding = fsUniformBinding;
wds[wdsCount].dstArrayElement = 0;
wds[wdsCount].descriptorCount = 1;
wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
wds[wdsCount].pImageInfo = NULL;
wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
wds[wdsCount].pTexelBufferView = NULL;
wdsCount++;
bufferCount++;
bx::memCopy(&sb.m_data[sb.m_pos + vsize], m_fsScratch, program.m_fsh->m_size);
}
sb.m_pos += vsize + fsize;
offsets[numOffset++] = scratchBuffer.m_pos;
bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size);
}
if (fsize > 0)
{
offsets[numOffset++] = scratchBuffer.m_pos + vsize;
bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos + vsize], m_fsScratch, program.m_fsh->m_size);
}
m_vsChanges = 0;
m_fsChanges = 0;
vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL);
vkCmdBindDescriptorSets(
m_commandBuffer
, VK_PIPELINE_BIND_POINT_GRAPHICS
, program.m_pipelineLayout
, 0
, 1
, &sb.m_descriptorSet[sb.m_currentDs]
, 0
, NULL
);
sb.m_currentDs++;
scratchBuffer.m_pos += total;
}
vkCmdBindDescriptorSets(
m_commandBuffer
, VK_PIPELINE_BIND_POINT_GRAPHICS
, program.m_pipelineLayout
, 0
, 1
, &scratchBuffer.getCurrentDS()
, numOffset
, offsets
);
// if (constantsChanged
// || hasPredefined)
// {
@@ -6638,11 +6535,11 @@ BX_UNUSED(presentMin, presentMax);
pos++;
tvm.printf(10, pos++, 0x8b, " State cache: ");
tvm.printf(10, pos++, 0x8b, " PSO | Sampler | Bind | Queued ");
tvm.printf(10, pos++, 0x8b, " %6d " //| %6d | %6d | %6d "
tvm.printf(10, pos++, 0x8b, " PSO | DSL | DS | Queued ");
tvm.printf(10, pos++, 0x8b, " %6d | %6d | %6d | %6d "
, m_pipelineStateCache.getCount()
// , m_samplerStateCache.getCount()
// , bindLru.getCount()
, m_descriptorSetLayoutCache.getCount()
, scratchBuffer.m_currentDs
// , m_cmd.m_control.available()
);
pos++;
@@ -6675,6 +6572,14 @@ BX_UNUSED(presentMin, presentMax);
// PIX_ENDEVENT();
}
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = NULL;
range.memory = scratchBuffer.m_deviceMem;
range.offset = 0;
range.size = scratchBuffer.m_pos;
vkFlushMappedMemoryRanges(m_device, 1, &range);
if (beginRenderPass)
{
vkCmdEndRenderPass(m_commandBuffer);

View File

@@ -71,6 +71,7 @@
VK_IMPORT_INSTANCE_FUNC(false, vkEnumerateDeviceLayerProperties); \
VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceProperties); \
VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceFormatProperties); \
VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceFeatures); \
VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceImageFormatProperties); \
VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceMemoryProperties); \
VK_IMPORT_INSTANCE_FUNC(true, vkGetPhysicalDeviceMemoryProperties2KHR); \
@@ -325,6 +326,11 @@ VK_DESTROY
void destroy();
void reset();
VkDescriptorSet& getCurrentDS()
{
return m_descriptorSet[m_currentDs - 1];
}
VkDescriptorSet* m_descriptorSet;
VkBuffer m_buffer;
VkDeviceMemory m_deviceMem;
@@ -469,6 +475,7 @@ VK_DESTROY
, m_textureImage(VK_NULL_HANDLE)
, m_textureDeviceMem(VK_NULL_HANDLE)
, m_textureImageView(VK_NULL_HANDLE)
, m_textureImageDepthView(VK_NULL_HANDLE)
, m_textureImageStorageView(VK_NULL_HANDLE)
, m_currentImageLayout(VK_IMAGE_LAYOUT_UNDEFINED)
{
@@ -498,6 +505,7 @@ VK_DESTROY
VkImage m_textureImage;
VkDeviceMemory m_textureDeviceMem;
VkImageView m_textureImageView;
VkImageView m_textureImageDepthView;
VkImageView m_textureImageStorageView;
VkImageLayout m_currentImageLayout;
};