Vulkan memory allocation LRU cache (#3422)

* First version of a memory allocation cache for Vulkan.

* Increase maximal cached memory allocations to 1024. Add configuration parameter.
This commit is contained in:
Martijn Courteaux
2025-04-12 02:21:34 +02:00
committed by GitHub
parent 27ce54a1e4
commit 0e734522cd
3 changed files with 266 additions and 80 deletions

View File

@@ -333,6 +333,14 @@ static_assert(bx::isPowerOf2(BGFX_CONFIG_MAX_VIEWS), "BGFX_CONFIG_MAX_VIEWS must
# define BGFX_CONFIG_PER_FRAME_SCRATCH_STAGING_BUFFER_SIZE (32<<20)
#endif
#ifndef BGFX_CONFIG_MAX_BYTES_CACHED_DEVICE_MEMORY_ALLOCATIONS
/// Amount of allowed memory allocations left on device to use for recycling during
/// later allocations. This can be benificial in case the driver is slow allocating memory
/// on the device.
/// Note: Currently only used by the Vulkan backend.
# define BGFX_CONFIG_MAX_BYTES_CACHED_DEVICE_MEMORY_ALLOCATIONS (128 << 20)
#endif
#ifndef BGFX_CONFIG_MAX_STAGING_SIZE_FOR_SCRATCH_BUFFER
/// The threshold of data size above which the staging scratch buffer will
/// not be used, but instead a separate device memory allocation will take

View File

@@ -1136,13 +1136,14 @@ VK_IMPORT_DEVICE
{
RendererContextVK()
: m_allocatorCb(NULL)
, m_memoryLru()
, m_renderDocDll(NULL)
, m_vulkan1Dll(NULL)
, m_maxAnisotropy(1.0f)
, m_depthClamp(false)
, m_wireframe(false)
, m_captureBuffer(VK_NULL_HANDLE)
, m_captureMemory(VK_NULL_HANDLE)
, m_captureMemory()
, m_captureSize(0)
{
}
@@ -2184,6 +2185,8 @@ VK_IMPORT_DEVICE
m_backBuffer.destroy();
m_memoryLru.evictAll();
m_cmd.shutdown();
vkDestroy(m_pipelineCache);
@@ -2346,7 +2349,7 @@ VK_IMPORT_DEVICE
uint32_t pitch = texture.m_readback.pitch(_mip);
uint32_t size = height * pitch;
VkDeviceMemory stagingMemory;
DeviceMemoryAllocationVK stagingMemory;
VkBuffer stagingBuffer;
VK_CHECK(createReadbackBuffer(size, &stagingBuffer, &stagingMemory) );
@@ -2360,10 +2363,10 @@ VK_IMPORT_DEVICE
kick(true);
texture.m_readback.readback(stagingMemory, 0, _data, _mip);
texture.m_readback.readback(stagingMemory.mem, stagingMemory.offset, _data, _mip);
vkDestroy(stagingBuffer);
vkDestroy(stagingMemory);
recycleMemory(stagingMemory);
}
void resizeTexture(TextureHandle _handle, uint16_t _width, uint16_t _height, uint8_t _numMips, uint16_t _numLayers) override
@@ -2511,14 +2514,14 @@ VK_IMPORT_DEVICE
const uint8_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(swapChain.m_colorFormat) );
const uint32_t size = frameBuffer.m_width * frameBuffer.m_height * bpp / 8;
VkDeviceMemory stagingMemory;
DeviceMemoryAllocationVK stagingMemory;
VkBuffer stagingBuffer;
VK_CHECK(createReadbackBuffer(size, &stagingBuffer, &stagingMemory) );
readSwapChain(swapChain, stagingBuffer, stagingMemory, callback, _filePath);
vkDestroy(stagingBuffer);
vkDestroy(stagingMemory);
recycleMemory(stagingMemory);
}
void updateViewName(ViewId _id, const char* _name) override
@@ -2601,6 +2604,11 @@ VK_IMPORT_DEVICE
}
}
void recycleMemory(DeviceMemoryAllocationVK _alloc)
{
m_cmd.recycleMemory(_alloc);
}
void submitBlit(BlitState& _bs, uint16_t _view);
void submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) override;
@@ -2737,7 +2745,7 @@ VK_IMPORT_DEVICE
g_callback->captureEnd();
release(m_captureBuffer);
release(m_captureMemory);
recycleMemory(m_captureMemory);
m_captureSize = 0;
}
}
@@ -2760,7 +2768,7 @@ VK_IMPORT_DEVICE
if (captureSize > m_captureSize)
{
release(m_captureBuffer);
release(m_captureMemory);
recycleMemory(m_captureMemory);
m_captureSize = captureSize;
VK_CHECK(createReadbackBuffer(m_captureSize, &m_captureBuffer, &m_captureMemory) );
@@ -4059,7 +4067,7 @@ VK_IMPORT_DEVICE
typedef void (*SwapChainReadFunc)(void* /*src*/, uint32_t /*width*/, uint32_t /*height*/, uint32_t /*pitch*/, const void* /*userData*/);
bool readSwapChain(const SwapChainVK& _swapChain, VkBuffer _buffer, VkDeviceMemory _memory, SwapChainReadFunc _func, const void* _userData = NULL)
bool readSwapChain(const SwapChainVK& _swapChain, VkBuffer _buffer, DeviceMemoryAllocationVK _memory, SwapChainReadFunc _func, const void* _userData = NULL)
{
if (isSwapChainReadable(_swapChain) )
{
@@ -4080,7 +4088,7 @@ VK_IMPORT_DEVICE
kick(true);
uint8_t* src;
VK_CHECK(vkMapMemory(m_device, _memory, 0, VK_WHOLE_SIZE, 0, (void**)&src) );
VK_CHECK(vkMapMemory(m_device, _memory.mem, _memory.offset, _memory.size, 0, (void**)&src) );
if (_swapChain.m_colorFormat == TextureFormat::RGBA8)
{
@@ -4106,7 +4114,7 @@ VK_IMPORT_DEVICE
bx::free(g_allocator, dst);
}
vkUnmapMemory(m_device, _memory);
vkUnmapMemory(m_device, _memory.mem);
readback.destroy();
@@ -4354,9 +4362,31 @@ VK_IMPORT_DEVICE
return -1;
}
VkResult allocateMemory(const VkMemoryRequirements* requirements, VkMemoryPropertyFlags propertyFlags, ::VkDeviceMemory* memory) const
VkResult allocateMemory(const VkMemoryRequirements* requirements, VkMemoryPropertyFlags propertyFlags, DeviceMemoryAllocationVK* memory, bool _forcePrivateDeviceAllocation)
{
BGFX_PROFILER_SCOPE("RendererContextVK::allocateMemory", kColorResource);
// Forcing the use of a private device allocation for a certain memory allocation
// can be desireable when memory mapping the allocation. A memory allocation
// can only be mapped once. So handing out multiple subregions of one bigger
// allocation can lead to problems, when they get mapped multiple times.
// Right now, with the LRU system, we are still only handing out the full
// memory allocation, and never subregions of it, so it's impossible right
// now to map a single allocation multiple times.
// The argument is there to indicate this, but it's ignored right now, for the above
// reason: any cached memory is fine, as long as we don't partition it.
BX_UNUSED(_forcePrivateDeviceAllocation);
{
// Check LRU cache.
int memoryType = selectMemoryType(requirements->memoryTypeBits, propertyFlags, 0);
bool found = m_memoryLru.find(requirements->size, memoryType, memory);
if (found)
{
return VK_SUCCESS;
}
}
VkMemoryAllocateInfo ma;
ma.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
ma.pNext = NULL;
@@ -4371,8 +4401,12 @@ VK_IMPORT_DEVICE
if (searchIndex >= 0)
{
BGFX_PROFILER_SCOPE("vkAllocateMemory", kColorResource);
ma.memoryTypeIndex = searchIndex;
result = vkAllocateMemory(m_device, &ma, m_allocatorCb, memory);
memory->memoryTypeIndex = searchIndex;
memory->size = ma.allocationSize;
memory->offset = 0;
result = vkAllocateMemory(m_device, &ma, m_allocatorCb, &memory->mem);
}
}
while (result != VK_SUCCESS
@@ -4381,7 +4415,7 @@ VK_IMPORT_DEVICE
return result;
}
VkResult createHostBuffer(uint32_t _size, VkMemoryPropertyFlags _flags, ::VkBuffer* _buffer, ::VkDeviceMemory* _memory, const void* _data = NULL)
VkResult createHostBuffer(uint32_t _size, VkMemoryPropertyFlags _flags, ::VkBuffer* _buffer, DeviceMemoryAllocationVK* _memory, bool _forcePrivateDeviceAllocation, const void* _data = NULL)
{
BGFX_PROFILER_SCOPE("createHostBuffer", kColorResource);
VkResult result = VK_SUCCESS;
@@ -4406,12 +4440,12 @@ VK_IMPORT_DEVICE
VkMemoryRequirements mr;
vkGetBufferMemoryRequirements(m_device, *_buffer, &mr);
result = allocateMemory(&mr, _flags, _memory);
result = allocateMemory(&mr, _flags, _memory, _forcePrivateDeviceAllocation);
if (VK_SUCCESS != result
&& (_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) )
{
result = allocateMemory(&mr, _flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT, _memory);
result = allocateMemory(&mr, _flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT, _memory, _forcePrivateDeviceAllocation);
}
if (VK_SUCCESS != result)
@@ -4420,7 +4454,7 @@ VK_IMPORT_DEVICE
return result;
}
result = vkBindBufferMemory(m_device, *_buffer, *_memory, 0);
result = vkBindBufferMemory(m_device, *_buffer, _memory->mem, _memory->offset);
if (VK_SUCCESS != result)
{
BX_TRACE("Create host buffer error: vkBindBufferMemory failed %d: %s.", result, getName(result) );
@@ -4431,7 +4465,7 @@ VK_IMPORT_DEVICE
{
BGFX_PROFILER_SCOPE("map and copy data", kColorResource);
void* dst;
result = vkMapMemory(m_device, *_memory, 0, _size, 0, &dst);
result = vkMapMemory(m_device, _memory->mem, _memory->offset, _size, 0, &dst);
if (VK_SUCCESS != result)
{
BX_TRACE("Create host buffer error: vkMapMemory failed %d: %s.", result, getName(result) );
@@ -4439,19 +4473,19 @@ VK_IMPORT_DEVICE
}
bx::memCopy(dst, _data, _size);
vkUnmapMemory(m_device, *_memory);
vkUnmapMemory(m_device, _memory->mem);
}
return result;
}
VkResult createStagingBuffer(uint32_t _size, ::VkBuffer* _buffer, ::VkDeviceMemory* _memory, const void* _data = NULL)
VkResult createStagingBuffer(uint32_t _size, ::VkBuffer* _buffer, DeviceMemoryAllocationVK* _memory, const void* _data = NULL)
{
const VkMemoryPropertyFlags flags = 0
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
;
return createHostBuffer(_size, flags, _buffer, _memory, _data);
return createHostBuffer(_size, flags, _buffer, _memory, false, _data);
}
StagingBufferVK allocFromScratchStagingBuffer(uint32_t _size, uint32_t _align, const void* _data = NULL)
@@ -4467,12 +4501,12 @@ VK_IMPORT_DEVICE
if (UINT32_MAX != scratchOffset)
{
result.m_isFromScratch = true;
result.m_size = _size;
result.m_offset = scratchOffset;
result.m_buffer = scratch.m_buffer;
result.m_isFromScratch = true;
result.m_deviceMem = scratch.m_deviceMem;
result.m_data = scratch.m_data + result.m_offset;
result.m_size = _size;
result.m_offset = scratchOffset;
result.m_buffer = scratch.m_buffer;
result.m_data = scratch.m_data + result.m_offset;
if (_data != NULL)
{
@@ -4485,18 +4519,17 @@ VK_IMPORT_DEVICE
}
// Not enough space or too big, we will create a new staging buffer on the spot.
result.m_isFromScratch = false;
VK_CHECK(createStagingBuffer(_size, &result.m_buffer, &result.m_deviceMem, _data));
result.m_size = _size;
result.m_offset = 0;
result.m_data = NULL;
result.m_isFromScratch = false;
result.m_offset = 0;
result.m_size = _size;
result.m_data = NULL;
return result;
}
VkResult createReadbackBuffer(uint32_t _size, ::VkBuffer* _buffer, ::VkDeviceMemory* _memory)
VkResult createReadbackBuffer(uint32_t _size, ::VkBuffer* _buffer, DeviceMemoryAllocationVK* _memory)
{
const VkMemoryPropertyFlags flags = 0
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
@@ -4504,7 +4537,7 @@ VK_IMPORT_DEVICE
| VK_MEMORY_PROPERTY_HOST_CACHED_BIT
;
return createHostBuffer(_size, flags, _buffer, _memory, NULL);
return createHostBuffer(_size, flags, _buffer, _memory, true, NULL);
}
VkAllocationCallbacks* m_allocatorCb;
@@ -4528,6 +4561,8 @@ VK_IMPORT_DEVICE
FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
int64_t m_presentElapsed;
MemoryLruVK m_memoryLru;
ScratchBufferVK m_scratchBuffer[BGFX_CONFIG_MAX_FRAME_LATENCY];
ScratchBufferVK m_scratchStagingBuffer[BGFX_CONFIG_MAX_FRAME_LATENCY];
@@ -4572,7 +4607,7 @@ VK_IMPORT_DEVICE
bool m_wireframe;
VkBuffer m_captureBuffer;
VkDeviceMemory m_captureMemory;
DeviceMemoryAllocationVK m_captureMemory;
uint32_t m_captureSize;
TextVideoMem m_textVideoMem;
@@ -4665,6 +4700,96 @@ VK_DESTROY
s_renderVK->release(_obj);
}
void MemoryLruVK::recycle(DeviceMemoryAllocationVK &_alloc)
{
if (MAX_ENTRIES == lru.getNumHandles())
{
// Evict LRU
uint16_t handle = lru.getBack();
DeviceMemoryAllocationVK &alloc = entries[handle];
totalSizeCached -= alloc.size;
release(alloc.mem);
// Touch slot and overwrite
lru.touch(handle);
alloc = _alloc;
} else
{
uint16_t handle = lru.alloc();
entries[handle] = _alloc;
}
totalSizeCached += _alloc.size;
while (totalSizeCached > BGFX_CONFIG_MAX_BYTES_CACHED_DEVICE_MEMORY_ALLOCATIONS)
{
BX_ASSERT(lru.getNumHandles() > 0, "Memory badly counted.");
uint16_t handle = lru.getBack();
DeviceMemoryAllocationVK &alloc = entries[handle];
totalSizeCached -= alloc.size;
release(alloc.mem);
lru.free(handle);
}
}
bool MemoryLruVK::find(uint32_t _size, int32_t _memoryTypeIndex, DeviceMemoryAllocationVK *_alloc)
{
BGFX_PROFILER_SCOPE("MemoryLruVK::find", kColorResource);
// Find best fit.
uint16_t slot;
{
int16_t bestIdx = MAX_ENTRIES;
uint32_t bestWaste = 0xffff'ffff;
slot = lru.getFront();
while (UINT16_MAX != slot)
{
DeviceMemoryAllocationVK &alloc = entries[slot];
if (alloc.memoryTypeIndex == _memoryTypeIndex)
{
// 50% waste allowed, otherwise we'll just allocate a new one.
// This is to prevent we trash this cache of usefull allocations
// with a handful of tiny allocations.
if (alloc.size >= _size && _size * 2 >= alloc.size)
{
uint32_t waste = alloc.size - _size;
if (waste < bestWaste)
{
bestIdx = slot;
bestWaste = waste;
if (waste == 0)
{
break;
}
}
}
}
slot = lru.getNext(slot);
}
slot = bestIdx;
}
if (MAX_ENTRIES != slot)
{
*_alloc = entries[slot];
lru.free(slot);
totalSizeCached -= _alloc->size;
return true;
} else {
return false;
}
}
void MemoryLruVK::evictAll()
{
uint16_t slot = lru.getFront();
while (slot != UINT16_MAX)
{
release(entries[slot].mem);
slot = lru.getNext(slot);
}
lru.reset();
totalSizeCached = 0;
}
void ScratchBufferVK::create(uint32_t _size, uint32_t _count, VkBufferUsageFlags usage, uint32_t _align)
{
const VkAllocationCallbacks* allocatorCb = s_renderVK->m_allocatorCb;
@@ -4698,21 +4823,21 @@ VK_DESTROY
);
VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
VkResult result = s_renderVK->allocateMemory(&mr, flags, &m_deviceMem);
VkResult result = s_renderVK->allocateMemory(&mr, flags, &m_deviceMem, true);
if (VK_SUCCESS != result)
{
flags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
VK_CHECK(s_renderVK->allocateMemory(&mr, flags, &m_deviceMem) );
VK_CHECK(s_renderVK->allocateMemory(&mr, flags, &m_deviceMem, true) );
}
m_size = (uint32_t)mr.size;
m_pos = 0;
m_align = _align;
VK_CHECK(vkBindBufferMemory(device, m_buffer, m_deviceMem, 0) );
VK_CHECK(vkBindBufferMemory(device, m_buffer, m_deviceMem.mem, m_deviceMem.offset) );
VK_CHECK(vkMapMemory(device, m_deviceMem, 0, m_size, 0, (void**)&m_data) );
VK_CHECK(vkMapMemory(device, m_deviceMem.mem, m_deviceMem.offset, m_size, 0, (void**)&m_data) );
}
void ScratchBufferVK::createUniform(uint32_t _size, uint32_t _count)
@@ -4733,10 +4858,10 @@ VK_DESTROY
void ScratchBufferVK::destroy()
{
vkUnmapMemory(s_renderVK->m_device, m_deviceMem);
vkUnmapMemory(s_renderVK->m_device, m_deviceMem.mem);
s_renderVK->release(m_buffer);
s_renderVK->release(m_deviceMem);
s_renderVK->recycleMemory(m_deviceMem);
}
@@ -4779,8 +4904,8 @@ VK_DESTROY
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = NULL;
range.memory = m_deviceMem;
range.offset = 0;
range.memory = m_deviceMem.mem;
range.offset = m_deviceMem.offset;
range.size = size;
VK_CHECK(vkFlushMappedMemoryRanges(device, 1, &range) );
@@ -4823,9 +4948,9 @@ VK_DESTROY
VkMemoryRequirements mr;
vkGetBufferMemoryRequirements(device, m_buffer, &mr);
VK_CHECK(s_renderVK->allocateMemory(&mr, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_deviceMem) );
VK_CHECK(s_renderVK->allocateMemory(&mr, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_deviceMem, false) );
VK_CHECK(vkBindBufferMemory(device, m_buffer, m_deviceMem, 0) );
VK_CHECK(vkBindBufferMemory(device, m_buffer, m_deviceMem.mem, m_deviceMem.offset) );
if (!m_dynamic)
{
@@ -4855,7 +4980,7 @@ VK_DESTROY
if (!stagingBuffer.m_isFromScratch)
{
s_renderVK->release(stagingBuffer.m_buffer);
s_renderVK->release(stagingBuffer.m_deviceMem);
s_renderVK->recycleMemory(stagingBuffer.m_deviceMem);
}
}
@@ -4864,7 +4989,7 @@ VK_DESTROY
if (VK_NULL_HANDLE != m_buffer)
{
s_renderVK->release(m_buffer);
s_renderVK->release(m_deviceMem);
s_renderVK->recycleMemory(m_deviceMem);
m_dynamic = false;
}
@@ -5451,7 +5576,7 @@ VK_DESTROY
return result;
}
result = vkMapMemory(device, m_readbackMemory, 0, VK_WHOLE_SIZE, 0, (void**)&m_queryResult);
result = vkMapMemory(device, m_readbackMemory.mem, m_readbackMemory.offset, VK_WHOLE_SIZE, 0, (void**)&m_queryResult);
if (VK_SUCCESS != result)
{
@@ -5475,8 +5600,8 @@ VK_DESTROY
{
vkDestroy(m_queryPool);
vkDestroy(m_readback);
vkUnmapMemory(s_renderVK->m_device, m_readbackMemory);
vkDestroy(m_readbackMemory);
vkUnmapMemory(s_renderVK->m_device, m_readbackMemory.mem);
s_renderVK->recycleMemory(m_readbackMemory);
}
uint32_t TimerQueryVK::begin(uint32_t _resultIdx, uint32_t _frameNum)
@@ -5606,7 +5731,7 @@ VK_DESTROY
return result;
}
result = vkMapMemory(device, m_readbackMemory, 0, VK_WHOLE_SIZE, 0, (void**)&m_queryResult);
result = vkMapMemory(device, m_readbackMemory.mem, m_readbackMemory.offset, VK_WHOLE_SIZE, 0, (void**)&m_queryResult);
if (VK_SUCCESS != result)
{
@@ -5623,8 +5748,8 @@ VK_DESTROY
{
vkDestroy(m_queryPool);
vkDestroy(m_readback);
vkUnmapMemory(s_renderVK->m_device, m_readbackMemory);
vkDestroy(m_readbackMemory);
vkUnmapMemory(s_renderVK->m_device, m_readbackMemory.mem);
s_renderVK->recycleMemory(m_readbackMemory);
}
void OcclusionQueryVK::begin(OcclusionQueryHandle _handle)
@@ -5924,14 +6049,14 @@ VK_DESTROY
VkMemoryRequirements imageMemReq;
vkGetImageMemoryRequirements(device, m_textureImage, &imageMemReq);
result = s_renderVK->allocateMemory(&imageMemReq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_textureDeviceMem);
result = s_renderVK->allocateMemory(&imageMemReq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_textureDeviceMem, false);
if (VK_SUCCESS != result)
{
BX_TRACE("Create texture image error: allocateMemory failed %d: %s.", result, getName(result) );
return result;
}
result = vkBindImageMemory(device, m_textureImage, m_textureDeviceMem, 0);
result = vkBindImageMemory(device, m_textureImage, m_textureDeviceMem.mem, m_textureDeviceMem.offset);
if (VK_SUCCESS != result)
{
BX_TRACE("Create texture image error: vkBindImageMemory failed %d: %s.", result, getName(result) );
@@ -5967,14 +6092,14 @@ VK_DESTROY
VkMemoryRequirements imageMemReq_resolve;
vkGetImageMemoryRequirements(device, m_singleMsaaImage, &imageMemReq_resolve);
result = s_renderVK->allocateMemory(&imageMemReq_resolve, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_singleMsaaDeviceMem);
result = s_renderVK->allocateMemory(&imageMemReq_resolve, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_singleMsaaDeviceMem, false);
if (VK_SUCCESS != result)
{
BX_TRACE("Create texture image error: allocateMemory failed %d: %s.", result, getName(result) );
return result;
}
result = vkBindImageMemory(device, m_singleMsaaImage, m_singleMsaaDeviceMem, 0);
result = vkBindImageMemory(device, m_singleMsaaImage, m_singleMsaaDeviceMem.mem, m_singleMsaaDeviceMem.offset);
if (VK_SUCCESS != result)
{
BX_TRACE("Create texture image error: vkBindImageMemory failed %d: %s.", result, getName(result) );
@@ -6226,8 +6351,8 @@ VK_DESTROY
{
VK_CHECK(vkMapMemory(
device
, stagingBuffer.m_deviceMem
, 0
, stagingBuffer.m_deviceMem.mem
, stagingBuffer.m_deviceMem.offset
, totalMemSize
, 0
, (void**)&mappedMemory
@@ -6253,7 +6378,7 @@ VK_DESTROY
if (!stagingBuffer.m_isFromScratch)
{
vkUnmapMemory(device, stagingBuffer.m_deviceMem);
vkUnmapMemory(device, stagingBuffer.m_deviceMem.mem);
}
copyBufferToTexture(_commandBuffer, stagingBuffer.m_buffer, numSrd, bufferCopyInfo);
@@ -6261,7 +6386,7 @@ VK_DESTROY
if (!stagingBuffer.m_isFromScratch)
{
s_renderVK->release(stagingBuffer.m_buffer);
s_renderVK->release(stagingBuffer.m_deviceMem);
s_renderVK->recycleMemory(stagingBuffer.m_deviceMem);
}
}
else
@@ -6292,13 +6417,13 @@ VK_DESTROY
if (VK_NULL_HANDLE != m_textureImage)
{
s_renderVK->release(m_textureImage);
s_renderVK->release(m_textureDeviceMem);
s_renderVK->recycleMemory(m_textureDeviceMem);
}
if (VK_NULL_HANDLE != m_singleMsaaImage)
{
s_renderVK->release(m_singleMsaaImage);
s_renderVK->release(m_singleMsaaDeviceMem);
s_renderVK->recycleMemory(m_singleMsaaDeviceMem);
}
m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -6353,7 +6478,7 @@ VK_DESTROY
StagingBufferVK stagingBuffer = s_renderVK->allocFromScratchStagingBuffer(size, align, data);
region.bufferOffset += stagingBuffer.m_offset;
BX_ASSERT(region.bufferOffset % align == 0,
"Alignment for image (mip %u, z %s) is not aligned correctly (%u).",
"Alignment for image (mip %u, z %u) is not aligned correctly (%u).",
_mip, _z, region.bufferOffset, align);
if (VK_IMAGE_VIEW_TYPE_3D == m_type)
@@ -6375,7 +6500,7 @@ VK_DESTROY
if (!stagingBuffer.m_isFromScratch)
{
s_renderVK->release(stagingBuffer.m_buffer);
s_renderVK->release(stagingBuffer.m_deviceMem);
s_renderVK->recycleMemory(stagingBuffer.m_deviceMem);
}
if (NULL != temp)
@@ -8260,11 +8385,22 @@ VK_DESTROY
m_release[m_currentFrameInFlight].push_back(resource);
}
void CommandQueueVK::recycleMemory(DeviceMemoryAllocationVK _mem)
{
m_recycleAllocs[m_currentFrameInFlight].push_back(_mem);
}
void CommandQueueVK::consume()
{
BGFX_PROFILER_SCOPE("CommandQueueVK::consume", kColorResource);
m_consumeIndex = (m_consumeIndex + 1) % m_numFramesInFlight;
for (DeviceMemoryAllocationVK &alloc : m_recycleAllocs[m_consumeIndex])
{
s_renderVK->m_memoryLru.recycle(alloc);
}
m_recycleAllocs[m_consumeIndex].clear();
for (const Resource& resource : m_release[m_consumeIndex])
{
switch (resource.m_type)
@@ -8289,6 +8425,7 @@ VK_DESTROY
}
}
m_release[m_consumeIndex].clear();
}

View File

@@ -307,7 +307,7 @@ namespace bgfx { namespace vk
::Vk##_name* operator &() { return &vk; } \
const ::Vk##_name* operator &() const { return &vk; } \
}; \
static_assert(sizeof(::Vk##_name) == sizeof(Vk##_name) ); \
static_assert(sizeof(::Vk##_name) == sizeof(Vk##_name) ); \
void vkDestroy(Vk##_name&); \
void release(Vk##_name&)
VK_DESTROY
@@ -372,14 +372,52 @@ VK_DESTROY_FUNC(DescriptorSet);
HashMap m_hashMap;
};
struct DeviceMemoryAllocationVK {
DeviceMemoryAllocationVK()
: mem(VK_NULL_HANDLE)
, offset(0)
, size(0)
, memoryTypeIndex(0)
{
}
VkDeviceMemory mem;
uint32_t offset;
uint32_t size;
int32_t memoryTypeIndex;
};
struct MemoryLruVK
{
MemoryLruVK()
: entries()
, lru()
, totalSizeCached(0)
{
}
static constexpr uint16_t MAX_ENTRIES = 1 << 10;
DeviceMemoryAllocationVK entries[MAX_ENTRIES];
bx::HandleAllocLruT<MAX_ENTRIES> lru;
uint64_t totalSizeCached;
void recycle(DeviceMemoryAllocationVK &_alloc);
bool find(uint32_t _size, int32_t _memoryTypeIndex, DeviceMemoryAllocationVK *_alloc);
void evictAll();
};
/** A Buffer used for moving data from main memory to GPU memory.
* This can either be an independently allocated memory region, or a sub-region
* of the scratch staging buffer for the frame-in-flight.
*/
struct StagingBufferVK
{
VkBuffer m_buffer;
VkDeviceMemory m_deviceMem;
DeviceMemoryAllocationVK m_deviceMem;
uint8_t* m_data;
uint32_t m_size;
uint32_t m_offset;
uint32_t m_offset; // Offset into the bound buffer (not the device memory!)
bool m_isFromScratch;
};
@@ -403,7 +441,7 @@ VK_DESTROY_FUNC(DescriptorSet);
void flush(bool _reset = true);
VkBuffer m_buffer;
VkDeviceMemory m_deviceMem;
DeviceMemoryAllocationVK m_deviceMem;
uint8_t* m_data;
uint32_t m_size;
@@ -415,7 +453,7 @@ VK_DESTROY_FUNC(DescriptorSet);
{
BufferVK()
: m_buffer(VK_NULL_HANDLE)
, m_deviceMem(VK_NULL_HANDLE)
, m_deviceMem()
, m_size(0)
, m_flags(BGFX_BUFFER_NONE)
, m_dynamic(false)
@@ -427,7 +465,7 @@ VK_DESTROY_FUNC(DescriptorSet);
void destroy();
VkBuffer m_buffer;
VkDeviceMemory m_deviceMem;
DeviceMemoryAllocationVK m_deviceMem;
uint32_t m_size;
uint16_t m_flags;
bool m_dynamic;
@@ -589,7 +627,7 @@ VK_DESTROY_FUNC(DescriptorSet);
Query m_query[BGFX_CONFIG_MAX_VIEWS*4];
VkBuffer m_readback;
VkDeviceMemory m_readbackMemory;
DeviceMemoryAllocationVK m_readbackMemory;
VkQueryPool m_queryPool;
const uint64_t* m_queryResult;
bx::RingBufferControl m_control;
@@ -613,7 +651,7 @@ VK_DESTROY_FUNC(DescriptorSet);
OcclusionQueryHandle m_handle[BGFX_CONFIG_MAX_OCCLUSION_QUERIES];
VkBuffer m_readback;
VkDeviceMemory m_readbackMemory;
DeviceMemoryAllocationVK m_readbackMemory;
VkQueryPool m_queryPool;
const uint32_t* m_queryResult;
bx::RingBufferControl m_control;
@@ -640,10 +678,10 @@ VK_DESTROY_FUNC(DescriptorSet);
, m_sampler({ 1, VK_SAMPLE_COUNT_1_BIT })
, m_format(VK_FORMAT_UNDEFINED)
, m_textureImage(VK_NULL_HANDLE)
, m_textureDeviceMem(VK_NULL_HANDLE)
, m_textureDeviceMem()
, m_currentImageLayout(VK_IMAGE_LAYOUT_UNDEFINED)
, m_singleMsaaImage(VK_NULL_HANDLE)
, m_singleMsaaDeviceMem(VK_NULL_HANDLE)
, m_singleMsaaDeviceMem()
, m_currentSingleMsaaImageLayout(VK_IMAGE_LAYOUT_UNDEFINED)
{
}
@@ -680,13 +718,13 @@ VK_DESTROY_FUNC(DescriptorSet);
VkComponentMapping m_components;
VkImageAspectFlags m_aspectMask;
VkImage m_textureImage;
VkDeviceMemory m_textureDeviceMem;
VkImageLayout m_currentImageLayout;
VkImage m_textureImage;
DeviceMemoryAllocationVK m_textureDeviceMem;
VkImageLayout m_currentImageLayout;
VkImage m_singleMsaaImage;
VkDeviceMemory m_singleMsaaDeviceMem;
VkImageLayout m_currentSingleMsaaImageLayout;
VkImage m_singleMsaaImage;
DeviceMemoryAllocationVK m_singleMsaaDeviceMem;
VkImageLayout m_currentSingleMsaaImageLayout;
VkImageLayout m_sampledLayout;
@@ -841,6 +879,7 @@ VK_DESTROY_FUNC(DescriptorSet);
void finish(bool _finishAll = false);
void release(uint64_t _handle, VkObjectType _type);
void recycleMemory(DeviceMemoryAllocationVK _mem);
void consume();
uint32_t m_queueFamily;
@@ -881,6 +920,8 @@ VK_DESTROY_FUNC(DescriptorSet);
typedef stl::vector<Resource> ResourceArray;
ResourceArray m_release[BGFX_CONFIG_MAX_FRAME_LATENCY];
stl::vector<DeviceMemoryAllocationVK> m_recycleAllocs[BGFX_CONFIG_MAX_FRAME_LATENCY];
private:
template<typename Ty>