mirror of
https://github.com/bkaradzic/bgfx.git
synced 2026-02-17 20:52:36 +01:00
Vulkan memory allocation LRU cache (#3422)
* First version of a memory allocation cache for Vulkan. * Increase maximal cached memory allocations to 1024. Add configuration parameter.
This commit is contained in:
committed by
GitHub
parent
27ce54a1e4
commit
0e734522cd
@@ -333,6 +333,14 @@ static_assert(bx::isPowerOf2(BGFX_CONFIG_MAX_VIEWS), "BGFX_CONFIG_MAX_VIEWS must
|
||||
# define BGFX_CONFIG_PER_FRAME_SCRATCH_STAGING_BUFFER_SIZE (32<<20)
|
||||
#endif
|
||||
|
||||
#ifndef BGFX_CONFIG_MAX_BYTES_CACHED_DEVICE_MEMORY_ALLOCATIONS
|
||||
/// Amount of allowed memory allocations left on device to use for recycling during
|
||||
/// later allocations. This can be benificial in case the driver is slow allocating memory
|
||||
/// on the device.
|
||||
/// Note: Currently only used by the Vulkan backend.
|
||||
# define BGFX_CONFIG_MAX_BYTES_CACHED_DEVICE_MEMORY_ALLOCATIONS (128 << 20)
|
||||
#endif
|
||||
|
||||
#ifndef BGFX_CONFIG_MAX_STAGING_SIZE_FOR_SCRATCH_BUFFER
|
||||
/// The threshold of data size above which the staging scratch buffer will
|
||||
/// not be used, but instead a separate device memory allocation will take
|
||||
|
||||
@@ -1136,13 +1136,14 @@ VK_IMPORT_DEVICE
|
||||
{
|
||||
RendererContextVK()
|
||||
: m_allocatorCb(NULL)
|
||||
, m_memoryLru()
|
||||
, m_renderDocDll(NULL)
|
||||
, m_vulkan1Dll(NULL)
|
||||
, m_maxAnisotropy(1.0f)
|
||||
, m_depthClamp(false)
|
||||
, m_wireframe(false)
|
||||
, m_captureBuffer(VK_NULL_HANDLE)
|
||||
, m_captureMemory(VK_NULL_HANDLE)
|
||||
, m_captureMemory()
|
||||
, m_captureSize(0)
|
||||
{
|
||||
}
|
||||
@@ -2184,6 +2185,8 @@ VK_IMPORT_DEVICE
|
||||
|
||||
m_backBuffer.destroy();
|
||||
|
||||
m_memoryLru.evictAll();
|
||||
|
||||
m_cmd.shutdown();
|
||||
|
||||
vkDestroy(m_pipelineCache);
|
||||
@@ -2346,7 +2349,7 @@ VK_IMPORT_DEVICE
|
||||
uint32_t pitch = texture.m_readback.pitch(_mip);
|
||||
uint32_t size = height * pitch;
|
||||
|
||||
VkDeviceMemory stagingMemory;
|
||||
DeviceMemoryAllocationVK stagingMemory;
|
||||
VkBuffer stagingBuffer;
|
||||
VK_CHECK(createReadbackBuffer(size, &stagingBuffer, &stagingMemory) );
|
||||
|
||||
@@ -2360,10 +2363,10 @@ VK_IMPORT_DEVICE
|
||||
|
||||
kick(true);
|
||||
|
||||
texture.m_readback.readback(stagingMemory, 0, _data, _mip);
|
||||
texture.m_readback.readback(stagingMemory.mem, stagingMemory.offset, _data, _mip);
|
||||
|
||||
vkDestroy(stagingBuffer);
|
||||
vkDestroy(stagingMemory);
|
||||
recycleMemory(stagingMemory);
|
||||
}
|
||||
|
||||
void resizeTexture(TextureHandle _handle, uint16_t _width, uint16_t _height, uint8_t _numMips, uint16_t _numLayers) override
|
||||
@@ -2511,14 +2514,14 @@ VK_IMPORT_DEVICE
|
||||
const uint8_t bpp = bimg::getBitsPerPixel(bimg::TextureFormat::Enum(swapChain.m_colorFormat) );
|
||||
const uint32_t size = frameBuffer.m_width * frameBuffer.m_height * bpp / 8;
|
||||
|
||||
VkDeviceMemory stagingMemory;
|
||||
DeviceMemoryAllocationVK stagingMemory;
|
||||
VkBuffer stagingBuffer;
|
||||
VK_CHECK(createReadbackBuffer(size, &stagingBuffer, &stagingMemory) );
|
||||
|
||||
readSwapChain(swapChain, stagingBuffer, stagingMemory, callback, _filePath);
|
||||
|
||||
vkDestroy(stagingBuffer);
|
||||
vkDestroy(stagingMemory);
|
||||
recycleMemory(stagingMemory);
|
||||
}
|
||||
|
||||
void updateViewName(ViewId _id, const char* _name) override
|
||||
@@ -2601,6 +2604,11 @@ VK_IMPORT_DEVICE
|
||||
}
|
||||
}
|
||||
|
||||
void recycleMemory(DeviceMemoryAllocationVK _alloc)
|
||||
{
|
||||
m_cmd.recycleMemory(_alloc);
|
||||
}
|
||||
|
||||
void submitBlit(BlitState& _bs, uint16_t _view);
|
||||
|
||||
void submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) override;
|
||||
@@ -2737,7 +2745,7 @@ VK_IMPORT_DEVICE
|
||||
g_callback->captureEnd();
|
||||
|
||||
release(m_captureBuffer);
|
||||
release(m_captureMemory);
|
||||
recycleMemory(m_captureMemory);
|
||||
m_captureSize = 0;
|
||||
}
|
||||
}
|
||||
@@ -2760,7 +2768,7 @@ VK_IMPORT_DEVICE
|
||||
if (captureSize > m_captureSize)
|
||||
{
|
||||
release(m_captureBuffer);
|
||||
release(m_captureMemory);
|
||||
recycleMemory(m_captureMemory);
|
||||
|
||||
m_captureSize = captureSize;
|
||||
VK_CHECK(createReadbackBuffer(m_captureSize, &m_captureBuffer, &m_captureMemory) );
|
||||
@@ -4059,7 +4067,7 @@ VK_IMPORT_DEVICE
|
||||
|
||||
typedef void (*SwapChainReadFunc)(void* /*src*/, uint32_t /*width*/, uint32_t /*height*/, uint32_t /*pitch*/, const void* /*userData*/);
|
||||
|
||||
bool readSwapChain(const SwapChainVK& _swapChain, VkBuffer _buffer, VkDeviceMemory _memory, SwapChainReadFunc _func, const void* _userData = NULL)
|
||||
bool readSwapChain(const SwapChainVK& _swapChain, VkBuffer _buffer, DeviceMemoryAllocationVK _memory, SwapChainReadFunc _func, const void* _userData = NULL)
|
||||
{
|
||||
if (isSwapChainReadable(_swapChain) )
|
||||
{
|
||||
@@ -4080,7 +4088,7 @@ VK_IMPORT_DEVICE
|
||||
kick(true);
|
||||
|
||||
uint8_t* src;
|
||||
VK_CHECK(vkMapMemory(m_device, _memory, 0, VK_WHOLE_SIZE, 0, (void**)&src) );
|
||||
VK_CHECK(vkMapMemory(m_device, _memory.mem, _memory.offset, _memory.size, 0, (void**)&src) );
|
||||
|
||||
if (_swapChain.m_colorFormat == TextureFormat::RGBA8)
|
||||
{
|
||||
@@ -4106,7 +4114,7 @@ VK_IMPORT_DEVICE
|
||||
bx::free(g_allocator, dst);
|
||||
}
|
||||
|
||||
vkUnmapMemory(m_device, _memory);
|
||||
vkUnmapMemory(m_device, _memory.mem);
|
||||
|
||||
readback.destroy();
|
||||
|
||||
@@ -4354,9 +4362,31 @@ VK_IMPORT_DEVICE
|
||||
return -1;
|
||||
}
|
||||
|
||||
VkResult allocateMemory(const VkMemoryRequirements* requirements, VkMemoryPropertyFlags propertyFlags, ::VkDeviceMemory* memory) const
|
||||
VkResult allocateMemory(const VkMemoryRequirements* requirements, VkMemoryPropertyFlags propertyFlags, DeviceMemoryAllocationVK* memory, bool _forcePrivateDeviceAllocation)
|
||||
{
|
||||
BGFX_PROFILER_SCOPE("RendererContextVK::allocateMemory", kColorResource);
|
||||
|
||||
// Forcing the use of a private device allocation for a certain memory allocation
|
||||
// can be desireable when memory mapping the allocation. A memory allocation
|
||||
// can only be mapped once. So handing out multiple subregions of one bigger
|
||||
// allocation can lead to problems, when they get mapped multiple times.
|
||||
// Right now, with the LRU system, we are still only handing out the full
|
||||
// memory allocation, and never subregions of it, so it's impossible right
|
||||
// now to map a single allocation multiple times.
|
||||
// The argument is there to indicate this, but it's ignored right now, for the above
|
||||
// reason: any cached memory is fine, as long as we don't partition it.
|
||||
BX_UNUSED(_forcePrivateDeviceAllocation);
|
||||
{
|
||||
// Check LRU cache.
|
||||
int memoryType = selectMemoryType(requirements->memoryTypeBits, propertyFlags, 0);
|
||||
bool found = m_memoryLru.find(requirements->size, memoryType, memory);
|
||||
if (found)
|
||||
{
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
VkMemoryAllocateInfo ma;
|
||||
ma.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
ma.pNext = NULL;
|
||||
@@ -4371,8 +4401,12 @@ VK_IMPORT_DEVICE
|
||||
|
||||
if (searchIndex >= 0)
|
||||
{
|
||||
BGFX_PROFILER_SCOPE("vkAllocateMemory", kColorResource);
|
||||
ma.memoryTypeIndex = searchIndex;
|
||||
result = vkAllocateMemory(m_device, &ma, m_allocatorCb, memory);
|
||||
memory->memoryTypeIndex = searchIndex;
|
||||
memory->size = ma.allocationSize;
|
||||
memory->offset = 0;
|
||||
result = vkAllocateMemory(m_device, &ma, m_allocatorCb, &memory->mem);
|
||||
}
|
||||
}
|
||||
while (result != VK_SUCCESS
|
||||
@@ -4381,7 +4415,7 @@ VK_IMPORT_DEVICE
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult createHostBuffer(uint32_t _size, VkMemoryPropertyFlags _flags, ::VkBuffer* _buffer, ::VkDeviceMemory* _memory, const void* _data = NULL)
|
||||
VkResult createHostBuffer(uint32_t _size, VkMemoryPropertyFlags _flags, ::VkBuffer* _buffer, DeviceMemoryAllocationVK* _memory, bool _forcePrivateDeviceAllocation, const void* _data = NULL)
|
||||
{
|
||||
BGFX_PROFILER_SCOPE("createHostBuffer", kColorResource);
|
||||
VkResult result = VK_SUCCESS;
|
||||
@@ -4406,12 +4440,12 @@ VK_IMPORT_DEVICE
|
||||
VkMemoryRequirements mr;
|
||||
vkGetBufferMemoryRequirements(m_device, *_buffer, &mr);
|
||||
|
||||
result = allocateMemory(&mr, _flags, _memory);
|
||||
result = allocateMemory(&mr, _flags, _memory, _forcePrivateDeviceAllocation);
|
||||
|
||||
if (VK_SUCCESS != result
|
||||
&& (_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) )
|
||||
{
|
||||
result = allocateMemory(&mr, _flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT, _memory);
|
||||
result = allocateMemory(&mr, _flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT, _memory, _forcePrivateDeviceAllocation);
|
||||
}
|
||||
|
||||
if (VK_SUCCESS != result)
|
||||
@@ -4420,7 +4454,7 @@ VK_IMPORT_DEVICE
|
||||
return result;
|
||||
}
|
||||
|
||||
result = vkBindBufferMemory(m_device, *_buffer, *_memory, 0);
|
||||
result = vkBindBufferMemory(m_device, *_buffer, _memory->mem, _memory->offset);
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
BX_TRACE("Create host buffer error: vkBindBufferMemory failed %d: %s.", result, getName(result) );
|
||||
@@ -4431,7 +4465,7 @@ VK_IMPORT_DEVICE
|
||||
{
|
||||
BGFX_PROFILER_SCOPE("map and copy data", kColorResource);
|
||||
void* dst;
|
||||
result = vkMapMemory(m_device, *_memory, 0, _size, 0, &dst);
|
||||
result = vkMapMemory(m_device, _memory->mem, _memory->offset, _size, 0, &dst);
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
BX_TRACE("Create host buffer error: vkMapMemory failed %d: %s.", result, getName(result) );
|
||||
@@ -4439,19 +4473,19 @@ VK_IMPORT_DEVICE
|
||||
}
|
||||
|
||||
bx::memCopy(dst, _data, _size);
|
||||
vkUnmapMemory(m_device, *_memory);
|
||||
vkUnmapMemory(m_device, _memory->mem);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult createStagingBuffer(uint32_t _size, ::VkBuffer* _buffer, ::VkDeviceMemory* _memory, const void* _data = NULL)
|
||||
VkResult createStagingBuffer(uint32_t _size, ::VkBuffer* _buffer, DeviceMemoryAllocationVK* _memory, const void* _data = NULL)
|
||||
{
|
||||
const VkMemoryPropertyFlags flags = 0
|
||||
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
||||
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
|
||||
;
|
||||
return createHostBuffer(_size, flags, _buffer, _memory, _data);
|
||||
return createHostBuffer(_size, flags, _buffer, _memory, false, _data);
|
||||
}
|
||||
|
||||
StagingBufferVK allocFromScratchStagingBuffer(uint32_t _size, uint32_t _align, const void* _data = NULL)
|
||||
@@ -4467,12 +4501,12 @@ VK_IMPORT_DEVICE
|
||||
|
||||
if (UINT32_MAX != scratchOffset)
|
||||
{
|
||||
result.m_isFromScratch = true;
|
||||
result.m_size = _size;
|
||||
result.m_offset = scratchOffset;
|
||||
result.m_buffer = scratch.m_buffer;
|
||||
result.m_isFromScratch = true;
|
||||
result.m_deviceMem = scratch.m_deviceMem;
|
||||
result.m_data = scratch.m_data + result.m_offset;
|
||||
result.m_size = _size;
|
||||
result.m_offset = scratchOffset;
|
||||
result.m_buffer = scratch.m_buffer;
|
||||
result.m_data = scratch.m_data + result.m_offset;
|
||||
|
||||
if (_data != NULL)
|
||||
{
|
||||
@@ -4485,18 +4519,17 @@ VK_IMPORT_DEVICE
|
||||
}
|
||||
|
||||
// Not enough space or too big, we will create a new staging buffer on the spot.
|
||||
result.m_isFromScratch = false;
|
||||
|
||||
VK_CHECK(createStagingBuffer(_size, &result.m_buffer, &result.m_deviceMem, _data));
|
||||
|
||||
result.m_size = _size;
|
||||
result.m_offset = 0;
|
||||
result.m_data = NULL;
|
||||
result.m_isFromScratch = false;
|
||||
result.m_offset = 0;
|
||||
result.m_size = _size;
|
||||
result.m_data = NULL;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult createReadbackBuffer(uint32_t _size, ::VkBuffer* _buffer, ::VkDeviceMemory* _memory)
|
||||
VkResult createReadbackBuffer(uint32_t _size, ::VkBuffer* _buffer, DeviceMemoryAllocationVK* _memory)
|
||||
{
|
||||
const VkMemoryPropertyFlags flags = 0
|
||||
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
||||
@@ -4504,7 +4537,7 @@ VK_IMPORT_DEVICE
|
||||
| VK_MEMORY_PROPERTY_HOST_CACHED_BIT
|
||||
;
|
||||
|
||||
return createHostBuffer(_size, flags, _buffer, _memory, NULL);
|
||||
return createHostBuffer(_size, flags, _buffer, _memory, true, NULL);
|
||||
}
|
||||
|
||||
VkAllocationCallbacks* m_allocatorCb;
|
||||
@@ -4528,6 +4561,8 @@ VK_IMPORT_DEVICE
|
||||
FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
|
||||
int64_t m_presentElapsed;
|
||||
|
||||
MemoryLruVK m_memoryLru;
|
||||
|
||||
ScratchBufferVK m_scratchBuffer[BGFX_CONFIG_MAX_FRAME_LATENCY];
|
||||
ScratchBufferVK m_scratchStagingBuffer[BGFX_CONFIG_MAX_FRAME_LATENCY];
|
||||
|
||||
@@ -4572,7 +4607,7 @@ VK_IMPORT_DEVICE
|
||||
bool m_wireframe;
|
||||
|
||||
VkBuffer m_captureBuffer;
|
||||
VkDeviceMemory m_captureMemory;
|
||||
DeviceMemoryAllocationVK m_captureMemory;
|
||||
uint32_t m_captureSize;
|
||||
|
||||
TextVideoMem m_textVideoMem;
|
||||
@@ -4665,6 +4700,96 @@ VK_DESTROY
|
||||
s_renderVK->release(_obj);
|
||||
}
|
||||
|
||||
void MemoryLruVK::recycle(DeviceMemoryAllocationVK &_alloc)
|
||||
{
|
||||
if (MAX_ENTRIES == lru.getNumHandles())
|
||||
{
|
||||
// Evict LRU
|
||||
uint16_t handle = lru.getBack();
|
||||
DeviceMemoryAllocationVK &alloc = entries[handle];
|
||||
totalSizeCached -= alloc.size;
|
||||
release(alloc.mem);
|
||||
|
||||
// Touch slot and overwrite
|
||||
lru.touch(handle);
|
||||
alloc = _alloc;
|
||||
} else
|
||||
{
|
||||
uint16_t handle = lru.alloc();
|
||||
entries[handle] = _alloc;
|
||||
}
|
||||
totalSizeCached += _alloc.size;
|
||||
|
||||
while (totalSizeCached > BGFX_CONFIG_MAX_BYTES_CACHED_DEVICE_MEMORY_ALLOCATIONS)
|
||||
{
|
||||
BX_ASSERT(lru.getNumHandles() > 0, "Memory badly counted.");
|
||||
uint16_t handle = lru.getBack();
|
||||
DeviceMemoryAllocationVK &alloc = entries[handle];
|
||||
totalSizeCached -= alloc.size;
|
||||
release(alloc.mem);
|
||||
lru.free(handle);
|
||||
}
|
||||
}
|
||||
|
||||
bool MemoryLruVK::find(uint32_t _size, int32_t _memoryTypeIndex, DeviceMemoryAllocationVK *_alloc)
|
||||
{
|
||||
BGFX_PROFILER_SCOPE("MemoryLruVK::find", kColorResource);
|
||||
// Find best fit.
|
||||
uint16_t slot;
|
||||
{
|
||||
int16_t bestIdx = MAX_ENTRIES;
|
||||
uint32_t bestWaste = 0xffff'ffff;
|
||||
slot = lru.getFront();
|
||||
while (UINT16_MAX != slot)
|
||||
{
|
||||
DeviceMemoryAllocationVK &alloc = entries[slot];
|
||||
if (alloc.memoryTypeIndex == _memoryTypeIndex)
|
||||
{
|
||||
// 50% waste allowed, otherwise we'll just allocate a new one.
|
||||
// This is to prevent we trash this cache of usefull allocations
|
||||
// with a handful of tiny allocations.
|
||||
if (alloc.size >= _size && _size * 2 >= alloc.size)
|
||||
{
|
||||
uint32_t waste = alloc.size - _size;
|
||||
if (waste < bestWaste)
|
||||
{
|
||||
bestIdx = slot;
|
||||
bestWaste = waste;
|
||||
if (waste == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
slot = lru.getNext(slot);
|
||||
}
|
||||
slot = bestIdx;
|
||||
}
|
||||
|
||||
if (MAX_ENTRIES != slot)
|
||||
{
|
||||
*_alloc = entries[slot];
|
||||
lru.free(slot);
|
||||
totalSizeCached -= _alloc->size;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryLruVK::evictAll()
|
||||
{
|
||||
uint16_t slot = lru.getFront();
|
||||
while (slot != UINT16_MAX)
|
||||
{
|
||||
release(entries[slot].mem);
|
||||
slot = lru.getNext(slot);
|
||||
}
|
||||
lru.reset();
|
||||
totalSizeCached = 0;
|
||||
}
|
||||
|
||||
void ScratchBufferVK::create(uint32_t _size, uint32_t _count, VkBufferUsageFlags usage, uint32_t _align)
|
||||
{
|
||||
const VkAllocationCallbacks* allocatorCb = s_renderVK->m_allocatorCb;
|
||||
@@ -4698,21 +4823,21 @@ VK_DESTROY
|
||||
);
|
||||
|
||||
VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
VkResult result = s_renderVK->allocateMemory(&mr, flags, &m_deviceMem);
|
||||
VkResult result = s_renderVK->allocateMemory(&mr, flags, &m_deviceMem, true);
|
||||
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
flags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
VK_CHECK(s_renderVK->allocateMemory(&mr, flags, &m_deviceMem) );
|
||||
VK_CHECK(s_renderVK->allocateMemory(&mr, flags, &m_deviceMem, true) );
|
||||
}
|
||||
|
||||
m_size = (uint32_t)mr.size;
|
||||
m_pos = 0;
|
||||
m_align = _align;
|
||||
|
||||
VK_CHECK(vkBindBufferMemory(device, m_buffer, m_deviceMem, 0) );
|
||||
VK_CHECK(vkBindBufferMemory(device, m_buffer, m_deviceMem.mem, m_deviceMem.offset) );
|
||||
|
||||
VK_CHECK(vkMapMemory(device, m_deviceMem, 0, m_size, 0, (void**)&m_data) );
|
||||
VK_CHECK(vkMapMemory(device, m_deviceMem.mem, m_deviceMem.offset, m_size, 0, (void**)&m_data) );
|
||||
}
|
||||
|
||||
void ScratchBufferVK::createUniform(uint32_t _size, uint32_t _count)
|
||||
@@ -4733,10 +4858,10 @@ VK_DESTROY
|
||||
|
||||
void ScratchBufferVK::destroy()
|
||||
{
|
||||
vkUnmapMemory(s_renderVK->m_device, m_deviceMem);
|
||||
vkUnmapMemory(s_renderVK->m_device, m_deviceMem.mem);
|
||||
|
||||
s_renderVK->release(m_buffer);
|
||||
s_renderVK->release(m_deviceMem);
|
||||
s_renderVK->recycleMemory(m_deviceMem);
|
||||
}
|
||||
|
||||
|
||||
@@ -4779,8 +4904,8 @@ VK_DESTROY
|
||||
VkMappedMemoryRange range;
|
||||
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||
range.pNext = NULL;
|
||||
range.memory = m_deviceMem;
|
||||
range.offset = 0;
|
||||
range.memory = m_deviceMem.mem;
|
||||
range.offset = m_deviceMem.offset;
|
||||
range.size = size;
|
||||
VK_CHECK(vkFlushMappedMemoryRanges(device, 1, &range) );
|
||||
|
||||
@@ -4823,9 +4948,9 @@ VK_DESTROY
|
||||
VkMemoryRequirements mr;
|
||||
vkGetBufferMemoryRequirements(device, m_buffer, &mr);
|
||||
|
||||
VK_CHECK(s_renderVK->allocateMemory(&mr, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_deviceMem) );
|
||||
VK_CHECK(s_renderVK->allocateMemory(&mr, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_deviceMem, false) );
|
||||
|
||||
VK_CHECK(vkBindBufferMemory(device, m_buffer, m_deviceMem, 0) );
|
||||
VK_CHECK(vkBindBufferMemory(device, m_buffer, m_deviceMem.mem, m_deviceMem.offset) );
|
||||
|
||||
if (!m_dynamic)
|
||||
{
|
||||
@@ -4855,7 +4980,7 @@ VK_DESTROY
|
||||
if (!stagingBuffer.m_isFromScratch)
|
||||
{
|
||||
s_renderVK->release(stagingBuffer.m_buffer);
|
||||
s_renderVK->release(stagingBuffer.m_deviceMem);
|
||||
s_renderVK->recycleMemory(stagingBuffer.m_deviceMem);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4864,7 +4989,7 @@ VK_DESTROY
|
||||
if (VK_NULL_HANDLE != m_buffer)
|
||||
{
|
||||
s_renderVK->release(m_buffer);
|
||||
s_renderVK->release(m_deviceMem);
|
||||
s_renderVK->recycleMemory(m_deviceMem);
|
||||
|
||||
m_dynamic = false;
|
||||
}
|
||||
@@ -5451,7 +5576,7 @@ VK_DESTROY
|
||||
return result;
|
||||
}
|
||||
|
||||
result = vkMapMemory(device, m_readbackMemory, 0, VK_WHOLE_SIZE, 0, (void**)&m_queryResult);
|
||||
result = vkMapMemory(device, m_readbackMemory.mem, m_readbackMemory.offset, VK_WHOLE_SIZE, 0, (void**)&m_queryResult);
|
||||
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
@@ -5475,8 +5600,8 @@ VK_DESTROY
|
||||
{
|
||||
vkDestroy(m_queryPool);
|
||||
vkDestroy(m_readback);
|
||||
vkUnmapMemory(s_renderVK->m_device, m_readbackMemory);
|
||||
vkDestroy(m_readbackMemory);
|
||||
vkUnmapMemory(s_renderVK->m_device, m_readbackMemory.mem);
|
||||
s_renderVK->recycleMemory(m_readbackMemory);
|
||||
}
|
||||
|
||||
uint32_t TimerQueryVK::begin(uint32_t _resultIdx, uint32_t _frameNum)
|
||||
@@ -5606,7 +5731,7 @@ VK_DESTROY
|
||||
return result;
|
||||
}
|
||||
|
||||
result = vkMapMemory(device, m_readbackMemory, 0, VK_WHOLE_SIZE, 0, (void**)&m_queryResult);
|
||||
result = vkMapMemory(device, m_readbackMemory.mem, m_readbackMemory.offset, VK_WHOLE_SIZE, 0, (void**)&m_queryResult);
|
||||
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
@@ -5623,8 +5748,8 @@ VK_DESTROY
|
||||
{
|
||||
vkDestroy(m_queryPool);
|
||||
vkDestroy(m_readback);
|
||||
vkUnmapMemory(s_renderVK->m_device, m_readbackMemory);
|
||||
vkDestroy(m_readbackMemory);
|
||||
vkUnmapMemory(s_renderVK->m_device, m_readbackMemory.mem);
|
||||
s_renderVK->recycleMemory(m_readbackMemory);
|
||||
}
|
||||
|
||||
void OcclusionQueryVK::begin(OcclusionQueryHandle _handle)
|
||||
@@ -5924,14 +6049,14 @@ VK_DESTROY
|
||||
VkMemoryRequirements imageMemReq;
|
||||
vkGetImageMemoryRequirements(device, m_textureImage, &imageMemReq);
|
||||
|
||||
result = s_renderVK->allocateMemory(&imageMemReq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_textureDeviceMem);
|
||||
result = s_renderVK->allocateMemory(&imageMemReq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_textureDeviceMem, false);
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
BX_TRACE("Create texture image error: allocateMemory failed %d: %s.", result, getName(result) );
|
||||
return result;
|
||||
}
|
||||
|
||||
result = vkBindImageMemory(device, m_textureImage, m_textureDeviceMem, 0);
|
||||
result = vkBindImageMemory(device, m_textureImage, m_textureDeviceMem.mem, m_textureDeviceMem.offset);
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
BX_TRACE("Create texture image error: vkBindImageMemory failed %d: %s.", result, getName(result) );
|
||||
@@ -5967,14 +6092,14 @@ VK_DESTROY
|
||||
VkMemoryRequirements imageMemReq_resolve;
|
||||
vkGetImageMemoryRequirements(device, m_singleMsaaImage, &imageMemReq_resolve);
|
||||
|
||||
result = s_renderVK->allocateMemory(&imageMemReq_resolve, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_singleMsaaDeviceMem);
|
||||
result = s_renderVK->allocateMemory(&imageMemReq_resolve, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_singleMsaaDeviceMem, false);
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
BX_TRACE("Create texture image error: allocateMemory failed %d: %s.", result, getName(result) );
|
||||
return result;
|
||||
}
|
||||
|
||||
result = vkBindImageMemory(device, m_singleMsaaImage, m_singleMsaaDeviceMem, 0);
|
||||
result = vkBindImageMemory(device, m_singleMsaaImage, m_singleMsaaDeviceMem.mem, m_singleMsaaDeviceMem.offset);
|
||||
if (VK_SUCCESS != result)
|
||||
{
|
||||
BX_TRACE("Create texture image error: vkBindImageMemory failed %d: %s.", result, getName(result) );
|
||||
@@ -6226,8 +6351,8 @@ VK_DESTROY
|
||||
{
|
||||
VK_CHECK(vkMapMemory(
|
||||
device
|
||||
, stagingBuffer.m_deviceMem
|
||||
, 0
|
||||
, stagingBuffer.m_deviceMem.mem
|
||||
, stagingBuffer.m_deviceMem.offset
|
||||
, totalMemSize
|
||||
, 0
|
||||
, (void**)&mappedMemory
|
||||
@@ -6253,7 +6378,7 @@ VK_DESTROY
|
||||
|
||||
if (!stagingBuffer.m_isFromScratch)
|
||||
{
|
||||
vkUnmapMemory(device, stagingBuffer.m_deviceMem);
|
||||
vkUnmapMemory(device, stagingBuffer.m_deviceMem.mem);
|
||||
}
|
||||
|
||||
copyBufferToTexture(_commandBuffer, stagingBuffer.m_buffer, numSrd, bufferCopyInfo);
|
||||
@@ -6261,7 +6386,7 @@ VK_DESTROY
|
||||
if (!stagingBuffer.m_isFromScratch)
|
||||
{
|
||||
s_renderVK->release(stagingBuffer.m_buffer);
|
||||
s_renderVK->release(stagingBuffer.m_deviceMem);
|
||||
s_renderVK->recycleMemory(stagingBuffer.m_deviceMem);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -6292,13 +6417,13 @@ VK_DESTROY
|
||||
if (VK_NULL_HANDLE != m_textureImage)
|
||||
{
|
||||
s_renderVK->release(m_textureImage);
|
||||
s_renderVK->release(m_textureDeviceMem);
|
||||
s_renderVK->recycleMemory(m_textureDeviceMem);
|
||||
}
|
||||
|
||||
if (VK_NULL_HANDLE != m_singleMsaaImage)
|
||||
{
|
||||
s_renderVK->release(m_singleMsaaImage);
|
||||
s_renderVK->release(m_singleMsaaDeviceMem);
|
||||
s_renderVK->recycleMemory(m_singleMsaaDeviceMem);
|
||||
}
|
||||
|
||||
m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
@@ -6353,7 +6478,7 @@ VK_DESTROY
|
||||
StagingBufferVK stagingBuffer = s_renderVK->allocFromScratchStagingBuffer(size, align, data);
|
||||
region.bufferOffset += stagingBuffer.m_offset;
|
||||
BX_ASSERT(region.bufferOffset % align == 0,
|
||||
"Alignment for image (mip %u, z %s) is not aligned correctly (%u).",
|
||||
"Alignment for image (mip %u, z %u) is not aligned correctly (%u).",
|
||||
_mip, _z, region.bufferOffset, align);
|
||||
|
||||
if (VK_IMAGE_VIEW_TYPE_3D == m_type)
|
||||
@@ -6375,7 +6500,7 @@ VK_DESTROY
|
||||
if (!stagingBuffer.m_isFromScratch)
|
||||
{
|
||||
s_renderVK->release(stagingBuffer.m_buffer);
|
||||
s_renderVK->release(stagingBuffer.m_deviceMem);
|
||||
s_renderVK->recycleMemory(stagingBuffer.m_deviceMem);
|
||||
}
|
||||
|
||||
if (NULL != temp)
|
||||
@@ -8260,11 +8385,22 @@ VK_DESTROY
|
||||
m_release[m_currentFrameInFlight].push_back(resource);
|
||||
}
|
||||
|
||||
void CommandQueueVK::recycleMemory(DeviceMemoryAllocationVK _mem)
|
||||
{
|
||||
m_recycleAllocs[m_currentFrameInFlight].push_back(_mem);
|
||||
}
|
||||
|
||||
void CommandQueueVK::consume()
|
||||
{
|
||||
BGFX_PROFILER_SCOPE("CommandQueueVK::consume", kColorResource);
|
||||
m_consumeIndex = (m_consumeIndex + 1) % m_numFramesInFlight;
|
||||
|
||||
for (DeviceMemoryAllocationVK &alloc : m_recycleAllocs[m_consumeIndex])
|
||||
{
|
||||
s_renderVK->m_memoryLru.recycle(alloc);
|
||||
}
|
||||
m_recycleAllocs[m_consumeIndex].clear();
|
||||
|
||||
for (const Resource& resource : m_release[m_consumeIndex])
|
||||
{
|
||||
switch (resource.m_type)
|
||||
@@ -8289,6 +8425,7 @@ VK_DESTROY
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
m_release[m_consumeIndex].clear();
|
||||
}
|
||||
|
||||
|
||||
@@ -307,7 +307,7 @@ namespace bgfx { namespace vk
|
||||
::Vk##_name* operator &() { return &vk; } \
|
||||
const ::Vk##_name* operator &() const { return &vk; } \
|
||||
}; \
|
||||
static_assert(sizeof(::Vk##_name) == sizeof(Vk##_name) ); \
|
||||
static_assert(sizeof(::Vk##_name) == sizeof(Vk##_name) ); \
|
||||
void vkDestroy(Vk##_name&); \
|
||||
void release(Vk##_name&)
|
||||
VK_DESTROY
|
||||
@@ -372,14 +372,52 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
HashMap m_hashMap;
|
||||
};
|
||||
|
||||
struct DeviceMemoryAllocationVK {
|
||||
DeviceMemoryAllocationVK()
|
||||
: mem(VK_NULL_HANDLE)
|
||||
, offset(0)
|
||||
, size(0)
|
||||
, memoryTypeIndex(0)
|
||||
{
|
||||
}
|
||||
|
||||
VkDeviceMemory mem;
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
int32_t memoryTypeIndex;
|
||||
};
|
||||
|
||||
struct MemoryLruVK
|
||||
{
|
||||
MemoryLruVK()
|
||||
: entries()
|
||||
, lru()
|
||||
, totalSizeCached(0)
|
||||
{
|
||||
}
|
||||
|
||||
static constexpr uint16_t MAX_ENTRIES = 1 << 10;
|
||||
DeviceMemoryAllocationVK entries[MAX_ENTRIES];
|
||||
bx::HandleAllocLruT<MAX_ENTRIES> lru;
|
||||
uint64_t totalSizeCached;
|
||||
|
||||
void recycle(DeviceMemoryAllocationVK &_alloc);
|
||||
bool find(uint32_t _size, int32_t _memoryTypeIndex, DeviceMemoryAllocationVK *_alloc);
|
||||
void evictAll();
|
||||
};
|
||||
|
||||
/** A Buffer used for moving data from main memory to GPU memory.
|
||||
* This can either be an independently allocated memory region, or a sub-region
|
||||
* of the scratch staging buffer for the frame-in-flight.
|
||||
*/
|
||||
struct StagingBufferVK
|
||||
{
|
||||
VkBuffer m_buffer;
|
||||
VkDeviceMemory m_deviceMem;
|
||||
DeviceMemoryAllocationVK m_deviceMem;
|
||||
|
||||
uint8_t* m_data;
|
||||
uint32_t m_size;
|
||||
uint32_t m_offset;
|
||||
uint32_t m_offset; // Offset into the bound buffer (not the device memory!)
|
||||
bool m_isFromScratch;
|
||||
};
|
||||
|
||||
@@ -403,7 +441,7 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
void flush(bool _reset = true);
|
||||
|
||||
VkBuffer m_buffer;
|
||||
VkDeviceMemory m_deviceMem;
|
||||
DeviceMemoryAllocationVK m_deviceMem;
|
||||
|
||||
uint8_t* m_data;
|
||||
uint32_t m_size;
|
||||
@@ -415,7 +453,7 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
{
|
||||
BufferVK()
|
||||
: m_buffer(VK_NULL_HANDLE)
|
||||
, m_deviceMem(VK_NULL_HANDLE)
|
||||
, m_deviceMem()
|
||||
, m_size(0)
|
||||
, m_flags(BGFX_BUFFER_NONE)
|
||||
, m_dynamic(false)
|
||||
@@ -427,7 +465,7 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
void destroy();
|
||||
|
||||
VkBuffer m_buffer;
|
||||
VkDeviceMemory m_deviceMem;
|
||||
DeviceMemoryAllocationVK m_deviceMem;
|
||||
uint32_t m_size;
|
||||
uint16_t m_flags;
|
||||
bool m_dynamic;
|
||||
@@ -589,7 +627,7 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
Query m_query[BGFX_CONFIG_MAX_VIEWS*4];
|
||||
|
||||
VkBuffer m_readback;
|
||||
VkDeviceMemory m_readbackMemory;
|
||||
DeviceMemoryAllocationVK m_readbackMemory;
|
||||
VkQueryPool m_queryPool;
|
||||
const uint64_t* m_queryResult;
|
||||
bx::RingBufferControl m_control;
|
||||
@@ -613,7 +651,7 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
OcclusionQueryHandle m_handle[BGFX_CONFIG_MAX_OCCLUSION_QUERIES];
|
||||
|
||||
VkBuffer m_readback;
|
||||
VkDeviceMemory m_readbackMemory;
|
||||
DeviceMemoryAllocationVK m_readbackMemory;
|
||||
VkQueryPool m_queryPool;
|
||||
const uint32_t* m_queryResult;
|
||||
bx::RingBufferControl m_control;
|
||||
@@ -640,10 +678,10 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
, m_sampler({ 1, VK_SAMPLE_COUNT_1_BIT })
|
||||
, m_format(VK_FORMAT_UNDEFINED)
|
||||
, m_textureImage(VK_NULL_HANDLE)
|
||||
, m_textureDeviceMem(VK_NULL_HANDLE)
|
||||
, m_textureDeviceMem()
|
||||
, m_currentImageLayout(VK_IMAGE_LAYOUT_UNDEFINED)
|
||||
, m_singleMsaaImage(VK_NULL_HANDLE)
|
||||
, m_singleMsaaDeviceMem(VK_NULL_HANDLE)
|
||||
, m_singleMsaaDeviceMem()
|
||||
, m_currentSingleMsaaImageLayout(VK_IMAGE_LAYOUT_UNDEFINED)
|
||||
{
|
||||
}
|
||||
@@ -680,13 +718,13 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
VkComponentMapping m_components;
|
||||
VkImageAspectFlags m_aspectMask;
|
||||
|
||||
VkImage m_textureImage;
|
||||
VkDeviceMemory m_textureDeviceMem;
|
||||
VkImageLayout m_currentImageLayout;
|
||||
VkImage m_textureImage;
|
||||
DeviceMemoryAllocationVK m_textureDeviceMem;
|
||||
VkImageLayout m_currentImageLayout;
|
||||
|
||||
VkImage m_singleMsaaImage;
|
||||
VkDeviceMemory m_singleMsaaDeviceMem;
|
||||
VkImageLayout m_currentSingleMsaaImageLayout;
|
||||
VkImage m_singleMsaaImage;
|
||||
DeviceMemoryAllocationVK m_singleMsaaDeviceMem;
|
||||
VkImageLayout m_currentSingleMsaaImageLayout;
|
||||
|
||||
VkImageLayout m_sampledLayout;
|
||||
|
||||
@@ -841,6 +879,7 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
void finish(bool _finishAll = false);
|
||||
|
||||
void release(uint64_t _handle, VkObjectType _type);
|
||||
void recycleMemory(DeviceMemoryAllocationVK _mem);
|
||||
void consume();
|
||||
|
||||
uint32_t m_queueFamily;
|
||||
@@ -881,6 +920,8 @@ VK_DESTROY_FUNC(DescriptorSet);
|
||||
|
||||
typedef stl::vector<Resource> ResourceArray;
|
||||
ResourceArray m_release[BGFX_CONFIG_MAX_FRAME_LATENCY];
|
||||
stl::vector<DeviceMemoryAllocationVK> m_recycleAllocs[BGFX_CONFIG_MAX_FRAME_LATENCY];
|
||||
|
||||
|
||||
private:
|
||||
template<typename Ty>
|
||||
|
||||
Reference in New Issue
Block a user