Add Persistent Buffers

This work is a heavily refactored and rewritten from TheForge's initial
code.

TheForge's original code had too many race conditions and was
fundamentally flawed as it was too easy to incur into those data races
by accident.

However they identified the proper places that needed changes, and the
idea was sound. I used their work as a blueprint to design this work.

This PR implements:

 - Introduction of UMA buffers used by a few buffers
(most notably the ones filled by _fill_instance_data).

Ironically this change seems to positively affect PC more than it does
on Mobile.

Updates D3D12 Memory Allocator to get GPU_UPLOAD heap support.

Metal implementation by Stuart Carnie.

Co-authored-by: Stuart Carnie <stuart.carnie@gmail.com>
Co-authored-by: TheForge team
This commit is contained in:
Stuart Carnie
2025-10-18 07:00:58 +11:00
parent 5950fca36c
commit 230adb7511
38 changed files with 2848 additions and 1466 deletions

View File

@ -58,6 +58,8 @@
static const uint32_t BREADCRUMB_BUFFER_ENTRIES = 512u;
#endif
static const uint32_t MAX_DYNAMIC_BUFFERS = 8u; // Minimum guaranteed by Vulkan.
static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = {
VK_FORMAT_R4G4_UNORM_PACK8,
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
@ -1569,7 +1571,7 @@ Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t
max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool");
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
breadcrumb_buffer = buffer_create(2u * sizeof(uint32_t) * BREADCRUMB_BUFFER_ENTRIES, BufferUsageBits::BUFFER_USAGE_TRANSFER_TO_BIT, MemoryAllocationType::MEMORY_ALLOCATION_TYPE_CPU);
breadcrumb_buffer = buffer_create(2u * sizeof(uint32_t) * BREADCRUMB_BUFFER_ENTRIES, BufferUsageBits::BUFFER_USAGE_TRANSFER_TO_BIT, MemoryAllocationType::MEMORY_ALLOCATION_TYPE_CPU, UINT64_MAX);
#endif
#if defined(SWAPPY_FRAME_PACING_ENABLED)
@ -1634,11 +1636,28 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_VERTEX_BIT, VK_BUFFER_USAGE_V
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_INDIRECT_BIT, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) {
RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) {
uint32_t alignment = 16u; // 16 bytes is reasonable.
if (p_usage.has_flag(BUFFER_USAGE_UNIFORM_BIT)) {
// Some GPUs (e.g. NVIDIA) have absurdly high alignments, like 256 bytes.
alignment = MAX(alignment, physical_device_properties.limits.minUniformBufferOffsetAlignment);
}
if (p_usage.has_flag(BUFFER_USAGE_STORAGE_BIT)) {
// This shouldn't be a problem since it's often <= 16 bytes. But do it just in case.
alignment = MAX(alignment, physical_device_properties.limits.minStorageBufferOffsetAlignment);
}
// Align the size. This is specially important for BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT buffers.
// For the rest, it should work thanks to VMA taking care of the details. But still align just in case.
p_size = STEPIFY(p_size, alignment);
const size_t original_size = p_size;
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
p_size = p_size * frame_count;
}
VkBufferCreateInfo create_info = {};
create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
create_info.size = p_size;
create_info.usage = p_usage;
create_info.usage = p_usage & ~BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT;
create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VmaMemoryUsage vma_usage = VMA_MEMORY_USAGE_UNKNOWN;
@ -1670,6 +1689,9 @@ RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitFie
// We must set it right now or else vmaFindMemoryTypeIndexForBufferInfo will use wrong parameters.
alloc_create_info.usage = vma_usage;
}
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
alloc_create_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
}
alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
if (p_size <= SMALL_ALLOCATION_MAX_SIZE) {
uint32_t mem_type_index = 0;
@ -1698,11 +1720,26 @@ RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitFie
}
// Bookkeep.
BufferInfo *buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);
BufferInfo *buf_info;
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
void *persistent_ptr = nullptr;
VkResult err = vmaMapMemory(allocator, allocation, &persistent_ptr);
ERR_FAIL_COND_V_MSG(err, BufferID(), "vmaMapMemory failed with error " + itos(err) + ".");
BufferDynamicInfo *dyn_buffer = VersatileResource::allocate<BufferDynamicInfo>(resources_allocator);
buf_info = dyn_buffer;
#ifdef DEBUG_ENABLED
dyn_buffer->last_frame_mapped = p_frames_drawn - 1ul;
#endif
dyn_buffer->frame_idx = 0u;
dyn_buffer->persistent_ptr = (uint8_t *)persistent_ptr;
} else {
buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);
}
buf_info->vk_buffer = vk_buffer;
buf_info->allocation.handle = allocation;
buf_info->allocation.size = alloc_info.size;
buf_info->size = p_size;
buf_info->size = original_size;
return BufferID(buf_info);
}
@ -1730,6 +1767,10 @@ void RenderingDeviceDriverVulkan::buffer_free(BufferID p_buffer) {
vkDestroyBufferView(vk_device, buf_info->vk_view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER_VIEW));
}
if (buf_info->is_dynamic()) {
vmaUnmapMemory(allocator, buf_info->allocation.handle);
}
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
vmaDestroyBuffer(allocator, buf_info->vk_buffer, buf_info->allocation.handle);
} else {
@ -1737,7 +1778,11 @@ void RenderingDeviceDriverVulkan::buffer_free(BufferID p_buffer) {
vmaFreeMemory(allocator, buf_info->allocation.handle);
}
VersatileResource::free(resources_allocator, buf_info);
if (buf_info->is_dynamic()) {
VersatileResource::free(resources_allocator, (BufferDynamicInfo *)buf_info);
} else {
VersatileResource::free(resources_allocator, buf_info);
}
}
uint64_t RenderingDeviceDriverVulkan::buffer_get_allocation_size(BufferID p_buffer) {
@ -1747,6 +1792,7 @@ uint64_t RenderingDeviceDriverVulkan::buffer_get_allocation_size(BufferID p_buff
uint8_t *RenderingDeviceDriverVulkan::buffer_map(BufferID p_buffer) {
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), nullptr, "Buffer must NOT have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_persistent_map_advance() instead.");
void *data_ptr = nullptr;
VkResult err = vmaMapMemory(allocator, buf_info->allocation.handle, &data_ptr);
ERR_FAIL_COND_V_MSG(err, nullptr, "vmaMapMemory failed with error " + itos(err) + ".");
@ -1758,6 +1804,38 @@ void RenderingDeviceDriverVulkan::buffer_unmap(BufferID p_buffer) {
vmaUnmapMemory(allocator, buf_info->allocation.handle);
}
uint8_t *RenderingDeviceDriverVulkan::buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) {
BufferDynamicInfo *buf_info = (BufferDynamicInfo *)p_buffer.id;
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), nullptr, "Buffer must have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_map() instead.");
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit.");
buf_info->last_frame_mapped = p_frames_drawn;
#endif
buf_info->frame_idx = (buf_info->frame_idx + 1u) % frame_count;
return buf_info->persistent_ptr + buf_info->frame_idx * buf_info->size;
}
void RenderingDeviceDriverVulkan::buffer_flush(BufferID p_buffer) {
BufferDynamicInfo *buf_info = (BufferDynamicInfo *)p_buffer.id;
VkMemoryPropertyFlags mem_props_flags;
vmaGetAllocationMemoryProperties(allocator, buf_info->allocation.handle, &mem_props_flags);
const bool needs_flushing = !(mem_props_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
if (needs_flushing) {
if (buf_info->is_dynamic()) {
pending_flushes.allocations.push_back(buf_info->allocation.handle);
pending_flushes.offsets.push_back(buf_info->frame_idx * buf_info->size);
pending_flushes.sizes.push_back(buf_info->size);
} else {
pending_flushes.allocations.push_back(buf_info->allocation.handle);
pending_flushes.offsets.push_back(0u);
pending_flushes.sizes.push_back(VK_WHOLE_SIZE);
}
}
}
uint64_t RenderingDeviceDriverVulkan::buffer_get_device_address(BufferID p_buffer) {
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
VkBufferDeviceAddressInfo address_info = {};
@ -2780,6 +2858,18 @@ Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueu
wait_semaphores_stages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
if (!pending_flushes.allocations.is_empty()) {
// We must do this now, even if p_cmd_buffers is empty; because afterwards pending_flushes.allocations
// could become dangling. We cannot delay this call for the next frame(s).
err = vmaFlushAllocations(allocator, pending_flushes.allocations.size(),
pending_flushes.allocations.ptr(), pending_flushes.offsets.ptr(),
pending_flushes.sizes.ptr());
pending_flushes.allocations.clear();
pending_flushes.offsets.clear();
pending_flushes.sizes.clear();
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
}
if (p_cmd_buffers.size() > 0) {
thread_local LocalVector<VkCommandBuffer> command_buffers;
thread_local LocalVector<VkSemaphore> present_semaphores;
@ -3713,9 +3803,15 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Re
case UNIFORM_TYPE_UNIFORM_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
} break;
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
} break;
case UNIFORM_TYPE_STORAGE_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
} break;
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
} break;
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
} break;
@ -3942,6 +4038,13 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_find_or_creat
curr_vk_size++;
vk_sizes_count++;
}
if (p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC]) {
*curr_vk_size = {};
curr_vk_size->type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC] * max_descriptor_sets_per_pool;
curr_vk_size++;
vk_sizes_count++;
}
if (p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER]) {
*curr_vk_size = {};
curr_vk_size->type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
@ -3949,6 +4052,13 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_find_or_creat
curr_vk_size++;
vk_sizes_count++;
}
if (p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC]) {
*curr_vk_size = {};
curr_vk_size->type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC] * max_descriptor_sets_per_pool;
curr_vk_size++;
vk_sizes_count++;
}
if (p_key.uniform_type[UNIFORM_TYPE_INPUT_ATTACHMENT]) {
*curr_vk_size = {};
curr_vk_size->type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
@ -4012,6 +4122,12 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<Bou
p_linear_pool_index = -1;
}
DescriptorSetPoolKey pool_key;
// We first gather dynamic arrays in a local array because TightLocalVector's
// growth is not efficient when the number of elements is unknown.
const BufferInfo *dynamic_buffers[MAX_DYNAMIC_BUFFERS];
uint32_t num_dynamic_buffers = 0u;
// Immutable samplers will be skipped so we need to track the number of vk_writes used.
VkWriteDescriptorSet *vk_writes = ALLOCA_ARRAY(VkWriteDescriptorSet, p_uniforms.size());
uint32_t writes_amount = 0;
@ -4147,9 +4263,28 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<Bou
vk_buf_info->buffer = buf_info->vk_buffer;
vk_buf_info->range = buf_info->size;
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), UniformSetID(),
"Sent a buffer with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_UNIFORM_BUFFER instead of UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC.");
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
} break;
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
*vk_buf_info = {};
vk_buf_info->buffer = buf_info->vk_buffer;
vk_buf_info->range = buf_info->size;
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), UniformSetID(),
"Sent a buffer without BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC instead of UNIFORM_TYPE_UNIFORM_BUFFER.");
ERR_FAIL_COND_V_MSG(num_dynamic_buffers >= MAX_DYNAMIC_BUFFERS, UniformSetID(),
"Uniform set exceeded the limit of dynamic/persistent buffers. (" + itos(MAX_DYNAMIC_BUFFERS) + ").");
dynamic_buffers[num_dynamic_buffers++] = buf_info;
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
} break;
case UNIFORM_TYPE_STORAGE_BUFFER: {
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
@ -4157,9 +4292,28 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<Bou
vk_buf_info->buffer = buf_info->vk_buffer;
vk_buf_info->range = buf_info->size;
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), UniformSetID(),
"Sent a buffer with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_STORAGE_BUFFER instead of UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC.");
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
} break;
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
*vk_buf_info = {};
vk_buf_info->buffer = buf_info->vk_buffer;
vk_buf_info->range = buf_info->size;
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), UniformSetID(),
"Sent a buffer without BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC instead of UNIFORM_TYPE_STORAGE_BUFFER.");
ERR_FAIL_COND_V_MSG(num_dynamic_buffers >= MAX_DYNAMIC_BUFFERS, UniformSetID(),
"Uniform set exceeded the limit of dynamic/persistent buffers. (" + itos(MAX_DYNAMIC_BUFFERS) + ").");
dynamic_buffers[num_dynamic_buffers++] = buf_info;
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
} break;
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
num_descriptors = uniform.ids.size();
VkDescriptorImageInfo *vk_img_infos = ALLOCA_ARRAY(VkDescriptorImageInfo, num_descriptors);
@ -4223,6 +4377,10 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<Bou
usi->vk_descriptor_pool = vk_pool;
}
usi->pool_sets_it = pool_sets_it;
usi->dynamic_buffers.resize(num_dynamic_buffers);
for (uint32_t i = 0u; i < num_dynamic_buffers; ++i) {
usi->dynamic_buffers[i] = dynamic_buffers[i];
}
return UniformSetID(usi);
}
@ -4249,6 +4407,31 @@ bool RenderingDeviceDriverVulkan::uniform_sets_have_linear_pools() const {
return true;
}
uint32_t RenderingDeviceDriverVulkan::uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const {
uint32_t mask = 0u;
uint32_t shift = 0u;
#ifdef DEV_ENABLED
uint32_t curr_dynamic_offset = 0u;
#endif
for (uint32_t i = 0; i < p_set_count; i++) {
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
// At this point this assert should already have been validated.
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
for (const BufferInfo *dynamic_buffer : usi->dynamic_buffers) {
DEV_ASSERT(dynamic_buffer->frame_idx < 16u);
mask |= dynamic_buffer->frame_idx << shift;
shift += 4u;
}
#ifdef DEV_ENABLED
curr_dynamic_offset += usi->dynamic_buffers.size();
#endif
}
return mask;
}
void RenderingDeviceDriverVulkan::linear_uniform_set_pools_reset(int p_linear_pool_index) {
if (linear_descriptor_pools_enabled) {
DescriptorSetPools &pools_to_reset = linear_descriptor_set_pools[p_linear_pool_index];
@ -4844,14 +5027,7 @@ void RenderingDeviceDriverVulkan::command_bind_render_pipeline(CommandBufferID p
vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, (VkPipeline)p_pipeline.id);
}
void RenderingDeviceDriverVulkan::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id;
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr);
}
void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
if (p_set_count == 0) {
return;
}
@ -4860,13 +5036,29 @@ void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBuffer
sets.clear();
sets.resize(p_set_count);
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
uint32_t shift = 0u;
uint32_t curr_dynamic_offset = 0u;
for (uint32_t i = 0; i < p_set_count; i++) {
sets[i] = ((const UniformSetInfo *)p_uniform_sets[i].id)->vk_descriptor_set;
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
sets[i] = usi->vk_descriptor_set;
// At this point this assert should already have been validated.
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
const uint32_t dynamic_offset_count = usi->dynamic_buffers.size();
for (uint32_t j = 0u; j < dynamic_offset_count; ++j) {
const uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xFu;
shift += 4u;
dynamic_offsets[curr_dynamic_offset++] = uint32_t(frame_idx * usi->dynamic_buffers[j]->size);
}
}
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], 0, nullptr);
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], curr_dynamic_offset, dynamic_offsets);
}
void RenderingDeviceDriverVulkan::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {
@ -5290,14 +5482,7 @@ void RenderingDeviceDriverVulkan::command_bind_compute_pipeline(CommandBufferID
vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, (VkPipeline)p_pipeline.id);
}
void RenderingDeviceDriverVulkan::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id;
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr);
}
void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
if (p_set_count == 0) {
return;
}
@ -5306,13 +5491,29 @@ void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBuffe
sets.clear();
sets.resize(p_set_count);
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
uint32_t shift = 0u;
uint32_t curr_dynamic_offset = 0u;
for (uint32_t i = 0; i < p_set_count; i++) {
sets[i] = ((const UniformSetInfo *)p_uniform_sets[i].id)->vk_descriptor_set;
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
sets[i] = usi->vk_descriptor_set;
// At this point this assert should already have been validated.
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
const uint32_t dynamic_offset_count = usi->dynamic_buffers.size();
for (uint32_t j = 0u; j < dynamic_offset_count; ++j) {
const uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xFu;
shift += 4u;
dynamic_offsets[curr_dynamic_offset++] = uint32_t(frame_idx * usi->dynamic_buffers[j]->size);
}
}
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], 0, nullptr);
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], curr_dynamic_offset, dynamic_offsets);
}
void RenderingDeviceDriverVulkan::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {