Add Persistent Buffers
This work is a heavily refactored and rewritten from TheForge's initial code. TheForge's original code had too many race conditions and was fundamentally flawed as it was too easy to incur into those data races by accident. However they identified the proper places that needed changes, and the idea was sound. I used their work as a blueprint to design this work. This PR implements: - Introduction of UMA buffers used by a few buffers (most notably the ones filled by _fill_instance_data). Ironically this change seems to positively affect PC more than it does on Mobile. Updates D3D12 Memory Allocator to get GPU_UPLOAD heap support. Metal implementation by Stuart Carnie. Co-authored-by: Stuart Carnie <stuart.carnie@gmail.com> Co-authored-by: TheForge team
This commit is contained in:
@ -99,6 +99,8 @@ GODOT_MSVC_WARNING_POP
|
||||
|
||||
static const D3D12_RANGE VOID_RANGE = {};
|
||||
|
||||
static const uint32_t MAX_DYNAMIC_BUFFERS = 8u; // Minimum guaranteed by Vulkan.
|
||||
|
||||
/*****************/
|
||||
/**** GENERIC ****/
|
||||
/*****************/
|
||||
@ -1012,12 +1014,22 @@ void RenderingDeviceDriverD3D12::_resource_transitions_flush(CommandBufferInfo *
|
||||
/**** BUFFERS ****/
|
||||
/*****************/
|
||||
|
||||
RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) {
|
||||
// D3D12 debug layers complain at CBV creation time if the size is not multiple of the value per the spec
|
||||
// but also if you give a rounded size at that point because it will extend beyond the
|
||||
// memory of the resource. Therefore, it seems the only way is to create it with a
|
||||
// rounded size.
|
||||
CD3DX12_RESOURCE_DESC1 resource_desc = CD3DX12_RESOURCE_DESC1::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));
|
||||
RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) {
|
||||
uint32_t alignment = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; // 16 bytes is reasonable.
|
||||
if (p_usage.has_flag(BUFFER_USAGE_UNIFORM_BIT)) {
|
||||
// 256 bytes is absurd. Only use it when required.
|
||||
alignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT;
|
||||
}
|
||||
|
||||
// We don't have VMA like in Vulkan, that takes care of the details. We must align the size.
|
||||
p_size = STEPIFY(p_size, alignment);
|
||||
|
||||
const size_t original_size = p_size;
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
p_size = p_size * frames.size();
|
||||
}
|
||||
|
||||
CD3DX12_RESOURCE_DESC1 resource_desc = CD3DX12_RESOURCE_DESC1::Buffer(p_size);
|
||||
if (p_usage.has_flag(RDD::BUFFER_USAGE_STORAGE_BIT)) {
|
||||
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
} else {
|
||||
@ -1044,6 +1056,12 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
|
||||
} break;
|
||||
case MEMORY_ALLOCATION_TYPE_GPU: {
|
||||
// Use default parameters.
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
allocation_desc.HeapType = dynamic_persistent_upload_heap;
|
||||
|
||||
// We can't use STORAGE for write access, just for read.
|
||||
resource_desc.Flags = resource_desc.Flags & ~D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
}
|
||||
} break;
|
||||
}
|
||||
|
||||
@ -1074,14 +1092,30 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
|
||||
|
||||
// Bookkeep.
|
||||
|
||||
BufferInfo *buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);
|
||||
BufferInfo *buf_info;
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
void *persistent_ptr = nullptr;
|
||||
res = buffer->Map(0, &VOID_RANGE, &persistent_ptr);
|
||||
ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), BufferID(), "Map failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");
|
||||
|
||||
BufferDynamicInfo *dyn_buffer = VersatileResource::allocate<BufferDynamicInfo>(resources_allocator);
|
||||
buf_info = dyn_buffer;
|
||||
#ifdef DEBUG_ENABLED
|
||||
dyn_buffer->last_frame_mapped = p_frames_drawn - 1ul;
|
||||
#endif
|
||||
dyn_buffer->frame_idx = 0u;
|
||||
dyn_buffer->persistent_ptr = (uint8_t *)persistent_ptr;
|
||||
} else {
|
||||
buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);
|
||||
}
|
||||
buf_info->resource = buffer.Get();
|
||||
buf_info->owner_info.resource = buffer;
|
||||
buf_info->owner_info.allocation = allocation;
|
||||
buf_info->owner_info.states.subresource_states.push_back(initial_state);
|
||||
buf_info->states_ptr = &buf_info->owner_info.states;
|
||||
buf_info->size = p_size;
|
||||
buf_info->size = original_size;
|
||||
buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
||||
buf_info->flags.is_dynamic = p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT);
|
||||
|
||||
return BufferID(buf_info);
|
||||
}
|
||||
@ -1094,7 +1128,12 @@ bool RenderingDeviceDriverD3D12::buffer_set_texel_format(BufferID p_buffer, Data
|
||||
|
||||
void RenderingDeviceDriverD3D12::buffer_free(BufferID p_buffer) {
|
||||
BufferInfo *buf_info = (BufferInfo *)p_buffer.id;
|
||||
VersatileResource::free(resources_allocator, buf_info);
|
||||
if (buf_info->is_dynamic()) {
|
||||
buf_info->resource->Unmap(0, &VOID_RANGE);
|
||||
VersatileResource::free(resources_allocator, (BufferDynamicInfo *)buf_info);
|
||||
} else {
|
||||
VersatileResource::free(resources_allocator, buf_info);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t RenderingDeviceDriverD3D12::buffer_get_allocation_size(BufferID p_buffer) {
|
||||
@ -1115,6 +1154,17 @@ void RenderingDeviceDriverD3D12::buffer_unmap(BufferID p_buffer) {
|
||||
buf_info->resource->Unmap(0, &VOID_RANGE);
|
||||
}
|
||||
|
||||
uint8_t *RenderingDeviceDriverD3D12::buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) {
|
||||
BufferDynamicInfo *buf_info = (BufferDynamicInfo *)p_buffer.id;
|
||||
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), nullptr, "Buffer must have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_map() instead.");
|
||||
#ifdef DEBUG_ENABLED
|
||||
ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit.");
|
||||
buf_info->last_frame_mapped = p_frames_drawn;
|
||||
#endif
|
||||
buf_info->frame_idx = (buf_info->frame_idx + 1u) % frames.size();
|
||||
return buf_info->persistent_ptr + buf_info->frame_idx * buf_info->size;
|
||||
}
|
||||
|
||||
uint64_t RenderingDeviceDriverD3D12::buffer_get_device_address(BufferID p_buffer) {
|
||||
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
|
||||
return buf_info->resource->GetGPUVirtualAddress();
|
||||
@ -3420,7 +3470,7 @@ void RenderingDeviceDriverD3D12::shader_destroy_modules(ShaderID p_shader) {
|
||||
/**** UNIFORM SET ****/
|
||||
/*********************/
|
||||
|
||||
static void _add_descriptor_count_for_uniform(RenderingDevice::UniformType p_type, uint32_t p_binding_length, bool p_double_srv_uav_ambiguous, uint32_t &r_num_resources, uint32_t &r_num_samplers, bool &r_srv_uav_ambiguity) {
|
||||
static void _add_descriptor_count_for_uniform(RenderingDevice::UniformType p_type, uint32_t p_binding_length, bool p_double_srv_uav_ambiguous, uint32_t &r_num_resources, uint32_t &r_num_samplers, bool &r_srv_uav_ambiguity, uint32_t p_frame_count) {
|
||||
r_srv_uav_ambiguity = false;
|
||||
|
||||
// Some resource types can be SRV or UAV, depending on what NIR-DXIL decided for a specific shader variant.
|
||||
@ -3440,10 +3490,18 @@ static void _add_descriptor_count_for_uniform(RenderingDevice::UniformType p_typ
|
||||
case RenderingDevice::UNIFORM_TYPE_UNIFORM_BUFFER: {
|
||||
r_num_resources += 1;
|
||||
} break;
|
||||
case RenderingDevice::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
|
||||
r_num_resources += p_frame_count;
|
||||
} break;
|
||||
case RenderingDevice::UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
r_num_resources += p_double_srv_uav_ambiguous ? 2 : 1;
|
||||
r_srv_uav_ambiguity = true;
|
||||
} break;
|
||||
case RenderingDevice::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
// Dynamic storage buffers can only be SRV (we can't guarantee they get placed in
|
||||
// D3D12_HEAP_TYPE_GPU_UPLOAD heap and D3D12_HEAP_TYPE_GPU doesn't support UAV).
|
||||
r_num_resources += p_frame_count;
|
||||
} break;
|
||||
case RenderingDevice::UNIFORM_TYPE_IMAGE: {
|
||||
r_num_resources += p_binding_length * (p_double_srv_uav_ambiguous ? 2 : 1);
|
||||
r_srv_uav_ambiguity = true;
|
||||
@ -3460,6 +3518,11 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun
|
||||
// Pre-bookkeep.
|
||||
UniformSetInfo *uniform_set_info = VersatileResource::allocate<UniformSetInfo>(resources_allocator);
|
||||
|
||||
// We first gather dynamic arrays in a local array because TightLocalVector's
|
||||
// growth is not efficient when the number of elements is unknown.
|
||||
const BufferDynamicInfo *dynamic_buffers[MAX_DYNAMIC_BUFFERS];
|
||||
uint32_t num_dynamic_buffers = 0u;
|
||||
|
||||
// Do a first pass to count resources and samplers.
|
||||
uint32_t num_resource_descs = 0;
|
||||
uint32_t num_sampler_descs = 0;
|
||||
@ -3476,7 +3539,7 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun
|
||||
if (uniform.type == UNIFORM_TYPE_SAMPLER_WITH_TEXTURE || uniform.type == UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER) {
|
||||
binding_length /= 2;
|
||||
}
|
||||
_add_descriptor_count_for_uniform(uniform.type, binding_length, true, num_resource_descs, num_sampler_descs, srv_uav_ambiguity);
|
||||
_add_descriptor_count_for_uniform(uniform.type, binding_length, true, num_resource_descs, num_sampler_descs, srv_uav_ambiguity, frames.size());
|
||||
}
|
||||
#ifdef DEV_ENABLED
|
||||
uniform_set_info->resources_desc_info.reserve(num_resource_descs);
|
||||
@ -3599,64 +3662,94 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun
|
||||
case UNIFORM_TYPE_IMAGE_BUFFER: {
|
||||
CRASH_NOW_MSG("Unimplemented!");
|
||||
} break;
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER: {
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER:
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
|
||||
BufferInfo *buf_info = (BufferInfo *)uniform.ids[0].id;
|
||||
|
||||
if (uniform.type == UNIFORM_TYPE_UNIFORM_BUFFER) {
|
||||
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), UniformSetID(),
|
||||
"Sent a buffer with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_UNIFORM_BUFFER instead of UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC.");
|
||||
} else {
|
||||
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), UniformSetID(),
|
||||
"Sent a buffer without BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC instead of UNIFORM_TYPE_UNIFORM_BUFFER.");
|
||||
ERR_FAIL_COND_V_MSG(num_dynamic_buffers >= MAX_DYNAMIC_BUFFERS, UniformSetID(),
|
||||
"Uniform set exceeded the limit of dynamic/persistent buffers. (" + itos(MAX_DYNAMIC_BUFFERS) + ").");
|
||||
|
||||
dynamic_buffers[num_dynamic_buffers++] = (const BufferDynamicInfo *)buf_info;
|
||||
}
|
||||
|
||||
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {};
|
||||
cbv_desc.BufferLocation = buf_info->resource->GetGPUVirtualAddress();
|
||||
cbv_desc.SizeInBytes = STEPIFY(buf_info->size, 256);
|
||||
device->CreateConstantBufferView(&cbv_desc, desc_heap_walkers.resources.get_curr_cpu_handle());
|
||||
desc_heap_walkers.resources.advance();
|
||||
cbv_desc.SizeInBytes = STEPIFY(buf_info->size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
|
||||
|
||||
const uint32_t subregion_count = uniform.type == UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC ? frames.size() : 1u;
|
||||
for (uint32_t j = 0u; j < subregion_count; ++j) {
|
||||
device->CreateConstantBufferView(&cbv_desc, desc_heap_walkers.resources.get_curr_cpu_handle());
|
||||
desc_heap_walkers.resources.advance();
|
||||
#ifdef DEV_ENABLED
|
||||
uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_CBV, {} });
|
||||
uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_CBV, {} });
|
||||
#endif
|
||||
cbv_desc.BufferLocation += cbv_desc.SizeInBytes;
|
||||
}
|
||||
|
||||
NeededState &ns = resource_states[buf_info];
|
||||
ns.is_buffer = true;
|
||||
ns.shader_uniform_idx_mask |= ((uint64_t)1 << i);
|
||||
ns.states |= D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER;
|
||||
} break;
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER:
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
BufferInfo *buf_info = (BufferInfo *)uniform.ids[0].id;
|
||||
|
||||
// SRV first. [[SRV_UAV_AMBIGUITY]]
|
||||
{
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {};
|
||||
srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
||||
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
srv_desc.Buffer.FirstElement = 0;
|
||||
srv_desc.Buffer.NumElements = (buf_info->size + 3) / 4;
|
||||
srv_desc.Buffer.StructureByteStride = 0;
|
||||
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
|
||||
if (uniform.type == UNIFORM_TYPE_STORAGE_BUFFER) {
|
||||
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), UniformSetID(),
|
||||
"Sent a buffer with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_STORAGE_BUFFER instead of UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC.");
|
||||
} else {
|
||||
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), UniformSetID(),
|
||||
"Sent a buffer without BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC instead of UNIFORM_TYPE_STORAGE_BUFFER.");
|
||||
ERR_FAIL_COND_V_MSG(num_dynamic_buffers >= MAX_DYNAMIC_BUFFERS, UniformSetID(),
|
||||
"Uniform set exceeded the limit of dynamic/persistent buffers. (" + itos(MAX_DYNAMIC_BUFFERS) + ").");
|
||||
|
||||
dynamic_buffers[num_dynamic_buffers++] = (const BufferDynamicInfo *)buf_info;
|
||||
}
|
||||
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {};
|
||||
srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
||||
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
srv_desc.Buffer.FirstElement = 0;
|
||||
srv_desc.Buffer.NumElements = (buf_info->size + 3u) / 4u;
|
||||
srv_desc.Buffer.StructureByteStride = 0;
|
||||
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
|
||||
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};
|
||||
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
uav_desc.Buffer.FirstElement = 0;
|
||||
uav_desc.Buffer.NumElements = (buf_info->size + 3u) / 4u;
|
||||
uav_desc.Buffer.StructureByteStride = 0;
|
||||
uav_desc.Buffer.CounterOffsetInBytes = 0;
|
||||
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||
|
||||
const uint32_t subregion_count = uniform.type == UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC ? frames.size() : 1u;
|
||||
for (uint32_t j = 0u; j < subregion_count; ++j) {
|
||||
// SRV first. [[SRV_UAV_AMBIGUITY]]
|
||||
device->CreateShaderResourceView(buf_info->resource, &srv_desc, desc_heap_walkers.resources.get_curr_cpu_handle());
|
||||
#ifdef DEV_ENABLED
|
||||
uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_SRV, srv_desc.ViewDimension });
|
||||
#endif
|
||||
desc_heap_walkers.resources.advance();
|
||||
}
|
||||
srv_desc.Buffer.FirstElement += srv_desc.Buffer.NumElements;
|
||||
|
||||
// UAV then. [[SRV_UAV_AMBIGUITY]]
|
||||
{
|
||||
// UAV then. [[SRV_UAV_AMBIGUITY]]
|
||||
if (buf_info->flags.usable_as_uav) {
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};
|
||||
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
uav_desc.Buffer.FirstElement = 0;
|
||||
uav_desc.Buffer.NumElements = (buf_info->size + 3) / 4;
|
||||
uav_desc.Buffer.StructureByteStride = 0;
|
||||
uav_desc.Buffer.CounterOffsetInBytes = 0;
|
||||
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||
device->CreateUnorderedAccessView(buf_info->resource, nullptr, &uav_desc, desc_heap_walkers.resources.get_curr_cpu_handle());
|
||||
#ifdef DEV_ENABLED
|
||||
uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_UAV, {} });
|
||||
#endif
|
||||
} else {
|
||||
// If can't transition to UAV, leave this one empty since it won't be
|
||||
// used, and trying to create an UAV view would trigger a validation error.
|
||||
uav_desc.Buffer.FirstElement += uav_desc.Buffer.NumElements;
|
||||
desc_heap_walkers.resources.advance();
|
||||
}
|
||||
|
||||
desc_heap_walkers.resources.advance();
|
||||
}
|
||||
|
||||
NeededState &ns = resource_states[buf_info];
|
||||
@ -3685,6 +3778,11 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun
|
||||
}
|
||||
}
|
||||
|
||||
uniform_set_info->dynamic_buffers.resize(num_dynamic_buffers);
|
||||
for (size_t i = 0u; i < num_dynamic_buffers; ++i) {
|
||||
uniform_set_info->dynamic_buffers[i] = dynamic_buffers[i];
|
||||
}
|
||||
|
||||
DEV_ASSERT(desc_heap_walkers.resources.is_at_eof());
|
||||
DEV_ASSERT(desc_heap_walkers.samplers.is_at_eof());
|
||||
|
||||
@ -3708,6 +3806,31 @@ void RenderingDeviceDriverD3D12::uniform_set_free(UniformSetID p_uniform_set) {
|
||||
VersatileResource::free(resources_allocator, uniform_set_info);
|
||||
}
|
||||
|
||||
uint32_t RenderingDeviceDriverD3D12::uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const {
|
||||
uint32_t mask = 0u;
|
||||
uint32_t shift = 0u;
|
||||
#ifdef DEV_ENABLED
|
||||
uint32_t curr_dynamic_offset = 0u;
|
||||
#endif
|
||||
|
||||
for (uint32_t i = 0; i < p_set_count; i++) {
|
||||
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
|
||||
// At this point this assert should already have been validated.
|
||||
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
|
||||
|
||||
for (const BufferDynamicInfo *dynamic_buffer : usi->dynamic_buffers) {
|
||||
DEV_ASSERT(dynamic_buffer->frame_idx < 16u);
|
||||
mask |= dynamic_buffer->frame_idx << shift;
|
||||
shift += 4u;
|
||||
}
|
||||
#ifdef DEV_ENABLED
|
||||
curr_dynamic_offset += usi->dynamic_buffers.size();
|
||||
#endif
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
// ----- COMMANDS -----
|
||||
|
||||
void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
|
||||
@ -3885,14 +4008,23 @@ void RenderingDeviceDriverD3D12::_command_check_descriptor_sets(CommandBufferID
|
||||
}
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, bool p_for_compute) {
|
||||
void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets, bool p_for_compute) {
|
||||
_command_check_descriptor_sets(p_cmd_buffer);
|
||||
|
||||
uint32_t shift = 0u;
|
||||
|
||||
UniformSetInfo *uniform_set_info = (UniformSetInfo *)p_uniform_set.id;
|
||||
const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;
|
||||
const ShaderInfo::UniformSet &shader_set = shader_info_in->sets[p_set_index];
|
||||
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
|
||||
|
||||
// The value of p_dynamic_offsets depends on all the other UniformSets bound after us
|
||||
// (caller already filtered out bits that came before us).
|
||||
// Turn that mask into something that is unique to us, *so that we don't create unnecessary entries in the cache*.
|
||||
// We may not even have dynamic buffers at all in this set. In that case p_dynamic_offsets becomes 0.
|
||||
const uint32_t used_dynamic_buffers_mask = (1u << (uniform_set_info->dynamic_buffers.size() * 4u)) - 1u;
|
||||
p_dynamic_offsets = p_dynamic_offsets & used_dynamic_buffers_mask;
|
||||
|
||||
using SetRootDescriptorTableFn = void (STDMETHODCALLTYPE ID3D12GraphicsCommandList::*)(UINT, D3D12_GPU_DESCRIPTOR_HANDLE);
|
||||
SetRootDescriptorTableFn set_root_desc_table_fn = p_for_compute ? &ID3D12GraphicsCommandList::SetComputeRootDescriptorTable : &ID3D12GraphicsCommandList1::SetGraphicsRootDescriptorTable;
|
||||
|
||||
@ -3901,7 +4033,8 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd
|
||||
UniformSetInfo::RecentBind *last_bind = nullptr;
|
||||
for (int i = 0; i < (int)ARRAY_SIZE(uniform_set_info->recent_binds); i++) {
|
||||
if (uniform_set_info->recent_binds[i].segment_serial == frames[frame_idx].segment_serial) {
|
||||
if (uniform_set_info->recent_binds[i].root_signature_crc == root_sig_crc) {
|
||||
if (uniform_set_info->recent_binds[i].root_signature_crc == root_sig_crc &&
|
||||
uniform_set_info->recent_binds[i].dynamic_state_mask == p_dynamic_offsets) {
|
||||
for (const RootDescriptorTable &table : uniform_set_info->recent_binds[i].root_tables.resources) {
|
||||
(cmd_buf_info->cmd_list.Get()->*set_root_desc_table_fn)(table.root_param_idx, table.start_gpu_handle);
|
||||
}
|
||||
@ -3940,10 +4073,11 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd
|
||||
set_heap_walkers.resources = uniform_set_info->desc_heaps.resources.make_walker();
|
||||
set_heap_walkers.samplers = uniform_set_info->desc_heaps.samplers.make_walker();
|
||||
|
||||
const uint32_t binding_count = shader_set.bindings.size();
|
||||
#ifdef DEV_ENABLED
|
||||
// Whether we have stages where the uniform is actually used should match
|
||||
// whether we have any root signature locations for it.
|
||||
for (uint32_t i = 0; i < shader_set.bindings.size(); i++) {
|
||||
for (uint32_t i = 0; i < binding_count; i++) {
|
||||
bool has_rs_locations = false;
|
||||
if (shader_set.bindings[i].root_sig_locations.resource.root_param_idx != UINT32_MAX ||
|
||||
shader_set.bindings[i].root_sig_locations.sampler.root_param_idx != UINT32_MAX) {
|
||||
@ -3967,21 +4101,25 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd
|
||||
RootDescriptorTable *resources = nullptr;
|
||||
RootDescriptorTable *samplers = nullptr;
|
||||
} tables;
|
||||
for (uint32_t i = 0; i < shader_set.bindings.size(); i++) {
|
||||
for (uint32_t i = 0; i < binding_count; i++) {
|
||||
const ShaderInfo::UniformBindingInfo &binding = shader_set.bindings[i];
|
||||
|
||||
uint32_t num_resource_descs = 0;
|
||||
uint32_t num_sampler_descs = 0;
|
||||
bool srv_uav_ambiguity = false;
|
||||
_add_descriptor_count_for_uniform(binding.type, binding.length, false, num_resource_descs, num_sampler_descs, srv_uav_ambiguity);
|
||||
const uint32_t frame_count_for_binding = 1u; // _add_descriptor_count_for_uniform wants frames.size() so we can create N entries.
|
||||
// However we are binding now, and we must bind only one (not N of them), so set 1u.
|
||||
_add_descriptor_count_for_uniform(binding.type, binding.length, false, num_resource_descs, num_sampler_descs, srv_uav_ambiguity, frame_count_for_binding);
|
||||
|
||||
uint32_t dynamic_resources_to_skip = 0u;
|
||||
|
||||
bool resource_used = false;
|
||||
if (shader_set.bindings[i].stages) {
|
||||
if (binding.stages) {
|
||||
{
|
||||
const ShaderInfo::UniformBindingInfo::RootSignatureLocation &rs_loc_resource = shader_set.bindings[i].root_sig_locations.resource;
|
||||
const ShaderInfo::UniformBindingInfo::RootSignatureLocation &rs_loc_resource = binding.root_sig_locations.resource;
|
||||
if (rs_loc_resource.root_param_idx != UINT32_MAX) { // Location used?
|
||||
DEV_ASSERT(num_resource_descs);
|
||||
DEV_ASSERT(!(srv_uav_ambiguity && (shader_set.bindings[i].res_class != RES_CLASS_SRV && shader_set.bindings[i].res_class != RES_CLASS_UAV))); // [[SRV_UAV_AMBIGUITY]]
|
||||
DEV_ASSERT(!(srv_uav_ambiguity && (binding.res_class != RES_CLASS_SRV && binding.res_class != RES_CLASS_UAV))); // [[SRV_UAV_AMBIGUITY]]
|
||||
|
||||
bool must_flush_table = tables.resources && rs_loc_resource.root_param_idx != tables.resources->root_param_idx;
|
||||
if (must_flush_table) {
|
||||
@ -4010,8 +4148,16 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd
|
||||
tables.resources->start_gpu_handle = frame_heap_walkers.resources->get_curr_gpu_handle();
|
||||
}
|
||||
|
||||
// For dynamic buffers, jump to the last written offset.
|
||||
if (binding.type == UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC || binding.type == UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC) {
|
||||
const uint32_t dyn_frame_idx = (p_dynamic_offsets >> shift) & 0xFu;
|
||||
shift += 4u;
|
||||
set_heap_walkers.resources.advance(num_resource_descs * dyn_frame_idx);
|
||||
dynamic_resources_to_skip = num_resource_descs * (frames.size() - dyn_frame_idx - 1u);
|
||||
}
|
||||
|
||||
// If there is ambiguity and it didn't clarify as SRVs, skip them, which come first. [[SRV_UAV_AMBIGUITY]]
|
||||
if (srv_uav_ambiguity && shader_set.bindings[i].res_class != RES_CLASS_SRV) {
|
||||
if (srv_uav_ambiguity && binding.res_class != RES_CLASS_SRV) {
|
||||
set_heap_walkers.resources.advance(num_resource_descs);
|
||||
}
|
||||
|
||||
@ -4024,7 +4170,7 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd
|
||||
frame_heap_walkers.resources->advance(num_resource_descs);
|
||||
|
||||
// If there is ambiguity and it didn't clarify as UAVs, skip them, which come later. [[SRV_UAV_AMBIGUITY]]
|
||||
if (srv_uav_ambiguity && shader_set.bindings[i].res_class != RES_CLASS_UAV) {
|
||||
if (srv_uav_ambiguity && binding.res_class != RES_CLASS_UAV) {
|
||||
set_heap_walkers.resources.advance(num_resource_descs);
|
||||
}
|
||||
|
||||
@ -4033,7 +4179,7 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd
|
||||
}
|
||||
|
||||
{
|
||||
const ShaderInfo::UniformBindingInfo::RootSignatureLocation &rs_loc_sampler = shader_set.bindings[i].root_sig_locations.sampler;
|
||||
const ShaderInfo::UniformBindingInfo::RootSignatureLocation &rs_loc_sampler = binding.root_sig_locations.sampler;
|
||||
if (rs_loc_sampler.root_param_idx != UINT32_MAX) { // Location used?
|
||||
DEV_ASSERT(num_sampler_descs);
|
||||
DEV_ASSERT(!srv_uav_ambiguity); // [[SRV_UAV_AMBIGUITY]]
|
||||
@ -4080,7 +4226,7 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd
|
||||
// the shader variant a given set is created upon may not need all of them due to DXC optimizations.
|
||||
// Therefore, at this point we have to advance through the descriptor set descriptor's heap unconditionally.
|
||||
|
||||
set_heap_walkers.resources.advance(num_resource_descs);
|
||||
set_heap_walkers.resources.advance(num_resource_descs + dynamic_resources_to_skip);
|
||||
if (srv_uav_ambiguity) {
|
||||
DEV_ASSERT(num_resource_descs);
|
||||
if (!resource_used) {
|
||||
@ -4109,6 +4255,7 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd
|
||||
|
||||
last_bind->root_signature_crc = root_sig_crc;
|
||||
last_bind->segment_serial = frames[frame_idx].segment_serial;
|
||||
last_bind->dynamic_state_mask = p_dynamic_offsets;
|
||||
}
|
||||
|
||||
/******************/
|
||||
@ -4983,14 +5130,16 @@ void RenderingDeviceDriverD3D12::command_bind_render_pipeline(CommandBufferID p_
|
||||
cmd_buf_info->compute_pso = nullptr;
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverD3D12::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
|
||||
_command_bind_uniform_set(p_cmd_buffer, p_uniform_set, p_shader, p_set_index, false);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverD3D12::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
void RenderingDeviceDriverD3D12::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
|
||||
uint32_t shift = 0u;
|
||||
for (uint32_t i = 0u; i < p_set_count; ++i) {
|
||||
// TODO: _command_bind_uniform_set() does WAAAAY too much stuff. A lot of it should be already cached in UniformSetID when uniform_set_create() was called. Binding is supposed to be a cheap operation, ideally a memcpy.
|
||||
_command_bind_uniform_set(p_cmd_buffer, p_uniform_sets[i], p_shader, p_first_set_index + i, false);
|
||||
_command_bind_uniform_set(p_cmd_buffer, p_uniform_sets[i], p_shader, p_first_set_index + i, p_dynamic_offsets >> shift, false);
|
||||
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
|
||||
shift += usi->dynamic_buffers.size() * 4u;
|
||||
|
||||
// At this point this assert should already have been validated.
|
||||
DEV_ASSERT((shift / 4u) <= MAX_DYNAMIC_BUFFERS);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5503,14 +5652,16 @@ void RenderingDeviceDriverD3D12::command_bind_compute_pipeline(CommandBufferID p
|
||||
cmd_buf_info->graphics_pso = nullptr;
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverD3D12::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
|
||||
_command_bind_uniform_set(p_cmd_buffer, p_uniform_set, p_shader, p_set_index, true);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverD3D12::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
void RenderingDeviceDriverD3D12::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
|
||||
uint32_t shift = 0u;
|
||||
for (uint32_t i = 0u; i < p_set_count; ++i) {
|
||||
// TODO: _command_bind_uniform_set() does WAAAAY too much stuff. A lot of it should be already cached in UniformSetID when uniform_set_create() was called. Binding is supposed to be a cheap operation, ideally a memcpy.
|
||||
_command_bind_uniform_set(p_cmd_buffer, p_uniform_sets[i], p_shader, p_first_set_index + i, true);
|
||||
_command_bind_uniform_set(p_cmd_buffer, p_uniform_sets[i], p_shader, p_first_set_index + i, p_dynamic_offsets >> shift, true);
|
||||
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
|
||||
shift += usi->dynamic_buffers.size() * 4u;
|
||||
|
||||
// At this point this assert should already have been validated.
|
||||
DEV_ASSERT((shift / 4u) <= MAX_DYNAMIC_BUFFERS);
|
||||
}
|
||||
}
|
||||
|
||||
@ -6300,6 +6451,16 @@ Error RenderingDeviceDriverD3D12::_initialize_allocator() {
|
||||
HRESULT res = D3D12MA::CreateAllocator(&allocator_desc, &allocator);
|
||||
ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateAllocator failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");
|
||||
|
||||
if (allocator->IsGPUUploadHeapSupported()) {
|
||||
dynamic_persistent_upload_heap = D3D12_HEAP_TYPE_GPU_UPLOAD;
|
||||
print_verbose("D3D12: Device supports GPU UPLOAD heap.");
|
||||
} else {
|
||||
dynamic_persistent_upload_heap = D3D12_HEAP_TYPE_UPLOAD;
|
||||
// Print it as a warning (instead of verbose) because in the rare chance this lesser-used code path
|
||||
// causes bugs, we get an inkling of what's going on (i.e. in order to repro bugs locally).
|
||||
WARN_PRINT("D3D12: Device does NOT support GPU UPLOAD heap. ReBAR must be enabled for this feature. Regular UPLOAD heaps will be used as fallback.");
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
|
||||
@ -144,6 +144,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
|
||||
MiscFeaturesSupport misc_features_support;
|
||||
RenderingShaderContainerFormatD3D12 shader_container_format;
|
||||
String pipeline_cache_id;
|
||||
D3D12_HEAP_TYPE dynamic_persistent_upload_heap = D3D12_HEAP_TYPE_UPLOAD;
|
||||
|
||||
class CPUDescriptorsHeapPool;
|
||||
|
||||
@ -323,16 +324,29 @@ private:
|
||||
uint64_t size = 0;
|
||||
struct {
|
||||
bool usable_as_uav : 1;
|
||||
bool is_dynamic : 1; // Only used for tracking (e.g. Vulkan needs these checks).
|
||||
} flags = {};
|
||||
|
||||
bool is_dynamic() const { return flags.is_dynamic; }
|
||||
};
|
||||
|
||||
struct BufferDynamicInfo : BufferInfo {
|
||||
uint32_t frame_idx = UINT32_MAX;
|
||||
uint8_t *persistent_ptr = nullptr;
|
||||
#ifdef DEBUG_ENABLED
|
||||
// For tracking that a persistent buffer isn't mapped twice in the same frame.
|
||||
uint64_t last_frame_mapped = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
public:
|
||||
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) override final;
|
||||
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) override final;
|
||||
virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final;
|
||||
virtual void buffer_free(BufferID p_buffer) override final;
|
||||
virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final;
|
||||
virtual uint8_t *buffer_map(BufferID p_buffer) override final;
|
||||
virtual void buffer_unmap(BufferID p_buffer) override final;
|
||||
virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final;
|
||||
virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final;
|
||||
|
||||
/*****************/
|
||||
@ -705,6 +719,7 @@ private:
|
||||
|
||||
struct RecentBind {
|
||||
uint64_t segment_serial = 0;
|
||||
uint32_t dynamic_state_mask = 0;
|
||||
uint32_t root_signature_crc = 0;
|
||||
struct {
|
||||
TightLocalVector<RootDescriptorTable> resources;
|
||||
@ -713,6 +728,8 @@ private:
|
||||
int uses = 0;
|
||||
} recent_binds[4]; // A better amount may be empirically found.
|
||||
|
||||
TightLocalVector<BufferDynamicInfo const *, uint32_t> dynamic_buffers;
|
||||
|
||||
#ifdef DEV_ENABLED
|
||||
// Filthy, but useful for dev.
|
||||
struct ResourceDescInfo {
|
||||
@ -726,6 +743,7 @@ private:
|
||||
public:
|
||||
virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) override final;
|
||||
virtual void uniform_set_free(UniformSetID p_uniform_set) override final;
|
||||
virtual uint32_t uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const override final;
|
||||
|
||||
// ----- COMMANDS -----
|
||||
|
||||
@ -733,8 +751,7 @@ public:
|
||||
|
||||
private:
|
||||
void _command_check_descriptor_sets(CommandBufferID p_cmd_buffer);
|
||||
void _command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, bool p_for_compute);
|
||||
void _command_bind_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, bool p_for_compute);
|
||||
void _command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets, bool p_for_compute);
|
||||
|
||||
public:
|
||||
/******************/
|
||||
@ -823,8 +840,7 @@ public:
|
||||
|
||||
// Binding.
|
||||
virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
|
||||
virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
|
||||
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;
|
||||
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) override final;
|
||||
|
||||
// Drawing.
|
||||
virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) override final;
|
||||
@ -871,8 +887,7 @@ public:
|
||||
|
||||
// Binding.
|
||||
virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
|
||||
virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
|
||||
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;
|
||||
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) override final;
|
||||
|
||||
// Dispatching.
|
||||
virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final;
|
||||
|
||||
@ -135,6 +135,8 @@ class RenderingDeviceDriverMetal;
|
||||
class MDUniformSet;
|
||||
class MDShader;
|
||||
|
||||
struct MetalBufferDynamicInfo;
|
||||
|
||||
#pragma mark - Resource Factory
|
||||
|
||||
struct ClearAttKey {
|
||||
@ -385,11 +387,12 @@ public:
|
||||
BitField<DirtyFlag> dirty = DIRTY_NONE;
|
||||
|
||||
LocalVector<MDUniformSet *> uniform_sets;
|
||||
uint32_t dynamic_offsets = 0;
|
||||
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
|
||||
uint64_t uniform_set_mask = 0;
|
||||
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
|
||||
uint32_t push_constant_data_len = 0;
|
||||
uint32_t push_constant_bindings[2] = { 0 };
|
||||
uint32_t push_constant_bindings[2] = { ~0U, ~0U };
|
||||
|
||||
_FORCE_INLINE_ void reset();
|
||||
void end_encoding();
|
||||
@ -505,11 +508,12 @@ public:
|
||||
BitField<DirtyFlag> dirty = DIRTY_NONE;
|
||||
|
||||
LocalVector<MDUniformSet *> uniform_sets;
|
||||
uint32_t dynamic_offsets = 0;
|
||||
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
|
||||
uint64_t uniform_set_mask = 0;
|
||||
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
|
||||
uint32_t push_constant_data_len = 0;
|
||||
uint32_t push_constant_bindings[1] = { 0 };
|
||||
uint32_t push_constant_bindings[1] = { ~0U };
|
||||
|
||||
_FORCE_INLINE_ void reset();
|
||||
void end_encoding();
|
||||
@ -559,8 +563,7 @@ public:
|
||||
|
||||
#pragma mark - Render Commands
|
||||
|
||||
void render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
|
||||
void render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count);
|
||||
void render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets);
|
||||
void render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects);
|
||||
void render_set_viewport(VectorView<Rect2i> p_viewports);
|
||||
void render_set_scissor(VectorView<Rect2i> p_scissors);
|
||||
@ -593,8 +596,7 @@ public:
|
||||
|
||||
#pragma mark - Compute Commands
|
||||
|
||||
void compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
|
||||
void compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count);
|
||||
void compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets);
|
||||
void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
|
||||
void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset);
|
||||
|
||||
@ -647,6 +649,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformInfo {
|
||||
|
||||
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSet {
|
||||
LocalVector<UniformInfo> uniforms;
|
||||
LocalVector<uint32_t> dynamic_uniforms;
|
||||
uint32_t buffer_size = 0;
|
||||
HashMap<RDC::ShaderStage, uint32_t> offsets;
|
||||
HashMap<RDC::ShaderStage, id<MTLArgumentEncoder>> encoders;
|
||||
@ -715,10 +718,62 @@ struct ShaderCacheEntry {
|
||||
~ShaderCacheEntry() = default;
|
||||
};
|
||||
|
||||
/// Godot limits the number of dynamic buffers to 8.
|
||||
///
|
||||
/// This is a minimum guarantee for Vulkan.
|
||||
constexpr uint32_t MAX_DYNAMIC_BUFFERS = 8;
|
||||
|
||||
/// Maximum number of queued frames.
|
||||
///
|
||||
/// See setting: rendering/rendering_device/vsync/frame_queue_size
|
||||
constexpr uint32_t MAX_FRAME_COUNT = 4;
|
||||
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) DynamicOffsetLayout {
|
||||
struct Data {
|
||||
uint8_t offset : 4;
|
||||
uint8_t count : 4;
|
||||
};
|
||||
|
||||
union {
|
||||
Data data[MAX_DYNAMIC_BUFFERS];
|
||||
uint64_t _val = 0;
|
||||
};
|
||||
|
||||
public:
|
||||
_FORCE_INLINE_ bool is_empty() const { return _val == 0; }
|
||||
|
||||
_FORCE_INLINE_ uint32_t get_count(uint32_t p_set_index) const {
|
||||
return data[p_set_index].count;
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ uint32_t get_offset(uint32_t p_set_index) const {
|
||||
return data[p_set_index].offset;
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ void set_offset_count(uint32_t p_set_index, uint8_t p_offset, uint8_t p_count) {
|
||||
data[p_set_index].offset = p_offset;
|
||||
data[p_set_index].count = p_count;
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ uint32_t get_offset_index_shift(uint32_t p_set_index, uint32_t p_dynamic_index = 0) const {
|
||||
return (data[p_set_index].offset + p_dynamic_index) * 4u;
|
||||
}
|
||||
};
|
||||
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) DynamicOffsets {
|
||||
uint32_t data;
|
||||
|
||||
public:
|
||||
_FORCE_INLINE_ uint32_t get_frame_index(const DynamicOffsetLayout &p_layout) const {
|
||||
return data;
|
||||
}
|
||||
};
|
||||
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader {
|
||||
public:
|
||||
CharString name;
|
||||
Vector<UniformSet> sets;
|
||||
DynamicOffsetLayout dynamic_offset_layout;
|
||||
bool uses_argument_buffers = true;
|
||||
|
||||
MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
|
||||
@ -786,30 +841,49 @@ struct HashMapComparatorDefault<RDD::ShaderID> {
|
||||
struct BoundUniformSet {
|
||||
id<MTLBuffer> buffer;
|
||||
ResourceUsageMap usage_to_resources;
|
||||
/// Size of the per-frame buffer, which is 0 when there are no dynamic uniforms.
|
||||
uint32_t frame_size = 0;
|
||||
|
||||
/// Perform a 2-way merge each key of `ResourceVector` resources from this set into the
|
||||
/// destination set.
|
||||
///
|
||||
/// Assumes the vectors of resources are sorted.
|
||||
void merge_into(ResourceUsageMap &p_dst) const;
|
||||
|
||||
/// Returns true if this bound uniform set contains dynamic uniforms.
|
||||
_FORCE_INLINE_ bool is_dynamic() const { return frame_size > 0; }
|
||||
|
||||
/// Calculate the offset in the Metal buffer for the current frame.
|
||||
_FORCE_INLINE_ uint32_t frame_offset(uint32_t p_frame_index) const { return p_frame_index * frame_size; }
|
||||
|
||||
/// Calculate the offset in the buffer for the given frame index and base offset.
|
||||
_FORCE_INLINE_ uint32_t make_offset(uint32_t p_frame_index, uint32_t p_base_offset) const {
|
||||
return frame_offset(p_frame_index) + p_base_offset;
|
||||
}
|
||||
|
||||
BoundUniformSet() = default;
|
||||
BoundUniformSet(id<MTLBuffer> p_buffer, ResourceUsageMap &&p_usage_to_resources, uint32_t p_frame_size) :
|
||||
buffer(p_buffer), usage_to_resources(std::move(p_usage_to_resources)), frame_size(p_frame_size) {}
|
||||
};
|
||||
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDUniformSet {
|
||||
private:
|
||||
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
|
||||
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
|
||||
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
|
||||
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
|
||||
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
|
||||
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets);
|
||||
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
|
||||
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets);
|
||||
|
||||
void update_dynamic_uniforms(MDShader *p_shader, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, BoundUniformSet &p_bound_set, uint32_t p_dynamic_offsets, uint32_t p_frame_idx);
|
||||
|
||||
public:
|
||||
uint32_t index;
|
||||
uint32_t index = 0;
|
||||
LocalVector<RDD::BoundUniform> uniforms;
|
||||
HashMap<MDShader *, BoundUniformSet> bound_uniforms;
|
||||
|
||||
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
|
||||
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
|
||||
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
|
||||
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
|
||||
|
||||
BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index);
|
||||
BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
|
||||
};
|
||||
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDPipeline {
|
||||
|
||||
@ -247,39 +247,26 @@ void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDesc
|
||||
|
||||
#pragma mark - Render Commands
|
||||
|
||||
void MDCommandBuffer::render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
|
||||
void MDCommandBuffer::render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Render);
|
||||
|
||||
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
|
||||
if (render.uniform_sets.size() <= p_set_index) {
|
||||
render.dynamic_offsets |= p_dynamic_offsets;
|
||||
|
||||
if (uint32_t new_size = p_first_set_index + p_set_count; render.uniform_sets.size() < new_size) {
|
||||
uint32_t s = render.uniform_sets.size();
|
||||
render.uniform_sets.resize(p_set_index + 1);
|
||||
render.uniform_sets.resize(new_size);
|
||||
// Set intermediate values to null.
|
||||
std::fill(&render.uniform_sets[s], &render.uniform_sets[p_set_index] + 1, nullptr);
|
||||
std::fill(&render.uniform_sets[s], render.uniform_sets.end().operator->(), nullptr);
|
||||
}
|
||||
|
||||
if (render.uniform_sets[p_set_index] != set) {
|
||||
render.dirty.set_flag(RenderState::DIRTY_UNIFORMS);
|
||||
render.uniform_set_mask |= 1ULL << p_set_index;
|
||||
render.uniform_sets[p_set_index] = set;
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Render);
|
||||
const MDShader *shader = (const MDShader *)p_shader.id;
|
||||
DynamicOffsetLayout layout = shader->dynamic_offset_layout;
|
||||
|
||||
for (size_t i = 0; i < p_set_count; ++i) {
|
||||
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
|
||||
|
||||
uint32_t index = p_first_set_index + i;
|
||||
if (render.uniform_sets.size() <= index) {
|
||||
uint32_t s = render.uniform_sets.size();
|
||||
render.uniform_sets.resize(index + 1);
|
||||
// Set intermediate values to null.
|
||||
std::fill(&render.uniform_sets[s], &render.uniform_sets[index] + 1, nullptr);
|
||||
}
|
||||
|
||||
if (render.uniform_sets[index] != set) {
|
||||
if (render.uniform_sets[index] != set || layout.get_count(index) > 0) {
|
||||
render.dirty.set_flag(RenderState::DIRTY_UNIFORMS);
|
||||
render.uniform_set_mask |= 1ULL << index;
|
||||
render.uniform_sets[index] = set;
|
||||
@ -524,6 +511,7 @@ void MDCommandBuffer::_render_bind_uniform_sets() {
|
||||
render.uniform_set_mask = 0;
|
||||
|
||||
MDRenderShader *shader = render.pipeline->shader;
|
||||
const uint32_t dynamic_offsets = render.dynamic_offsets;
|
||||
|
||||
while (set_uniforms != 0) {
|
||||
// Find the index of the next set bit.
|
||||
@ -534,7 +522,7 @@ void MDCommandBuffer::_render_bind_uniform_sets() {
|
||||
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
|
||||
continue;
|
||||
}
|
||||
set->bind_uniforms(shader, render, index);
|
||||
set->bind_uniforms(shader, render, index, dynamic_offsets, device_driver->frame_index, device_driver->frame_count);
|
||||
}
|
||||
}
|
||||
|
||||
@ -819,7 +807,8 @@ void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const
|
||||
|
||||
// Reverse the buffers, as their bindings are assigned in descending order.
|
||||
for (uint32_t i = 0; i < p_binding_count; i += 1) {
|
||||
render.vertex_buffers[i] = rid::get(p_buffers[p_binding_count - i - 1]);
|
||||
const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffers[p_binding_count - i - 1].id;
|
||||
render.vertex_buffers[i] = buf_info->metal_buffer;
|
||||
render.vertex_offsets[i] = p_offsets[p_binding_count - i - 1];
|
||||
}
|
||||
|
||||
@ -837,7 +826,9 @@ void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const
|
||||
void MDCommandBuffer::render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Render);
|
||||
|
||||
render.index_buffer = rid::get(p_buffer);
|
||||
const RenderingDeviceDriverMetal::BufferInfo *buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffer.id;
|
||||
|
||||
render.index_buffer = buffer->metal_buffer;
|
||||
render.index_type = p_format == RDD::IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32;
|
||||
render.index_offset = p_offset;
|
||||
}
|
||||
@ -880,7 +871,7 @@ void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buff
|
||||
|
||||
id<MTLRenderCommandEncoder> enc = render.encoder;
|
||||
|
||||
id<MTLBuffer> indirect_buffer = rid::get(p_indirect_buffer);
|
||||
const RenderingDeviceDriverMetal::BufferInfo *indirect_buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id;
|
||||
NSUInteger indirect_offset = p_offset;
|
||||
|
||||
for (uint32_t i = 0; i < p_draw_count; i++) {
|
||||
@ -888,7 +879,7 @@ void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buff
|
||||
indexType:render.index_type
|
||||
indexBuffer:render.index_buffer
|
||||
indexBufferOffset:0
|
||||
indirectBuffer:indirect_buffer
|
||||
indirectBuffer:indirect_buffer->metal_buffer
|
||||
indirectBufferOffset:indirect_offset];
|
||||
indirect_offset += p_stride;
|
||||
}
|
||||
@ -906,12 +897,12 @@ void MDCommandBuffer::render_draw_indirect(RDD::BufferID p_indirect_buffer, uint
|
||||
|
||||
id<MTLRenderCommandEncoder> enc = render.encoder;
|
||||
|
||||
id<MTLBuffer> indirect_buffer = rid::get(p_indirect_buffer);
|
||||
const RenderingDeviceDriverMetal::BufferInfo *indirect_buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id;
|
||||
NSUInteger indirect_offset = p_offset;
|
||||
|
||||
for (uint32_t i = 0; i < p_draw_count; i++) {
|
||||
[enc drawPrimitives:render.pipeline->raster_state.render_primitive
|
||||
indirectBuffer:indirect_buffer
|
||||
indirectBuffer:indirect_buffer->metal_buffer
|
||||
indirectBufferOffset:indirect_offset];
|
||||
indirect_offset += p_stride;
|
||||
}
|
||||
@ -944,7 +935,10 @@ void MDCommandBuffer::RenderState::reset() {
|
||||
index_type = MTLIndexTypeUInt16;
|
||||
dirty = DIRTY_NONE;
|
||||
uniform_sets.clear();
|
||||
dynamic_offsets = 0;
|
||||
uniform_set_mask = 0;
|
||||
push_constant_bindings[0] = ~0U;
|
||||
push_constant_bindings[1] = ~0U;
|
||||
push_constant_data_len = 0;
|
||||
clear_values.clear();
|
||||
viewports.clear();
|
||||
@ -1041,6 +1035,7 @@ void MDCommandBuffer::_compute_bind_uniform_sets() {
|
||||
compute.uniform_set_mask = 0;
|
||||
|
||||
MDComputeShader *shader = compute.pipeline->shader;
|
||||
const uint32_t dynamic_offsets = compute.dynamic_offsets;
|
||||
|
||||
while (set_uniforms != 0) {
|
||||
// Find the index of the next set bit.
|
||||
@ -1051,7 +1046,7 @@ void MDCommandBuffer::_compute_bind_uniform_sets() {
|
||||
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
|
||||
continue;
|
||||
}
|
||||
set->bind_uniforms(shader, compute, index);
|
||||
set->bind_uniforms(shader, compute, index, dynamic_offsets, device_driver->frame_index, device_driver->frame_count);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1060,7 +1055,9 @@ void MDCommandBuffer::ComputeState::reset() {
|
||||
encoder = nil;
|
||||
dirty = DIRTY_NONE;
|
||||
uniform_sets.clear();
|
||||
dynamic_offsets = 0;
|
||||
uniform_set_mask = 0;
|
||||
push_constant_bindings[0] = ~0U;
|
||||
push_constant_data_len = 0;
|
||||
// Keep the keys, as they are likely to be used again.
|
||||
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
|
||||
@ -1068,39 +1065,26 @@ void MDCommandBuffer::ComputeState::reset() {
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
|
||||
void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
|
||||
if (compute.uniform_sets.size() <= p_set_index) {
|
||||
uint32_t s = render.uniform_sets.size();
|
||||
compute.uniform_sets.resize(p_set_index + 1);
|
||||
compute.dynamic_offsets |= p_dynamic_offsets;
|
||||
|
||||
if (uint32_t new_size = p_first_set_index + p_set_count; compute.uniform_sets.size() < new_size) {
|
||||
uint32_t s = compute.uniform_sets.size();
|
||||
compute.uniform_sets.resize(new_size);
|
||||
// Set intermediate values to null.
|
||||
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[p_set_index] + 1, nullptr);
|
||||
std::fill(&compute.uniform_sets[s], compute.uniform_sets.end().operator->(), nullptr);
|
||||
}
|
||||
|
||||
if (compute.uniform_sets[p_set_index] != set) {
|
||||
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
|
||||
compute.uniform_set_mask |= 1ULL << p_set_index;
|
||||
compute.uniform_sets[p_set_index] = set;
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
const MDShader *shader = (const MDShader *)p_shader.id;
|
||||
DynamicOffsetLayout layout = shader->dynamic_offset_layout;
|
||||
|
||||
for (size_t i = 0; i < p_set_count; ++i) {
|
||||
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
|
||||
|
||||
uint32_t index = p_first_set_index + i;
|
||||
if (compute.uniform_sets.size() <= index) {
|
||||
uint32_t s = compute.uniform_sets.size();
|
||||
compute.uniform_sets.resize(index + 1);
|
||||
// Set intermediate values to null.
|
||||
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[index] + 1, nullptr);
|
||||
}
|
||||
|
||||
if (compute.uniform_sets[index] != set) {
|
||||
if (compute.uniform_sets[index] != set || layout.get_count(index) > 0) {
|
||||
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
|
||||
compute.uniform_set_mask |= 1ULL << index;
|
||||
compute.uniform_sets[index] = set;
|
||||
@ -1124,10 +1108,10 @@ void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer,
|
||||
|
||||
_compute_set_dirty_state();
|
||||
|
||||
id<MTLBuffer> indirectBuffer = rid::get(p_indirect_buffer);
|
||||
const RenderingDeviceDriverMetal::BufferInfo *indirectBuffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id;
|
||||
|
||||
id<MTLComputeCommandEncoder> enc = compute.encoder;
|
||||
[enc dispatchThreadgroupsWithIndirectBuffer:indirectBuffer indirectBufferOffset:p_offset threadsPerThreadgroup:compute.pipeline->compute_state.local];
|
||||
[enc dispatchThreadgroupsWithIndirectBuffer:indirectBuffer->metal_buffer indirectBufferOffset:p_offset threadsPerThreadgroup:compute.pipeline->compute_state.local];
|
||||
}
|
||||
|
||||
void MDCommandBuffer::_end_compute_dispatch() {
|
||||
@ -1164,7 +1148,7 @@ MDRenderShader::MDRenderShader(CharString p_name,
|
||||
frag(p_frag) {
|
||||
}
|
||||
|
||||
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
|
||||
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) {
|
||||
DEV_ASSERT(p_shader->uses_argument_buffers);
|
||||
DEV_ASSERT(p_state.encoder != nil);
|
||||
|
||||
@ -1173,48 +1157,54 @@ void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandB
|
||||
id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_state.encoder;
|
||||
id<MTLDevice> __unsafe_unretained device = enc.device;
|
||||
|
||||
BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage, p_set_index);
|
||||
BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage, p_set_index, p_dynamic_offsets, p_frame_idx, p_frame_count);
|
||||
|
||||
// Set the buffer for the vertex stage.
|
||||
{
|
||||
uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX);
|
||||
if (offset) {
|
||||
[enc setVertexBuffer:bus.buffer offset:*offset atIndex:p_set_index];
|
||||
[enc setVertexBuffer:bus.buffer offset:bus.make_offset(p_frame_idx, *offset) atIndex:p_set_index];
|
||||
}
|
||||
}
|
||||
// Set the buffer for the fragment stage.
|
||||
{
|
||||
uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT);
|
||||
if (offset) {
|
||||
[enc setFragmentBuffer:bus.buffer offset:*offset atIndex:p_set_index];
|
||||
[enc setFragmentBuffer:bus.buffer offset:bus.make_offset(p_frame_idx, *offset) atIndex:p_set_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
|
||||
void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets) {
|
||||
DEV_ASSERT(!p_shader->uses_argument_buffers);
|
||||
DEV_ASSERT(p_state.encoder != nil);
|
||||
|
||||
id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_state.encoder;
|
||||
|
||||
UniformSet const &set = p_shader->sets[p_set_index];
|
||||
DynamicOffsetLayout layout = p_shader->dynamic_offset_layout;
|
||||
uint32_t dynamic_index = 0;
|
||||
|
||||
for (uint32_t i = 0; i < MIN(uniforms.size(), set.uniforms.size()); i++) {
|
||||
RDD::BoundUniform const &uniform = uniforms[i];
|
||||
const UniformInfo &ui = set.uniforms[i];
|
||||
|
||||
uint32_t frame_idx;
|
||||
if (uniform.is_dynamic()) {
|
||||
uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index);
|
||||
dynamic_index++;
|
||||
frame_idx = (p_dynamic_offsets >> shift) & 0xf;
|
||||
} else {
|
||||
frame_idx = 0;
|
||||
}
|
||||
|
||||
static const RDC::ShaderStage stage_usages[2] = { RDC::ShaderStage::SHADER_STAGE_VERTEX, RDC::ShaderStage::SHADER_STAGE_FRAGMENT };
|
||||
for (const RDC::ShaderStage stage : stage_usages) {
|
||||
ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage);
|
||||
|
||||
const BindingInfo *bi = ui.bindings.getptr(stage);
|
||||
if (bi == nullptr) {
|
||||
// No binding for this stage.
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((ui.active_stages & stage_usage) == 0) {
|
||||
// Not active for this state, so don't bind anything.
|
||||
if (bi == nullptr || (ui.active_stages & stage_usage) == 0) {
|
||||
// No binding for this stage or it is not active
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1321,20 +1311,22 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Ren
|
||||
case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
|
||||
CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: {
|
||||
id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER:
|
||||
case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)uniform.ids[0].id;
|
||||
if (stage == RDD::SHADER_STAGE_VERTEX) {
|
||||
[enc setVertexBuffer:buffer offset:0 atIndex:bi->index];
|
||||
[enc setVertexBuffer:buf_info->metal_buffer offset:0 atIndex:bi->index];
|
||||
} else {
|
||||
[enc setFragmentBuffer:buffer offset:0 atIndex:bi->index];
|
||||
[enc setFragmentBuffer:buf_info->metal_buffer offset:0 atIndex:bi->index];
|
||||
}
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
|
||||
if (stage == RDD::SHADER_STAGE_VERTEX) {
|
||||
[enc setVertexBuffer:buffer offset:0 atIndex:bi->index];
|
||||
[enc setVertexBuffer:buf_info->metal_buffer offset:frame_idx * buf_info->size_bytes atIndex:bi->index];
|
||||
} else {
|
||||
[enc setFragmentBuffer:buffer offset:0 atIndex:bi->index];
|
||||
[enc setFragmentBuffer:buf_info->metal_buffer offset:frame_idx * buf_info->size_bytes atIndex:bi->index];
|
||||
}
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
|
||||
@ -1368,15 +1360,15 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Ren
|
||||
}
|
||||
}
|
||||
|
||||
void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
|
||||
void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) {
|
||||
if (p_shader->uses_argument_buffers) {
|
||||
bind_uniforms_argument_buffers(p_shader, p_state, p_set_index);
|
||||
bind_uniforms_argument_buffers(p_shader, p_state, p_set_index, p_dynamic_offsets, p_frame_idx, p_frame_count);
|
||||
} else {
|
||||
bind_uniforms_direct(p_shader, p_state, p_set_index);
|
||||
bind_uniforms_direct(p_shader, p_state, p_set_index, p_dynamic_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index) {
|
||||
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) {
|
||||
DEV_ASSERT(p_shader->uses_argument_buffers);
|
||||
DEV_ASSERT(p_state.encoder != nil);
|
||||
|
||||
@ -1385,40 +1377,46 @@ void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandB
|
||||
id<MTLComputeCommandEncoder> enc = p_state.encoder;
|
||||
id<MTLDevice> device = enc.device;
|
||||
|
||||
BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage, p_set_index);
|
||||
BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage, p_set_index, p_dynamic_offsets, p_frame_idx, p_frame_count);
|
||||
|
||||
uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
|
||||
if (offset) {
|
||||
[enc setBuffer:bus.buffer offset:*offset atIndex:p_set_index];
|
||||
[enc setBuffer:bus.buffer offset:bus.make_offset(p_frame_idx, *offset) atIndex:p_set_index];
|
||||
}
|
||||
}
|
||||
|
||||
void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index) {
|
||||
void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets) {
|
||||
DEV_ASSERT(!p_shader->uses_argument_buffers);
|
||||
DEV_ASSERT(p_state.encoder != nil);
|
||||
|
||||
id<MTLComputeCommandEncoder> __unsafe_unretained enc = p_state.encoder;
|
||||
|
||||
UniformSet const &set = p_shader->sets[p_set_index];
|
||||
DynamicOffsetLayout layout = p_shader->dynamic_offset_layout;
|
||||
uint32_t dynamic_index = 0;
|
||||
|
||||
for (uint32_t i = 0; i < uniforms.size(); i++) {
|
||||
RDD::BoundUniform const &uniform = uniforms[i];
|
||||
const UniformInfo &ui = set.uniforms[i];
|
||||
|
||||
uint32_t frame_idx;
|
||||
if (uniform.is_dynamic()) {
|
||||
uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index);
|
||||
dynamic_index++;
|
||||
frame_idx = (p_dynamic_offsets >> shift) & 0xf;
|
||||
} else {
|
||||
frame_idx = 0;
|
||||
}
|
||||
|
||||
const RDC::ShaderStage stage = RDC::ShaderStage::SHADER_STAGE_COMPUTE;
|
||||
const ShaderStageUsage stage_usage = ShaderStageUsage(1 << stage);
|
||||
|
||||
const BindingInfo *bi = ui.bindings.getptr(stage);
|
||||
if (bi == nullptr) {
|
||||
if (bi == nullptr || (ui.active_stages & stage_usage) == 0) {
|
||||
// No binding for this stage.
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((ui.active_stages & stage_usage) == 0) {
|
||||
// Not active for this state, so don't bind anything.
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (uniform.type) {
|
||||
case RDD::UNIFORM_TYPE_SAMPLER: {
|
||||
size_t count = uniform.ids.size();
|
||||
@ -1490,13 +1488,15 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Com
|
||||
case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
|
||||
CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: {
|
||||
id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
|
||||
[enc setBuffer:buffer offset:0 atIndex:bi->index];
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER:
|
||||
case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
|
||||
[enc setBuffer:buffer offset:0 atIndex:bi->index];
|
||||
const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)uniform.ids[0].id;
|
||||
[enc setBuffer:buf_info->metal_buffer offset:0 atIndex:bi->index];
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
|
||||
[enc setBuffer:buf_info->metal_buffer offset:frame_idx * buf_info->size_bytes atIndex:bi->index];
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
|
||||
size_t count = uniform.ids.size();
|
||||
@ -1519,19 +1519,23 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Com
|
||||
}
|
||||
}
|
||||
|
||||
void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index) {
|
||||
void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) {
|
||||
if (p_shader->uses_argument_buffers) {
|
||||
bind_uniforms_argument_buffers(p_shader, p_state, p_set_index);
|
||||
bind_uniforms_argument_buffers(p_shader, p_state, p_set_index, p_dynamic_offsets, p_frame_idx, p_frame_count);
|
||||
} else {
|
||||
bind_uniforms_direct(p_shader, p_state, p_set_index);
|
||||
bind_uniforms_direct(p_shader, p_state, p_set_index, p_dynamic_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index) {
|
||||
BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) {
|
||||
BoundUniformSet *sus = bound_uniforms.getptr(p_shader);
|
||||
if (sus != nullptr) {
|
||||
sus->merge_into(p_resource_usage);
|
||||
return *sus;
|
||||
BoundUniformSet &bs = *sus;
|
||||
if (bs.is_dynamic()) {
|
||||
update_dynamic_uniforms(p_shader, p_resource_usage, p_set_index, bs, p_dynamic_offsets, p_frame_idx);
|
||||
}
|
||||
bs.merge_into(p_resource_usage);
|
||||
return bs;
|
||||
}
|
||||
|
||||
UniformSet const &set = p_shader->sets[p_set_index];
|
||||
@ -1546,9 +1550,18 @@ BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevic
|
||||
}
|
||||
};
|
||||
id<MTLBuffer> enc_buffer = nil;
|
||||
uint32_t frame_size = set.buffer_size;
|
||||
uint32_t buffer_size = frame_size;
|
||||
if (!set.dynamic_uniforms.is_empty()) {
|
||||
// We need to store a copy of the argument buffer for each frame that could be in flight, just
|
||||
// like the dynamic buffers themselves.
|
||||
buffer_size *= p_frame_count;
|
||||
} else {
|
||||
frame_size = 0;
|
||||
}
|
||||
if (set.buffer_size > 0) {
|
||||
MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceHazardTrackingModeTracked;
|
||||
enc_buffer = [p_device newBufferWithLength:set.buffer_size options:options];
|
||||
MTLResourceOptions options = MTLResourceHazardTrackingModeUntracked | MTLResourceStorageModeShared;
|
||||
enc_buffer = [p_device newBufferWithLength:buffer_size options:options];
|
||||
for (KeyValue<RDC::ShaderStage, id<MTLArgumentEncoder>> const &kv : set.encoders) {
|
||||
RDD::ShaderStage const stage = kv.key;
|
||||
ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage);
|
||||
@ -1647,16 +1660,18 @@ BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevic
|
||||
case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
|
||||
CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: {
|
||||
id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
|
||||
[enc setBuffer:buffer offset:0 atIndex:bi->index];
|
||||
add_usage(buffer, stage, bi->usage);
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER:
|
||||
case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
|
||||
[enc setBuffer:buffer offset:0 atIndex:bi->index];
|
||||
add_usage(buffer, stage, bi->usage);
|
||||
const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)uniform.ids[0].id;
|
||||
[enc setBuffer:buf_info->metal_buffer offset:0 atIndex:bi->index];
|
||||
add_usage(buf_info->metal_buffer, stage, bi->usage);
|
||||
} break;
|
||||
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
|
||||
add_usage(buf_info->metal_buffer, stage, bi->usage);
|
||||
} break;
|
||||
|
||||
case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
|
||||
size_t count = uniform.ids.size();
|
||||
if (count == 1) {
|
||||
@ -1679,6 +1694,16 @@ BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevic
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Duplicate the argument buffer data for each frame, if needed.
|
||||
// The dynamic uniforms will be updated each frame.
|
||||
if (frame_size > 0) {
|
||||
void *ptr = enc_buffer.contents;
|
||||
for (uint32_t i = 1; i < p_frame_count; i++) {
|
||||
void *dst = (void *)((uintptr_t)ptr + i * frame_size);
|
||||
memcpy(dst, ptr, frame_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ResourceUsageMap usage_to_resources;
|
||||
@ -1693,10 +1718,57 @@ BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevic
|
||||
}
|
||||
}
|
||||
|
||||
BoundUniformSet bs = { .buffer = enc_buffer, .usage_to_resources = usage_to_resources };
|
||||
bound_uniforms.insert(p_shader, bs);
|
||||
BoundUniformSet &bs = bound_uniforms.insert(p_shader, BoundUniformSet(enc_buffer, std::move(usage_to_resources), frame_size))->value;
|
||||
if (bs.is_dynamic()) {
|
||||
update_dynamic_uniforms(p_shader, p_resource_usage, p_set_index, bs, p_dynamic_offsets, p_frame_idx);
|
||||
}
|
||||
bs.merge_into(p_resource_usage);
|
||||
return bound_uniforms.get(p_shader);
|
||||
return bs;
|
||||
}
|
||||
|
||||
void MDUniformSet::update_dynamic_uniforms(MDShader *p_shader, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, BoundUniformSet &p_bound_set, uint32_t p_dynamic_offsets, uint32_t p_frame_idx) {
|
||||
// This shouldn't be called if the set doesn't have dynamic uniforms.
|
||||
DEV_ASSERT(p_bound_set.is_dynamic());
|
||||
|
||||
UniformSet const &set = p_shader->sets[p_set_index];
|
||||
DEV_ASSERT(!set.dynamic_uniforms.is_empty()); // Programming error if this is empty.
|
||||
|
||||
DynamicOffsetLayout layout = p_shader->dynamic_offset_layout;
|
||||
|
||||
for (KeyValue<RDC::ShaderStage, id<MTLArgumentEncoder>> const &kv : set.encoders) {
|
||||
RDD::ShaderStage const stage = kv.key;
|
||||
ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage);
|
||||
id<MTLArgumentEncoder> const __unsafe_unretained enc = kv.value;
|
||||
|
||||
[enc setArgumentBuffer:p_bound_set.buffer offset:p_bound_set.make_offset(p_frame_idx, set.offsets[stage])];
|
||||
|
||||
uint32_t dynamic_index = 0;
|
||||
|
||||
for (uint32_t i : set.dynamic_uniforms) {
|
||||
RDD::BoundUniform const &uniform = uniforms[i];
|
||||
const UniformInfo &ui = set.uniforms[i];
|
||||
|
||||
const BindingInfo *bi = ui.bindings.getptr(stage);
|
||||
if (bi == nullptr) {
|
||||
// No binding for this stage.
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((ui.active_stages & stage_usage) == None) {
|
||||
// Not active for this state, so don't bind anything.
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index);
|
||||
dynamic_index++;
|
||||
uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xf;
|
||||
|
||||
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
|
||||
[enc setBuffer:buf_info->metal_buffer
|
||||
offset:frame_idx * buf_info->size_bytes
|
||||
atIndex:bi->index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MTLFmtCaps MDSubpass::getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const {
|
||||
|
||||
@ -48,6 +48,7 @@ class RenderingContextDriverMetal;
|
||||
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMetal : public RenderingDeviceDriver {
|
||||
friend struct ShaderCacheEntry;
|
||||
friend class MDCommandBuffer;
|
||||
|
||||
template <typename T>
|
||||
using Result = std::variant<T, Error>;
|
||||
@ -58,6 +59,12 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet
|
||||
RenderingContextDriver::Device context_device;
|
||||
id<MTLDevice> device = nil;
|
||||
|
||||
uint32_t frame_count = 1;
|
||||
/// frame_index is a cyclic counter derived from the current frame number modulo frame_count,
|
||||
/// cycling through values from 0 to frame_count - 1
|
||||
uint32_t frame_index = 0;
|
||||
uint32_t frames_drawn = 0;
|
||||
|
||||
MetalDeviceProperties *device_properties = nullptr;
|
||||
MetalDeviceProfile device_profile;
|
||||
RenderingShaderContainerFormatMetal *shader_container_format = nullptr;
|
||||
@ -101,12 +108,27 @@ public:
|
||||
#pragma mark - Buffers
|
||||
|
||||
public:
|
||||
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) override final;
|
||||
struct BufferInfo {
|
||||
id<MTLBuffer> metal_buffer;
|
||||
|
||||
_FORCE_INLINE_ bool is_dynamic() const { return _frame_idx != UINT32_MAX; }
|
||||
_FORCE_INLINE_ uint32_t frame_index() const { return _frame_idx; }
|
||||
_FORCE_INLINE_ void set_frame_index(uint32_t p_frame_index) { _frame_idx = p_frame_index; }
|
||||
|
||||
protected:
|
||||
// If dynamic buffer, then its range is [0; RenderingDeviceDriverMetal::frame_count)
|
||||
// else it's UINT32_MAX.
|
||||
uint32_t _frame_idx = UINT32_MAX;
|
||||
};
|
||||
|
||||
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) override final;
|
||||
virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final;
|
||||
virtual void buffer_free(BufferID p_buffer) override final;
|
||||
virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final;
|
||||
virtual uint8_t *buffer_map(BufferID p_buffer) override final;
|
||||
virtual void buffer_unmap(BufferID p_buffer) override final;
|
||||
virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final;
|
||||
virtual void buffer_flush(BufferID p_buffer) override final;
|
||||
virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final;
|
||||
|
||||
#pragma mark - Texture
|
||||
@ -253,6 +275,7 @@ public:
|
||||
public:
|
||||
virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) override final;
|
||||
virtual void uniform_set_free(UniformSetID p_uniform_set) override final;
|
||||
virtual uint32_t uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const override final;
|
||||
|
||||
#pragma mark - Commands
|
||||
|
||||
@ -323,8 +346,7 @@ public:
|
||||
|
||||
// Binding.
|
||||
virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
|
||||
virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
|
||||
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;
|
||||
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) override final;
|
||||
|
||||
// Drawing.
|
||||
virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) override final;
|
||||
@ -364,8 +386,7 @@ public:
|
||||
|
||||
// Binding.
|
||||
virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
|
||||
virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
|
||||
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;
|
||||
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) override final;
|
||||
|
||||
// Dispatching.
|
||||
virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final;
|
||||
@ -438,3 +459,17 @@ public:
|
||||
RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver);
|
||||
~RenderingDeviceDriverMetal();
|
||||
};
|
||||
|
||||
// Defined outside because we need to forward declare it in metal_objects.h
|
||||
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalBufferDynamicInfo : public RenderingDeviceDriverMetal::BufferInfo {
|
||||
uint64_t size_bytes; // Contains the real buffer size / frame_count.
|
||||
uint32_t next_frame_index(uint32_t p_frame_count) {
|
||||
// This is the next frame index to use for this buffer.
|
||||
_frame_idx = (_frame_idx + 1u) % p_frame_count;
|
||||
return _frame_idx;
|
||||
}
|
||||
#ifdef DEBUG_ENABLED
|
||||
// For tracking that a persistent buffer isn't mapped twice in the same frame.
|
||||
uint64_t last_frame_mapped = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -64,9 +64,7 @@
|
||||
#import <Metal/Metal.h>
|
||||
#import <os/log.h>
|
||||
#import <os/signpost.h>
|
||||
#import <spirv.hpp>
|
||||
#import <spirv_msl.hpp>
|
||||
#import <spirv_parser.hpp>
|
||||
#include <algorithm>
|
||||
|
||||
#pragma mark - Logging
|
||||
|
||||
@ -121,20 +119,44 @@ _FORCE_INLINE_ static bool operator==(MTLSize p_a, MTLSize p_b) {
|
||||
/**** BUFFERS ****/
|
||||
/*****************/
|
||||
|
||||
RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) {
|
||||
MTLResourceOptions options = MTLResourceHazardTrackingModeTracked;
|
||||
RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) {
|
||||
const uint64_t original_size = p_size;
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
p_size = round_up_to_alignment(p_size, 16u) * frame_count;
|
||||
}
|
||||
|
||||
MTLResourceOptions options = 0;
|
||||
switch (p_allocation_type) {
|
||||
case MEMORY_ALLOCATION_TYPE_CPU:
|
||||
options |= MTLResourceStorageModeShared;
|
||||
options = MTLResourceHazardTrackingModeTracked | MTLResourceStorageModeShared;
|
||||
break;
|
||||
case MEMORY_ALLOCATION_TYPE_GPU:
|
||||
options |= MTLResourceStorageModePrivate;
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
options = MTLResourceHazardTrackingModeUntracked | MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
|
||||
} else {
|
||||
options = MTLResourceHazardTrackingModeTracked | MTLResourceStorageModePrivate;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
id<MTLBuffer> obj = [device newBufferWithLength:p_size options:options];
|
||||
ERR_FAIL_NULL_V_MSG(obj, BufferID(), "Can't create buffer of size: " + itos(p_size));
|
||||
return rid::make(obj);
|
||||
|
||||
BufferInfo *buf_info;
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
MetalBufferDynamicInfo *dyn_buffer = memnew(MetalBufferDynamicInfo);
|
||||
buf_info = dyn_buffer;
|
||||
#ifdef DEBUG_ENABLED
|
||||
dyn_buffer->last_frame_mapped = p_frames_drawn - 1ul;
|
||||
#endif
|
||||
dyn_buffer->set_frame_index(0u);
|
||||
dyn_buffer->size_bytes = round_up_to_alignment(original_size, 16u);
|
||||
} else {
|
||||
buf_info = memnew(BufferInfo);
|
||||
}
|
||||
buf_info->metal_buffer = obj;
|
||||
|
||||
return BufferID(buf_info);
|
||||
}
|
||||
|
||||
bool RenderingDeviceDriverMetal::buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) {
|
||||
@ -143,28 +165,49 @@ bool RenderingDeviceDriverMetal::buffer_set_texel_format(BufferID p_buffer, Data
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::buffer_free(BufferID p_buffer) {
|
||||
rid::release(p_buffer);
|
||||
BufferInfo *buf_info = (BufferInfo *)p_buffer.id;
|
||||
buf_info->metal_buffer = nil; // Tell ARC to release.
|
||||
|
||||
if (buf_info->is_dynamic()) {
|
||||
memdelete((MetalBufferDynamicInfo *)buf_info);
|
||||
} else {
|
||||
memdelete(buf_info);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t RenderingDeviceDriverMetal::buffer_get_allocation_size(BufferID p_buffer) {
|
||||
id<MTLBuffer> obj = rid::get(p_buffer);
|
||||
return obj.allocatedSize;
|
||||
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
|
||||
return buf_info->metal_buffer.allocatedSize;
|
||||
}
|
||||
|
||||
uint8_t *RenderingDeviceDriverMetal::buffer_map(BufferID p_buffer) {
|
||||
id<MTLBuffer> obj = rid::get(p_buffer);
|
||||
ERR_FAIL_COND_V_MSG(obj.storageMode != MTLStorageModeShared, nullptr, "Unable to map private buffers");
|
||||
return (uint8_t *)obj.contents;
|
||||
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
|
||||
ERR_FAIL_COND_V_MSG(buf_info->metal_buffer.storageMode != MTLStorageModeShared, nullptr, "Unable to map private buffers");
|
||||
return (uint8_t *)buf_info->metal_buffer.contents;
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::buffer_unmap(BufferID p_buffer) {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
uint8_t *RenderingDeviceDriverMetal::buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) {
|
||||
MetalBufferDynamicInfo *buf_info = (MetalBufferDynamicInfo *)p_buffer.id;
|
||||
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), nullptr, "Buffer must have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_map() instead.");
|
||||
#ifdef DEBUG_ENABLED
|
||||
ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit.");
|
||||
buf_info->last_frame_mapped = p_frames_drawn;
|
||||
#endif
|
||||
return (uint8_t *)buf_info->metal_buffer.contents + buf_info->next_frame_index(frame_count) * buf_info->size_bytes;
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::buffer_flush(BufferID p_buffer) {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
uint64_t RenderingDeviceDriverMetal::buffer_get_device_address(BufferID p_buffer) {
|
||||
if (@available(iOS 16.0, macOS 13.0, *)) {
|
||||
id<MTLBuffer> obj = rid::get(p_buffer);
|
||||
return obj.gpuAddress;
|
||||
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
|
||||
return buf_info->metal_buffer.gpuAddress;
|
||||
} else {
|
||||
#if DEV_ENABLED
|
||||
WARN_PRINT_ONCE("buffer_get_device_address is not supported on this OS version.");
|
||||
@ -1202,6 +1245,10 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
|
||||
uint32_t uniform_sets_count = mtl_refl.uniform_sets.size();
|
||||
uniform_sets.resize(uniform_sets_count);
|
||||
|
||||
DynamicOffsetLayout dynamic_offset_layout;
|
||||
uint8_t dynamic_offset = 0;
|
||||
uint8_t dynamic_count = 0;
|
||||
|
||||
// Create sets.
|
||||
for (uint32_t i = 0; i < uniform_sets_count; i++) {
|
||||
UniformSet &set = uniform_sets.write[i];
|
||||
@ -1215,6 +1262,16 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
|
||||
const ShaderUniform &uniform = refl_set.ptr()[j];
|
||||
const RSCM::UniformData &bind = mtl_set.ptr()[j];
|
||||
|
||||
switch (uniform.type) {
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
|
||||
set.dynamic_uniforms.push_back(j);
|
||||
dynamic_count++;
|
||||
} break;
|
||||
default: {
|
||||
} break;
|
||||
}
|
||||
|
||||
UniformInfo &ui = *iter;
|
||||
++iter;
|
||||
ui.binding = uniform.binding;
|
||||
@ -1235,6 +1292,11 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
|
||||
ui.bindings_secondary.insert((RDC::ShaderStage)info.shader_stage, bi);
|
||||
}
|
||||
}
|
||||
if (dynamic_count > 0) {
|
||||
dynamic_offset_layout.set_offset_count(i, dynamic_offset, dynamic_count);
|
||||
dynamic_offset += dynamic_count;
|
||||
dynamic_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < uniform_sets_count; i++) {
|
||||
@ -1329,6 +1391,8 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
|
||||
shader = rs;
|
||||
}
|
||||
|
||||
shader->dynamic_offset_layout = dynamic_offset_layout;
|
||||
|
||||
return RDD::ShaderID(shader);
|
||||
}
|
||||
|
||||
@ -1365,6 +1429,38 @@ void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) {
|
||||
memdelete(obj);
|
||||
}
|
||||
|
||||
uint32_t RenderingDeviceDriverMetal::uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const {
|
||||
const MDShader *shader = (const MDShader *)p_shader.id;
|
||||
const DynamicOffsetLayout layout = shader->dynamic_offset_layout;
|
||||
|
||||
if (layout.is_empty()) {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
uint32_t mask = 0u;
|
||||
|
||||
for (uint32_t i = 0; i < p_set_count; i++) {
|
||||
const uint32_t index = p_first_set_index + i;
|
||||
uint32_t shift = layout.get_offset_index_shift(index);
|
||||
const uint32_t count = layout.get_count(index);
|
||||
DEV_ASSERT(shader->sets[index].dynamic_uniforms.size() == count);
|
||||
if (count == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const MDUniformSet *usi = (const MDUniformSet *)p_uniform_sets[i].id;
|
||||
for (uint32_t uniform_index : shader->sets[index].dynamic_uniforms) {
|
||||
const RDD::BoundUniform &uniform = usi->uniforms[uniform_index];
|
||||
DEV_ASSERT(uniform.is_dynamic());
|
||||
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
|
||||
mask |= buf_info->frame_index() << shift;
|
||||
shift += 4u;
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
|
||||
}
|
||||
|
||||
@ -1372,26 +1468,25 @@ void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBuff
|
||||
|
||||
void RenderingDeviceDriverMetal::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) {
|
||||
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
id<MTLBuffer> buffer = rid::get(p_buffer);
|
||||
|
||||
id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder();
|
||||
[blit fillBuffer:buffer
|
||||
[blit fillBuffer:((const BufferInfo *)p_buffer.id)->metal_buffer
|
||||
range:NSMakeRange(p_offset, p_size)
|
||||
value:0];
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) {
|
||||
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
id<MTLBuffer> src = rid::get(p_src_buffer);
|
||||
id<MTLBuffer> dst = rid::get(p_dst_buffer);
|
||||
const BufferInfo *src = (const BufferInfo *)p_src_buffer.id;
|
||||
const BufferInfo *dst = (const BufferInfo *)p_dst_buffer.id;
|
||||
|
||||
id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder();
|
||||
|
||||
for (uint32_t i = 0; i < p_regions.size(); i++) {
|
||||
BufferCopyRegion region = p_regions[i];
|
||||
[blit copyFromBuffer:src
|
||||
[blit copyFromBuffer:src->metal_buffer
|
||||
sourceOffset:region.src_offset
|
||||
toBuffer:dst
|
||||
toBuffer:dst->metal_buffer
|
||||
destinationOffset:region.dst_offset
|
||||
size:region.size];
|
||||
}
|
||||
@ -1627,7 +1722,7 @@ void RenderingDeviceDriverMetal::_copy_texture_buffer(CommandBufferID p_cmd_buff
|
||||
BufferID p_buffer,
|
||||
VectorView<BufferTextureCopyRegion> p_regions) {
|
||||
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
id<MTLBuffer> buffer = rid::get(p_buffer);
|
||||
const BufferInfo *buffer = (const BufferInfo *)p_buffer.id;
|
||||
id<MTLTexture> texture = rid::get(p_texture);
|
||||
|
||||
id<MTLBlitCommandEncoder> enc = cmd->blit_command_encoder();
|
||||
@ -1683,7 +1778,7 @@ void RenderingDeviceDriverMetal::_copy_texture_buffer(CommandBufferID p_cmd_buff
|
||||
|
||||
if (p_source == CopySource::Buffer) {
|
||||
for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) {
|
||||
[enc copyFromBuffer:buffer
|
||||
[enc copyFromBuffer:buffer->metal_buffer
|
||||
sourceOffset:region.buffer_offset + (bytesPerImg * lyrIdx)
|
||||
sourceBytesPerRow:bytesPerRow
|
||||
sourceBytesPerImage:bytesPerImg
|
||||
@ -1701,7 +1796,7 @@ void RenderingDeviceDriverMetal::_copy_texture_buffer(CommandBufferID p_cmd_buff
|
||||
sourceLevel:mip_level
|
||||
sourceOrigin:txt_origin
|
||||
sourceSize:txt_size
|
||||
toBuffer:buffer
|
||||
toBuffer:buffer->metal_buffer
|
||||
destinationOffset:region.buffer_offset + (bytesPerImg * lyrIdx)
|
||||
destinationBytesPerRow:bytesPerRow
|
||||
destinationBytesPerImage:bytesPerImg
|
||||
@ -1896,14 +1991,9 @@ void RenderingDeviceDriverMetal::command_bind_render_pipeline(CommandBufferID p_
|
||||
cb->bind_pipeline(p_pipeline);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
|
||||
void RenderingDeviceDriverMetal::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
|
||||
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
cb->render_bind_uniform_set(p_uniform_set, p_shader, p_set_index);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
cb->render_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count);
|
||||
cb->render_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count, p_dynamic_offsets);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {
|
||||
@ -2377,14 +2467,9 @@ void RenderingDeviceDriverMetal::command_bind_compute_pipeline(CommandBufferID p
|
||||
cb->bind_pipeline(p_pipeline);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
|
||||
void RenderingDeviceDriverMetal::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
|
||||
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
cb->compute_bind_uniform_set(p_uniform_set, p_shader, p_set_index);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
cb->compute_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count);
|
||||
cb->compute_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count, p_dynamic_offsets);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
|
||||
@ -2491,6 +2576,8 @@ void RenderingDeviceDriverMetal::command_insert_breadcrumb(CommandBufferID p_cmd
|
||||
#pragma mark - Submission
|
||||
|
||||
void RenderingDeviceDriverMetal::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) {
|
||||
frame_index = p_frame_index;
|
||||
frames_drawn = p_frames_drawn;
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverMetal::end_segment() {
|
||||
@ -2508,8 +2595,8 @@ void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_
|
||||
// Can't set label after creation.
|
||||
} break;
|
||||
case OBJECT_TYPE_BUFFER: {
|
||||
id<MTLBuffer> buffer = rid::get(p_driver_id);
|
||||
buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
|
||||
const BufferInfo *buf_info = (const BufferInfo *)p_driver_id.id;
|
||||
buf_info->metal_buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
|
||||
} break;
|
||||
case OBJECT_TYPE_SHADER: {
|
||||
NSString *label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
|
||||
@ -2898,6 +2985,8 @@ static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *
|
||||
} break;
|
||||
}
|
||||
|
||||
res.update_options();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -2912,6 +3001,8 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p
|
||||
|
||||
_check_capabilities();
|
||||
|
||||
frame_count = p_frame_count;
|
||||
|
||||
// Set the pipeline cache ID based on the Metal version.
|
||||
pipeline_cache_id = "metal-driver-" + get_api_version();
|
||||
|
||||
|
||||
@ -100,12 +100,26 @@ struct MetalDeviceProfile {
|
||||
bool simdPermute = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Options to configure the Metal device profile.
|
||||
*
|
||||
* This structure allows customization of the Metal device profile,
|
||||
* such as the argument buffers tier, which can affect how shaders are compiled.
|
||||
*/
|
||||
struct Options {
|
||||
ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1;
|
||||
};
|
||||
|
||||
Platform platform = Platform::macOS;
|
||||
GPU gpu = GPU::Apple4;
|
||||
Features features;
|
||||
Options options;
|
||||
|
||||
static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu);
|
||||
|
||||
// Configure any options for the device profile, which may include overrides from the environment.
|
||||
void update_options();
|
||||
|
||||
MetalDeviceProfile() = default;
|
||||
|
||||
private:
|
||||
|
||||
@ -84,9 +84,35 @@ const MetalDeviceProfile *MetalDeviceProfile::get_profile(MetalDeviceProfile::Pl
|
||||
res.features.mslVersionMinor = 1;
|
||||
}
|
||||
|
||||
res.update_options();
|
||||
|
||||
return &profiles.insert(key, res)->value;
|
||||
}
|
||||
|
||||
void MetalDeviceProfile::update_options() {
|
||||
options.argument_buffers_tier = features.argument_buffers_tier;
|
||||
|
||||
if (OS::get_singleton()->has_environment(U"GODOT_MTL_ARGUMENT_BUFFERS_TIER")) {
|
||||
uint64_t tier = OS::get_singleton()->get_environment(U"GODOT_MTL_ARGUMENT_BUFFERS_TIER").to_int();
|
||||
switch (tier) {
|
||||
case 1:
|
||||
// All devices support tier 1 argument buffers.
|
||||
options.argument_buffers_tier = ArgumentBuffersTier::Tier1;
|
||||
break;
|
||||
case 2:
|
||||
if (features.argument_buffers_tier >= ArgumentBuffersTier::Tier2) {
|
||||
options.argument_buffers_tier = ArgumentBuffersTier::Tier2;
|
||||
} else {
|
||||
WARN_PRINT("Current device does not support tier 2 argument buffers, leaving as default.");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
WARN_PRINT(vformat("Invalid value for GODOT_MTL_ARGUMENT_BUFFER_TIER: %d. Falling back to device default.", tier));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RenderingShaderContainerMetal::_initialize_toolchain_properties() {
|
||||
if (compiler_props.is_valid()) {
|
||||
return;
|
||||
@ -313,12 +339,9 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
|
||||
msl_options.ios_support_base_vertex_instance = true;
|
||||
}
|
||||
|
||||
bool disable_argument_buffers = false;
|
||||
if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {
|
||||
disable_argument_buffers = true;
|
||||
}
|
||||
bool argument_buffers_allowed = get_shader_reflection().has_dynamic_buffers == false;
|
||||
|
||||
if (device_profile->features.argument_buffers_tier >= MetalDeviceProfile::ArgumentBuffersTier::Tier2 && !disable_argument_buffers) {
|
||||
if (device_profile->options.argument_buffers_tier >= MetalDeviceProfile::ArgumentBuffersTier::Tier2 && argument_buffers_allowed) {
|
||||
msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2;
|
||||
msl_options.argument_buffers = true;
|
||||
mtl_reflection_data.set_uses_argument_buffers(true);
|
||||
|
||||
@ -58,6 +58,8 @@
|
||||
static const uint32_t BREADCRUMB_BUFFER_ENTRIES = 512u;
|
||||
#endif
|
||||
|
||||
static const uint32_t MAX_DYNAMIC_BUFFERS = 8u; // Minimum guaranteed by Vulkan.
|
||||
|
||||
static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = {
|
||||
VK_FORMAT_R4G4_UNORM_PACK8,
|
||||
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
|
||||
@ -1569,7 +1571,7 @@ Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t
|
||||
max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool");
|
||||
|
||||
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
|
||||
breadcrumb_buffer = buffer_create(2u * sizeof(uint32_t) * BREADCRUMB_BUFFER_ENTRIES, BufferUsageBits::BUFFER_USAGE_TRANSFER_TO_BIT, MemoryAllocationType::MEMORY_ALLOCATION_TYPE_CPU);
|
||||
breadcrumb_buffer = buffer_create(2u * sizeof(uint32_t) * BREADCRUMB_BUFFER_ENTRIES, BufferUsageBits::BUFFER_USAGE_TRANSFER_TO_BIT, MemoryAllocationType::MEMORY_ALLOCATION_TYPE_CPU, UINT64_MAX);
|
||||
#endif
|
||||
|
||||
#if defined(SWAPPY_FRAME_PACING_ENABLED)
|
||||
@ -1634,11 +1636,28 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_VERTEX_BIT, VK_BUFFER_USAGE_V
|
||||
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_INDIRECT_BIT, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT));
|
||||
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
|
||||
|
||||
RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) {
|
||||
RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) {
|
||||
uint32_t alignment = 16u; // 16 bytes is reasonable.
|
||||
if (p_usage.has_flag(BUFFER_USAGE_UNIFORM_BIT)) {
|
||||
// Some GPUs (e.g. NVIDIA) have absurdly high alignments, like 256 bytes.
|
||||
alignment = MAX(alignment, physical_device_properties.limits.minUniformBufferOffsetAlignment);
|
||||
}
|
||||
if (p_usage.has_flag(BUFFER_USAGE_STORAGE_BIT)) {
|
||||
// This shouldn't be a problem since it's often <= 16 bytes. But do it just in case.
|
||||
alignment = MAX(alignment, physical_device_properties.limits.minStorageBufferOffsetAlignment);
|
||||
}
|
||||
// Align the size. This is specially important for BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT buffers.
|
||||
// For the rest, it should work thanks to VMA taking care of the details. But still align just in case.
|
||||
p_size = STEPIFY(p_size, alignment);
|
||||
|
||||
const size_t original_size = p_size;
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
p_size = p_size * frame_count;
|
||||
}
|
||||
VkBufferCreateInfo create_info = {};
|
||||
create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
create_info.size = p_size;
|
||||
create_info.usage = p_usage;
|
||||
create_info.usage = p_usage & ~BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT;
|
||||
create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
|
||||
VmaMemoryUsage vma_usage = VMA_MEMORY_USAGE_UNKNOWN;
|
||||
@ -1670,6 +1689,9 @@ RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitFie
|
||||
// We must set it right now or else vmaFindMemoryTypeIndexForBufferInfo will use wrong parameters.
|
||||
alloc_create_info.usage = vma_usage;
|
||||
}
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
alloc_create_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
||||
}
|
||||
alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
if (p_size <= SMALL_ALLOCATION_MAX_SIZE) {
|
||||
uint32_t mem_type_index = 0;
|
||||
@ -1698,11 +1720,26 @@ RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitFie
|
||||
}
|
||||
|
||||
// Bookkeep.
|
||||
BufferInfo *buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);
|
||||
BufferInfo *buf_info;
|
||||
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
void *persistent_ptr = nullptr;
|
||||
VkResult err = vmaMapMemory(allocator, allocation, &persistent_ptr);
|
||||
ERR_FAIL_COND_V_MSG(err, BufferID(), "vmaMapMemory failed with error " + itos(err) + ".");
|
||||
|
||||
BufferDynamicInfo *dyn_buffer = VersatileResource::allocate<BufferDynamicInfo>(resources_allocator);
|
||||
buf_info = dyn_buffer;
|
||||
#ifdef DEBUG_ENABLED
|
||||
dyn_buffer->last_frame_mapped = p_frames_drawn - 1ul;
|
||||
#endif
|
||||
dyn_buffer->frame_idx = 0u;
|
||||
dyn_buffer->persistent_ptr = (uint8_t *)persistent_ptr;
|
||||
} else {
|
||||
buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);
|
||||
}
|
||||
buf_info->vk_buffer = vk_buffer;
|
||||
buf_info->allocation.handle = allocation;
|
||||
buf_info->allocation.size = alloc_info.size;
|
||||
buf_info->size = p_size;
|
||||
buf_info->size = original_size;
|
||||
|
||||
return BufferID(buf_info);
|
||||
}
|
||||
@ -1730,6 +1767,10 @@ void RenderingDeviceDriverVulkan::buffer_free(BufferID p_buffer) {
|
||||
vkDestroyBufferView(vk_device, buf_info->vk_view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER_VIEW));
|
||||
}
|
||||
|
||||
if (buf_info->is_dynamic()) {
|
||||
vmaUnmapMemory(allocator, buf_info->allocation.handle);
|
||||
}
|
||||
|
||||
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
|
||||
vmaDestroyBuffer(allocator, buf_info->vk_buffer, buf_info->allocation.handle);
|
||||
} else {
|
||||
@ -1737,7 +1778,11 @@ void RenderingDeviceDriverVulkan::buffer_free(BufferID p_buffer) {
|
||||
vmaFreeMemory(allocator, buf_info->allocation.handle);
|
||||
}
|
||||
|
||||
VersatileResource::free(resources_allocator, buf_info);
|
||||
if (buf_info->is_dynamic()) {
|
||||
VersatileResource::free(resources_allocator, (BufferDynamicInfo *)buf_info);
|
||||
} else {
|
||||
VersatileResource::free(resources_allocator, buf_info);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t RenderingDeviceDriverVulkan::buffer_get_allocation_size(BufferID p_buffer) {
|
||||
@ -1747,6 +1792,7 @@ uint64_t RenderingDeviceDriverVulkan::buffer_get_allocation_size(BufferID p_buff
|
||||
|
||||
uint8_t *RenderingDeviceDriverVulkan::buffer_map(BufferID p_buffer) {
|
||||
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
|
||||
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), nullptr, "Buffer must NOT have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_persistent_map_advance() instead.");
|
||||
void *data_ptr = nullptr;
|
||||
VkResult err = vmaMapMemory(allocator, buf_info->allocation.handle, &data_ptr);
|
||||
ERR_FAIL_COND_V_MSG(err, nullptr, "vmaMapMemory failed with error " + itos(err) + ".");
|
||||
@ -1758,6 +1804,38 @@ void RenderingDeviceDriverVulkan::buffer_unmap(BufferID p_buffer) {
|
||||
vmaUnmapMemory(allocator, buf_info->allocation.handle);
|
||||
}
|
||||
|
||||
uint8_t *RenderingDeviceDriverVulkan::buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) {
|
||||
BufferDynamicInfo *buf_info = (BufferDynamicInfo *)p_buffer.id;
|
||||
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), nullptr, "Buffer must have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_map() instead.");
|
||||
#ifdef DEBUG_ENABLED
|
||||
ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit.");
|
||||
buf_info->last_frame_mapped = p_frames_drawn;
|
||||
#endif
|
||||
buf_info->frame_idx = (buf_info->frame_idx + 1u) % frame_count;
|
||||
return buf_info->persistent_ptr + buf_info->frame_idx * buf_info->size;
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverVulkan::buffer_flush(BufferID p_buffer) {
|
||||
BufferDynamicInfo *buf_info = (BufferDynamicInfo *)p_buffer.id;
|
||||
|
||||
VkMemoryPropertyFlags mem_props_flags;
|
||||
vmaGetAllocationMemoryProperties(allocator, buf_info->allocation.handle, &mem_props_flags);
|
||||
|
||||
const bool needs_flushing = !(mem_props_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
||||
|
||||
if (needs_flushing) {
|
||||
if (buf_info->is_dynamic()) {
|
||||
pending_flushes.allocations.push_back(buf_info->allocation.handle);
|
||||
pending_flushes.offsets.push_back(buf_info->frame_idx * buf_info->size);
|
||||
pending_flushes.sizes.push_back(buf_info->size);
|
||||
} else {
|
||||
pending_flushes.allocations.push_back(buf_info->allocation.handle);
|
||||
pending_flushes.offsets.push_back(0u);
|
||||
pending_flushes.sizes.push_back(VK_WHOLE_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t RenderingDeviceDriverVulkan::buffer_get_device_address(BufferID p_buffer) {
|
||||
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
|
||||
VkBufferDeviceAddressInfo address_info = {};
|
||||
@ -2780,6 +2858,18 @@ Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueu
|
||||
wait_semaphores_stages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
|
||||
}
|
||||
|
||||
if (!pending_flushes.allocations.is_empty()) {
|
||||
// We must do this now, even if p_cmd_buffers is empty; because afterwards pending_flushes.allocations
|
||||
// could become dangling. We cannot delay this call for the next frame(s).
|
||||
err = vmaFlushAllocations(allocator, pending_flushes.allocations.size(),
|
||||
pending_flushes.allocations.ptr(), pending_flushes.offsets.ptr(),
|
||||
pending_flushes.sizes.ptr());
|
||||
pending_flushes.allocations.clear();
|
||||
pending_flushes.offsets.clear();
|
||||
pending_flushes.sizes.clear();
|
||||
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
|
||||
}
|
||||
|
||||
if (p_cmd_buffers.size() > 0) {
|
||||
thread_local LocalVector<VkCommandBuffer> command_buffers;
|
||||
thread_local LocalVector<VkSemaphore> present_semaphores;
|
||||
@ -3713,9 +3803,15 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Re
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER: {
|
||||
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
} break;
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
|
||||
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
} break;
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
} break;
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
|
||||
} break;
|
||||
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
|
||||
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
|
||||
} break;
|
||||
@ -3942,6 +4038,13 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_find_or_creat
|
||||
curr_vk_size++;
|
||||
vk_sizes_count++;
|
||||
}
|
||||
if (p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC]) {
|
||||
*curr_vk_size = {};
|
||||
curr_vk_size->type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC] * max_descriptor_sets_per_pool;
|
||||
curr_vk_size++;
|
||||
vk_sizes_count++;
|
||||
}
|
||||
if (p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER]) {
|
||||
*curr_vk_size = {};
|
||||
curr_vk_size->type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
@ -3949,6 +4052,13 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_find_or_creat
|
||||
curr_vk_size++;
|
||||
vk_sizes_count++;
|
||||
}
|
||||
if (p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC]) {
|
||||
*curr_vk_size = {};
|
||||
curr_vk_size->type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
|
||||
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC] * max_descriptor_sets_per_pool;
|
||||
curr_vk_size++;
|
||||
vk_sizes_count++;
|
||||
}
|
||||
if (p_key.uniform_type[UNIFORM_TYPE_INPUT_ATTACHMENT]) {
|
||||
*curr_vk_size = {};
|
||||
curr_vk_size->type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
|
||||
@ -4012,6 +4122,12 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<Bou
|
||||
p_linear_pool_index = -1;
|
||||
}
|
||||
DescriptorSetPoolKey pool_key;
|
||||
|
||||
// We first gather dynamic arrays in a local array because TightLocalVector's
|
||||
// growth is not efficient when the number of elements is unknown.
|
||||
const BufferInfo *dynamic_buffers[MAX_DYNAMIC_BUFFERS];
|
||||
uint32_t num_dynamic_buffers = 0u;
|
||||
|
||||
// Immutable samplers will be skipped so we need to track the number of vk_writes used.
|
||||
VkWriteDescriptorSet *vk_writes = ALLOCA_ARRAY(VkWriteDescriptorSet, p_uniforms.size());
|
||||
uint32_t writes_amount = 0;
|
||||
@ -4147,9 +4263,28 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<Bou
|
||||
vk_buf_info->buffer = buf_info->vk_buffer;
|
||||
vk_buf_info->range = buf_info->size;
|
||||
|
||||
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), UniformSetID(),
|
||||
"Sent a buffer with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_UNIFORM_BUFFER instead of UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC.");
|
||||
|
||||
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
|
||||
} break;
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
|
||||
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
|
||||
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
|
||||
*vk_buf_info = {};
|
||||
vk_buf_info->buffer = buf_info->vk_buffer;
|
||||
vk_buf_info->range = buf_info->size;
|
||||
|
||||
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), UniformSetID(),
|
||||
"Sent a buffer without BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC instead of UNIFORM_TYPE_UNIFORM_BUFFER.");
|
||||
ERR_FAIL_COND_V_MSG(num_dynamic_buffers >= MAX_DYNAMIC_BUFFERS, UniformSetID(),
|
||||
"Uniform set exceeded the limit of dynamic/persistent buffers. (" + itos(MAX_DYNAMIC_BUFFERS) + ").");
|
||||
|
||||
dynamic_buffers[num_dynamic_buffers++] = buf_info;
|
||||
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
|
||||
} break;
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
|
||||
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
|
||||
@ -4157,9 +4292,28 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<Bou
|
||||
vk_buf_info->buffer = buf_info->vk_buffer;
|
||||
vk_buf_info->range = buf_info->size;
|
||||
|
||||
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), UniformSetID(),
|
||||
"Sent a buffer with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_STORAGE_BUFFER instead of UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC.");
|
||||
|
||||
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
|
||||
} break;
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
|
||||
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
|
||||
*vk_buf_info = {};
|
||||
vk_buf_info->buffer = buf_info->vk_buffer;
|
||||
vk_buf_info->range = buf_info->size;
|
||||
|
||||
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), UniformSetID(),
|
||||
"Sent a buffer without BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC instead of UNIFORM_TYPE_STORAGE_BUFFER.");
|
||||
ERR_FAIL_COND_V_MSG(num_dynamic_buffers >= MAX_DYNAMIC_BUFFERS, UniformSetID(),
|
||||
"Uniform set exceeded the limit of dynamic/persistent buffers. (" + itos(MAX_DYNAMIC_BUFFERS) + ").");
|
||||
|
||||
dynamic_buffers[num_dynamic_buffers++] = buf_info;
|
||||
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
|
||||
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
|
||||
} break;
|
||||
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
|
||||
num_descriptors = uniform.ids.size();
|
||||
VkDescriptorImageInfo *vk_img_infos = ALLOCA_ARRAY(VkDescriptorImageInfo, num_descriptors);
|
||||
@ -4223,6 +4377,10 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<Bou
|
||||
usi->vk_descriptor_pool = vk_pool;
|
||||
}
|
||||
usi->pool_sets_it = pool_sets_it;
|
||||
usi->dynamic_buffers.resize(num_dynamic_buffers);
|
||||
for (uint32_t i = 0u; i < num_dynamic_buffers; ++i) {
|
||||
usi->dynamic_buffers[i] = dynamic_buffers[i];
|
||||
}
|
||||
|
||||
return UniformSetID(usi);
|
||||
}
|
||||
@ -4249,6 +4407,31 @@ bool RenderingDeviceDriverVulkan::uniform_sets_have_linear_pools() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t RenderingDeviceDriverVulkan::uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const {
|
||||
uint32_t mask = 0u;
|
||||
uint32_t shift = 0u;
|
||||
#ifdef DEV_ENABLED
|
||||
uint32_t curr_dynamic_offset = 0u;
|
||||
#endif
|
||||
|
||||
for (uint32_t i = 0; i < p_set_count; i++) {
|
||||
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
|
||||
// At this point this assert should already have been validated.
|
||||
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
|
||||
|
||||
for (const BufferInfo *dynamic_buffer : usi->dynamic_buffers) {
|
||||
DEV_ASSERT(dynamic_buffer->frame_idx < 16u);
|
||||
mask |= dynamic_buffer->frame_idx << shift;
|
||||
shift += 4u;
|
||||
}
|
||||
#ifdef DEV_ENABLED
|
||||
curr_dynamic_offset += usi->dynamic_buffers.size();
|
||||
#endif
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverVulkan::linear_uniform_set_pools_reset(int p_linear_pool_index) {
|
||||
if (linear_descriptor_pools_enabled) {
|
||||
DescriptorSetPools &pools_to_reset = linear_descriptor_set_pools[p_linear_pool_index];
|
||||
@ -4844,14 +5027,7 @@ void RenderingDeviceDriverVulkan::command_bind_render_pipeline(CommandBufferID p
|
||||
vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, (VkPipeline)p_pipeline.id);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverVulkan::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
|
||||
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
|
||||
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
|
||||
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id;
|
||||
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
|
||||
if (p_set_count == 0) {
|
||||
return;
|
||||
}
|
||||
@ -4860,13 +5036,29 @@ void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBuffer
|
||||
sets.clear();
|
||||
sets.resize(p_set_count);
|
||||
|
||||
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
|
||||
uint32_t shift = 0u;
|
||||
uint32_t curr_dynamic_offset = 0u;
|
||||
|
||||
for (uint32_t i = 0; i < p_set_count; i++) {
|
||||
sets[i] = ((const UniformSetInfo *)p_uniform_sets[i].id)->vk_descriptor_set;
|
||||
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
|
||||
|
||||
sets[i] = usi->vk_descriptor_set;
|
||||
|
||||
// At this point this assert should already have been validated.
|
||||
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
|
||||
|
||||
const uint32_t dynamic_offset_count = usi->dynamic_buffers.size();
|
||||
for (uint32_t j = 0u; j < dynamic_offset_count; ++j) {
|
||||
const uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xFu;
|
||||
shift += 4u;
|
||||
dynamic_offsets[curr_dynamic_offset++] = uint32_t(frame_idx * usi->dynamic_buffers[j]->size);
|
||||
}
|
||||
}
|
||||
|
||||
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
|
||||
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
|
||||
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], 0, nullptr);
|
||||
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], curr_dynamic_offset, dynamic_offsets);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverVulkan::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {
|
||||
@ -5290,14 +5482,7 @@ void RenderingDeviceDriverVulkan::command_bind_compute_pipeline(CommandBufferID
|
||||
vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, (VkPipeline)p_pipeline.id);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverVulkan::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
|
||||
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
|
||||
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
|
||||
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id;
|
||||
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
|
||||
if (p_set_count == 0) {
|
||||
return;
|
||||
}
|
||||
@ -5306,13 +5491,29 @@ void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBuffe
|
||||
sets.clear();
|
||||
sets.resize(p_set_count);
|
||||
|
||||
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
|
||||
uint32_t shift = 0u;
|
||||
uint32_t curr_dynamic_offset = 0u;
|
||||
|
||||
for (uint32_t i = 0; i < p_set_count; i++) {
|
||||
sets[i] = ((const UniformSetInfo *)p_uniform_sets[i].id)->vk_descriptor_set;
|
||||
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
|
||||
|
||||
sets[i] = usi->vk_descriptor_set;
|
||||
|
||||
// At this point this assert should already have been validated.
|
||||
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
|
||||
|
||||
const uint32_t dynamic_offset_count = usi->dynamic_buffers.size();
|
||||
for (uint32_t j = 0u; j < dynamic_offset_count; ++j) {
|
||||
const uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xFu;
|
||||
shift += 4u;
|
||||
dynamic_offsets[curr_dynamic_offset++] = uint32_t(frame_idx * usi->dynamic_buffers[j]->size);
|
||||
}
|
||||
}
|
||||
|
||||
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
|
||||
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
|
||||
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], 0, nullptr);
|
||||
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], curr_dynamic_offset, dynamic_offsets);
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverVulkan::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
|
||||
|
||||
@ -147,6 +147,14 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver {
|
||||
#endif
|
||||
DeviceFunctions device_functions;
|
||||
|
||||
struct PendingFlushes {
|
||||
LocalVector<VmaAllocation> allocations;
|
||||
LocalVector<VkDeviceSize> offsets;
|
||||
LocalVector<VkDeviceSize> sizes;
|
||||
};
|
||||
|
||||
PendingFlushes pending_flushes;
|
||||
|
||||
void _register_requested_device_extension(const CharString &p_extension_name, bool p_required);
|
||||
Error _initialize_device_extensions();
|
||||
Error _check_device_features();
|
||||
@ -194,14 +202,29 @@ public:
|
||||
} allocation;
|
||||
uint64_t size = 0;
|
||||
VkBufferView vk_view = VK_NULL_HANDLE; // For texel buffers.
|
||||
// If dynamic buffer, then its range is [0; RenderingDeviceDriverVulkan::frame_count)
|
||||
// else it's UINT32_MAX.
|
||||
uint32_t frame_idx = UINT32_MAX;
|
||||
|
||||
bool is_dynamic() const { return frame_idx != UINT32_MAX; }
|
||||
};
|
||||
|
||||
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) override final;
|
||||
struct BufferDynamicInfo : BufferInfo {
|
||||
uint8_t *persistent_ptr = nullptr;
|
||||
#ifdef DEBUG_ENABLED
|
||||
// For tracking that a persistent buffer isn't mapped twice in the same frame.
|
||||
uint64_t last_frame_mapped = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) override final;
|
||||
virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final;
|
||||
virtual void buffer_free(BufferID p_buffer) override final;
|
||||
virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final;
|
||||
virtual uint8_t *buffer_map(BufferID p_buffer) override final;
|
||||
virtual void buffer_unmap(BufferID p_buffer) override final;
|
||||
virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final;
|
||||
virtual void buffer_flush(BufferID p_buffer) override final;
|
||||
virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final;
|
||||
|
||||
/*****************/
|
||||
@ -473,6 +496,7 @@ private:
|
||||
VkDescriptorPool vk_descriptor_pool = VK_NULL_HANDLE;
|
||||
VkDescriptorPool vk_linear_descriptor_pool = VK_NULL_HANDLE;
|
||||
DescriptorSetPools::Iterator pool_sets_it;
|
||||
TightLocalVector<BufferInfo const *, uint32_t> dynamic_buffers;
|
||||
};
|
||||
|
||||
public:
|
||||
@ -480,6 +504,7 @@ public:
|
||||
virtual void linear_uniform_set_pools_reset(int p_linear_pool_index) override final;
|
||||
virtual void uniform_set_free(UniformSetID p_uniform_set) override final;
|
||||
virtual bool uniform_sets_have_linear_pools() const override final;
|
||||
virtual uint32_t uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const override final;
|
||||
|
||||
// ----- COMMANDS -----
|
||||
|
||||
@ -567,8 +592,7 @@ public:
|
||||
|
||||
// Binding.
|
||||
virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
|
||||
virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
|
||||
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;
|
||||
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) override final;
|
||||
|
||||
// Drawing.
|
||||
virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) override final;
|
||||
@ -610,8 +634,7 @@ public:
|
||||
|
||||
// Binding.
|
||||
virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
|
||||
virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
|
||||
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;
|
||||
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) override final;
|
||||
|
||||
// Dispatching.
|
||||
virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final;
|
||||
|
||||
Reference in New Issue
Block a user