diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 29b36f14055..03157c3f5b1 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -6218,6 +6218,8 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { return D3D12_CS_THREAD_GROUP_MAX_Y; case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: return D3D12_CS_THREAD_GROUP_MAX_Z; + case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE: + return D3D12_CS_TGSM_REGISTER_COUNT * sizeof(float); case LIMIT_SUBGROUP_SIZE: // Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html), // but at this time I don't know the implications on the transpilation to DXIL, etc. diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h index 2f87f10ea4b..db890f63953 100644 --- a/drivers/metal/metal_device_properties.h +++ b/drivers/metal/metal_device_properties.h @@ -125,6 +125,7 @@ struct MetalLimits { uint32_t maxVertexInputBindingStride; uint32_t maxDrawIndexedIndexValue; uint32_t maxShaderVaryings; + uint32_t maxThreadGroupMemoryAllocation; double temporalScalerInputContentMinScale; double temporalScalerInputContentMaxScale; diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm index 2c7ff576f4b..713f69e41df 100644 --- a/drivers/metal/metal_device_properties.mm +++ b/drivers/metal/metal_device_properties.mm @@ -305,6 +305,14 @@ void MetalDeviceProperties::init_limits(id p_device) { limits.maxVertexInputBindingStride = (2 * KIBI); limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf + if ([p_device supportsFamily:MTLGPUFamilyApple4]) { + limits.maxThreadGroupMemoryAllocation = 32768; + } else if ([p_device supportsFamily:MTLGPUFamilyApple3]) { + limits.maxThreadGroupMemoryAllocation = 16384; + } else { + limits.maxThreadGroupMemoryAllocation = 16352; + } + #if TARGET_OS_IOS && !TARGET_OS_MACCATALYST limits.minUniformBufferOffsetAlignment = 64; #endif diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 6c170034d90..89161578884 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -3899,16 +3899,16 @@ uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() { uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) { MetalDeviceProperties const &props = (*device_properties); MetalLimits const &limits = props.limits; - + uint64_t safe_unbounded = ((uint64_t)1 << 30); #if defined(DEV_ENABLED) #define UNKNOWN(NAME) \ case NAME: \ WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \ - return (uint64_t)1 << 30; + return safe_unbounded; #else #define UNKNOWN(NAME) \ case NAME: \ - return (uint64_t)1 << 30 + return safe_unbounded #endif // clang-format off @@ -3981,6 +3981,8 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) { return limits.maxThreadsPerThreadGroup.height; case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: return limits.maxThreadsPerThreadGroup.depth; + case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE: + return limits.maxThreadGroupMemoryAllocation; case LIMIT_MAX_VIEWPORT_DIMENSIONS_X: return limits.maxViewportDimensionX; case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: @@ -4006,8 +4008,12 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) { UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT); UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH); UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT); - default: - ERR_FAIL_V(0); + default: { +#ifdef DEV_ENABLED + WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + "."); +#endif + return safe_unbounded; + } } // clang-format on return 0; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index ae15191dd42..ca75c8c9750 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -5828,6 +5828,7 @@ uint64_t RenderingDeviceDriverVulkan::get_lazily_memory_used() { uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { const VkPhysicalDeviceLimits &limits = physical_device_properties.limits; + uint64_t safe_unbounded = ((uint64_t)1 << 30); switch (p_limit) { case LIMIT_MAX_BOUND_UNIFORM_SETS: return limits.maxBoundDescriptorSets; @@ -5897,6 +5898,8 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { return limits.maxComputeWorkGroupSize[1]; case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: return limits.maxComputeWorkGroupSize[2]; + case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE: + return limits.maxComputeSharedMemorySize; case LIMIT_MAX_VIEWPORT_DIMENSIONS_X: return limits.maxViewportDimensions[0]; case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: @@ -5923,8 +5926,12 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { // The Vulkan spec states that built in varyings like gl_FragCoord should count against this, but in // practice, that doesn't seem to be the case. The validation layers don't even complain. return MIN(limits.maxVertexOutputComponents / 4, limits.maxFragmentInputComponents / 4); - default: - ERR_FAIL_V(0); + default: { +#ifdef DEV_ENABLED + WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + "."); +#endif + return safe_unbounded; + } } }