Implement LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE to limit_get in all Rendering backends.

Also add a more helpful warning that is only displayed in dev builds to
match the D3D12 backend
This commit is contained in:
clayjohn
2024-01-19 14:29:45 -08:00
parent 1753893c60
commit 338c12fc9a
5 changed files with 31 additions and 7 deletions

View File

@ -6218,6 +6218,8 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) {
return D3D12_CS_THREAD_GROUP_MAX_Y; return D3D12_CS_THREAD_GROUP_MAX_Y;
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return D3D12_CS_THREAD_GROUP_MAX_Z; return D3D12_CS_THREAD_GROUP_MAX_Z;
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
return D3D12_CS_TGSM_REGISTER_COUNT * sizeof(float);
case LIMIT_SUBGROUP_SIZE: case LIMIT_SUBGROUP_SIZE:
// Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html), // Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html),
// but at this time I don't know the implications on the transpilation to DXIL, etc. // but at this time I don't know the implications on the transpilation to DXIL, etc.

View File

@ -125,6 +125,7 @@ struct MetalLimits {
uint32_t maxVertexInputBindingStride; uint32_t maxVertexInputBindingStride;
uint32_t maxDrawIndexedIndexValue; uint32_t maxDrawIndexedIndexValue;
uint32_t maxShaderVaryings; uint32_t maxShaderVaryings;
uint32_t maxThreadGroupMemoryAllocation;
double temporalScalerInputContentMinScale; double temporalScalerInputContentMinScale;
double temporalScalerInputContentMaxScale; double temporalScalerInputContentMaxScale;

View File

@ -305,6 +305,14 @@ void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {
limits.maxVertexInputBindingStride = (2 * KIBI); limits.maxVertexInputBindingStride = (2 * KIBI);
limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
limits.maxThreadGroupMemoryAllocation = 32768;
} else if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
limits.maxThreadGroupMemoryAllocation = 16384;
} else {
limits.maxThreadGroupMemoryAllocation = 16352;
}
#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST #if TARGET_OS_IOS && !TARGET_OS_MACCATALYST
limits.minUniformBufferOffsetAlignment = 64; limits.minUniformBufferOffsetAlignment = 64;
#endif #endif

View File

@ -3899,16 +3899,16 @@ uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() {
uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) { uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
MetalDeviceProperties const &props = (*device_properties); MetalDeviceProperties const &props = (*device_properties);
MetalLimits const &limits = props.limits; MetalLimits const &limits = props.limits;
uint64_t safe_unbounded = ((uint64_t)1 << 30);
#if defined(DEV_ENABLED) #if defined(DEV_ENABLED)
#define UNKNOWN(NAME) \ #define UNKNOWN(NAME) \
case NAME: \ case NAME: \
WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \ WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \
return (uint64_t)1 << 30; return safe_unbounded;
#else #else
#define UNKNOWN(NAME) \ #define UNKNOWN(NAME) \
case NAME: \ case NAME: \
return (uint64_t)1 << 30 return safe_unbounded
#endif #endif
// clang-format off // clang-format off
@ -3981,6 +3981,8 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
return limits.maxThreadsPerThreadGroup.height; return limits.maxThreadsPerThreadGroup.height;
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return limits.maxThreadsPerThreadGroup.depth; return limits.maxThreadsPerThreadGroup.depth;
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
return limits.maxThreadGroupMemoryAllocation;
case LIMIT_MAX_VIEWPORT_DIMENSIONS_X: case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
return limits.maxViewportDimensionX; return limits.maxViewportDimensionX;
case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
@ -4006,8 +4008,12 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT); UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT);
UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH); UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH);
UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT); UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT);
default: default: {
ERR_FAIL_V(0); #ifdef DEV_ENABLED
WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
#endif
return safe_unbounded;
}
} }
// clang-format on // clang-format on
return 0; return 0;

View File

@ -5828,6 +5828,7 @@ uint64_t RenderingDeviceDriverVulkan::get_lazily_memory_used() {
uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
const VkPhysicalDeviceLimits &limits = physical_device_properties.limits; const VkPhysicalDeviceLimits &limits = physical_device_properties.limits;
uint64_t safe_unbounded = ((uint64_t)1 << 30);
switch (p_limit) { switch (p_limit) {
case LIMIT_MAX_BOUND_UNIFORM_SETS: case LIMIT_MAX_BOUND_UNIFORM_SETS:
return limits.maxBoundDescriptorSets; return limits.maxBoundDescriptorSets;
@ -5897,6 +5898,8 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
return limits.maxComputeWorkGroupSize[1]; return limits.maxComputeWorkGroupSize[1];
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return limits.maxComputeWorkGroupSize[2]; return limits.maxComputeWorkGroupSize[2];
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
return limits.maxComputeSharedMemorySize;
case LIMIT_MAX_VIEWPORT_DIMENSIONS_X: case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
return limits.maxViewportDimensions[0]; return limits.maxViewportDimensions[0];
case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
@ -5923,8 +5926,12 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
// The Vulkan spec states that built in varyings like gl_FragCoord should count against this, but in // The Vulkan spec states that built in varyings like gl_FragCoord should count against this, but in
// practice, that doesn't seem to be the case. The validation layers don't even complain. // practice, that doesn't seem to be the case. The validation layers don't even complain.
return MIN(limits.maxVertexOutputComponents / 4, limits.maxFragmentInputComponents / 4); return MIN(limits.maxVertexOutputComponents / 4, limits.maxFragmentInputComponents / 4);
default: default: {
ERR_FAIL_V(0); #ifdef DEV_ENABLED
WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
#endif
return safe_unbounded;
}
} }
} }