From 46277836a60545e729ea2b7c4dc24bfc9565e67c Mon Sep 17 00:00:00 2001 From: Dario Date: Wed, 28 May 2025 11:58:07 -0300 Subject: [PATCH] Optimize Mobile renderer by using FP16 explicitly. --- .../d3d12/rendering_device_driver_d3d12.cpp | 2 +- .../metal/rendering_device_driver_metal.mm | 2 +- .../vulkan/rendering_device_driver_vulkan.cpp | 2 +- editor/plugins/shader_baker_export_plugin.cpp | 4 + servers/rendering/renderer_rd/effects/fsr.cpp | 2 +- .../rendering/renderer_rd/effects/fsr2.cpp | 2 +- .../scene_shader_forward_clustered.cpp | 14 +- .../forward_mobile/render_forward_mobile.cpp | 8 + .../scene_shader_forward_mobile.cpp | 150 ++-- .../scene_shader_forward_mobile.h | 9 +- .../renderer_rd/shaders/decal_data_inc.glsl | 28 +- .../environment/volumetric_fog_process.glsl | 2 +- .../scene_forward_clustered.glsl | 42 +- .../forward_mobile/scene_forward_mobile.glsl | 697 ++++++++++-------- .../scene_forward_mobile_inc.glsl | 36 +- .../renderer_rd/shaders/half_inc.glsl | 43 ++ .../renderer_rd/shaders/light_data_inc.glsl | 102 +-- .../renderer_rd/shaders/scene_data_inc.glsl | 74 +- .../shaders/scene_forward_aa_inc.glsl | 16 +- .../shaders/scene_forward_lights_inc.glsl | 410 ++++++----- .../scene_forward_vertex_lights_inc.glsl | 75 +- servers/rendering/rendering_device_commons.h | 2 +- servers/rendering/rendering_shader_library.h | 2 + 23 files changed, 938 insertions(+), 786 deletions(-) create mode 100644 servers/rendering/renderer_rd/shaders/half_inc.glsl diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index ffba8cea6b5..a91eee30505 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -5580,7 +5580,7 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_FSR_HALF_FLOAT: + case SUPPORTS_HALF_FLOAT: return shader_capabilities.native_16bit_ops && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 1a2957cb12c..ed450b8bf80 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -2725,7 +2725,7 @@ uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverMetal::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_FSR_HALF_FLOAT: + case SUPPORTS_HALF_FLOAT: return true; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 95462acabea..e53f6bfbcf7 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -5888,7 +5888,7 @@ uint64_t RenderingDeviceDriverVulkan::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_FSR_HALF_FLOAT: + case SUPPORTS_HALF_FLOAT: return shader_capabilities.shader_float16_is_supported && physical_device_features.shaderInt16 && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; diff --git a/editor/plugins/shader_baker_export_plugin.cpp b/editor/plugins/shader_baker_export_plugin.cpp index 95f99590960..ae26aa4b1cc 100644 --- a/editor/plugins/shader_baker_export_plugin.cpp +++ b/editor/plugins/shader_baker_export_plugin.cpp @@ -130,6 +130,10 @@ bool ShaderBakerExportPlugin::_begin_customize_resources(const Refenable_features(renderer_features); // Included all shaders created by renderers and effects. diff --git a/servers/rendering/renderer_rd/effects/fsr.cpp b/servers/rendering/renderer_rd/effects/fsr.cpp index 8368513beeb..103a2835479 100644 --- a/servers/rendering/renderer_rd/effects/fsr.cpp +++ b/servers/rendering/renderer_rd/effects/fsr.cpp @@ -41,7 +41,7 @@ FSR::FSR() { fsr_shader.initialize(fsr_upscale_modes); FSRShaderVariant variant; - if (RD::get_singleton()->has_feature(RD::SUPPORTS_FSR_HALF_FLOAT)) { + if (RD::get_singleton()->has_feature(RD::SUPPORTS_HALF_FLOAT)) { variant = FSR_SHADER_VARIANT_NORMAL; } else { variant = FSR_SHADER_VARIANT_FALLBACK; diff --git a/servers/rendering/renderer_rd/effects/fsr2.cpp b/servers/rendering/renderer_rd/effects/fsr2.cpp index 32d208090da..d6c77497c38 100644 --- a/servers/rendering/renderer_rd/effects/fsr2.cpp +++ b/servers/rendering/renderer_rd/effects/fsr2.cpp @@ -518,7 +518,7 @@ FSR2Effect::FSR2Effect() { capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1; capabilities.waveLaneCountMin = 32; capabilities.waveLaneCountMax = 32; - capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_FSR_HALF_FLOAT); + capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_HALF_FLOAT); capabilities.raytracingSupported = false; String general_defines = diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp index dfb1634f1f3..04595ea9e50 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp @@ -694,14 +694,14 @@ void SceneShaderForwardClustered::init(const String p_defines) { actions.renames["EYE_OFFSET"] = "eye_offset"; //for light - actions.renames["VIEW"] = "view"; - actions.renames["SPECULAR_AMOUNT"] = "specular_amount"; - actions.renames["LIGHT_COLOR"] = "light_color"; + actions.renames["VIEW"] = "view_highp"; + actions.renames["SPECULAR_AMOUNT"] = "specular_amount_highp"; + actions.renames["LIGHT_COLOR"] = "light_color_highp"; actions.renames["LIGHT_IS_DIRECTIONAL"] = "is_directional"; - actions.renames["LIGHT"] = "light"; - actions.renames["ATTENUATION"] = "attenuation"; - actions.renames["DIFFUSE_LIGHT"] = "diffuse_light"; - actions.renames["SPECULAR_LIGHT"] = "specular_light"; + actions.renames["LIGHT"] = "light_highp"; + actions.renames["ATTENUATION"] = "attenuation_highp"; + actions.renames["DIFFUSE_LIGHT"] = "diffuse_light_highp"; + actions.renames["SPECULAR_LIGHT"] = "specular_light_highp"; actions.usage_defines["NORMAL"] = "#define NORMAL_USED\n"; actions.usage_defines["TANGENT"] = "#define TANGENT_USED\n"; diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index c9d0d7b63c9..6c1a58b5e53 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -380,6 +380,14 @@ uint32_t RenderForwardMobile::get_pipeline_compilations(RS::PipelineSource p_sou } void RenderForwardMobile::enable_features(BitField p_feature_bits) { + if (p_feature_bits.has_flag(FEATURE_FP32_BIT)) { + scene_shader.enable_fp32_shader_group(); + } + + if (p_feature_bits.has_flag(FEATURE_FP16_BIT)) { + scene_shader.enable_fp16_shader_group(); + } + if (p_feature_bits.has_flag(FEATURE_MULTIVIEW_BIT)) { scene_shader.enable_multiview_shader_group(); } diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp index 732d4305316..72eca489fe5 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp @@ -381,7 +381,7 @@ RID SceneShaderForwardMobile::ShaderData::get_shader_variant(ShaderVersion p_sha if (version.is_valid()) { MutexLock lock(SceneShaderForwardMobile::singleton_mutex); ERR_FAIL_NULL_V(SceneShaderForwardMobile::singleton, RID()); - return SceneShaderForwardMobile::singleton->shader.version_get_shader(version, p_shader_version + (p_ubershader ? SHADER_VERSION_MAX : 0)); + return SceneShaderForwardMobile::singleton->shader.version_get_shader(version, p_shader_version + (SceneShaderForwardMobile::singleton->use_fp16 ? SHADER_VERSION_MAX * 2 : 0) + (p_ubershader ? SHADER_VERSION_MAX : 0)); } else { return RID(); } @@ -389,6 +389,7 @@ RID SceneShaderForwardMobile::ShaderData::get_shader_variant(ShaderVersion p_sha uint64_t SceneShaderForwardMobile::ShaderData::get_vertex_input_mask(ShaderVersion p_shader_version, bool p_ubershader) { // Vertex input masks require knowledge of the shader. Since querying the shader can be expensive due to high contention and the necessary mutex, we cache the result instead. + // It is intentional for the range of the input masks to be different than the versions available in the shaders as it'll only ever use the regular variants or the FP16 ones. uint32_t input_mask_index = p_shader_version + (p_ubershader ? SHADER_VERSION_MAX : 0); uint64_t input_mask = vertex_input_masks[input_mask_index].load(std::memory_order_relaxed); if (input_mask == 0) { @@ -446,7 +447,8 @@ void SceneShaderForwardMobile::MaterialData::set_next_pass(RID p_pass) { bool SceneShaderForwardMobile::MaterialData::update_parameters(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { if (shader_data->version.is_valid()) { MutexLock lock(SceneShaderForwardMobile::singleton_mutex); - return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, SceneShaderForwardMobile::singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardMobile::MATERIAL_UNIFORM_SET, true, true); + RID base_shader = SceneShaderForwardMobile::singleton->shader.version_get_shader(shader_data->version, (SceneShaderForwardMobile::singleton->use_fp16 ? SHADER_VERSION_MAX * 2 : 0)); + return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, base_shader, RenderForwardMobile::MATERIAL_UNIFORM_SET, true, true); } else { return false; } @@ -476,6 +478,9 @@ SceneShaderForwardMobile::SceneShaderForwardMobile() { void SceneShaderForwardMobile::init(const String p_defines) { RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + // Store whether the shader will prefer using the FP16 variant. + use_fp16 = RD::get_singleton()->has_feature(RD::SUPPORTS_HALF_FLOAT); + // Immutable samplers : create the shadow sampler to be passed when creating the pipeline. { RD::SamplerState sampler; @@ -490,19 +495,26 @@ void SceneShaderForwardMobile::init(const String p_defines) { { Vector shader_versions; - for (uint32_t ubershader = 0; ubershader < 2; ubershader++) { - const String base_define = ubershader ? "\n#define UBERSHADER\n" : ""; - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "", true)); // SHADER_VERSION_COLOR_PASS - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define USE_LIGHTMAP\n", true)); // SHADER_VERSION_LIGHTMAP_COLOR_PASS - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n", true)); // SHADER_VERSION_SHADOW_PASS, should probably change this to MODE_RENDER_SHADOW because we don't have a depth pass here... - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n#define SHADOW_PASS\n", true)); // SHADER_VERSION_SHADOW_PASS_DP - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n", true)); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL + for (uint32_t fp16 = 0; fp16 < 2; fp16++) { + for (uint32_t ubershader = 0; ubershader < 2; ubershader++) { + String base_define = fp16 ? "\n#define EXPLICIT_FP16\n" : ""; + int shader_group = fp16 ? SHADER_GROUP_FP16 : SHADER_GROUP_FP32; + int shader_group_multiview = fp16 ? SHADER_GROUP_FP16_MULTIVIEW : SHADER_GROUP_FP32_MULTIVIEW; + base_define += ubershader ? "\n#define UBERSHADER\n" : ""; - // Multiview versions of our shaders. - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n", false)); // SHADER_VERSION_COLOR_PASS_MULTIVIEW - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define USE_LIGHTMAP\n", false)); // SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n", false)); // SHADER_VERSION_SHADOW_PASS_MULTIVIEW - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_MOTION_VECTORS\n", false)); // SHADER_VERSION_MOTION_VECTORS_MULTIVIEW + bool default_enabled = (uint32_t(use_fp16) == fp16); + shader_versions.push_back(ShaderRD::VariantDefine(shader_group, base_define + "", default_enabled)); // SHADER_VERSION_COLOR_PASS + shader_versions.push_back(ShaderRD::VariantDefine(shader_group, base_define + "\n#define USE_LIGHTMAP\n", default_enabled)); // SHADER_VERSION_LIGHTMAP_COLOR_PASS + shader_versions.push_back(ShaderRD::VariantDefine(shader_group, base_define + "\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n", default_enabled)); // SHADER_VERSION_SHADOW_PASS, should probably change this to MODE_RENDER_SHADOW because we don't have a depth pass here... + shader_versions.push_back(ShaderRD::VariantDefine(shader_group, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n#define SHADOW_PASS\n", default_enabled)); // SHADER_VERSION_SHADOW_PASS_DP + shader_versions.push_back(ShaderRD::VariantDefine(shader_group, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n", default_enabled)); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL + + // Multiview versions of our shaders. + shader_versions.push_back(ShaderRD::VariantDefine(shader_group_multiview, base_define + "\n#define USE_MULTIVIEW\n", false)); // SHADER_VERSION_COLOR_PASS_MULTIVIEW + shader_versions.push_back(ShaderRD::VariantDefine(shader_group_multiview, base_define + "\n#define USE_MULTIVIEW\n#define USE_LIGHTMAP\n", false)); // SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW + shader_versions.push_back(ShaderRD::VariantDefine(shader_group_multiview, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n", false)); // SHADER_VERSION_SHADOW_PASS_MULTIVIEW + shader_versions.push_back(ShaderRD::VariantDefine(shader_group_multiview, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_MOTION_VECTORS\n", false)); // SHADER_VERSION_MOTION_VECTORS_MULTIVIEW + } } Vector immutable_samplers; @@ -514,7 +526,7 @@ void SceneShaderForwardMobile::init(const String p_defines) { shader.initialize(shader_versions, p_defines, immutable_samplers); if (RendererCompositorRD::get_singleton()->is_xr_enabled()) { - shader.enable_group(SHADER_GROUP_MULTIVIEW); + enable_multiview_shader_group(); } } @@ -536,21 +548,21 @@ void SceneShaderForwardMobile::init(const String p_defines) { actions.renames["MAIN_CAM_INV_VIEW_MATRIX"] = "scene_data.main_cam_inv_view_matrix"; actions.renames["VERTEX"] = "vertex"; - actions.renames["NORMAL"] = "normal"; - actions.renames["TANGENT"] = "tangent"; - actions.renames["BINORMAL"] = "binormal"; + actions.renames["NORMAL"] = "normal_highp"; + actions.renames["TANGENT"] = "tangent_highp"; + actions.renames["BINORMAL"] = "binormal_highp"; actions.renames["POSITION"] = "position"; actions.renames["UV"] = "uv_interp"; actions.renames["UV2"] = "uv2_interp"; - actions.renames["COLOR"] = "color_interp"; + actions.renames["COLOR"] = "color_highp"; actions.renames["POINT_SIZE"] = "gl_PointSize"; actions.renames["INSTANCE_ID"] = "gl_InstanceIndex"; actions.renames["VERTEX_ID"] = "gl_VertexIndex"; actions.renames["Z_CLIP_SCALE"] = "z_clip_scale"; - actions.renames["ALPHA_SCISSOR_THRESHOLD"] = "alpha_scissor_threshold"; - actions.renames["ALPHA_HASH_SCALE"] = "alpha_hash_scale"; - actions.renames["ALPHA_ANTIALIASING_EDGE"] = "alpha_antialiasing_edge"; + actions.renames["ALPHA_SCISSOR_THRESHOLD"] = "alpha_scissor_threshold_highp"; + actions.renames["ALPHA_HASH_SCALE"] = "alpha_hash_scale_highp"; + actions.renames["ALPHA_ANTIALIASING_EDGE"] = "alpha_antialiasing_edge_highp"; actions.renames["ALPHA_TEXTURE_COORDINATE"] = "alpha_texture_coordinate"; //builtins @@ -567,36 +579,36 @@ void SceneShaderForwardMobile::init(const String p_defines) { actions.renames["FRAGCOORD"] = "gl_FragCoord"; actions.renames["FRONT_FACING"] = "gl_FrontFacing"; - actions.renames["NORMAL_MAP"] = "normal_map"; - actions.renames["NORMAL_MAP_DEPTH"] = "normal_map_depth"; - actions.renames["BENT_NORMAL_MAP"] = "bent_normal_map"; - actions.renames["ALBEDO"] = "albedo"; - actions.renames["ALPHA"] = "alpha"; - actions.renames["PREMUL_ALPHA_FACTOR"] = "premul_alpha"; - actions.renames["METALLIC"] = "metallic"; - actions.renames["SPECULAR"] = "specular"; - actions.renames["ROUGHNESS"] = "roughness"; - actions.renames["RIM"] = "rim"; - actions.renames["RIM_TINT"] = "rim_tint"; - actions.renames["CLEARCOAT"] = "clearcoat"; - actions.renames["CLEARCOAT_ROUGHNESS"] = "clearcoat_roughness"; - actions.renames["ANISOTROPY"] = "anisotropy"; - actions.renames["ANISOTROPY_FLOW"] = "anisotropy_flow"; - actions.renames["SSS_STRENGTH"] = "sss_strength"; - actions.renames["SSS_TRANSMITTANCE_COLOR"] = "transmittance_color"; - actions.renames["SSS_TRANSMITTANCE_DEPTH"] = "transmittance_depth"; - actions.renames["SSS_TRANSMITTANCE_BOOST"] = "transmittance_boost"; - actions.renames["BACKLIGHT"] = "backlight"; - actions.renames["AO"] = "ao"; - actions.renames["AO_LIGHT_AFFECT"] = "ao_light_affect"; - actions.renames["EMISSION"] = "emission"; + actions.renames["NORMAL_MAP"] = "normal_map_highp"; + actions.renames["NORMAL_MAP_DEPTH"] = "normal_map_depth_highp"; + actions.renames["BENT_NORMAL_MAP"] = "bent_normal_map_highp"; + actions.renames["ALBEDO"] = "albedo_highp"; + actions.renames["ALPHA"] = "alpha_highp"; + actions.renames["PREMUL_ALPHA_FACTOR"] = "premul_alpha_highp"; + actions.renames["METALLIC"] = "metallic_highp"; + actions.renames["SPECULAR"] = "specular_highp"; + actions.renames["ROUGHNESS"] = "roughness_highp"; + actions.renames["RIM"] = "rim_highp"; + actions.renames["RIM_TINT"] = "rim_tint_highp"; + actions.renames["CLEARCOAT"] = "clearcoat_highp"; + actions.renames["CLEARCOAT_ROUGHNESS"] = "clearcoat_roughness_highp"; + actions.renames["ANISOTROPY"] = "anisotropy_highp"; + actions.renames["ANISOTROPY_FLOW"] = "anisotropy_flow_highp"; + actions.renames["SSS_STRENGTH"] = "sss_strength_highp"; + actions.renames["SSS_TRANSMITTANCE_COLOR"] = "transmittance_color_highp"; + actions.renames["SSS_TRANSMITTANCE_DEPTH"] = "transmittance_depth_highp"; + actions.renames["SSS_TRANSMITTANCE_BOOST"] = "transmittance_boost_highp"; + actions.renames["BACKLIGHT"] = "backlight_highp"; + actions.renames["AO"] = "ao_highp"; + actions.renames["AO_LIGHT_AFFECT"] = "ao_light_affect_highp"; + actions.renames["EMISSION"] = "emission_highp"; actions.renames["POINT_COORD"] = "gl_PointCoord"; actions.renames["INSTANCE_CUSTOM"] = "instance_custom"; actions.renames["SCREEN_UV"] = "screen_uv"; actions.renames["DEPTH"] = "gl_FragDepth"; - actions.renames["FOG"] = "fog"; - actions.renames["RADIANCE"] = "custom_radiance"; - actions.renames["IRRADIANCE"] = "custom_irradiance"; + actions.renames["FOG"] = "fog_highp"; + actions.renames["RADIANCE"] = "custom_radiance_highp"; + actions.renames["IRRADIANCE"] = "custom_irradiance_highp"; actions.renames["BONE_INDICES"] = "bone_attrib"; actions.renames["BONE_WEIGHTS"] = "weight_attrib"; actions.renames["CUSTOM0"] = "custom0_attrib"; @@ -617,14 +629,14 @@ void SceneShaderForwardMobile::init(const String p_defines) { actions.renames["EYE_OFFSET"] = "eye_offset"; //for light - actions.renames["VIEW"] = "view"; - actions.renames["SPECULAR_AMOUNT"] = "specular_amount"; - actions.renames["LIGHT_COLOR"] = "light_color"; + actions.renames["VIEW"] = "view_highp"; + actions.renames["SPECULAR_AMOUNT"] = "specular_amount_highp"; + actions.renames["LIGHT_COLOR"] = "light_color_highp"; actions.renames["LIGHT_IS_DIRECTIONAL"] = "is_directional"; - actions.renames["LIGHT"] = "light"; - actions.renames["ATTENUATION"] = "attenuation"; - actions.renames["DIFFUSE_LIGHT"] = "diffuse_light"; - actions.renames["SPECULAR_LIGHT"] = "specular_light"; + actions.renames["LIGHT"] = "light_highp"; + actions.renames["ATTENUATION"] = "attenuation_highp"; + actions.renames["DIFFUSE_LIGHT"] = "diffuse_light_highp"; + actions.renames["SPECULAR_LIGHT"] = "specular_light_highp"; actions.usage_defines["NORMAL"] = "#define NORMAL_USED\n"; actions.usage_defines["TANGENT"] = "#define TANGENT_USED\n"; @@ -749,7 +761,7 @@ void fragment() { material_storage->material_set_shader(default_material, default_shader); MaterialData *md = static_cast(material_storage->material_get_data(default_material, RendererRD::MaterialStorage::SHADER_TYPE_3D)); - default_shader_rd = shader.version_get_shader(md->shader_data->version, SHADER_VERSION_COLOR_PASS); + default_shader_rd = shader.version_get_shader(md->shader_data->version, (use_fp16 ? SHADER_VERSION_MAX * 2 : 0) + SHADER_VERSION_COLOR_PASS); default_material_shader_ptr = md->shader_data; default_material_uniform_set = md->uniform_set; @@ -830,12 +842,34 @@ uint32_t SceneShaderForwardMobile::get_pipeline_compilations(RS::PipelineSource return pipeline_compilations[p_source]; } +void SceneShaderForwardMobile::enable_fp32_shader_group() { + shader.enable_group(SHADER_GROUP_FP32); + + if (is_multiview_shader_group_enabled()) { + enable_multiview_shader_group(); + } +} + +void SceneShaderForwardMobile::enable_fp16_shader_group() { + shader.enable_group(SHADER_GROUP_FP16); + + if (is_multiview_shader_group_enabled()) { + enable_multiview_shader_group(); + } +} + void SceneShaderForwardMobile::enable_multiview_shader_group() { - shader.enable_group(SHADER_GROUP_MULTIVIEW); + if (shader.is_group_enabled(SHADER_GROUP_FP32)) { + shader.enable_group(SHADER_GROUP_FP32_MULTIVIEW); + } + + if (shader.is_group_enabled(SHADER_GROUP_FP16)) { + shader.enable_group(SHADER_GROUP_FP16_MULTIVIEW); + } } bool SceneShaderForwardMobile::is_multiview_shader_group_enabled() const { - return shader.is_group_enabled(SHADER_GROUP_MULTIVIEW); + return shader.is_group_enabled(SHADER_GROUP_FP32_MULTIVIEW) || shader.is_group_enabled(SHADER_GROUP_FP16_MULTIVIEW); } SceneShaderForwardMobile::~SceneShaderForwardMobile() { diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h index 29fc5bb2a52..bde585f534e 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h @@ -74,8 +74,10 @@ public: } enum ShaderGroup { - SHADER_GROUP_BASE, // Always compiled at the beginning. - SHADER_GROUP_MULTIVIEW, + SHADER_GROUP_FP32, + SHADER_GROUP_FP32_MULTIVIEW, + SHADER_GROUP_FP16, + SHADER_GROUP_FP16_MULTIVIEW, }; struct ShaderSpecialization { @@ -311,6 +313,7 @@ public: SceneForwardMobileShaderRD shader; ShaderCompiler compiler; + bool use_fp16 = false; RID default_shader; RID default_material; @@ -344,6 +347,8 @@ public: void init(const String p_defines); void set_default_specialization(const ShaderSpecialization &p_specialization); uint32_t get_pipeline_compilations(RS::PipelineSource p_source); + void enable_fp32_shader_group(); + void enable_fp16_shader_group(); void enable_multiview_shader_group(); bool is_multiview_shader_group_enabled() const; }; diff --git a/servers/rendering/renderer_rd/shaders/decal_data_inc.glsl b/servers/rendering/renderer_rd/shaders/decal_data_inc.glsl index 3d6eaab8e1a..b1b2e04dc1e 100644 --- a/servers/rendering/renderer_rd/shaders/decal_data_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/decal_data_inc.glsl @@ -1,17 +1,17 @@ struct DecalData { - highp mat4 xform; //to decal transform - highp vec3 inv_extents; - mediump float albedo_mix; - highp vec4 albedo_rect; - highp vec4 normal_rect; - highp vec4 orm_rect; - highp vec4 emission_rect; - highp vec4 modulate; - mediump float emission_energy; + mat4 xform; //to decal transform + vec3 inv_extents; + float albedo_mix; + vec4 albedo_rect; + vec4 normal_rect; + vec4 orm_rect; + vec4 emission_rect; + vec4 modulate; + float emission_energy; uint mask; - mediump float upper_fade; - mediump float lower_fade; - mediump mat3x4 normal_xform; - mediump vec3 normal; - mediump float normal_fade; + float upper_fade; + float lower_fade; + mat3x4 normal_xform; + vec3 normal; + float normal_fade; }; diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl index d7e422a74b0..832058553e1 100644 --- a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl @@ -541,7 +541,7 @@ void main() { float attenuation = get_omni_attenuation(d, spot_lights.data[light_index].inv_radius, spot_lights.data[light_index].attenuation); vec3 spot_dir = spot_lights.data[light_index].direction; - highp float cone_angle = spot_lights.data[light_index].cone_angle; + float cone_angle = spot_lights.data[light_index].cone_angle; float scos = max(dot(-normalize(light_rel_vec), spot_dir), cone_angle); float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cone_angle)); attenuation *= 1.0 - pow(spot_rim, spot_lights.data[light_index].cone_attenuation); diff --git a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl index 6847d3f5b49..afe3452037a 100644 --- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl @@ -4,6 +4,9 @@ #VERSION_DEFINES +/* Include half precision types. */ +#include "../half_inc.glsl" + #include "scene_forward_clustered_inc.glsl" #define SHADER_IS_SRGB false @@ -155,8 +158,8 @@ ivec2 multiview_uv(ivec2 uv) { #endif //USE_MULTIVIEW #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) -layout(location = 12) highp out vec4 diffuse_light_interp; -layout(location = 13) highp out vec4 specular_light_interp; +layout(location = 12) out vec4 diffuse_light_interp; +layout(location = 13) out vec4 specular_light_interp; #include "../scene_forward_vertex_lights_inc.glsl" @@ -808,6 +811,9 @@ void main() { #define SHADER_IS_SRGB false #define SHADER_SPACE_FAR 0.0 +/* Include half precision types. */ +#include "../half_inc.glsl" + #include "scene_forward_clustered_inc.glsl" /* Varyings */ @@ -929,8 +935,8 @@ ivec2 multiview_uv(ivec2 uv) { } #endif // !USE_MULTIVIEW #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) -layout(location = 12) highp in vec4 diffuse_light_interp; -layout(location = 13) highp in vec4 specular_light_interp; +layout(location = 12) in vec4 diffuse_light_interp; +layout(location = 13) in vec4 specular_light_interp; #endif //defines to keep compatibility with vertex @@ -1127,14 +1133,14 @@ void fragment_shader(in SceneData scene_data) { vec3 vertex = vertex_interp; #ifdef USE_MULTIVIEW vec3 eye_offset = scene_data.eye_offset[ViewIndex].xyz; - vec3 view = -normalize(vertex_interp - eye_offset); + vec3 view_highp = -normalize(vertex_interp - eye_offset); // UV in our combined frustum space is used for certain screen uv processes where it's // overkill to render separate left and right eye views vec2 combined_uv = (combined_projected.xy / combined_projected.w) * 0.5 + 0.5; #else vec3 eye_offset = vec3(0.0, 0.0, 0.0); - vec3 view = -normalize(vertex_interp); + vec3 view_highp = -normalize(vertex_interp); #endif vec3 albedo = vec3(1.0); vec3 backlight = vec3(0.0); @@ -1260,10 +1266,12 @@ void fragment_shader(in SceneData scene_data) { #ifdef LIGHT_VERTEX_USED vertex = light_vertex; #ifdef USE_MULTIVIEW - view = -normalize(vertex - eye_offset); + vec3 view = -normalize(vertex - eye_offset); #else - view = -normalize(vertex); + vec3 view = -normalize(vertex); #endif //USE_MULTIVIEW +#else + vec3 view = view_highp; #endif //LIGHT_VERTEX_USED #ifdef NORMAL_USED @@ -2063,11 +2071,6 @@ void fragment_shader(in SceneData scene_data) { #endif // !AMBIENT_LIGHT_DISABLED #endif //GI !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) -#if !defined(MODE_RENDER_DEPTH) - //this saves some VGPRs - uint orms = packUnorm4x8(vec4(ao, roughness, metallic, specular)); -#endif - // LIGHTING #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) @@ -2455,7 +2458,7 @@ void fragment_shader(in SceneData scene_data) { #else directional_lights.data[i].color * directional_lights.data[i].energy * tint, #endif - true, shadow, f0, orms, directional_lights.data[i].specular, albedo, alpha, screen_uv, energy_compensation, + true, shadow, f0, roughness, metallic, directional_lights.data[i].specular, albedo, alpha, screen_uv, energy_compensation, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -2519,7 +2522,7 @@ void fragment_shader(in SceneData scene_data) { continue; // Statically baked light and object uses lightmap, skip } - light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, scene_data.taa_frame_count, albedo, alpha, screen_uv, energy_compensation, + light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, roughness, metallic, scene_data.taa_frame_count, albedo, alpha, screen_uv, energy_compensation, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -2536,7 +2539,7 @@ void fragment_shader(in SceneData scene_data) { clearcoat, clearcoat_roughness, geo_normal, #endif // LIGHT_CLEARCOAT_USED #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, + binormal, tangent, anisotropy, #endif diffuse_light, direct_specular_light); } @@ -2580,7 +2583,7 @@ void fragment_shader(in SceneData scene_data) { continue; // Statically baked light and object uses lightmap, skip } - light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, scene_data.taa_frame_count, albedo, alpha, screen_uv, energy_compensation, + light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, roughness, metallic, scene_data.taa_frame_count, albedo, alpha, screen_uv, energy_compensation, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -2597,8 +2600,7 @@ void fragment_shader(in SceneData scene_data) { clearcoat, clearcoat_roughness, geo_normal, #endif // LIGHT_CLEARCOAT_USED #ifdef LIGHT_ANISOTROPY_USED - tangent, - binormal, anisotropy, + binormal, tangent, anisotropy, #endif diffuse_light, direct_specular_light); } @@ -2775,12 +2777,10 @@ void fragment_shader(in SceneData scene_data) { diffuse_light *= albedo; // ambient must be multiplied by albedo at the end // apply direct light AO - ao = unpackUnorm4x8(orms).x; diffuse_light *= ao; direct_specular_light *= ao; // apply metallic - metallic = unpackUnorm4x8(orms).z; diffuse_light *= 1.0 - metallic; ambient_light *= 1.0 - metallic; diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl index 0f2cd5f982e..1af333b038d 100644 --- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl @@ -4,6 +4,9 @@ #VERSION_DEFINES +/* Include half precision types. */ +#include "../half_inc.glsl" + /* Include our forward mobile UBOs definitions etc. */ #include "scene_forward_mobile_inc.glsl" @@ -94,31 +97,31 @@ void axis_angle_to_tbn(vec3 axis, float angle, out vec3 tangent, out vec3 binorm /* Varyings */ -layout(location = 0) highp out vec3 vertex_interp; +layout(location = 0) out vec3 vertex_interp; #ifdef NORMAL_USED -layout(location = 1) mediump out vec3 normal_interp; +layout(location = 1) out hvec3 normal_interp; #endif #if defined(COLOR_USED) -layout(location = 2) mediump out vec4 color_interp; +layout(location = 2) out hvec4 color_interp; #endif #ifdef UV_USED -layout(location = 3) mediump out vec2 uv_interp; +layout(location = 3) out vec2 uv_interp; #endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) -layout(location = 4) mediump out vec2 uv2_interp; +layout(location = 4) out vec2 uv2_interp; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(BENT_NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) -layout(location = 5) mediump out vec3 tangent_interp; -layout(location = 6) mediump out vec3 binormal_interp; +layout(location = 5) out hvec3 tangent_interp; +layout(location = 6) out hvec3 binormal_interp; #endif #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) -layout(location = 7) highp out vec4 diffuse_light_interp; -layout(location = 8) highp out vec4 specular_light_interp; +layout(location = 7) out hvec4 diffuse_light_interp; +layout(location = 8) out hvec4 specular_light_interp; #include "../scene_forward_vertex_lights_inc.glsl" #endif // !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) @@ -132,7 +135,7 @@ layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms #ifdef MODE_DUAL_PARABOLOID -layout(location = 9) out highp float dp_clip; +layout(location = 9) out float dp_clip; #endif @@ -242,11 +245,11 @@ void _unpack_vertex_attributes(vec4 p_vertex_in, vec3 p_compressed_aabb_position void vertex_shader(in vec3 vertex, #ifdef NORMAL_USED - in vec3 normal, + in vec3 normal_highp, #endif #if defined(NORMAL_USED) || defined(TANGENT_USED) - in vec3 tangent, - in vec3 binormal, + in vec3 tangent_highp, + in vec3 binormal_highp, #endif in uint instance_index, in uint multimesh_offset, in mat4 model_matrix, #ifdef MODE_DUAL_PARABOLOID @@ -268,7 +271,7 @@ void vertex_shader(in vec3 vertex, out vec4 screen_position_output) { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) - color_interp = color_attrib; + vec4 color_highp = color_attrib; #endif #ifdef USE_DOUBLE_PRECISION @@ -336,7 +339,7 @@ void vertex_shader(in vec3 vertex, instance_custom = transforms.data[offset + 4]; #ifdef COLOR_USED - color_interp *= pcolor; + color_highp *= pcolor; #endif #else @@ -353,7 +356,7 @@ void vertex_shader(in vec3 vertex, if (sc_multimesh_has_color()) { #ifdef COLOR_USED - color_interp *= transforms.data[offset]; + color_highp *= transforms.data[offset]; #endif offset += 1; } @@ -377,6 +380,10 @@ void vertex_shader(in vec3 vertex, model_normal_matrix = model_normal_matrix * mat3(matrix); } +#if defined(COLOR_USED) + color_interp = hvec4(color_highp); +#endif + #ifdef UV_USED uv_interp = uv_attrib; #endif @@ -412,13 +419,13 @@ void vertex_shader(in vec3 vertex, vertex = (model_matrix * vec4(vertex, 1.0)).xyz; #ifdef NORMAL_USED - normal = model_normal_matrix * normal; + normal_highp = model_normal_matrix * normal_highp; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(BENT_NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - tangent = model_normal_matrix * tangent; - binormal = model_normal_matrix * binormal; + tangent_highp = model_normal_matrix * tangent_highp; + binormal_highp = model_normal_matrix * binormal_highp; #endif #endif @@ -427,7 +434,7 @@ void vertex_shader(in vec3 vertex, float z_clip_scale = 1.0; #endif - float roughness = 1.0; + float roughness_highp = 1.0; mat4 modelview = view_matrix * model_matrix; mat3 modelview_normal = mat3(view_matrix) * model_normal_matrix; @@ -438,6 +445,8 @@ void vertex_shader(in vec3 vertex, #CODE : VERTEX } + half roughness = half(roughness_highp); + // using local coordinates (default) #if !defined(SKIP_TRANSFORM_USED) && !defined(VERTEX_WORLD_COORDS_USED) @@ -458,13 +467,13 @@ void vertex_shader(in vec3 vertex, vertex = (modelview * vec4(vertex, 1.0)).xyz; #endif #ifdef NORMAL_USED - normal = modelview_normal * normal; + normal_highp = modelview_normal * normal_highp; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(BENT_NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - binormal = modelview_normal * binormal; - tangent = modelview_normal * tangent; + binormal_highp = modelview_normal * binormal_highp; + tangent_highp = modelview_normal * tangent_highp; #endif #endif // !defined(SKIP_TRANSFORM_USED) && !defined(VERTEX_WORLD_COORDS_USED) @@ -473,12 +482,12 @@ void vertex_shader(in vec3 vertex, vertex = (view_matrix * vec4(vertex, 1.0)).xyz; #ifdef NORMAL_USED - normal = (view_matrix * vec4(normal, 0.0)).xyz; + normal_highp = (view_matrix * vec4(normal_highp, 0.0)).xyz; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(BENT_NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - binormal = (view_matrix * vec4(binormal, 0.0)).xyz; - tangent = (view_matrix * vec4(tangent, 0.0)).xyz; + binormal_highp = (view_matrix * vec4(binormal_highp, 0.0)).xyz; + tangent_highp = (view_matrix * vec4(tangent_highp, 0.0)).xyz; #endif #endif @@ -487,24 +496,24 @@ void vertex_shader(in vec3 vertex, // Normalize TBN vectors before interpolation, per MikkTSpace. // See: http://www.mikktspace.com/ #ifdef NORMAL_USED - normal_interp = normalize(normal); + normal_interp = hvec3(normalize(normal_highp)); #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) || defined(BENT_NORMAL_MAP_USED) - tangent_interp = normalize(tangent); - binormal_interp = normalize(binormal); + tangent_interp = hvec3(normalize(tangent_highp)); + binormal_interp = hvec3(normalize(binormal_highp)); #endif // VERTEX LIGHTING #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) #ifdef USE_MULTIVIEW - vec3 view = -normalize(vertex_interp - eye_offset); + hvec3 view = hvec3(-normalize(vertex_interp - eye_offset)); #else - vec3 view = -normalize(vertex_interp); + hvec3 view = hvec3(-normalize(vertex_interp)); #endif - diffuse_light_interp = vec4(0.0); - specular_light_interp = vec4(0.0); + diffuse_light_interp = hvec4(0.0); + specular_light_interp = hvec4(0.0); uint omni_light_count = sc_omni_lights(8); uvec2 omni_light_indices = instances.data[instance_index].omni_lights; @@ -531,8 +540,8 @@ void vertex_shader(in vec3 vertex, uint directional_lights_count = sc_directional_lights(scene_directional_light_count); if (directional_lights_count > 0) { // We process the first directional light separately as it may have shadows. - vec3 directional_diffuse = vec3(0.0); - vec3 directional_specular = vec3(0.0); + hvec3 directional_diffuse = hvec3(0.0); + hvec3 directional_specular = hvec3(0.0); for (uint i = 0; i < directional_lights_count; i++) { if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { @@ -543,14 +552,14 @@ void vertex_shader(in vec3 vertex, continue; // Statically baked light and object uses lightmap, skip. } if (i == 0) { - light_compute_vertex(normal_interp, directional_lights.data[0].direction, view, - directional_lights.data[0].color * directional_lights.data[0].energy, + light_compute_vertex(normal_interp, hvec3(directional_lights.data[0].direction), view, + hvec3(directional_lights.data[0].color * directional_lights.data[0].energy), true, roughness, directional_diffuse, directional_specular); } else { - light_compute_vertex(normal_interp, directional_lights.data[i].direction, view, - directional_lights.data[i].color * directional_lights.data[i].energy, + light_compute_vertex(normal_interp, hvec3(directional_lights.data[i].direction), view, + hvec3(directional_lights.data[i].color * directional_lights.data[i].energy), true, roughness, diffuse_light_interp.rgb, specular_light_interp.rgb); @@ -558,22 +567,22 @@ void vertex_shader(in vec3 vertex, } // Calculate the contribution from the shadowed light so we can scale the shadows accordingly. - float diff_avg = dot(diffuse_light_interp.rgb, vec3(0.33333)); - float diff_dir_avg = dot(directional_diffuse, vec3(0.33333)); - if (diff_avg > 0.0) { + half diff_avg = dot(diffuse_light_interp.rgb, hvec3(0.33333)); + half diff_dir_avg = dot(directional_diffuse, hvec3(0.33333)); + if (diff_avg > half(0.0)) { diffuse_light_interp.a = diff_dir_avg / (diff_avg + diff_dir_avg); } else { - diffuse_light_interp.a = 1.0; + diffuse_light_interp.a = half(1.0); } diffuse_light_interp.rgb += directional_diffuse; - float spec_avg = dot(specular_light_interp.rgb, vec3(0.33333)); - float spec_dir_avg = dot(directional_specular, vec3(0.33333)); - if (spec_avg > 0.0) { + half spec_avg = dot(specular_light_interp.rgb, hvec3(0.33333)); + half spec_dir_avg = dot(directional_specular, hvec3(0.33333)); + if (spec_avg > half(0.0)) { specular_light_interp.a = spec_dir_avg / (spec_avg + spec_dir_avg); } else { - specular_light_interp.a = 1.0; + specular_light_interp.a = half(1.0); } specular_light_interp.rgb += directional_specular; @@ -768,42 +777,45 @@ void main() { #define IN_SHADOW_PASS false #endif +/* Include half precision types. */ +#include "../half_inc.glsl" + /* Include our forward mobile UBOs definitions etc. */ #include "scene_forward_mobile_inc.glsl" /* Varyings */ -layout(location = 0) highp in vec3 vertex_interp; +layout(location = 0) in vec3 vertex_interp; #ifdef NORMAL_USED -layout(location = 1) mediump in vec3 normal_interp; +layout(location = 1) in hvec3 normal_interp; #endif #if defined(COLOR_USED) -layout(location = 2) mediump in vec4 color_interp; +layout(location = 2) in hvec4 color_interp; #endif #ifdef UV_USED -layout(location = 3) mediump in vec2 uv_interp; +layout(location = 3) in vec2 uv_interp; #endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) -layout(location = 4) mediump in vec2 uv2_interp; +layout(location = 4) in vec2 uv2_interp; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(BENT_NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) -layout(location = 5) mediump in vec3 tangent_interp; -layout(location = 6) mediump in vec3 binormal_interp; +layout(location = 5) in hvec3 tangent_interp; +layout(location = 6) in hvec3 binormal_interp; #endif #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) -layout(location = 7) highp in vec4 diffuse_light_interp; -layout(location = 8) highp in vec4 specular_light_interp; +layout(location = 7) in hvec4 diffuse_light_interp; +layout(location = 8) in hvec4 specular_light_interp; #endif #ifdef MODE_DUAL_PARABOLOID -layout(location = 9) highp in float dp_clip; +layout(location = 9) in float dp_clip; #endif @@ -941,7 +953,7 @@ layout(location = 0) out vec4 diffuse_buffer; //diffuse (rgb) and roughness layout(location = 1) out vec4 specular_buffer; //specular and SSS (subsurface scatter) #else -layout(location = 0) out mediump vec4 frag_color; +layout(location = 0) out hvec4 frag_color; #endif // MODE_MULTIPLE_RENDER_TARGETS #endif // RENDER DEPTH @@ -965,7 +977,7 @@ layout(location = 0) out mediump vec4 frag_color; Only supporting normal fog here. */ -vec4 fog_process(vec3 vertex) { +hvec4 fog_process(vec3 vertex) { vec3 fog_color = scene_data_block.data.fog_light_color; if (sc_use_fog_aerial_perspective()) { @@ -1017,7 +1029,7 @@ vec4 fog_process(vec3 vertex) { fog_amount = max(vfog_amount, fog_amount); } - return vec4(fog_color, fog_amount); + return hvec4(fog_color, fog_amount); } #endif //!MODE_RENDER DEPTH @@ -1042,57 +1054,57 @@ void main() { vec3 vertex = vertex_interp; #ifdef USE_MULTIVIEW vec3 eye_offset = scene_data.eye_offset[ViewIndex].xyz; - vec3 view = -normalize(vertex_interp - eye_offset); + vec3 view_highp = -normalize(vertex_interp - eye_offset); #else vec3 eye_offset = vec3(0.0, 0.0, 0.0); - vec3 view = -normalize(vertex_interp); + vec3 view_highp = -normalize(vertex_interp); #endif - vec3 albedo = vec3(1.0); - vec3 backlight = vec3(0.0); - vec4 transmittance_color = vec4(0.0); - float transmittance_depth = 0.0; - float transmittance_boost = 0.0; - float metallic = 0.0; - float specular = 0.5; - vec3 emission = vec3(0.0); - float roughness = 1.0; - float rim = 0.0; - float rim_tint = 0.0; - float clearcoat = 0.0; - float clearcoat_roughness = 0.0; - float anisotropy = 0.0; - vec2 anisotropy_flow = vec2(1.0, 0.0); + vec3 albedo_highp = vec3(1.0); + vec3 backlight_highp = vec3(0.0); + vec4 transmittance_color_highp = vec4(0.0); + float transmittance_depth_highp = 0.0; + float transmittance_boost_highp = 0.0; + float metallic_highp = 0.0; + float specular_highp = 0.5; + vec3 emission_highp = vec3(0.0); + float roughness_highp = 1.0; + float rim_highp = 0.0; + float rim_tint_highp = 0.0; + float clearcoat_highp = 0.0; + float clearcoat_roughness_highp = 0.0; + float anisotropy_highp = 0.0; + vec2 anisotropy_flow_highp = vec2(1.0, 0.0); #ifdef PREMUL_ALPHA_USED - float premul_alpha = 1.0; + float premul_alpha_highp = 1.0; #endif #ifndef FOG_DISABLED - vec4 fog = vec4(0.0); + vec4 fog_highp = vec4(0.0); #endif // !FOG_DISABLED #if defined(CUSTOM_RADIANCE_USED) - vec4 custom_radiance = vec4(0.0); + vec4 custom_radiance_highp = vec4(0.0); #endif #if defined(CUSTOM_IRRADIANCE_USED) - vec4 custom_irradiance = vec4(0.0); + vec4 custom_irradiance_highp = vec4(0.0); #endif - float ao = 1.0; - float ao_light_affect = 0.0; + float ao_highp = 1.0; + float ao_light_affect_highp = 0.0; - float alpha = 1.0; + float alpha_highp = 1.0; #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) || defined(BENT_NORMAL_MAP_USED) - vec3 binormal = binormal_interp; - vec3 tangent = tangent_interp; + vec3 binormal_highp = vec3(binormal_interp); + vec3 tangent_highp = vec3(tangent_interp); #else // TANGENT_USED || NORMAL_MAP_USED || LIGHT_ANISOTROPY_USED || BENT_NORMAL_MAP_USED - vec3 binormal = vec3(0.0); - vec3 tangent = vec3(0.0); + vec3 binormal_highp = vec3(0.0); + vec3 tangent_highp = vec3(0.0); #endif #ifdef NORMAL_USED - vec3 normal = normal_interp; + vec3 normal_highp = vec3(normal_interp); #if defined(DO_SIDE_CHECK) if (!gl_FrontFacing) { - normal = -normal; + normal_highp = -normal_highp; } #endif // DO_SIDE_CHECK #endif // NORMAL_USED @@ -1106,35 +1118,35 @@ void main() { #endif #if defined(COLOR_USED) - vec4 color = color_interp; + vec4 color_highp = vec4(color_interp); #endif #if defined(NORMAL_MAP_USED) - vec3 normal_map = vec3(0.5); + vec3 normal_map_highp = vec3(0.5); #endif #if defined(BENT_NORMAL_MAP_USED) - vec3 bent_normal_vector; - vec3 bent_normal_map = vec3(0.5); + hvec3 bent_normal_vector; + vec3 bent_normal_map_highp = vec3(0.5); #endif - float normal_map_depth = 1.0; + float normal_map_depth_highp = 1.0; vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size; - float sss_strength = 0.0; + float sss_strength_highp = 0.0; #ifdef ALPHA_SCISSOR_USED - float alpha_scissor_threshold = 1.0; + float alpha_scissor_threshold_highp = 1.0; #endif // ALPHA_SCISSOR_USED #ifdef ALPHA_HASH_USED - float alpha_hash_scale = 1.0; + float alpha_hash_scale_highp = 1.0; #endif // ALPHA_HASH_USED #ifdef ALPHA_ANTIALIASING_EDGE_USED - float alpha_antialiasing_edge = 0.0; + float alpha_antialiasing_edge_highp = 0.0; vec2 alpha_texture_coordinate = vec2(0.0, 0.0); #endif // ALPHA_ANTIALIASING_EDGE_USED @@ -1167,17 +1179,80 @@ void main() { #CODE : FRAGMENT } + // Store variables in half precision after user shader code is run. + hvec3 view = hvec3(view_highp); + hvec3 albedo = hvec3(albedo_highp); + hvec3 backlight = hvec3(backlight_highp); + hvec4 transmittance_color = hvec4(transmittance_color_highp); + half transmittance_depth = half(transmittance_depth_highp); + half transmittance_boost = half(transmittance_boost_highp); + half metallic = half(metallic_highp); + half specular = half(specular_highp); + hvec3 emission = hvec3(emission_highp); + half roughness = half(roughness_highp); + half rim = half(rim_highp); + half rim_tint = half(rim_tint_highp); + half clearcoat = half(clearcoat_highp); + half clearcoat_roughness = half(clearcoat_roughness_highp); + half anisotropy = half(anisotropy_highp); + hvec2 anisotropy_flow = hvec2(anisotropy_flow_highp); + half ao = half(ao_highp); + half ao_light_affect = half(ao_light_affect_highp); + half alpha = half(alpha_highp); + half normal_map_depth = half(normal_map_depth_highp); + half sss_strength = half(sss_strength_highp); +#ifdef PREMUL_ALPHA_USED + half premul_alpha = half(premul_alpha_highp); +#endif +#ifndef FOG_DISABLED + hvec4 fog = hvec4(fog_highp); +#endif +#ifdef CUSTOM_RADIANCE_USED + hvec4 custom_radiance = hvec4(custom_radiance_highp); +#endif +#ifdef CUSTOM_IRRADIANCE_USED + hvec4 custom_irradiance = hvec4(custom_irradiance_highp); +#endif +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) || defined(BENT_NORMAL_MAP_USED) + hvec3 binormal = hvec3(binormal_highp); + hvec3 tangent = hvec3(tangent_highp); +#else + hvec3 binormal = hvec3(binormal_highp); + hvec3 tangent = hvec3(tangent_highp); +#endif +#ifdef NORMAL_USED + hvec3 normal = hvec3(normal_highp); +#endif +#if defined(COLOR_USED) + hvec4 color = hvec4(color_highp); +#endif +#if defined(NORMAL_MAP_USED) + hvec3 normal_map = hvec3(normal_map_highp); +#endif +#if defined(BENT_NORMAL_MAP_USED) + hvec3 bent_normal_map = hvec3(bent_normal_map_highp); +#endif +#ifdef ALPHA_SCISSOR_USED + half alpha_scissor_threshold = half(alpha_scissor_threshold_highp); +#endif +#ifdef ALPHA_HASH_USED + half alpha_hash_scale = half(alpha_hash_scale_highp); +#endif +#ifdef ALPHA_ANTIALIASING_EDGE_USED + half alpha_antialiasing_edge = half(alpha_antialiasing_edge_highp); +#endif + #ifdef LIGHT_VERTEX_USED vertex = light_vertex; #ifdef USE_MULTIVIEW - view = -normalize(vertex - eye_offset); + view = hvec3(-normalize(vertex - eye_offset)); #else - view = -normalize(vertex); + view = hvec3(-normalize(vertex)); #endif //USE_MULTIVIEW #endif //LIGHT_VERTEX_USED #ifdef NORMAL_USED - vec3 geo_normal = normalize(normal); + hvec3 geo_normal = normalize(normal); #endif // NORMAL_USED #ifdef LIGHT_TRANSMITTANCE_USED @@ -1193,9 +1268,9 @@ void main() { #ifdef ALPHA_SCISSOR_USED #ifdef MODE_RENDER_MATERIAL if (alpha < alpha_scissor_threshold) { - alpha = 0.0; + alpha = half(0.0); } else { - alpha = 1.0; + alpha = half(1.0); } #else if (alpha < alpha_scissor_threshold) { @@ -1209,9 +1284,9 @@ void main() { vec3 object_pos = (inverse(read_model_matrix) * inv_view_matrix * vec4(vertex, 1.0)).xyz; #ifdef MODE_RENDER_MATERIAL if (alpha < compute_alpha_hash_threshold(object_pos, alpha_hash_scale)) { - alpha = 0.0; + alpha = half(0.0); } else { - alpha = 1.0; + alpha = half(1.0); } #else if (alpha < compute_alpha_hash_threshold(object_pos, alpha_hash_scale)) { @@ -1222,20 +1297,20 @@ void main() { // If we are not edge antialiasing, we need to remove the output alpha channel from scissor and hash #if (defined(ALPHA_SCISSOR_USED) || defined(ALPHA_HASH_USED)) && !defined(ALPHA_ANTIALIASING_EDGE_USED) && !defined(MODE_RENDER_MATERIAL) - alpha = 1.0; + alpha = half(1.0); #endif #ifdef ALPHA_ANTIALIASING_EDGE_USED // If alpha scissor is used, we must further the edge threshold, otherwise we won't get any edge feather #ifdef ALPHA_SCISSOR_USED - alpha_antialiasing_edge = clamp(alpha_scissor_threshold + alpha_antialiasing_edge, 0.0, 1.0); + alpha_antialiasing_edge = clamp(alpha_scissor_threshold + alpha_antialiasing_edge, half(0.0), half(1.0)); #endif alpha = compute_alpha_antialiasing_edge(alpha, alpha_texture_coordinate, alpha_antialiasing_edge); #endif // ALPHA_ANTIALIASING_EDGE_USED #ifdef MODE_RENDER_DEPTH #if defined(USE_OPAQUE_PREPASS) || defined(ALPHA_ANTIALIASING_EDGE_USED) - if (alpha < scene_data.opaque_prepass_threshold) { + if (alpha < half(scene_data.opaque_prepass_threshold)) { discard; } #endif // USE_OPAQUE_PREPASS || ALPHA_ANTIALIASING_EDGE_USED @@ -1244,8 +1319,8 @@ void main() { #endif // !USE_SHADOW_TO_OPACITY #if defined(NORMAL_MAP_USED) - normal_map.xy = normal_map.xy * 2.0 - 1.0; - normal_map.z = sqrt(max(0.0, 1.0 - dot(normal_map.xy, normal_map.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc. + normal_map.xy = normal_map.xy * half(2.0) - half(1.0); + normal_map.z = sqrt(max(half(0.0), half(1.0) - dot(normal_map.xy, normal_map.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc. // Tangent-space transformation is performed using unnormalized TBN vectors, per MikkTSpace. // See: http://www.mikktspace.com/ @@ -1255,35 +1330,35 @@ void main() { #endif // NORMAL_MAP_USED #ifdef BENT_NORMAL_MAP_USED - bent_normal_map.xy = bent_normal_map.xy * 2.0 - 1.0; - bent_normal_map.z = sqrt(max(0.0, 1.0 - dot(bent_normal_map.xy, bent_normal_map.xy))); + bent_normal_map.xy = bent_normal_map.xy * half(2.0) - half(1.0); + bent_normal_map.z = sqrt(max(half(0.0), half(1.0) - dot(bent_normal_map.xy, bent_normal_map.xy))); bent_normal_vector = normalize(tangent * bent_normal_map.x + binormal * bent_normal_map.y + normal * bent_normal_map.z); #endif #ifdef LIGHT_ANISOTROPY_USED - if (anisotropy > 0.01) { - mat3 rot = mat3(normalize(tangent), normalize(binormal), normal); + if (anisotropy > half(0.01)) { + hmat3 rot = hmat3(tangent, binormal, normal); // Make local to space. - tangent = normalize(rot * vec3(anisotropy_flow.x, anisotropy_flow.y, 0.0)); - binormal = normalize(rot * vec3(-anisotropy_flow.y, anisotropy_flow.x, 0.0)); + tangent = normalize(rot * hvec3(anisotropy_flow.x, anisotropy_flow.y, 0.0)); + binormal = normalize(rot * hvec3(-anisotropy_flow.y, anisotropy_flow.x, 0.0)); } #endif #ifdef ENABLE_CLIP_ALPHA #ifdef MODE_RENDER_MATERIAL - if (albedo.a < 0.99) { + if (albedo.a < half(0.99)) { // Used for doublepass and shadowmapping. - albedo.a = 0.0; - alpha = 0.0; + albedo.a = half(0.0); + alpha = half(0.0); } else { - albedo.a = 1.0; - alpha = 1.0; + albedo.a = half(1.0); + alpha = half(1.0); } #else - if (albedo.a < 0.99) { + if (albedo.a < half(0.99)) { //used for doublepass and shadowmapping discard; } @@ -1295,8 +1370,6 @@ void main() { #ifndef FOG_DISABLED #ifndef CUSTOM_FOG_USED - // fog must be processed as early as possible and then packed. - // to maximize VGPR usage // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. if (!sc_disable_fog() && bool(scene_data.flags & SCENE_DATA_FLAGS_USE_FOG)) { @@ -1305,9 +1378,6 @@ void main() { #endif //!CUSTOM_FOG_USED - uint fog_rg = packHalf2x16(fog.rg); - uint fog_ba = packHalf2x16(fog.ba); - #endif //!FOG_DISABLED #endif //!MODE_RENDER_DEPTH @@ -1334,7 +1404,7 @@ void main() { float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade); if (decals.data[decal_index].normal_fade > 0.0) { - fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(geo_normal, decals.data[decal_index].normal) * 0.5 + 0.5); + fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(vec3(geo_normal), decals.data[decal_index].normal) * 0.5 + 0.5); } //we need ddx/ddy for mipmaps, so simulate them @@ -1351,7 +1421,7 @@ void main() { } decal_albedo *= decals.data[decal_index].modulate; decal_albedo.a *= fade; - albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix); + albedo = hvec3(mix(vec3(albedo), decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix)); if (decals.data[decal_index].normal_rect != vec4(0.0)) { vec3 decal_normal; @@ -1365,7 +1435,7 @@ void main() { //convert to view space, use xzy because y is up decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz; - normal = normalize(mix(normal, decal_normal, decal_albedo.a)); + normal = hvec3(normalize(mix(vec3(normal), decal_normal, decal_albedo.a))); } if (decals.data[decal_index].orm_rect != vec4(0.0)) { @@ -1375,18 +1445,18 @@ void main() { } else { decal_orm = textureLod(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, 0.0).xyz; } - ao = mix(ao, decal_orm.r, decal_albedo.a); - roughness = mix(roughness, decal_orm.g, decal_albedo.a); - metallic = mix(metallic, decal_orm.b, decal_albedo.a); + ao = half(mix(float(ao), decal_orm.r, decal_albedo.a)); + roughness = half(mix(float(roughness), decal_orm.g, decal_albedo.a)); + metallic = half(mix(float(metallic), decal_orm.b, decal_albedo.a)); } } if (decals.data[decal_index].emission_rect != vec4(0.0)) { //emission is additive, so its independent from albedo if (sc_decal_use_mipmaps()) { - emission += textureGrad(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + emission += hvec3(textureGrad(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade); } else { - emission += textureLod(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, 0.0).xyz * decals.data[decal_index].emission_energy * fade; + emission += hvec3(textureLod(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, 0.0).xyz * decals.data[decal_index].emission_energy * fade); } } } @@ -1397,65 +1467,68 @@ void main() { #ifdef NORMAL_USED if (sc_scene_roughness_limiter_enabled()) { //https://www.jp.square-enix.com/tech/library/pdf/ImprovedGeometricSpecularAA.pdf - float roughness2 = roughness * roughness; - vec3 dndu = dFdx(normal), dndv = dFdy(normal); - float variance = scene_data.roughness_limiter_amount * (dot(dndu, dndu) + dot(dndv, dndv)); - float kernelRoughness2 = min(2.0 * variance, scene_data.roughness_limiter_limit); //limit effect - float filteredRoughness2 = min(1.0, roughness2 + kernelRoughness2); + half roughness2 = roughness * roughness; + hvec3 dndu = dFdx(normal), dndv = dFdy(normal); + half variance = half(scene_data.roughness_limiter_amount) * (dot(dndu, dndu) + dot(dndv, dndv)); + half kernelRoughness2 = min(half(2.0) * variance, half(scene_data.roughness_limiter_limit)); + half filteredRoughness2 = min(half(1.0), roughness2 + kernelRoughness2); roughness = sqrt(filteredRoughness2); } #endif // NORMAL_USED //apply energy conservation - vec3 indirect_specular_light = vec3(0.0, 0.0, 0.0); - vec3 direct_specular_light = vec3(0.0, 0.0, 0.0); - vec3 diffuse_light = vec3(0.0, 0.0, 0.0); - vec3 ambient_light = vec3(0.0, 0.0, 0.0); + hvec3 indirect_specular_light = hvec3(0.0); + hvec3 direct_specular_light = hvec3(0.0); + hvec3 diffuse_light = hvec3(0.0); + hvec3 ambient_light = hvec3(0.0); #ifndef MODE_UNSHADED // Used in regular draw pass and when drawing SDFs for SDFGI and materials for VoxelGI. - emission *= scene_data.emissive_exposure_normalization; + emission *= half(scene_data.emissive_exposure_normalization); #endif #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) #ifndef AMBIENT_LIGHT_DISABLED #ifdef BENT_NORMAL_MAP_USED - vec3 indirect_normal = bent_normal_vector; + hvec3 indirect_normal = bent_normal_vector; #else - vec3 indirect_normal = normal; + hvec3 indirect_normal = normal; #endif if (sc_scene_use_reflection_cubemap()) { #ifdef LIGHT_ANISOTROPY_USED // https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy - vec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; - vec3 anisotropic_tangent = cross(anisotropic_direction, view); - vec3 anisotropic_normal = cross(anisotropic_tangent, anisotropic_direction); - vec3 bent_normal = normalize(mix(indirect_normal, anisotropic_normal, abs(anisotropy) * clamp(5.0 * roughness, 0.0, 1.0))); - vec3 ref_vec = reflect(-view, bent_normal); + hvec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; + hvec3 anisotropic_tangent = cross(anisotropic_direction, view); + hvec3 anisotropic_normal = cross(anisotropic_tangent, anisotropic_direction); + hvec3 bent_normal = normalize(mix(indirect_normal, anisotropic_normal, anisotropy * clamp(half(5.0) * roughness, half(0.0), half(1.0)))); + hvec3 ref_vec = reflect(-view, bent_normal); ref_vec = mix(ref_vec, bent_normal, roughness * roughness); #else - vec3 ref_vec = reflect(-view, indirect_normal); + hvec3 ref_vec = reflect(-view, indirect_normal); ref_vec = mix(ref_vec, indirect_normal, roughness * roughness); #endif - float horizon = min(1.0 + dot(ref_vec, indirect_normal), 1.0); - ref_vec = scene_data.radiance_inverse_xform * ref_vec; + half horizon = min(half(1.0) + dot(ref_vec, indirect_normal), half(1.0)); + ref_vec = hvec3(scene_data.radiance_inverse_xform * vec3(ref_vec)); #ifdef USE_RADIANCE_CUBEMAP_ARRAY - float lod, blend; - blend = modf(sqrt(roughness) * MAX_ROUGHNESS_LOD, lod); - indirect_specular_light = texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(ref_vec, lod)).rgb; - indirect_specular_light = mix(indirect_specular_light, texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(ref_vec, lod + 1)).rgb, blend); + float lod; + half blend = half(modf(float(sqrt(roughness) * half(MAX_ROUGHNESS_LOD)), lod)); + + hvec3 indirect_sample_a = hvec3(texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(vec3(ref_vec), float(lod))).rgb); + hvec3 indirect_sample_b = hvec3(texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(vec3(ref_vec), float(lod) + 1.0)).rgb); + indirect_specular_light = mix(indirect_sample_a, indirect_sample_b, blend); #else // USE_RADIANCE_CUBEMAP_ARRAY - indirect_specular_light = textureLod(samplerCube(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), ref_vec, sqrt(roughness) * MAX_ROUGHNESS_LOD).rgb; + float lod = sqrt(roughness) * half(MAX_ROUGHNESS_LOD); + indirect_specular_light = hvec3(textureLod(samplerCube(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec3(ref_vec), lod).rgb); #endif //USE_RADIANCE_CUBEMAP_ARRAY indirect_specular_light *= sc_luminance_multiplier(); - indirect_specular_light *= scene_data.IBL_exposure_normalization; + indirect_specular_light *= half(scene_data.IBL_exposure_normalization); indirect_specular_light *= horizon * horizon; - indirect_specular_light *= scene_data.ambient_light_color_energy.a; + indirect_specular_light *= half(scene_data.ambient_light_color_energy.a); } #if defined(CUSTOM_RADIANCE_USED) @@ -1465,18 +1538,18 @@ void main() { #ifndef USE_LIGHTMAP //lightmap overrides everything if (bool(scene_data.flags & SCENE_DATA_FLAGS_USE_AMBIENT_LIGHT)) { - ambient_light = scene_data.ambient_light_color_energy.rgb; + ambient_light = hvec3(scene_data.ambient_light_color_energy.rgb); if (sc_scene_use_ambient_cubemap()) { vec3 ambient_dir = scene_data.radiance_inverse_xform * indirect_normal; #ifdef USE_RADIANCE_CUBEMAP_ARRAY - vec3 cubemap_ambient = texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(ambient_dir, MAX_ROUGHNESS_LOD)).rgb; + hvec3 cubemap_ambient = hvec3(texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(ambient_dir, MAX_ROUGHNESS_LOD)).rgb); #else - vec3 cubemap_ambient = textureLod(samplerCube(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), ambient_dir, MAX_ROUGHNESS_LOD).rgb; + hvec3 cubemap_ambient = hvec3(textureLod(samplerCube(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), ambient_dir, MAX_ROUGHNESS_LOD).rgb); #endif //USE_RADIANCE_CUBEMAP_ARRAY cubemap_ambient *= sc_luminance_multiplier(); - cubemap_ambient *= scene_data.IBL_exposure_normalization; - ambient_light = mix(ambient_light, cubemap_ambient * scene_data.ambient_light_color_energy.a, scene_data.ambient_color_sky_mix); + cubemap_ambient *= half(scene_data.IBL_exposure_normalization); + ambient_light = mix(ambient_light, cubemap_ambient * half(scene_data.ambient_light_color_energy.a), half(scene_data.ambient_color_sky_mix)); } } #endif // !USE_LIGHTMAP @@ -1487,30 +1560,31 @@ void main() { #ifdef LIGHT_CLEARCOAT_USED if (sc_scene_use_reflection_cubemap()) { - float NoV = max(dot(geo_normal, view), 0.0001); // We want to use geometric normal, not normal_map - vec3 ref_vec = reflect(-view, geo_normal); + half NoV = max(dot(geo_normal, view), half(0.0001)); + hvec3 ref_vec = reflect(-view, geo_normal); ref_vec = mix(ref_vec, geo_normal, clearcoat_roughness * clearcoat_roughness); // The clear coat layer assumes an IOR of 1.5 (4% reflectance) - float Fc = clearcoat * (0.04 + 0.96 * SchlickFresnel(NoV)); - float attenuation = 1.0 - Fc; + half Fc = clearcoat * (half(0.04) + half(0.96) * SchlickFresnel(NoV)); + half attenuation = half(1.0) - Fc; ambient_light *= attenuation; indirect_specular_light *= attenuation; - float horizon = min(1.0 + dot(ref_vec, indirect_normal), 1.0); - ref_vec = scene_data.radiance_inverse_xform * ref_vec; - float roughness_lod = mix(0.001, 0.1, sqrt(clearcoat_roughness)) * MAX_ROUGHNESS_LOD; + half horizon = min(half(1.0) + dot(ref_vec, indirect_normal), half(1.0)); + ref_vec = hvec3(scene_data.radiance_inverse_xform * vec3(ref_vec)); + float roughness_lod = mix(0.001, 0.1, sqrt(float(clearcoat_roughness))) * MAX_ROUGHNESS_LOD; #ifdef USE_RADIANCE_CUBEMAP_ARRAY - float lod, blend; - blend = modf(roughness_lod, lod); - vec3 clearcoat_light = texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(ref_vec, lod)).rgb; - clearcoat_light = mix(clearcoat_light, texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(ref_vec, lod + 1)).rgb, blend); + float lod; + half blend = half(modf(roughness_lod, lod)); + hvec3 clearcoat_sample_a = hvec3(texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(ref_vec, lod)).rgb); + hvec3 clearcoat_sample_b = hvec3(texture(samplerCubeArray(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP)), vec4(ref_vec, lod + 1)).rgb); + hvec3 clearcoat_light = mix(clearcoat_sample_a, clearcoat_sample_b, blend); #else - vec3 clearcoat_light = textureLod(samplerCube(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), ref_vec, roughness_lod).rgb; + hvec3 clearcoat_light = hvec3(textureLod(samplerCube(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec3(ref_vec), roughness_lod).rgb); #endif //USE_RADIANCE_CUBEMAP_ARRAY - indirect_specular_light += clearcoat_light * horizon * horizon * Fc * scene_data.ambient_light_color_energy.a; + indirect_specular_light += clearcoat_light * horizon * horizon * Fc * half(scene_data.ambient_light_color_energy.a); } #endif // LIGHT_CLEARCOAT_USED #endif // !AMBIENT_LIGHT_DISABLED @@ -1527,27 +1601,27 @@ void main() { uint index = instances.data[draw_call.instance_index].gi_offset; // The world normal. - vec3 wnormal = mat3(scene_data.inv_view_matrix) * indirect_normal; + hvec3 wnormal = hmat3(scene_data.inv_view_matrix) * indirect_normal; // The SH coefficients used for evaluating diffuse data from SH probes. - const float c[5] = float[]( - 0.886227, // l0 sqrt(1.0/(4.0*PI)) * PI - 1.023327, // l1 sqrt(3.0/(4.0*PI)) * PI*2.0/3.0 - 0.858086, // l2n2, l2n1, l2p1 sqrt(15.0/(4.0*PI)) * PI*1.0/4.0 - 0.247708, // l20 sqrt(5.0/(16.0*PI)) * PI*1.0/4.0 - 0.429043 // l2p2 sqrt(15.0/(16.0*PI))* PI*1.0/4.0 + const half c[5] = half[]( + half(0.886227), // l0 sqrt(1.0/(4.0*PI)) * PI + half(1.023327), // l1 sqrt(3.0/(4.0*PI)) * PI*2.0/3.0 + half(0.858086), // l2n2, l2n1, l2p1 sqrt(15.0/(4.0*PI)) * PI*1.0/4.0 + half(0.247708), // l20 sqrt(5.0/(16.0*PI)) * PI*1.0/4.0 + half(0.429043) // l2p2 sqrt(15.0/(16.0*PI))* PI*1.0/4.0 ); - ambient_light += (c[0] * lightmap_captures.data[index].sh[0].rgb + - c[1] * lightmap_captures.data[index].sh[1].rgb * wnormal.y + - c[1] * lightmap_captures.data[index].sh[2].rgb * wnormal.z + - c[1] * lightmap_captures.data[index].sh[3].rgb * wnormal.x + - c[2] * lightmap_captures.data[index].sh[4].rgb * wnormal.x * wnormal.y + - c[2] * lightmap_captures.data[index].sh[5].rgb * wnormal.y * wnormal.z + - c[3] * lightmap_captures.data[index].sh[6].rgb * (3.0 * wnormal.z * wnormal.z - 1.0) + - c[2] * lightmap_captures.data[index].sh[7].rgb * wnormal.x * wnormal.z + - c[4] * lightmap_captures.data[index].sh[8].rgb * (wnormal.x * wnormal.x - wnormal.y * wnormal.y)) * - scene_data.emissive_exposure_normalization; + half norm = half(scene_data.emissive_exposure_normalization); + ambient_light += c[0] * hvec3(lightmap_captures.data[index].sh[0].rgb) * norm; + ambient_light += c[1] * hvec3(lightmap_captures.data[index].sh[1].rgb) * wnormal.y * norm; + ambient_light += c[1] * hvec3(lightmap_captures.data[index].sh[2].rgb) * wnormal.z * norm; + ambient_light += c[1] * hvec3(lightmap_captures.data[index].sh[3].rgb) * wnormal.x * norm; + ambient_light += c[2] * hvec3(lightmap_captures.data[index].sh[4].rgb) * wnormal.x * wnormal.y * norm; + ambient_light += c[2] * hvec3(lightmap_captures.data[index].sh[5].rgb) * wnormal.y * wnormal.z * norm; + ambient_light += c[3] * hvec3(lightmap_captures.data[index].sh[6].rgb) * (half(3.0) * wnormal.z * wnormal.z - half(1.0)) * norm; + ambient_light += c[2] * hvec3(lightmap_captures.data[index].sh[7].rgb) * wnormal.x * wnormal.z * norm; + ambient_light += c[4] * hvec3(lightmap_captures.data[index].sh[8].rgb) * (wnormal.x * wnormal.x - wnormal.y * wnormal.y) * norm; } else if (bool(instances.data[draw_call.instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap bool uses_sh = bool(instances.data[draw_call.instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); @@ -1559,35 +1633,35 @@ void main() { if (uses_sh) { uvw.z *= 4.0; //SH textures use 4 times more data - vec3 lm_light_l0; - vec3 lm_light_l1n1; - vec3 lm_light_l1_0; - vec3 lm_light_l1p1; + hvec3 lm_light_l0; + hvec3 lm_light_l1n1; + hvec3 lm_light_l1_0; + hvec3 lm_light_l1p1; if (sc_use_lightmap_bicubic_filter()) { - lm_light_l0 = textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 0.0), lightmaps.data[ofs].light_texture_size).rgb; - lm_light_l1n1 = (textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 1.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0; - lm_light_l1_0 = (textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 2.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0; - lm_light_l1p1 = (textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 3.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0; + lm_light_l0 = hvec3(textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 0.0), lightmaps.data[ofs].light_texture_size).rgb); + lm_light_l1n1 = hvec3((textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 1.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0); + lm_light_l1_0 = hvec3((textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 2.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0); + lm_light_l1p1 = hvec3((textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 3.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0); } else { - lm_light_l0 = textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw + vec3(0.0, 0.0, 0.0), 0.0).rgb; - lm_light_l1n1 = (textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw + vec3(0.0, 0.0, 1.0), 0.0).rgb - vec3(0.5)) * 2.0; - lm_light_l1_0 = (textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb - vec3(0.5)) * 2.0; - lm_light_l1p1 = (textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb - vec3(0.5)) * 2.0; + lm_light_l0 = hvec3(textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw + vec3(0.0, 0.0, 0.0), 0.0).rgb); + lm_light_l1n1 = hvec3((textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw + vec3(0.0, 0.0, 1.0), 0.0).rgb - vec3(0.5)) * 2.0); + lm_light_l1_0 = hvec3((textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb - vec3(0.5)) * 2.0); + lm_light_l1p1 = hvec3((textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb - vec3(0.5)) * 2.0); } - vec3 n = normalize(lightmaps.data[ofs].normal_xform * indirect_normal); - float exposure_normalization = lightmaps.data[ofs].exposure_normalization; + hvec3 n = hvec3(normalize(lightmaps.data[ofs].normal_xform * indirect_normal)); + half exposure_normalization = half(lightmaps.data[ofs].exposure_normalization); ambient_light += lm_light_l0 * exposure_normalization; - ambient_light += lm_light_l1n1 * n.y * (lm_light_l0 * exposure_normalization * 4.0); - ambient_light += lm_light_l1_0 * n.z * (lm_light_l0 * exposure_normalization * 4.0); - ambient_light += lm_light_l1p1 * n.x * (lm_light_l0 * exposure_normalization * 4.0); + ambient_light += lm_light_l1n1 * n.y * lm_light_l0 * exposure_normalization * half(4.0); + ambient_light += lm_light_l1_0 * n.z * lm_light_l0 * exposure_normalization * half(4.0); + ambient_light += lm_light_l1p1 * n.x * lm_light_l0 * exposure_normalization * half(4.0); } else { if (sc_use_lightmap_bicubic_filter()) { - ambient_light += textureArray_bicubic(lightmap_textures[ofs], uvw, lightmaps.data[ofs].light_texture_size).rgb * lightmaps.data[ofs].exposure_normalization; + ambient_light += hvec3(textureArray_bicubic(lightmap_textures[ofs], uvw, lightmaps.data[ofs].light_texture_size).rgb * lightmaps.data[ofs].exposure_normalization); } else { - ambient_light += textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw, 0.0).rgb * lightmaps.data[ofs].exposure_normalization; + ambient_light += hvec3(textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw, 0.0).rgb * lightmaps.data[ofs].exposure_normalization); } } } @@ -1600,19 +1674,19 @@ void main() { uint reflection_probe_count = sc_reflection_probes(8); if (reflection_probe_count > 0) { - vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0); - vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); + hvec4 reflection_accum = hvec4(0.0); + hvec4 ambient_accum = hvec4(0.0); #ifdef LIGHT_ANISOTROPY_USED // https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy - vec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; - vec3 anisotropic_tangent = cross(anisotropic_direction, view); - vec3 anisotropic_normal = cross(anisotropic_tangent, anisotropic_direction); - vec3 bent_normal = normalize(mix(normal, anisotropic_normal, abs(anisotropy) * clamp(5.0 * roughness, 0.0, 1.0))); + hvec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; + hvec3 anisotropic_tangent = cross(anisotropic_direction, view); + hvec3 anisotropic_normal = cross(anisotropic_tangent, anisotropic_direction); + hvec3 bent_normal = normalize(mix(normal, anisotropic_normal, abs(anisotropy) * clamp(half(5.0) * roughness, half(0.0), half(1.0)))); #else - vec3 bent_normal = normal; + hvec3 bent_normal = normal; #endif - vec3 ref_vec = normalize(reflect(-view, bent_normal)); + hvec3 ref_vec = normalize(reflect(-view, bent_normal)); // Interpolate between mirror and rough reflection by using linear_roughness * linear_roughness. ref_vec = mix(ref_vec, bent_normal, roughness * roughness * roughness * roughness); @@ -1623,27 +1697,27 @@ void main() { break; } - if (reflection_accum.a >= 1.0 && ambient_accum.a >= 1.0) { + if (reflection_accum.a >= half(1.0) && ambient_accum.a >= half(1.0)) { break; } reflection_process(reflection_index, vertex, ref_vec, normal, roughness, ambient_light, indirect_specular_light, ambient_accum, reflection_accum); } - if (ambient_accum.a < 1.0) { - ambient_accum.rgb = ambient_light * (1.0 - ambient_accum.a) + ambient_accum.rgb; + if (ambient_accum.a < half(1.0)) { + ambient_accum.rgb = ambient_light * (half(1.0) - ambient_accum.a) + ambient_accum.rgb; } - if (reflection_accum.a < 1.0) { - reflection_accum.rgb = indirect_specular_light * (1.0 - reflection_accum.a) + reflection_accum.rgb; + if (reflection_accum.a < half(1.0)) { + reflection_accum.rgb = indirect_specular_light * (half(1.0) - reflection_accum.a) + reflection_accum.rgb; } - if (reflection_accum.a > 0.0) { + if (reflection_accum.a > half(0.0)) { indirect_specular_light = reflection_accum.rgb; } #if !defined(USE_LIGHTMAP) - if (ambient_accum.a > 0.0) { + if (ambient_accum.a > half(0.0)) { ambient_light = ambient_accum.rgb; } #endif @@ -1654,18 +1728,18 @@ void main() { #ifndef SPECULAR_OCCLUSION_DISABLED #ifdef BENT_NORMAL_MAP_USED // Simplified bent normal occlusion. - float cos_b = max(dot(reflect(-view, normal), bent_normal_vector), 0.0); - float specular_occlusion = clamp((ao - (1.0 - cos_b)) / roughness, 0.0, 1.0); - specular_occlusion = mix(specular_occlusion, cos_b * (1.0 - ao), roughness); + half cos_b = max(dot(reflect(-view, normal), bent_normal_vector), half(0.0)); + half specular_occlusion = clamp((ao - (half(1.0) - cos_b)) / roughness, half(0.0), half(1.0)); + specular_occlusion = mix(specular_occlusion, cos_b * (half(1.0) - ao), roughness); indirect_specular_light *= specular_occlusion; #else // BENT_NORMAL_MAP_USED - float specular_occlusion = (ambient_light.r * 0.3 + ambient_light.g * 0.59 + ambient_light.b * 0.11) * 2.0; // Luminance of ambient light. - specular_occlusion = min(specular_occlusion * 4.0, 1.0); // This multiplication preserves speculars on bright areas. + half specular_occlusion = (ambient_light.r * half(0.3) + ambient_light.g * half(0.59) + ambient_light.b * half(0.11)) * half(2.0); // Luminance of ambient light. + specular_occlusion = min(specular_occlusion * half(4.0), half(1.0)); // This multiplication preserves speculars on bright areas. - float reflective_f = (1.0 - roughness) * metallic; + half reflective_f = (half(1.0) - roughness) * metallic; // 10.0 is a magic number, it gives the intended effect in most scenarios. // Low enough for occlusion, high enough for reaction to lights and shadows. - specular_occlusion = max(min(reflective_f * specular_occlusion * 10.0, 1.0), specular_occlusion); + specular_occlusion = max(min(reflective_f * specular_occlusion * half(10.0), half(1.0)), specular_occlusion); indirect_specular_light *= specular_occlusion; #endif // BENT_NORMAL_MAP_USED #endif // USE_SPECULAR_OCCLUSION @@ -1674,41 +1748,36 @@ void main() { #endif // !AMBIENT_LIGHT_DISABLED // convert ao to direct light ao - ao = mix(1.0, ao, ao_light_affect); + ao = mix(half(1.0), ao, ao_light_affect); //this saves some VGPRs - vec3 f0 = F0(metallic, specular, albedo); + hvec3 f0 = F0(metallic, specular, albedo); #ifndef AMBIENT_LIGHT_DISABLED { #if defined(DIFFUSE_TOON) //simplify for toon, as - indirect_specular_light *= specular * metallic * albedo * 2.0; + indirect_specular_light *= specular * metallic * albedo * half(2.0); #else // scales the specular reflections, needs to be computed before lighting happens, // but after environment, GI, and reflection probes are added // Environment brdf approximation (Lazarov 2013) // see https://www.unrealengine.com/en-US/blog/physically-based-shading-on-mobile - const vec4 c0 = vec4(-1.0, -0.0275, -0.572, 0.022); - const vec4 c1 = vec4(1.0, 0.0425, 1.04, -0.04); - vec4 r = roughness * c0 + c1; - float ndotv = clamp(dot(normal, view), 0.0, 1.0); - float a004 = min(r.x * r.x, exp2(-9.28 * ndotv)) * r.x + r.y; - vec2 env = vec2(-1.04, 1.04) * a004 + r.zw; + const hvec4 c0 = hvec4(-1.0, -0.0275, -0.572, 0.022); + const hvec4 c1 = hvec4(1.0, 0.0425, 1.04, -0.04); + hvec4 r = roughness * c0 + c1; + half ndotv = clamp(dot(normal, view), half(0.0), half(1.0)); + half a004 = min(r.x * r.x, exp2(half(-9.28) * ndotv)) * r.x + r.y; + hvec2 env = hvec2(-1.04, 1.04) * a004 + r.zw; - indirect_specular_light *= env.x * f0 + env.y * clamp(50.0 * f0.g, metallic, 1.0); + indirect_specular_light *= env.x * f0 + env.y * clamp(half(50.0) * f0.g, metallic, half(1.0)); #endif } #endif // !AMBIENT_LIGHT_DISABLED #endif // !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) -#if !defined(MODE_RENDER_DEPTH) - //this saves some VGPRs - uint orms = packUnorm4x8(vec4(ao, roughness, metallic, specular)); -#endif - // LIGHTING #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) #ifdef USE_VERTEX_LIGHTING @@ -1720,10 +1789,9 @@ void main() { if (directional_lights_count > 0) { #ifndef SHADOWS_DISABLED // Do shadow and lighting in two passes to reduce register pressure - uint shadow0 = 0; - uint shadow1 = 0; + half shadows[8]; - float shadowmask = 1.0; + half shadowmask = half(1.0); #ifdef USE_LIGHTMAP uint shadowmask_mode = LIGHTMAP_SHADOWMASK_MODE_NONE; @@ -1738,9 +1806,9 @@ void main() { const vec3 uvw = vec3(scaled_uv, float(slice)); if (sc_use_lightmap_bicubic_filter()) { - shadowmask = textureArray_bicubic(lightmap_textures[MAX_LIGHTMAP_TEXTURES + ofs], uvw, lightmaps.data[ofs].light_texture_size).x; + shadowmask = half(textureArray_bicubic(lightmap_textures[MAX_LIGHTMAP_TEXTURES + ofs], uvw, lightmaps.data[ofs].light_texture_size).x); } else { - shadowmask = textureLod(sampler2DArray(lightmap_textures[MAX_LIGHTMAP_TEXTURES + ofs], SAMPLER_LINEAR_CLAMP), uvw, 0.0).x; + shadowmask = half(textureLod(sampler2DArray(lightmap_textures[MAX_LIGHTMAP_TEXTURES + ofs], SAMPLER_LINEAR_CLAMP), uvw, 0.0).x); } } } @@ -1762,21 +1830,21 @@ void main() { continue; // Statically baked light and object uses lightmap, skip. } - float shadow = 1.0; + half shadow = half(1.0); if (directional_lights.data[i].shadow_opacity > 0.001) { float depth_z = -vertex.z; vec4 pssm_coord; float blur_factor; - vec3 light_dir = directional_lights.data[i].direction; - vec3 base_normal_bias = geo_normal * (1.0 - max(0.0, dot(light_dir, -geo_normal))); + hvec3 light_dir = hvec3(directional_lights.data[i].direction); + hvec3 base_normal_bias = geo_normal * (half(1.0) - max(half(0.0), dot(light_dir, -geo_normal))); -#define BIAS_FUNC(m_var, m_idx) \ - m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \ - vec3 normal_bias = base_normal_bias * directional_lights.data[i].shadow_normal_bias[m_idx]; \ - normal_bias -= light_dir * dot(light_dir, normal_bias); \ - m_var.xyz += normal_bias; +#define BIAS_FUNC(m_var, m_idx) \ + hvec3 normal_bias = base_normal_bias * half(directional_lights.data[i].shadow_normal_bias[m_idx]); \ + normal_bias -= light_dir * dot(light_dir, normal_bias); \ + normal_bias += light_dir * half(directional_lights.data[i].shadow_bias[m_idx]); \ + m_var.xyz += vec3(normal_bias); if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { vec4 v = vec4(vertex, 1.0); @@ -1815,79 +1883,75 @@ void main() { bool blend_split = sc_directional_light_blend_split(i); float blend_split_weight = blend_split ? 1.0f : 0.0f; - shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale * (blur_factor + (1.0 - blur_factor) * blend_split_weight), pssm_coord, scene_data.taa_frame_count); + shadow = half(sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale * (blur_factor + (1.0 - blur_factor) * blend_split_weight), pssm_coord, scene_data.taa_frame_count)); if (blend_split) { - float pssm_blend; + half pssm_blend; float blur_factor2; if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 1) pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); - pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x - directional_lights.data[i].shadow_split_offsets.x * 0.1, directional_lights.data[i].shadow_split_offsets.x, depth_z); + pssm_blend = half(smoothstep(directional_lights.data[i].shadow_split_offsets.x - directional_lights.data[i].shadow_split_offsets.x * 0.1, directional_lights.data[i].shadow_split_offsets.x, depth_z)); // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.y; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 2) pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); - pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y - directional_lights.data[i].shadow_split_offsets.y * 0.1, directional_lights.data[i].shadow_split_offsets.y, depth_z); + pssm_blend = half(smoothstep(directional_lights.data[i].shadow_split_offsets.y - directional_lights.data[i].shadow_split_offsets.y * 0.1, directional_lights.data[i].shadow_split_offsets.y, depth_z)); // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.z; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 3) pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); - pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.z - directional_lights.data[i].shadow_split_offsets.z * 0.1, directional_lights.data[i].shadow_split_offsets.z, depth_z); + pssm_blend = half(smoothstep(directional_lights.data[i].shadow_split_offsets.z - directional_lights.data[i].shadow_split_offsets.z * 0.1, directional_lights.data[i].shadow_split_offsets.z, depth_z)); // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.w; } else { - pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached) + pssm_blend = half(0.0); //if no blend, same coord will be used (divide by z will result in same value, and already cached) blur_factor2 = 1.0; } pssm_coord /= pssm_coord.w; - float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale * (blur_factor2 + (1.0 - blur_factor2) * blend_split_weight), pssm_coord, scene_data.taa_frame_count); + half shadow2 = half(sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale * (blur_factor2 + (1.0 - blur_factor2) * blend_split_weight), pssm_coord, scene_data.taa_frame_count)); shadow = mix(shadow, shadow2, pssm_blend); } #ifdef USE_LIGHTMAP if (shadowmask_mode == LIGHTMAP_SHADOWMASK_MODE_REPLACE) { - shadow = mix(shadow, shadowmask, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance + shadow = mix(shadow, shadowmask, half(smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z))); //done with negative values for performance } else if (shadowmask_mode == LIGHTMAP_SHADOWMASK_MODE_OVERLAY) { - shadow = shadowmask * mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance + shadow = shadowmask * mix(shadow, half(1.0), half(smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z))); //done with negative values for performance } else { #endif - shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance + shadow = mix(shadow, half(1.0), half(smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z))); #ifdef USE_LIGHTMAP } #endif #ifdef USE_VERTEX_LIGHTING - diffuse_light *= mix(1.0, shadow, diffuse_light_interp.a); - direct_specular_light *= mix(1.0, shadow, specular_light_interp.a); + diffuse_light *= mix(half(1.0), shadow, diffuse_light_interp.a); + direct_specular_light *= mix(half(1.0), shadow, specular_light_interp.a); #endif #undef BIAS_FUNC } - if (i < 4) { - shadow0 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << (i * 8); - } else { - shadow1 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << ((i - 4) * 8); - } + shadows[i] = shadow; } #ifdef USE_LIGHTMAP } else { // shadowmask_mode == LIGHTMAP_SHADOWMASK_MODE_ONLY #ifdef USE_VERTEX_LIGHTING - diffuse_light *= mix(1.0, shadowmask, diffuse_light_interp.a); - direct_specular_light *= mix(1.0, shadowmask, specular_light_interp.a); + diffuse_light *= mix(half(1.0), half(shadowmask), diffuse_light_interp.a); + direct_specular_light *= mix(half(1.0), half(shadowmask), specular_light_interp.a); #endif - shadow0 |= uint(clamp(shadowmask * 255.0, 0.0, 255.0)); + shadows[0] = shadowmask; } #endif // USE_LIGHTMAP @@ -1906,15 +1970,9 @@ void main() { // We're not doing light transmittence - float shadow = 1.0; + half shadow = half(1.0); #ifndef SHADOWS_DISABLED - if (i < 4) { - shadow = float(shadow0 >> (i * 8) & 0xFF) / 255.0; - } else { - shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0; - } - - shadow = mix(1.0, shadow, directional_lights.data[i].shadow_opacity); + shadow = mix(half(1.0), shadows[i], half(directional_lights.data[i].shadow_opacity)); #endif blur_shadow(shadow); @@ -1929,16 +1987,16 @@ void main() { } else { tint = vec3(1.0, 1.0, 0.0); } - tint = mix(tint, vec3(1.0), shadow); - shadow = 1.0; + tint = mix(tint, vec3(1.0), float(shadow)); + shadow = half(1.0); #endif float size_A = sc_use_light_soft_shadows() ? directional_lights.data[i].size : 0.0; - light_compute(normal, directional_lights.data[i].direction, view, size_A, - directional_lights.data[i].color * directional_lights.data[i].energy * tint, - true, shadow, f0, orms, directional_lights.data[i].specular, albedo, alpha, - screen_uv, vec3(1.0), + light_compute(normal, hvec3(directional_lights.data[i].direction), view, saturateHalf(size_A), + hvec3(directional_lights.data[i].color * directional_lights.data[i].energy * tint), + true, shadow, f0, roughness, metallic, half(directional_lights.data[i].specular), albedo, alpha, + screen_uv, hvec3(1.0), #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1974,7 +2032,7 @@ void main() { break; } - light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, scene_data.taa_frame_count, albedo, alpha, screen_uv, vec3(1.0), + light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, roughness, metallic, scene_data.taa_frame_count, albedo, alpha, screen_uv, hvec3(1.0), #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1993,8 +2051,7 @@ void main() { clearcoat, clearcoat_roughness, geo_normal, #endif // LIGHT_CLEARCOAT_USED #ifdef LIGHT_ANISOTROPY_USED - tangent, - binormal, anisotropy, + binormal, tangent, anisotropy, #endif diffuse_light, direct_specular_light); } @@ -2007,7 +2064,7 @@ void main() { break; } - light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, scene_data.taa_frame_count, albedo, alpha, screen_uv, vec3(1.0), + light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, roughness, metallic, scene_data.taa_frame_count, albedo, alpha, screen_uv, hvec3(1.0), #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -2026,8 +2083,7 @@ void main() { clearcoat, clearcoat_roughness, geo_normal, #endif // LIGHT_CLEARCOAT_USED #ifdef LIGHT_ANISOTROPY_USED - tangent, - binormal, anisotropy, + binormal, tangent, anisotropy, #endif diffuse_light, direct_specular_light); } @@ -2037,14 +2093,14 @@ void main() { #ifdef USE_SHADOW_TO_OPACITY #ifndef MODE_RENDER_DEPTH - alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0)); + alpha = min(alpha, clamp(length(ambient_light), half(0.0), half(1.0))); #if defined(ALPHA_SCISSOR_USED) #ifdef MODE_RENDER_MATERIAL if (alpha < alpha_scissor_threshold) { - alpha = 0.0; + alpha = half(0.0); } else { - alpha = 1.0; + alpha = half(1.0); } #else if (alpha < alpha_scissor_threshold) { @@ -2082,19 +2138,12 @@ void main() { diffuse_light *= albedo; // ambient must be multiplied by albedo at the end // apply direct light AO - ao = unpackUnorm4x8(orms).x; diffuse_light *= ao; direct_specular_light *= ao; // apply metallic - metallic = unpackUnorm4x8(orms).z; - diffuse_light *= 1.0 - metallic; - ambient_light *= 1.0 - metallic; - -#ifndef FOG_DISABLED - //restore fog - fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba)); -#endif // !FOG_DISABLED + diffuse_light *= half(1.0) - metallic; + ambient_light *= half(1.0) - metallic; #ifdef MODE_MULTIPLE_RENDER_TARGETS @@ -2119,9 +2168,9 @@ void main() { #else //MODE_MULTIPLE_RENDER_TARGETS #ifdef MODE_UNSHADED - frag_color = vec4(albedo, alpha); + frag_color = hvec4(albedo, alpha); #else // MODE_UNSHADED - frag_color = vec4(emission + ambient_light + diffuse_light + direct_specular_light + indirect_specular_light, alpha); + frag_color = hvec4(emission + ambient_light + diffuse_light + direct_specular_light + indirect_specular_light, alpha); #endif // MODE_UNSHADED #ifndef FOG_DISABLED @@ -2144,10 +2193,10 @@ void main() { // These motion vectors are in NDC space (as opposed to screen space) to fit the OpenXR XR_FB_space_warp specification. // https://registry.khronos.org/OpenXR/specs/1.0/html/xrspec.html#XR_FB_space_warp - vec3 ndc = screen_position.xyz / screen_position.w; + hvec3 ndc = hvec3(screen_position.xyz / screen_position.w); ndc.y = -ndc.y; - vec3 prev_ndc = prev_screen_position.xyz / prev_screen_position.w; + hvec3 prev_ndc = hvec3(prev_screen_position.xyz / prev_screen_position.w); prev_ndc.y = -prev_ndc.y; - frag_color = vec4(ndc - prev_ndc, 0.0); + frag_color = hvec4(ndc - prev_ndc, half(0.0)); #endif } diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl index 8c7cd0fd54b..57aa34c6730 100644 --- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl @@ -207,8 +207,8 @@ bool sc_directional_light_blend_split(uint i) { return ((sc_packed_1() >> (21 + i)) & 1U) != 0; } -float sc_luminance_multiplier() { - return sc_packed_2(); +half sc_luminance_multiplier() { + return half(sc_packed_2()); } /* Set 0: Base Pass (never changes) */ @@ -259,7 +259,7 @@ directional_lights; #define LIGHTMAP_SHADOWMASK_MODE_ONLY 3 struct Lightmap { - mediump mat3 normal_xform; + mat3 normal_xform; vec2 light_texture_size; float exposure_normalization; uint flags; @@ -271,7 +271,7 @@ layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps { lightmaps; struct LightmapCapture { - mediump vec4 sh[9]; + vec4 sh[9]; }; layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures { @@ -279,8 +279,8 @@ layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures { } lightmap_captures; -layout(set = 0, binding = 9) uniform mediump texture2D decal_atlas; -layout(set = 0, binding = 10) uniform mediump texture2D decal_atlas_srgb; +layout(set = 0, binding = 9) uniform texture2D decal_atlas; +layout(set = 0, binding = 10) uniform texture2D decal_atlas_srgb; layout(set = 0, binding = 11, std430) restrict readonly buffer Decals { DecalData data[]; @@ -288,7 +288,7 @@ layout(set = 0, binding = 11, std430) restrict readonly buffer Decals { decals; layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalShaderUniformData { - highp vec4 data[]; + vec4 data[]; } global_shader_uniforms; @@ -309,7 +309,7 @@ struct InstanceData { uint instance_uniforms_ofs; // Base offset in global buffer for instance variables. // 04 - 72 uint gi_offset; // GI information when using lightmapping (VCT or lightmap index). // 04 - 76 uint layer_mask; // 04 - 80 - highp vec4 lightmap_uv_scale; // 16 - 96 Doubles as uv_offset when needed. + vec4 lightmap_uv_scale; // 16 - 96 Doubles as uv_offset when needed. uvec2 reflection_probes; // 08 - 104 uvec2 omni_lights; // 08 - 112 @@ -328,30 +328,30 @@ instances; #ifdef USE_RADIANCE_CUBEMAP_ARRAY -layout(set = 1, binding = 2) uniform mediump textureCubeArray radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCubeArray radiance_cubemap; #else -layout(set = 1, binding = 2) uniform mediump textureCube radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCube radiance_cubemap; #endif -layout(set = 1, binding = 3) uniform mediump textureCubeArray reflection_atlas; +layout(set = 1, binding = 3) uniform textureCubeArray reflection_atlas; -layout(set = 1, binding = 4) uniform highp texture2D shadow_atlas; +layout(set = 1, binding = 4) uniform texture2D shadow_atlas; -layout(set = 1, binding = 5) uniform highp texture2D directional_shadow_atlas; +layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas; // this needs to change to providing just the lightmap we're using.. layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES * 2]; #ifdef USE_MULTIVIEW -layout(set = 1, binding = 9) uniform highp texture2DArray depth_buffer; -layout(set = 1, binding = 10) uniform mediump texture2DArray color_buffer; +layout(set = 1, binding = 9) uniform texture2DArray depth_buffer; +layout(set = 1, binding = 10) uniform texture2DArray color_buffer; #define multiviewSampler sampler2DArray #else -layout(set = 1, binding = 9) uniform highp texture2D depth_buffer; -layout(set = 1, binding = 10) uniform mediump texture2D color_buffer; +layout(set = 1, binding = 9) uniform texture2D depth_buffer; +layout(set = 1, binding = 10) uniform texture2D color_buffer; #define multiviewSampler sampler2D #endif // USE_MULTIVIEW @@ -375,7 +375,7 @@ layout(set = 1, binding = 13 + 11) uniform sampler SAMPLER_LINEAR_WITH_MIPMAPS_A /* Set 2 Skeleton & Instancing (can change per item) */ layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { - highp vec4 data[]; + vec4 data[]; } transforms; diff --git a/servers/rendering/renderer_rd/shaders/half_inc.glsl b/servers/rendering/renderer_rd/shaders/half_inc.glsl new file mode 100644 index 00000000000..d1dd3dae7d0 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/half_inc.glsl @@ -0,0 +1,43 @@ +// Use of FP16 in Godot is done explicitly through the types half and hvec. +// The extensions must be supported by the system to use this shader. +// +// If EXPLICIT_FP16 is not defined, all operations will use full precision +// floats instead and all casting operations will not be performed. + +#ifndef HALF_INC_H +#define HALF_INC_H + +#ifdef EXPLICIT_FP16 + +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#extension GL_EXT_shader_16bit_storage : require + +#define HALF_FLT_MIN float16_t(6.10352e-5) +#define HALF_FLT_MAX float16_t(65504.0) + +#define half float16_t +#define hvec2 f16vec2 +#define hvec3 f16vec3 +#define hvec4 f16vec4 +#define hmat2 f16mat2 +#define hmat3 f16mat3 +#define hmat4 f16mat4 +#define saturateHalf(x) min(float16_t(x), HALF_FLT_MAX) + +#else + +#define HALF_FLT_MIN float(1.175494351e-38F) +#define HALF_FLT_MAX float(3.402823466e+38F) + +#define half float +#define hvec2 vec2 +#define hvec3 vec3 +#define hvec4 vec4 +#define hmat2 mat2 +#define hmat3 mat3 +#define hmat4 mat4 +#define saturateHalf(x) (x) + +#endif + +#endif // HALF_INC_H diff --git a/servers/rendering/renderer_rd/shaders/light_data_inc.glsl b/servers/rendering/renderer_rd/shaders/light_data_inc.glsl index 6f0ca2176d7..edfe777d191 100644 --- a/servers/rendering/renderer_rd/shaders/light_data_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/light_data_inc.glsl @@ -3,31 +3,31 @@ #define LIGHT_BAKE_DYNAMIC 2 struct LightData { //this structure needs to be as packed as possible - highp vec3 position; - highp float inv_radius; + vec3 position; + float inv_radius; - mediump vec3 direction; - highp float size; + vec3 direction; + float size; - mediump vec3 color; - mediump float attenuation; + vec3 color; + float attenuation; - mediump float cone_attenuation; - mediump float cone_angle; - mediump float specular_amount; - mediump float shadow_opacity; + float cone_attenuation; + float cone_angle; + float specular_amount; + float shadow_opacity; - highp vec4 atlas_rect; // rect in the shadow atlas - highp mat4 shadow_matrix; - highp float shadow_bias; - highp float shadow_normal_bias; - highp float transmittance_bias; - highp float soft_shadow_size; // for spot, it's the size in uv coordinates of the light, for omni it's the span angle - highp float soft_shadow_scale; // scales the shadow kernel for blurrier shadows + vec4 atlas_rect; // rect in the shadow atlas + mat4 shadow_matrix; + float shadow_bias; + float shadow_normal_bias; + float transmittance_bias; + float soft_shadow_size; // for spot, it's the size in uv coordinates of the light, for omni it's the span angle + float soft_shadow_scale; // scales the shadow kernel for blurrier shadows uint mask; - mediump float volumetric_fog_energy; + float volumetric_fog_energy; uint bake_mode; - highp vec4 projector_rect; //projector rect in srgb decal atlas + vec4 projector_rect; //projector rect in srgb decal atlas }; #define REFLECTION_AMBIENT_DISABLED 0 @@ -35,13 +35,13 @@ struct LightData { //this structure needs to be as packed as possible #define REFLECTION_AMBIENT_COLOR 2 struct ReflectionData { - highp vec3 box_extents; - mediump float index; - highp vec3 box_offset; + vec3 box_extents; + float index; + vec3 box_offset; uint mask; - mediump vec3 ambient; // ambient color - mediump float intensity; - mediump float blend_distance; + vec3 ambient; // ambient color + float intensity; + float blend_distance; bool exterior; bool box_project; uint ambient_mode; @@ -50,38 +50,38 @@ struct ReflectionData { float pad1; float pad2; //0-8 is intensity,8-9 is ambient, mode - highp mat4 local_matrix; // up to here for spot and omni, rest is for directional + mat4 local_matrix; // up to here for spot and omni, rest is for directional // notes: for ambientblend, use distance to edge to blend between already existing global environment }; struct DirectionalLightData { - mediump vec3 direction; - highp float energy; // needs to be highp to avoid NaNs being created with high energy values (i.e. when using physical light units and over-exposing the image) - mediump vec3 color; - mediump float size; - mediump float specular; + vec3 direction; + float energy; // needs to be highp to avoid NaNs being created with high energy values (i.e. when using physical light units and over-exposing the image) + vec3 color; + float size; + float specular; uint mask; - highp float softshadow_angle; - highp float soft_shadow_scale; + float softshadow_angle; + float soft_shadow_scale; bool blend_splits; - mediump float shadow_opacity; - highp float fade_from; - highp float fade_to; + float shadow_opacity; + float fade_from; + float fade_to; uvec2 pad; uint bake_mode; - mediump float volumetric_fog_energy; - highp vec4 shadow_bias; - highp vec4 shadow_normal_bias; - highp vec4 shadow_transmittance_bias; - highp vec4 shadow_z_range; - highp vec4 shadow_range_begin; - highp vec4 shadow_split_offsets; - highp mat4 shadow_matrix1; - highp mat4 shadow_matrix2; - highp mat4 shadow_matrix3; - highp mat4 shadow_matrix4; - highp vec2 uv_scale1; - highp vec2 uv_scale2; - highp vec2 uv_scale3; - highp vec2 uv_scale4; + float volumetric_fog_energy; + vec4 shadow_bias; + vec4 shadow_normal_bias; + vec4 shadow_transmittance_bias; + vec4 shadow_z_range; + vec4 shadow_range_begin; + vec4 shadow_split_offsets; + mat4 shadow_matrix1; + mat4 shadow_matrix2; + mat4 shadow_matrix3; + mat4 shadow_matrix4; + vec2 uv_scale1; + vec2 uv_scale2; + vec2 uv_scale3; + vec2 uv_scale4; }; diff --git a/servers/rendering/renderer_rd/shaders/scene_data_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_data_inc.glsl index 566461e8186..8f044d9d381 100644 --- a/servers/rendering/renderer_rd/shaders/scene_data_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_data_inc.glsl @@ -13,60 +13,60 @@ #define SCENE_DATA_FLAGS_IN_SHADOW_PASS (1 << 7) struct SceneData { - highp mat4 projection_matrix; - highp mat4 inv_projection_matrix; - highp mat4 inv_view_matrix; - highp mat4 view_matrix; + mat4 projection_matrix; + mat4 inv_projection_matrix; + mat4 inv_view_matrix; + mat4 view_matrix; // only used for multiview - highp mat4 projection_matrix_view[MAX_VIEWS]; - highp mat4 inv_projection_matrix_view[MAX_VIEWS]; - highp vec4 eye_offset[MAX_VIEWS]; + mat4 projection_matrix_view[MAX_VIEWS]; + mat4 inv_projection_matrix_view[MAX_VIEWS]; + vec4 eye_offset[MAX_VIEWS]; // Used for billboards to cast correct shadows. - highp mat4 main_cam_inv_view_matrix; + mat4 main_cam_inv_view_matrix; - highp vec2 viewport_size; - highp vec2 screen_pixel_size; + vec2 viewport_size; + vec2 screen_pixel_size; // Use vec4s because std140 doesn't play nice with vec2s, z and w are wasted. - highp vec4 directional_penumbra_shadow_kernel[32]; - highp vec4 directional_soft_shadow_kernel[32]; - highp vec4 penumbra_shadow_kernel[32]; - highp vec4 soft_shadow_kernel[32]; + vec4 directional_penumbra_shadow_kernel[32]; + vec4 directional_soft_shadow_kernel[32]; + vec4 penumbra_shadow_kernel[32]; + vec4 soft_shadow_kernel[32]; - highp vec2 shadow_atlas_pixel_size; - highp vec2 directional_shadow_pixel_size; + vec2 shadow_atlas_pixel_size; + vec2 directional_shadow_pixel_size; uint directional_light_count; - mediump float dual_paraboloid_side; - highp float z_far; - highp float z_near; + float dual_paraboloid_side; + float z_far; + float z_near; - mediump float roughness_limiter_amount; - mediump float roughness_limiter_limit; - mediump float opaque_prepass_threshold; - highp uint flags; + float roughness_limiter_amount; + float roughness_limiter_limit; + float opaque_prepass_threshold; + uint flags; - mediump mat3 radiance_inverse_xform; + mat3 radiance_inverse_xform; - mediump vec4 ambient_light_color_energy; + vec4 ambient_light_color_energy; - mediump float ambient_color_sky_mix; - highp float fog_density; - highp float fog_height; - highp float fog_height_density; + float ambient_color_sky_mix; + float fog_density; + float fog_height; + float fog_height_density; - highp float fog_depth_curve; - highp float fog_depth_begin; - highp float fog_depth_end; - mediump float fog_sun_scatter; + float fog_depth_curve; + float fog_depth_begin; + float fog_depth_end; + float fog_sun_scatter; - mediump vec3 fog_light_color; - mediump float fog_aerial_perspective; + vec3 fog_light_color; + float fog_aerial_perspective; - highp float time; - highp float taa_frame_count; + float time; + float taa_frame_count; vec2 taa_jitter; float emissive_exposure_normalization; diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_aa_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_aa_inc.glsl index 71510ee06a2..27020cf8652 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_aa_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_aa_inc.glsl @@ -9,7 +9,7 @@ float hash_3d(vec3 p) { return hash_2d(vec2(hash_2d(p.xy), p.z)); } -float compute_alpha_hash_threshold(vec3 pos, float hash_scale) { +half compute_alpha_hash_threshold(vec3 pos, float hash_scale) { vec3 dx = dFdx(pos); vec3 dy = dFdy(pos); @@ -35,24 +35,24 @@ float compute_alpha_hash_threshold(vec3 pos, float hash_scale) { float alpha_hash_threshold = (a_interp < (1.0 - min_lerp)) ? ((a_interp < min_lerp) ? cases.x : cases.y) : cases.z; - return clamp(alpha_hash_threshold, 0.00001, 1.0); + return half(clamp(alpha_hash_threshold, 0.00001, 1.0)); } #endif // ALPHA_HASH_USED #ifdef ALPHA_ANTIALIASING_EDGE_USED -float calc_mip_level(vec2 texture_coord) { +half calc_mip_level(vec2 texture_coord) { vec2 dx = dFdx(texture_coord); vec2 dy = dFdy(texture_coord); float delta_max_sqr = max(dot(dx, dx), dot(dy, dy)); - return max(0.0, 0.5 * log2(delta_max_sqr)); + return max(half(0.0), half(0.5) * half(log2(delta_max_sqr))); } -float compute_alpha_antialiasing_edge(float input_alpha, vec2 texture_coord, float alpha_edge) { - input_alpha *= 1.0 + max(0, calc_mip_level(texture_coord)) * 0.25; // 0.25 mip scale, magic number - input_alpha = (input_alpha - alpha_edge) / max(fwidth(input_alpha), 0.0001) + 0.5; - return clamp(input_alpha, 0.0, 1.0); +half compute_alpha_antialiasing_edge(half input_alpha, vec2 texture_coord, half alpha_edge) { + input_alpha *= half(1.0) + calc_mip_level(texture_coord) * half(0.25); + input_alpha = (input_alpha - alpha_edge) / max(fwidth(input_alpha), half(0.0001)) + half(0.5); + return clamp(input_alpha, half(0.0), half(1.0)); } #endif // ALPHA_ANTIALIASING_USED diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl index 586098e88e9..9ba27c9d607 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl @@ -13,69 +13,72 @@ #define SPEC_CONSTANT_LOOP_ANNOTATION #endif -float D_GGX(float cos_theta_m, float alpha) { - float a = cos_theta_m * alpha; - float k = alpha / (1.0 - cos_theta_m * cos_theta_m + a * a); - return k * k * (1.0 / M_PI); +half D_GGX(half NoH, half roughness, hvec3 n, hvec3 h) { + half a = NoH * roughness; +#ifdef EXPLICIT_FP16 + hvec3 NxH = cross(n, h); + half k = roughness / (dot(NxH, NxH) + a * a); +#else + float k = roughness / (1.0 - NoH * NoH + a * a); +#endif + half d = k * k * half(1.0 / M_PI); + return saturateHalf(d); } // From Earl Hammon, Jr. "PBR Diffuse Lighting for GGX+Smith Microsurfaces" https://www.gdcvault.com/play/1024478/PBR-Diffuse-Lighting-for-GGX -float V_GGX(float NdotL, float NdotV, float alpha) { - return 0.5 / mix(2.0 * NdotL * NdotV, NdotL + NdotV, alpha); +half V_GGX(half NdotL, half NdotV, half alpha) { + half v = half(0.5) / mix(half(2.0) * NdotL * NdotV, NdotL + NdotV, alpha); + return saturateHalf(v); } -float D_GGX_anisotropic(float cos_theta_m, float alpha_x, float alpha_y, float cos_phi, float sin_phi) { - float alpha2 = alpha_x * alpha_y; - highp vec3 v = vec3(alpha_y * cos_phi, alpha_x * sin_phi, alpha2 * cos_theta_m); - highp float v2 = dot(v, v); - float w2 = alpha2 / v2; - float D = alpha2 * w2 * w2 * (1.0 / M_PI); - return D; +half D_GGX_anisotropic(half cos_theta_m, half alpha_x, half alpha_y, half cos_phi, half sin_phi) { + half alpha2 = alpha_x * alpha_y; + vec3 v = vec3(alpha_y * cos_phi, alpha_x * sin_phi, alpha2 * cos_theta_m); + float v2 = dot(v, v); + half w2 = half(float(alpha2) / v2); + return alpha2 * w2 * w2 * half(1.0 / M_PI); } -float V_GGX_anisotropic(float alpha_x, float alpha_y, float TdotV, float TdotL, float BdotV, float BdotL, float NdotV, float NdotL) { - float Lambda_V = NdotL * length(vec3(alpha_x * TdotV, alpha_y * BdotV, NdotV)); - float Lambda_L = NdotV * length(vec3(alpha_x * TdotL, alpha_y * BdotL, NdotL)); - return 0.5 / (Lambda_V + Lambda_L); +half V_GGX_anisotropic(half alpha_x, half alpha_y, half TdotV, half TdotL, half BdotV, half BdotL, half NdotV, half NdotL) { + half Lambda_V = NdotL * length(hvec3(alpha_x * TdotV, alpha_y * BdotV, NdotV)); + half Lambda_L = NdotV * length(hvec3(alpha_x * TdotL, alpha_y * BdotL, NdotL)); + half v = half(0.5) / (Lambda_V + Lambda_L); + return saturateHalf(v); } -float SchlickFresnel(float u) { - float m = 1.0 - u; - float m2 = m * m; +half SchlickFresnel(half u) { + half m = half(1.0) - u; + half m2 = m * m; return m2 * m2 * m; // pow(m,5) } -vec3 F0(float metallic, float specular, vec3 albedo) { - float dielectric = 0.16 * specular * specular; +hvec3 F0(half metallic, half specular, hvec3 albedo) { + half dielectric = half(0.16) * specular * specular; // use albedo * metallic as colored specular reflectance at 0 angle for metallic materials; // see https://google.github.io/filament/Filament.md.html - return mix(vec3(dielectric), albedo, vec3(metallic)); + return mix(hvec3(dielectric), albedo, hvec3(metallic)); } -void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, bool is_directional, float attenuation, vec3 f0, uint orms, float specular_amount, vec3 albedo, inout float alpha, vec2 screen_uv, vec3 energy_compensation, +void light_compute(hvec3 N, hvec3 L, hvec3 V, half A, hvec3 light_color, bool is_directional, half attenuation, hvec3 f0, half roughness, half metallic, half specular_amount, hvec3 albedo, inout half alpha, vec2 screen_uv, hvec3 energy_compensation, #ifdef LIGHT_BACKLIGHT_USED - vec3 backlight, + hvec3 backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - vec4 transmittance_color, - float transmittance_depth, - float transmittance_boost, - float transmittance_z, + hvec4 transmittance_color, + half transmittance_depth, + half transmittance_boost, + half transmittance_z, #endif #ifdef LIGHT_RIM_USED - float rim, float rim_tint, + half rim, half rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_roughness, vec3 vertex_normal, + half clearcoat, half clearcoat_roughness, hvec3 vertex_normal, #endif #ifdef LIGHT_ANISOTROPY_USED - vec3 B, vec3 T, float anisotropy, + hvec3 B, hvec3 T, half anisotropy, #endif - inout vec3 diffuse_light, inout vec3 specular_light) { - vec4 orms_unpacked = unpackUnorm4x8(orms); - float roughness = orms_unpacked.y; - float metallic = orms_unpacked.z; - + inout hvec3 diffuse_light, inout hvec3 specular_light) { #if defined(LIGHT_CODE_USED) // Light is written by the user shader. mat4 inv_view_matrix = scene_data_block.data.inv_view_matrix; @@ -93,66 +96,71 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, bool is_di #define inv_projection_matrix scene_data_block.data.inv_projection_matrix vec2 read_viewport_size = scene_data_block.data.viewport_size; - vec3 normal = N; - vec3 light = L; - vec3 view = V; + vec3 normal_highp = vec3(N); + vec3 light_highp = vec3(L); + vec3 view_highp = vec3(V); + float specular_amount_highp = float(specular_amount); + vec3 light_color_highp = vec3(light_color); + float attenuation_highp = float(attenuation); + vec3 diffuse_light_highp = vec3(diffuse_light); + vec3 specular_light_highp = vec3(specular_light); #CODE : LIGHT + + diffuse_light = hvec3(diffuse_light_highp); + specular_light = hvec3(specular_light_highp); #else // !LIGHT_CODE_USED - float NdotL = min(A + dot(N, L), 1.0); - float cNdotV = max(dot(N, V), 1e-4); + half NdotL = min(A + dot(N, L), half(1.0)); + half cNdotV = max(dot(N, V), half(1e-4)); #ifdef LIGHT_TRANSMITTANCE_USED { #ifdef SSS_MODE_SKIN - float scale = 8.25 / transmittance_depth; - float d = scale * abs(transmittance_z); - float dd = -d * d; - vec3 profile = vec3(0.233, 0.455, 0.649) * exp(dd / 0.0064) + + half scale = half(8.25) / transmittance_depth; + half d = scale * abs(transmittance_z); + float dd = float(-d * d); + hvec3 profile = hvec3(vec3(0.233, 0.455, 0.649) * exp(dd / 0.0064) + vec3(0.1, 0.336, 0.344) * exp(dd / 0.0484) + vec3(0.118, 0.198, 0.0) * exp(dd / 0.187) + vec3(0.113, 0.007, 0.007) * exp(dd / 0.567) + vec3(0.358, 0.004, 0.0) * exp(dd / 1.99) + - vec3(0.078, 0.0, 0.0) * exp(dd / 7.41); + vec3(0.078, 0.0, 0.0) * exp(dd / 7.41)); - diffuse_light += profile * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI); + diffuse_light += profile * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, half(0.0), half(1.0)) * half(1.0 / M_PI); #else - float scale = 8.25 / transmittance_depth; - float d = scale * abs(transmittance_z); - float dd = -d * d; - diffuse_light += exp(dd) * transmittance_color.rgb * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI); + half scale = half(8.25) / transmittance_depth; + half d = scale * abs(transmittance_z); + half dd = -d * d; + diffuse_light += exp(dd) * transmittance_color.rgb * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, half(0.0), half(1.0)) * half(1.0 / M_PI); #endif } #endif //LIGHT_TRANSMITTANCE_USED #if defined(LIGHT_RIM_USED) // Epsilon min to prevent pow(0, 0) singularity which results in undefined behavior. - float rim_light = pow(max(1e-4, 1.0 - cNdotV), max(0.0, (1.0 - roughness) * 16.0)); - diffuse_light += rim_light * rim * mix(vec3(1.0), albedo, rim_tint) * light_color; + half rim_light = pow(max(half(1e-4), half(1.0) - cNdotV), max(half(0.0), (half(1.0) - roughness) * half(16.0))); + diffuse_light += rim_light * rim * mix(hvec3(1.0), albedo, rim_tint) * light_color; #endif // We skip checking on attenuation on directional lights to avoid a branch that is not as beneficial for directional lights as the other ones. - const float EPSILON = 1e-6f; - if (is_directional || attenuation > EPSILON) { - float cNdotL = max(NdotL, 0.0); + if (is_directional || attenuation > HALF_FLT_MIN) { + half cNdotL = max(NdotL, half(0.0)); #if defined(DIFFUSE_BURLEY) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) - vec3 H = normalize(V + L); -#endif -#if defined(DIFFUSE_BURLEY) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) - float cLdotH = clamp(A + dot(L, H), 0.0, 1.0); + hvec3 H = normalize(V + L); + half cLdotH = clamp(A + dot(L, H), half(0.0), half(1.0)); #endif #if defined(LIGHT_CLEARCOAT_USED) // Clearcoat ignores normal_map, use vertex normal instead - float ccNdotL = max(min(A + dot(vertex_normal, L), 1.0), 0.0); - float ccNdotH = clamp(A + dot(vertex_normal, H), 0.0, 1.0); - float ccNdotV = max(dot(vertex_normal, V), 1e-4); - float cLdotH5 = SchlickFresnel(cLdotH); + half ccNdotL = clamp(A + dot(vertex_normal, L), half(0.0), half(1.0)); + half ccNdotH = clamp(A + dot(vertex_normal, H), half(0.0), half(1.0)); + half ccNdotV = max(dot(vertex_normal, V), half(1e-4)); + half cLdotH5 = SchlickFresnel(cLdotH); - float Dr = D_GGX(ccNdotH, mix(0.001, 0.1, clearcoat_roughness)); - float Gr = 0.25 / (cLdotH * cLdotH); - float Fr = mix(.04, 1.0, cLdotH5); - float clearcoat_specular_brdf_NL = clearcoat * Gr * Fr * Dr * cNdotL; + half Dr = D_GGX(ccNdotH, half(mix(half(0.001), half(0.1), clearcoat_roughness)), vertex_normal, H); + half Gr = half(0.25) / (cLdotH * cLdotH); + half Fr = mix(half(0.04), half(1.0), cLdotH5); + half clearcoat_specular_brdf_NL = clearcoat * Gr * Fr * Dr * cNdotL; specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount; @@ -160,48 +168,49 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, bool is_di // but to do so we need to rearrange this entire function #endif // LIGHT_CLEARCOAT_USED - if (metallic < 1.0) { - float diffuse_brdf_NL; // BRDF times N.L for calculating diffuse radiance + if (metallic < half(1.0)) { + half diffuse_brdf_NL; // BRDF times N.L for calculating diffuse radiance #if defined(DIFFUSE_LAMBERT_WRAP) // Energy conserving lambert wrap shader. // https://web.archive.org/web/20210228210901/http://blog.stevemcauley.com/2011/12/03/energy-conserving-wrapped-diffuse/ - diffuse_brdf_NL = max(0.0, (NdotL + roughness) / ((1.0 + roughness) * (1.0 + roughness))) * (1.0 / M_PI); + half op_roughness = half(1.0) + roughness; + diffuse_brdf_NL = max(half(0.0), (NdotL + roughness) / (op_roughness * op_roughness)) * half(1.0 / M_PI); #elif defined(DIFFUSE_TOON) - diffuse_brdf_NL = smoothstep(-roughness, max(roughness, 0.01), NdotL) * (1.0 / M_PI); + diffuse_brdf_NL = smoothstep(-roughness, max(roughness, half(0.01)), NdotL) * half(1.0 / M_PI); #elif defined(DIFFUSE_BURLEY) { - float FD90_minus_1 = 2.0 * cLdotH * cLdotH * roughness - 0.5; - float FdV = 1.0 + FD90_minus_1 * SchlickFresnel(cNdotV); - float FdL = 1.0 + FD90_minus_1 * SchlickFresnel(cNdotL); - diffuse_brdf_NL = (1.0 / M_PI) * FdV * FdL * cNdotL; + half FD90_minus_1 = half(2.0) * cLdotH * cLdotH * roughness - half(0.5); + half FdV = half(1.0) + FD90_minus_1 * SchlickFresnel(cNdotV); + half FdL = half(1.0) + FD90_minus_1 * SchlickFresnel(cNdotL); + diffuse_brdf_NL = half(1.0 / M_PI) * FdV * FdL * cNdotL; } #else // lambert - diffuse_brdf_NL = cNdotL * (1.0 / M_PI); + diffuse_brdf_NL = cNdotL * half(1.0 / M_PI); #endif diffuse_light += light_color * diffuse_brdf_NL * attenuation; #if defined(LIGHT_BACKLIGHT_USED) - diffuse_light += light_color * (vec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation; + diffuse_light += light_color * (hvec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation; #endif } - if (roughness > 0.0) { + if (roughness > half(0.0)) { #if defined(SPECULAR_SCHLICK_GGX) - float cNdotH = clamp(A + dot(N, H), 0.0, 1.0); + half cNdotH = clamp(A + dot(N, H), half(0.0), half(1.0)); #endif // Apply specular light. // FIXME: roughness == 0 should not disable specular light entirely #if defined(SPECULAR_TOON) - vec3 R = normalize(-reflect(L, N)); - float RdotV = dot(R, V); - float mid = 1.0 - roughness; + hvec3 R = normalize(-reflect(L, N)); + half RdotV = dot(R, V); + half mid = half(1.0) - roughness; mid *= mid; - float intensity = smoothstep(mid - roughness * 0.5, mid + roughness * 0.5, RdotV) * mid; + half intensity = smoothstep(mid - roughness * half(0.5), mid + roughness * half(0.5), RdotV) * mid; diffuse_light += light_color * intensity * attenuation * specular_amount; // write to diffuse_light, as in toon shading you generally want no reflection #elif defined(SPECULAR_DISABLED) @@ -209,34 +218,34 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, bool is_di #elif defined(SPECULAR_SCHLICK_GGX) // shlick+ggx as default - float alpha_ggx = roughness * roughness; + half alpha_ggx = roughness * roughness; #if defined(LIGHT_ANISOTROPY_USED) - float aspect = sqrt(1.0 - anisotropy * 0.9); - float ax = alpha_ggx / aspect; - float ay = alpha_ggx * aspect; - float XdotH = dot(T, H); - float YdotH = dot(B, H); - float D = D_GGX_anisotropic(cNdotH, ax, ay, XdotH, YdotH); - float G = V_GGX_anisotropic(ax, ay, dot(T, V), dot(T, L), dot(B, V), dot(B, L), cNdotV, cNdotL); + half aspect = sqrt(half(1.0) - anisotropy * half(0.9)); + half ax = alpha_ggx / aspect; + half ay = alpha_ggx * aspect; + half XdotH = dot(T, H); + half YdotH = dot(B, H); + half D = D_GGX_anisotropic(cNdotH, ax, ay, XdotH, YdotH); + half G = V_GGX_anisotropic(ax, ay, dot(T, V), dot(T, L), dot(B, V), dot(B, L), cNdotV, cNdotL); #else // LIGHT_ANISOTROPY_USED - float D = D_GGX(cNdotH, alpha_ggx); - float G = V_GGX(cNdotL, cNdotV, alpha_ggx); + half D = D_GGX(cNdotH, alpha_ggx, N, H); + half G = V_GGX(cNdotL, cNdotV, alpha_ggx); #endif // LIGHT_ANISOTROPY_USED // F #if !defined(LIGHT_CLEARCOAT_USED) - float cLdotH5 = SchlickFresnel(cLdotH); + half cLdotH5 = SchlickFresnel(cLdotH); #endif // Calculate Fresnel using specular occlusion term from Filament: // https://google.github.io/filament/Filament.html#lighting/occlusion/specularocclusion - float f90 = clamp(dot(f0, vec3(50.0 * 0.33)), metallic, 1.0); - vec3 F = f0 + (f90 - f0) * cLdotH5; - vec3 specular_brdf_NL = energy_compensation * (cNdotL * D * F * G); + half f90 = clamp(dot(f0, hvec3(50.0 * 0.33)), metallic, half(1.0)); + hvec3 F = f0 + (f90 - f0) * cLdotH5; + hvec3 specular_brdf_NL = energy_compensation * cNdotL * D * F * G; specular_light += specular_brdf_NL * light_color * attenuation * specular_amount; #endif } #ifdef USE_SHADOW_TO_OPACITY - alpha = min(alpha, clamp(1.0 - attenuation, 0.0, 1.0)); + alpha = min(alpha, clamp(half(1.0 - attenuation), half(0.0), half(1.0))); #endif } #endif // LIGHT_CODE_USED @@ -251,13 +260,13 @@ float quick_hash(vec2 pos) { return fract(magic.z * fract(dot(pos, magic.xy))); } -float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec4 coord, float taa_frame_count) { +half sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec4 coord, float taa_frame_count) { vec2 pos = coord.xy; float depth = coord.z; //if only one sample is taken, take it from the center if (sc_directional_soft_shadow_samples() == 0) { - return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); + return half(textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0))); } mat2 disk_rotation; @@ -275,16 +284,16 @@ float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, ve avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data_block.data.directional_soft_shadow_kernel[i].xy), depth, 1.0)); } - return avg * (1.0 / float(sc_directional_soft_shadow_samples())); + return half(avg * (1.0 / float(sc_directional_soft_shadow_samples()))); } -float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord, float taa_frame_count) { +half sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord, float taa_frame_count) { vec2 pos = coord.xy; float depth = coord.z; //if only one sample is taken, take it from the center if (sc_soft_shadow_samples() == 0) { - return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); + return half(textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0))); } mat2 disk_rotation; @@ -302,15 +311,15 @@ float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord, fl avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data_block.data.soft_shadow_kernel[i].xy), depth, 1.0)); } - return avg * (1.0 / float(sc_soft_shadow_samples())); + return half(avg * (1.0 / float(sc_soft_shadow_samples()))); } -float sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec4 uv_rect, vec2 flip_offset, float depth, float taa_frame_count) { +half sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec4 uv_rect, vec2 flip_offset, float depth, float taa_frame_count) { //if only one sample is taken, take it from the center if (sc_soft_shadow_samples() == 0) { vec2 pos = coord * 0.5 + 0.5; pos = uv_rect.xy + pos * uv_rect.zw; - return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); + return half(textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0))); } mat2 disk_rotation; @@ -346,10 +355,10 @@ float sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(sample_coord, depth, 1.0)); } - return avg * (1.0 / float(sc_soft_shadow_samples())); + return half(avg * (1.0 / float(sc_soft_shadow_samples()))); } -float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex_scale, float taa_frame_count) { +half sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex_scale, float taa_frame_count) { //find blocker float blocker_count = 0.0; float blocker_average = 0.0; @@ -386,62 +395,62 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex s += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(suv, pssm_coord.z, 1.0)); } - return s / float(sc_directional_penumbra_shadow_samples()); + return half(s / float(sc_directional_penumbra_shadow_samples())); } else { //no blockers found, so no shadow - return 1.0; + return half(1.0); } } #endif // SHADOWS_DISABLED -float get_omni_attenuation(float distance, float inv_range, float decay) { +half get_omni_attenuation(float distance, float inv_range, float decay) { float nd = distance * inv_range; nd *= nd; nd *= nd; // nd^4 nd = max(1.0 - nd, 0.0); nd *= nd; // nd^2 - return nd * pow(max(distance, 0.0001), -decay); + return half(nd * pow(max(distance, 0.0001), -decay)); } -void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float taa_frame_count, vec3 albedo, inout float alpha, vec2 screen_uv, vec3 energy_compensation, +void light_process_omni(uint idx, vec3 vertex, hvec3 eye_vec, hvec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, hvec3 f0, half roughness, half metallic, float taa_frame_count, hvec3 albedo, inout half alpha, vec2 screen_uv, hvec3 energy_compensation, #ifdef LIGHT_BACKLIGHT_USED - vec3 backlight, + hvec3 backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - vec4 transmittance_color, - float transmittance_depth, - float transmittance_boost, + hvec4 transmittance_color, + half transmittance_depth, + half transmittance_boost, #endif #ifdef LIGHT_RIM_USED - float rim, float rim_tint, + half rim, half rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_roughness, vec3 vertex_normal, + half clearcoat, half clearcoat_roughness, hvec3 vertex_normal, #endif #ifdef LIGHT_ANISOTROPY_USED - vec3 binormal, vec3 tangent, float anisotropy, + hvec3 binormal, hvec3 tangent, half anisotropy, #endif - inout vec3 diffuse_light, inout vec3 specular_light) { - const float EPSILON = 1e-6f; + inout hvec3 diffuse_light, inout hvec3 specular_light) { // Omni light attenuation. vec3 light_rel_vec = omni_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); - float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation); + half omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation); // Compute size. - float size = 0.0; + half size = half(0.0); if (sc_use_light_soft_shadows() && omni_lights.data[idx].size > 0.0) { - float t = omni_lights.data[idx].size / max(0.001, light_length); - size = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); + half t = half(omni_lights.data[idx].size / max(0.001, light_length)); + size = half(1.0) / sqrt(half(1.0) + t * t); + size = max(half(1.0) - size, half(0.0)); } - float shadow = 1.0; + half shadow = half(1.0); #ifndef SHADOWS_DISABLED // Omni light shadow. - if (omni_attenuation > EPSILON && omni_lights.data[idx].shadow_opacity > 0.001) { + if (omni_attenuation > HALF_FLT_MIN && omni_lights.data[idx].shadow_opacity > 0.001) { // there is a shadowmap vec2 texel_size = scene_data_block.data.shadow_atlas_pixel_size; vec4 base_uv_rect = omni_lights.data[idx].atlas_rect; @@ -456,7 +465,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float shadow_len = length(local_vert); //need to remember shadow len from here vec3 shadow_dir = normalize(local_vert); - vec3 local_normal = normalize(mat3(omni_lights.data[idx].shadow_matrix) * normal); + vec3 local_normal = normalize(mat3(omni_lights.data[idx].shadow_matrix) * vec3(normal)); vec3 normal_bias = local_normal * omni_lights.data[idx].shadow_normal_bias * (1.0 - abs(dot(local_normal, shadow_dir))); if (sc_use_light_soft_shadows() && omni_lights.data[idx].soft_shadow_size > 0.0) { @@ -520,7 +529,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v z_norm += omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias; - shadow = 0.0; + shadow = half(0.0); SPEC_CONSTANT_LOOP_ANNOTATION for (uint i = 0; i < sc_penumbra_shadow_samples(); i++) { @@ -541,15 +550,15 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v pos.xy = pos.xy * 0.5 + 0.5; pos.xy = uv_rect.xy + pos.xy * uv_rect.zw; - shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(pos.xy, z_norm, 1.0)); + shadow += half(textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(pos.xy, z_norm, 1.0))); } - shadow /= float(sc_penumbra_shadow_samples()); - shadow = mix(1.0, shadow, omni_lights.data[idx].shadow_opacity); + shadow /= half(sc_penumbra_shadow_samples()); + shadow = mix(half(1.0), shadow, half(omni_lights.data[idx].shadow_opacity)); } else { //no blockers found, so no shadow - shadow = 1.0; + shadow = half(1.0); } } else { vec4 uv_rect = base_uv_rect; @@ -565,7 +574,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float depth = shadow_len - omni_lights.data[idx].shadow_bias; depth *= omni_lights.data[idx].inv_radius; depth = 1.0 - depth; - shadow = mix(1.0, sample_omni_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale / shadow_sample.z, pos, uv_rect, flip_offset, depth, taa_frame_count), omni_lights.data[idx].shadow_opacity); + shadow = mix(half(1.0), sample_omni_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale / shadow_sample.z, pos, uv_rect, flip_offset, depth, taa_frame_count), half(omni_lights.data[idx].shadow_opacity)); } } #endif @@ -573,7 +582,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec3 color = omni_lights.data[idx].color; #ifdef LIGHT_TRANSMITTANCE_USED - float transmittance_z = transmittance_depth; //no transmittance by default + half transmittance_z = transmittance_depth; //no transmittance by default transmittance_color.a *= omni_attenuation; #ifndef SHADOWS_DISABLED if (omni_lights.data[idx].shadow_opacity > 0.001) { @@ -586,7 +595,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v // Omni lights use direction.xy to store to store the offset between the two paraboloid regions vec2 flip_offset = omni_lights.data[idx].direction.xy; - vec3 local_vert = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal) * omni_lights.data[idx].transmittance_bias, 1.0)).xyz; + vec3 local_vert = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normal * omni_lights.data[idx].transmittance_bias, 1.0)).xyz; float shadow_len = length(local_vert); //need to remember shadow len from here vec3 shadow_sample = normalize(local_vert); @@ -604,7 +613,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v pos = pos * 0.5 + 0.5; pos = uv_rect.xy + pos * uv_rect.zw; float shadow_z = textureLod(sampler2D(shadow_atlas, SAMPLER_LINEAR_CLAMP), pos, 0.0).r; - transmittance_z = (depth - shadow_z) / omni_lights.data[idx].inv_radius; + transmittance_z = half((depth - shadow_z) / omni_lights.data[idx].inv_radius); } #endif // !SHADOWS_DISABLED #endif // LIGHT_TRANSMITTANCE_USED @@ -667,7 +676,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v } vec3 light_rel_vec_norm = light_rel_vec / light_length; - light_compute(normal, light_rel_vec_norm, eye_vec, size, color, false, omni_attenuation * shadow, f0, orms, omni_lights.data[idx].specular_amount, albedo, alpha, screen_uv, energy_compensation, + light_compute(normal, hvec3(light_rel_vec_norm), eye_vec, size, hvec3(color), false, omni_attenuation * shadow, f0, roughness, metallic, half(omni_lights.data[idx].specular_amount), albedo, alpha, screen_uv, energy_compensation, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -702,54 +711,53 @@ vec2 normal_to_panorama(vec3 n) { return panorama_coords; } -void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float taa_frame_count, vec3 albedo, inout float alpha, vec2 screen_uv, vec3 energy_compensation, +void light_process_spot(uint idx, vec3 vertex, hvec3 eye_vec, hvec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, hvec3 f0, half roughness, half metallic, float taa_frame_count, hvec3 albedo, inout half alpha, vec2 screen_uv, hvec3 energy_compensation, #ifdef LIGHT_BACKLIGHT_USED - vec3 backlight, + hvec3 backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - vec4 transmittance_color, - float transmittance_depth, - float transmittance_boost, + hvec4 transmittance_color, + half transmittance_depth, + half transmittance_boost, #endif #ifdef LIGHT_RIM_USED - float rim, float rim_tint, + half rim, half rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_roughness, vec3 vertex_normal, + half clearcoat, half clearcoat_roughness, hvec3 vertex_normal, #endif #ifdef LIGHT_ANISOTROPY_USED - vec3 binormal, vec3 tangent, float anisotropy, + hvec3 binormal, hvec3 tangent, half anisotropy, #endif - inout vec3 diffuse_light, - inout vec3 specular_light) { - const float EPSILON = 1e-6f; + inout hvec3 diffuse_light, + inout hvec3 specular_light) { // Spot light attenuation. vec3 light_rel_vec = spot_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); - vec3 light_rel_vec_norm = light_rel_vec / light_length; - float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation); - vec3 spot_dir = spot_lights.data[idx].direction; + hvec3 light_rel_vec_norm = hvec3(light_rel_vec / light_length); + half spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation); + hvec3 spot_dir = hvec3(spot_lights.data[idx].direction); + half cone_angle = half(spot_lights.data[idx].cone_angle); + half scos = max(dot(-light_rel_vec_norm, spot_dir), cone_angle); - // This conversion to a highp float is crucial to prevent light leaking - // due to precision errors in the following calculations (cone angle is mediump). - highp float cone_angle = spot_lights.data[idx].cone_angle; - float scos = max(dot(-light_rel_vec_norm, spot_dir), cone_angle); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cone_angle)); - spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation); + // This conversion to a highp float is crucial to prevent light leaking due to precision errors. + float spot_rim = max(1e-4, float(half(1.0) - scos) / float(half(1.0) - cone_angle)); + spot_attenuation *= half(1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation)); // Compute size. - float size = 0.0; + half size = half(0.0); if (sc_use_light_soft_shadows() && spot_lights.data[idx].size > 0.0) { - float t = spot_lights.data[idx].size / max(0.001, light_length); - size = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); + half t = half(spot_lights.data[idx].size / max(0.001, light_length)); + size = half(1.0) / sqrt(half(1.0) + t * t); + size = max(half(1.0) - size, half(0.0)); } - float shadow = 1.0; + half shadow = half(1.0); #ifndef SHADOWS_DISABLED // Spot light shadow. - if (spot_attenuation > EPSILON && spot_lights.data[idx].shadow_opacity > 0.001) { - vec3 normal_bias = normal * light_length * spot_lights.data[idx].shadow_normal_bias * (1.0 - abs(dot(normal, light_rel_vec_norm))); + if (spot_attenuation > HALF_FLT_MIN && spot_lights.data[idx].shadow_opacity > 0.001) { + vec3 normal_bias = vec3(normal) * light_length * spot_lights.data[idx].shadow_normal_bias * (1.0 - abs(dot(normal, light_rel_vec_norm))); //there is a shadowmap vec4 v = vec4(vertex + normal_bias, 1.0); @@ -762,7 +770,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v //soft shadow //find blocker - float z_norm = dot(spot_dir, -light_rel_vec) * spot_lights.data[idx].inv_radius; + float z_norm = dot(vec3(spot_dir), -light_rel_vec) * spot_lights.data[idx].inv_radius; vec2 shadow_uv = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; @@ -797,39 +805,38 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float penumbra = (-z_norm + blocker_average) / (1.0 - blocker_average); uv_size *= penumbra; - shadow = 0.0; + shadow = half(0.0); SPEC_CONSTANT_LOOP_ANNOTATION for (uint i = 0; i < sc_penumbra_shadow_samples(); i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy) * uv_size; suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); - shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, splane.z, 1.0)); + shadow += half(textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, splane.z, 1.0))); } - shadow /= float(sc_penumbra_shadow_samples()); - shadow = mix(1.0, shadow, spot_lights.data[idx].shadow_opacity); + shadow /= half(sc_penumbra_shadow_samples()); + shadow = mix(half(1.0), shadow, half(spot_lights.data[idx].shadow_opacity)); } else { //no blockers found, so no shadow - shadow = 1.0; + shadow = half(1.0); } } else { //hard shadow vec3 shadow_uv = vec3(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, splane.z); - shadow = mix(1.0, sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data_block.data.shadow_atlas_pixel_size, shadow_uv, taa_frame_count), spot_lights.data[idx].shadow_opacity); + shadow = mix(half(1.0), sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data_block.data.shadow_atlas_pixel_size, shadow_uv, taa_frame_count), half(spot_lights.data[idx].shadow_opacity)); } } #endif // SHADOWS_DISABLED vec3 color = spot_lights.data[idx].color; - float specular_amount = spot_lights.data[idx].specular_amount; #ifdef LIGHT_TRANSMITTANCE_USED - float transmittance_z = transmittance_depth; + half transmittance_z = transmittance_depth; transmittance_color.a *= spot_attenuation; #ifndef SHADOWS_DISABLED if (spot_lights.data[idx].shadow_opacity > 0.001) { - vec4 splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal) * spot_lights.data[idx].transmittance_bias, 1.0)); + vec4 splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - vec3(normal) * spot_lights.data[idx].transmittance_bias, 1.0)); splane /= splane.w; vec3 shadow_uv = vec3(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, splane.z); @@ -841,8 +848,8 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v shadow_z = 2.0 * z_near * z_far / (z_far + z_near - shadow_z * (z_far - z_near)); //distance to light plane - float z = dot(spot_dir, -light_rel_vec); - transmittance_z = z - shadow_z; + float z = dot(vec3(spot_dir), -light_rel_vec); + transmittance_z = half(z - shadow_z); } #endif // !SHADOWS_DISABLED #endif // LIGHT_TRANSMITTANCE_USED @@ -871,7 +878,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v } } - light_compute(normal, light_rel_vec_norm, eye_vec, size, color, false, spot_attenuation * shadow, f0, orms, spot_lights.data[idx].specular_amount, albedo, alpha, screen_uv, energy_compensation, + light_compute(normal, hvec3(light_rel_vec_norm), eye_vec, size, hvec3(color), false, spot_attenuation * shadow, f0, roughness, metallic, half(spot_lights.data[idx].specular_amount), albedo, alpha, screen_uv, energy_compensation, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -893,7 +900,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v diffuse_light, specular_light); } -void reflection_process(uint ref_index, vec3 vertex, vec3 ref_vec, vec3 normal, float roughness, vec3 ambient_light, vec3 specular_light, inout vec4 ambient_accum, inout vec4 reflection_accum) { +void reflection_process(uint ref_index, vec3 vertex, hvec3 ref_vec, hvec3 normal, half roughness, hvec3 ambient_light, hvec3 specular_light, inout hvec4 ambient_accum, inout hvec4 reflection_accum) { vec3 box_extents = reflections.data[ref_index].box_extents; vec3 local_pos = (reflections.data[ref_index].local_matrix * vec4(vertex, 1.0)).xyz; @@ -901,17 +908,16 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 ref_vec, vec3 normal, return; } - float blend = 1.0; + half blend = half(1.0); if (reflections.data[ref_index].blend_distance != 0.0) { vec3 axis_blend_distance = min(vec3(reflections.data[ref_index].blend_distance), box_extents); - vec3 blend_axes = abs(local_pos) - box_extents + axis_blend_distance; - blend_axes /= axis_blend_distance; - blend_axes = clamp(1.0 - blend_axes, vec3(0.0), vec3(1.0)); - - blend = pow(blend_axes.x * blend_axes.y * blend_axes.z, 2.0); + vec3 blend_axes_highp = abs(local_pos) - box_extents + axis_blend_distance; + hvec3 blend_axes = hvec3(blend_axes_highp / axis_blend_distance); + blend_axes = clamp(half(1.0) - blend_axes, hvec3(0.0), hvec3(1.0)); + blend = pow(blend_axes.x * blend_axes.y * blend_axes.z, half(2.0)); } - if (reflections.data[ref_index].intensity > 0.0 && reflection_accum.a < 1.0) { // compute reflection + if (reflections.data[ref_index].intensity > 0.0 && reflection_accum.a < half(1.0)) { // compute reflection vec3 local_ref_vec = (reflections.data[ref_index].local_matrix * vec4(ref_vec, 0.0)).xyz; @@ -928,20 +934,20 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 ref_vec, vec3 normal, local_ref_vec = posonbox - reflections.data[ref_index].box_offset; } - vec4 reflection; - float reflection_blend = max(0.0, blend - reflection_accum.a); + hvec4 reflection; + half reflection_blend = max(half(0.0), blend - reflection_accum.a); - reflection.rgb = textureLod(samplerCubeArray(reflection_atlas, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(local_ref_vec, reflections.data[ref_index].index), sqrt(roughness) * MAX_ROUGHNESS_LOD).rgb * sc_luminance_multiplier(); - reflection.rgb *= reflections.data[ref_index].exposure_normalization; + reflection.rgb = hvec3(textureLod(samplerCubeArray(reflection_atlas, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(local_ref_vec, reflections.data[ref_index].index), sqrt(roughness) * MAX_ROUGHNESS_LOD).rgb) * sc_luminance_multiplier(); + reflection.rgb *= half(reflections.data[ref_index].exposure_normalization); reflection.a = reflection_blend; - reflection.rgb *= reflections.data[ref_index].intensity; + reflection.rgb *= half(reflections.data[ref_index].intensity); reflection.rgb *= reflection.a; reflection_accum += reflection; } - if (ambient_accum.a >= 1.0) { + if (ambient_accum.a >= half(1.0)) { return; } @@ -951,20 +957,20 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 ref_vec, vec3 normal, } break; case REFLECTION_AMBIENT_ENVIRONMENT: { vec3 local_amb_vec = (reflections.data[ref_index].local_matrix * vec4(normal, 0.0)).xyz; - vec4 ambient_out; - float ambient_blend = max(0.0, blend - ambient_accum.a); + hvec4 ambient_out; + half ambient_blend = max(half(0.0), blend - ambient_accum.a); - ambient_out.rgb = textureLod(samplerCubeArray(reflection_atlas, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(local_amb_vec, reflections.data[ref_index].index), MAX_ROUGHNESS_LOD).rgb; - ambient_out.rgb *= reflections.data[ref_index].exposure_normalization; + ambient_out.rgb = hvec3(textureLod(samplerCubeArray(reflection_atlas, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(local_amb_vec, reflections.data[ref_index].index), MAX_ROUGHNESS_LOD).rgb); + ambient_out.rgb *= half(reflections.data[ref_index].exposure_normalization); ambient_out.a = ambient_blend; ambient_out.rgb *= ambient_out.a; ambient_accum += ambient_out; } break; case REFLECTION_AMBIENT_COLOR: { - vec4 ambient_out; - float ambient_blend = max(0.0, blend - ambient_accum.a); + hvec4 ambient_out; + half ambient_blend = max(half(0.0), blend - ambient_accum.a); - ambient_out.rgb = reflections.data[ref_index].ambient; + ambient_out.rgb = hvec3(reflections.data[ref_index].ambient); ambient_out.a = ambient_blend; ambient_out.rgb *= ambient_out.a; ambient_accum += ambient_out; @@ -972,7 +978,7 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 ref_vec, vec3 normal, } } -float blur_shadow(float shadow) { +half blur_shadow(half shadow) { return shadow; #if 0 //disabling for now, will investigate later diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_vertex_lights_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_vertex_lights_inc.glsl index e962c8f7f33..10da24c6856 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_vertex_lights_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_vertex_lights_inc.glsl @@ -3,80 +3,81 @@ // Eyeballed approximation of `exp2(15.0 * (1.0 - roughness) + 1.0) * 0.25`. // Uses slightly more FMA instructions (2x rate) to avoid special instructions (0.25x rate). // Range is reduced to [0.64,4977] from [068,2,221,528] which makes mediump feasible for the rest of the shader. -mediump float roughness_to_shininess(mediump float roughness) { - mediump float r = 1.2 - roughness; - mediump float r2 = r * r; - return r * r2 * r2 * 2000.0; +half roughness_to_shininess(half roughness) { + half r = half(1.2) - roughness; + half r2 = r * r; + return r * r2 * r2 * half(2000.0); } -void light_compute_vertex(vec3 N, vec3 L, vec3 V, vec3 light_color, bool is_directional, float roughness, - inout vec3 diffuse_light, inout vec3 specular_light) { - float NdotL = min(dot(N, L), 1.0); - float cNdotL = max(NdotL, 0.0); // clamped NdotL +void light_compute_vertex(hvec3 N, hvec3 L, hvec3 V, hvec3 light_color, bool is_directional, half roughness, + inout hvec3 diffuse_light, inout hvec3 specular_light) { + half NdotL = min(dot(N, L), half(1.0)); + half cNdotL = max(NdotL, half(0.0)); // clamped NdotL #if defined(DIFFUSE_LAMBERT_WRAP) // Energy conserving lambert wrap shader. // https://web.archive.org/web/20210228210901/http://blog.stevemcauley.com/2011/12/03/energy-conserving-wrapped-diffuse/ - float diffuse_brdf_NL = max(0.0, (cNdotL + roughness) / ((1.0 + roughness) * (1.0 + roughness))) * (1.0 / M_PI); + half diffuse_brdf_NL = max(half(0.0), (cNdotL + roughness) / ((half(1.0) + roughness) * (half(1.0) + roughness))) * half(1.0 / M_PI); #else // lambert - float diffuse_brdf_NL = cNdotL * (1.0 / M_PI); + half diffuse_brdf_NL = cNdotL * half(1.0 / M_PI); #endif diffuse_light += light_color * diffuse_brdf_NL; #if !defined(SPECULAR_DISABLED) - float specular_brdf_NL = 0.0; + half specular_brdf_NL = half(0.0); // Normalized blinn always unless disabled. - vec3 H = normalize(V + L); - float cNdotH = clamp(dot(N, H), 0.0, 1.0); - float shininess = roughness_to_shininess(roughness); - float blinn = pow(cNdotH, shininess); - blinn *= (shininess + 2.0) * (1.0 / (8.0 * M_PI)) * cNdotL; + hvec3 H = normalize(V + L); + half cNdotH = clamp(dot(N, H), half(0.0), half(1.0)); + half shininess = roughness_to_shininess(roughness); + half blinn = pow(cNdotH, shininess); + blinn *= (shininess + half(2.0)) * half(1.0 / (8.0 * M_PI)) * cNdotL; specular_brdf_NL = blinn; specular_light += specular_brdf_NL * light_color; #endif } -float get_omni_attenuation(float distance, float inv_range, float decay) { +half get_omni_attenuation(float distance, float inv_range, float decay) { float nd = distance * inv_range; nd *= nd; nd *= nd; // nd^4 nd = max(1.0 - nd, 0.0); nd *= nd; // nd^2 - return nd * pow(max(distance, 0.0001), -decay); + return half(nd * pow(max(distance, 0.0001), -decay)); } -void light_process_omni_vertex(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, float roughness, - inout vec3 diffuse_light, inout vec3 specular_light) { +void light_process_omni_vertex(uint idx, vec3 vertex, hvec3 eye_vec, hvec3 normal, half roughness, + inout hvec3 diffuse_light, inout hvec3 specular_light) { vec3 light_rel_vec = omni_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); - float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation); - vec3 color = omni_lights.data[idx].color * omni_attenuation; + hvec3 light_rel_vec_norm = hvec3(light_rel_vec / light_length); + half omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation); + hvec3 color = hvec3(omni_lights.data[idx].color * omni_attenuation); - light_compute_vertex(normal, normalize(light_rel_vec), eye_vec, color, false, roughness, + light_compute_vertex(normal, light_rel_vec_norm, eye_vec, color, false, roughness, diffuse_light, specular_light); } -void light_process_spot_vertex(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, float roughness, - inout vec3 diffuse_light, - inout vec3 specular_light) { +void light_process_spot_vertex(uint idx, vec3 vertex, hvec3 eye_vec, hvec3 normal, half roughness, + inout hvec3 diffuse_light, + inout hvec3 specular_light) { vec3 light_rel_vec = spot_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); - float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation); - vec3 spot_dir = spot_lights.data[idx].direction; + hvec3 light_rel_vec_norm = hvec3(light_rel_vec / light_length); + half spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation); + hvec3 spot_dir = hvec3(spot_lights.data[idx].direction); - // This conversion to a highp float is crucial to prevent light leaking - // due to precision errors in the following calculations (cone angle is mediump). - highp float cone_angle = spot_lights.data[idx].cone_angle; - float scos = max(dot(-normalize(light_rel_vec), spot_dir), cone_angle); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cone_angle)); + half cone_angle = half(spot_lights.data[idx].cone_angle); + half scos = max(dot(-light_rel_vec_norm, spot_dir), cone_angle); - spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation); - vec3 color = spot_lights.data[idx].color * spot_attenuation; - float specular_amount = spot_lights.data[idx].specular_amount; + // This conversion to a highp float is crucial to prevent light leaking due to precision errors. + float spot_rim = max(1e-4, float(half(1.0) - scos) / float(half(1.0) - cone_angle)); + spot_attenuation *= half(1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation)); - light_compute_vertex(normal, normalize(light_rel_vec), eye_vec, color, false, roughness, + hvec3 color = hvec3(spot_lights.data[idx].color * spot_attenuation); + + light_compute_vertex(normal, light_rel_vec_norm, eye_vec, color, false, roughness, diffuse_light, specular_light); } diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index e34a5127f99..21966228288 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -945,7 +945,7 @@ public: enum Features { SUPPORTS_MULTIVIEW, - SUPPORTS_FSR_HALF_FLOAT, + SUPPORTS_HALF_FLOAT, SUPPORTS_ATTACHMENT_VRS, SUPPORTS_METALFX_SPATIAL, SUPPORTS_METALFX_TEMPORAL, diff --git a/servers/rendering/rendering_shader_library.h b/servers/rendering/rendering_shader_library.h index 566fb7eb2fa..7a89f591376 100644 --- a/servers/rendering/rendering_shader_library.h +++ b/servers/rendering/rendering_shader_library.h @@ -36,6 +36,8 @@ public: FEATURE_ADVANCED_BIT = 1U << 0U, FEATURE_MULTIVIEW_BIT = 1U << 1U, FEATURE_VRS_BIT = 1U << 2U, + FEATURE_FP16_BIT = 1U << 3U, + FEATURE_FP32_BIT = 1U << 4U, }; // Used by the shader baker to globally enable features on all the shaders that will be exported.