diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4aa211089f..30e44418ba 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -401,6 +401,69 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr } } +void SetupRoundingMode(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + const auto& info = program.info; + + // If no rounding modes used, nothing to do. + if (!info.uses_fp16_rounding_rte && !info.uses_fp16_rounding_rtz && + !info.uses_fp32_rounding_rte && !info.uses_fp32_rounding_rtz && + !info.uses_fp64_rounding_rte && !info.uses_fp64_rounding_rtz) { + return; + } + + // Capability is required whenever any rounding mode is emitted. + ctx.AddCapability(spv::Capability::FPRoundingMode); + + // FP16 + if (info.uses_fp16_rounding_rte) { + if (profile.support_shader_rounding_rte_f16) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 16U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp16 RTE rounding used in shader without host support"); + } + } + if (info.uses_fp16_rounding_rtz) { + if (profile.support_shader_rounding_rtz_f16) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTZ, 16U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp16 RTZ rounding used in shader without host support"); + } + } + + // FP32 + if (info.uses_fp32_rounding_rte) { + if (profile.support_shader_rounding_rte_f32) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 32U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp32 RTE rounding used in shader without host support"); + } + } + if (info.uses_fp32_rounding_rtz) { + if (profile.support_shader_rounding_rtz_f32) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTZ, 32U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp32 RTZ rounding used in shader without host support"); + } + } + + // FP64 + if (info.uses_fp64_rounding_rte) { + if (profile.support_shader_rounding_rte_f64) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 64U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp64 RTE rounding used in shader without host support"); + } + } + if (info.uses_fp64_rounding_rtz) { + if (profile.support_shader_rounding_rtz_f64) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTZ, 64U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp64 RTZ rounding used in shader without host support"); + } + } +} + void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { if (ctx.runtime_info.xfb_count == 0) { return; @@ -519,6 +582,7 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in ctx.AddExtension("SPV_KHR_float_controls"); SetupDenormControl(profile, program, ctx, main); SetupSignedNanCapabilities(profile, program, ctx, main); + SetupRoundingMode(profile, program, ctx, main); } SetupCapabilities(profile, program.info, ctx); SetupTransformFeedbackCapabilities(ctx, main); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 2bfa3227a8..2c22a67f2b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -790,8 +790,133 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { info.uses_fp16_denorms_preserve = true; break; } + switch (control.rounding) { + case IR::FpRounding::DontCare: + break; + case IR::FpRounding::RN: + info.uses_fp16_rounding_rte = true; + break; + case IR::FpRounding::RZ: + info.uses_fp16_rounding_rtz = true; + break; + default: + break; + } break; } + +... + + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::ConvertF16F32: + case IR::Opcode::ConvertF64F32: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp32_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp32_denorms_preserve = true; + break; + } + switch (control.rounding) { + case IR::FpRounding::DontCare: + break; + case IR::FpRounding::RN: + info.uses_fp32_rounding_rte = true; + break; + case IR::FpRounding::RZ: + info.uses_fp32_rounding_rtz = true; + break; + default: + break; + } + break; + } + +... + + case IR::Opcode::FPAdd64: + case IR::Opcode::FPCeil64: + case IR::Opcode::FPFloor64: + case IR::Opcode::FPFma64: + case IR::Opcode::FPMax64: + case IR::Opcode::FPMin64: + case IR::Opcode::FPMul64: + case IR::Opcode::FPNeg64: + case IR::Opcode::FPRecip64: + case IR::Opcode::FPRecipSqrt64: + case IR::Opcode::FPRoundEven64: + case IR::Opcode::FPSaturate64: + case IR::Opcode::FPClamp64: + case IR::Opcode::FPTrunc64: + case IR::Opcode::FPOrdEqual64: + case IR::Opcode::FPUnordEqual64: + case IR::Opcode::FPOrdNotEqual64: + case IR::Opcode::FPUnordNotEqual64: + case IR::Opcode::FPOrdLessThan64: + case IR::Opcode::FPUnordLessThan64: + case IR::Opcode::FPOrdGreaterThan64: + case IR::Opcode::FPUnordGreaterThan64: + case IR::Opcode::FPOrdLessThanEqual64: + case IR::Opcode::FPUnordLessThanEqual64: + case IR::Opcode::FPOrdGreaterThanEqual64: + case IR::Opcode::FPUnordGreaterThanEqual64: + case IR::Opcode::FPIsNan64: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertS32F64: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertU32F64: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertF32F64: + case IR::Opcode::ConvertF64F32: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64S32: + case IR::Opcode::ConvertF64S64: + case IR::Opcode::ConvertF64U8: + case IR::Opcode::ConvertF64U16: + case IR::Opcode::ConvertF64U32: + case IR::Opcode::ConvertF64U64: + info.uses_fp64 = true; + { + const auto control{inst.Flags()}; + switch (control.rounding) { + case IR::FpRounding::DontCare: + break; + case IR::FpRounding::RN: + info.uses_fp64_rounding_rte = true; + break; + case IR::FpRounding::RZ: + info.uses_fp64_rounding_rtz = true; + break; + default: + break; + } + } + break; case IR::Opcode::FPAdd32: case IR::Opcode::FPFma32: case IR::Opcode::FPMul32: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ea7f5cc76f..364cef503d 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -22,8 +22,19 @@ struct Profile { bool support_int64{}; bool support_vertex_instance_id{}; bool support_float_controls{}; + bool support_float_controls2{}; bool support_separate_denorm_behavior{}; bool support_separate_rounding_mode{}; + + /* Shader rounding mode support (SPV_KHR_float_controls floating rounding modes). + * These are set from VkPhysicalDeviceFloatControlsProperties/KHR/2KHR fields. */ + bool support_shader_rounding_rte_f16{}; + bool support_shader_rounding_rte_f32{}; + bool support_shader_rounding_rte_f64{}; + bool support_shader_rounding_rtz_f16{}; + bool support_shader_rounding_rtz_f32{}; + bool support_shader_rounding_rtz_f64{}; + bool support_fp16_denorm_preserve{}; bool support_fp32_denorm_preserve{}; bool support_fp16_denorm_flush{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index b8438b3b7c..53ac73ebad 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -290,6 +290,14 @@ struct Info { bool uses_fp16_denorms_preserve{}; bool uses_fp32_denorms_flush{}; bool uses_fp32_denorms_preserve{}; + + /* Rounding mode usage detected in shader IR (set during analysis). */ + bool uses_fp16_rounding_rte{}; + bool uses_fp16_rounding_rtz{}; + bool uses_fp32_rounding_rte{}; + bool uses_fp32_rounding_rtz{}; + bool uses_fp64_rounding_rte{}; + bool uses_fp64_rounding_rtz{}; bool uses_int8{}; bool uses_int16{}; bool uses_int64{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5efe9dde3f..0c08008118 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -377,10 +377,24 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, .support_float_controls = float_controls_supported, + .support_float_controls2 = device.IsKhrShaderFloatControls2Supported(), .support_separate_denorm_behavior = float_controls_supported && float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, .support_separate_rounding_mode = float_controls_supported && float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, + /* Rounding mode support (RTE/RTZ) per precision */ + .support_shader_rounding_rte_f16 = float_controls_supported && + float_control.shaderRoundingModeRTEFloat16 != VK_FALSE, + .support_shader_rounding_rte_f32 = float_controls_supported && + float_control.shaderRoundingModeRTEFloat32 != VK_FALSE, + .support_shader_rounding_rte_f64 = float_controls_supported && + float_control.shaderRoundingModeRTEFloat64 != VK_FALSE, + .support_shader_rounding_rtz_f16 = float_controls_supported && + float_control.shaderRoundingModeRTZFloat16 != VK_FALSE, + .support_shader_rounding_rtz_f32 = float_controls_supported && + float_control.shaderRoundingModeRTZFloat32 != VK_FALSE, + .support_shader_rounding_rtz_f64 = float_controls_supported && + float_control.shaderRoundingModeRTZFloat64 != VK_FALSE, .support_fp16_denorm_preserve = float_controls_supported && float_control.shaderDenormPreserveFloat16 != VK_FALSE, .support_fp32_denorm_preserve = float_controls_supported && diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 3963d4fb09..6c143a6974 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -530,15 +530,30 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Qualcomm drivers require scaled vertex format emulation; forcing fallback"); if (extensions.shader_float_controls) { - LOG_WARNING(Render_Vulkan, - "Qualcomm drivers: VK_KHR_shader_float_controls is unstable; disabling usage"); - RemoveExtension(extensions.shader_float_controls, - VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + // Allowlist: enable float controls when VK_KHR_shader_float_controls2 is present + // (more precise properties), or when driver >= 512.849.0 where the driver fix + // was introduced. + const bool adreno_safe = extensions.shader_float_controls2 || + (properties.properties.driverVersion >= VK_MAKE_VERSION(512, 849, 0)); + if (!adreno_safe) { + LOG_WARNING(Render_Vulkan, + "Qualcomm drivers: VK_KHR_shader_float_controls is unstable; disabling usage"); + RemoveExtension(extensions.shader_float_controls, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + disable_shader_float_controls_usage = true; + } else { + LOG_INFO(Render_Vulkan, + "Qualcomm drivers: enabling VK_KHR_shader_float_controls (driver version {}.{}.{} or shader_float_controls2 present)", + VK_VERSION_MAJOR(properties.properties.driverVersion), + VK_VERSION_MINOR(properties.properties.driverVersion), + VK_VERSION_PATCH(properties.properties.driverVersion)); + } } else { LOG_INFO(Render_Vulkan, "Qualcomm drivers: VK_KHR_shader_float_controls already unavailable"); } - disable_shader_float_controls_usage = true; + + // Other Qualcomm workarounds continue to apply. RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); features.shader_atomic_int64.shaderBufferInt64Atomics = false; @@ -1172,6 +1187,11 @@ bool Device::GetSuitability(bool requires_swapchain) { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; SetNext(next, properties.float_controls); } + if (extensions.shader_float_controls2) { + properties.float_controls2.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_2_KHR; + SetNext(next, properties.float_controls2); + } if (extensions.push_descriptor) { properties.push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; @@ -1201,6 +1221,16 @@ bool Device::GetSuitability(bool requires_swapchain) { // Perform the property fetch. physical.GetProperties2(properties2); + // If VK_KHR_shader_float_controls2 is available, copy its inner properties for + // backwards compatibility with code that expects VkPhysicalDeviceFloatControlsProperties. + if (extensions.shader_float_controls2) { + properties.float_controls = properties.float_controls2.floatControls; + LOG_INFO(Render_Vulkan, + "VK_KHR_shader_float_controls2 supported: denormBehaviorIndependence={}, roundingModeIndependence={}", + properties.float_controls.denormBehaviorIndependence, + properties.float_controls.roundingModeIndependence); + } + // Store base properties properties.properties = properties2.properties; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 5335bd264c..d66364d9c4 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -90,6 +90,7 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \ EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \ EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ + EXTENSION(KHR, SHADER_FLOAT_CONTROLS_2, shader_float_controls2) \ EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \ EXTENSION(KHR, SWAPCHAIN, swapchain) \ EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \ @@ -344,6 +345,11 @@ public: return properties.float_controls; } + /// Returns float control properties (KHR v2) of the device. + const VkPhysicalDeviceFloatControlsProperties2KHR& FloatControlProperties2() const { + return properties.float_controls2; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { @@ -500,6 +506,11 @@ public: return extensions.shader_float_controls && !disable_shader_float_controls_usage; } + /// Returns true if VK_KHR_shader_float_controls2 is enabled. + bool IsKhrShaderFloatControls2Supported() const { + return extensions.shader_float_controls2 && !disable_shader_float_controls_usage; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return extensions.workgroup_memory_explicit_layout; @@ -1044,6 +1055,7 @@ private: VkPhysicalDeviceDriverProperties driver{}; VkPhysicalDeviceSubgroupProperties subgroup_properties{}; VkPhysicalDeviceFloatControlsProperties float_controls{}; + VkPhysicalDeviceFloatControlsProperties2KHR float_controls2{}; VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};