Compare commits
1 Commits
fw2110
...
qcom-weird
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92fb89cbf0 |
@@ -338,55 +338,73 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
||||
}
|
||||
|
||||
void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
|
||||
Id main_func) {
|
||||
const Info& info{program.info};
|
||||
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
|
||||
} else if (info.uses_fp32_denorms_flush) {
|
||||
void SetupDenormControl(const Profile& profile, IR::Program const& program, EmitContext& ctx, Id main_func) {
|
||||
Info const& info = program.info;
|
||||
switch (info.fp32_denorm) {
|
||||
case Shader::FloatDenormKind::None:
|
||||
default:
|
||||
break;
|
||||
case Shader::FloatDenormKind::DenormFlushToZero:
|
||||
if (profile.support_fp32_denorm_flush) {
|
||||
ctx.AddCapability(spv::Capability::DenormFlushToZero);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
|
||||
} else {
|
||||
// Drivers will most likely flush denorms by default, no need to warn
|
||||
} else if(!profile.uses_ftz_as_default) {
|
||||
LOG_WARNING(Shader_SPIRV, "f32.ftz requested but not supported");
|
||||
}
|
||||
} else if (info.uses_fp32_denorms_preserve) {
|
||||
break;
|
||||
case Shader::FloatDenormKind::RoundingModeRTE:
|
||||
if (profile.support_fp32_round_rte) {
|
||||
ctx.AddCapability(spv::Capability::RoundingModeRTE);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 32U);
|
||||
} else {
|
||||
LOG_WARNING(Shader_SPIRV, "f32.rte requested but not supported");
|
||||
}
|
||||
break;
|
||||
case Shader::FloatDenormKind::DenormPreserve:
|
||||
if (profile.support_fp32_denorm_preserve) {
|
||||
ctx.AddCapability(spv::Capability::DenormPreserve);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
|
||||
} else {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
|
||||
LOG_WARNING(Shader_SPIRV, "f32.pre requested but not supported");
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) {
|
||||
// No separate denorm behavior
|
||||
return;
|
||||
}
|
||||
if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader");
|
||||
} else if (info.uses_fp16_denorms_flush) {
|
||||
if (profile.support_fp16_denorm_flush) {
|
||||
|
||||
// No separate denorm behavior
|
||||
bool can_fp16 = !(!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls);
|
||||
switch (info.fp16_denorm) {
|
||||
case Shader::FloatDenormKind::None:
|
||||
default:
|
||||
break;
|
||||
case Shader::FloatDenormKind::DenormFlushToZero:
|
||||
if (can_fp16 && profile.support_fp16_denorm_flush) {
|
||||
ctx.AddCapability(spv::Capability::DenormFlushToZero);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U);
|
||||
} else {
|
||||
// Same as fp32, no need to warn as most drivers will flush by default
|
||||
} else if(!profile.uses_ftz_as_default) {
|
||||
LOG_WARNING(Shader_SPIRV, "f16.ftz requested but not supported");
|
||||
}
|
||||
} else if (info.uses_fp16_denorms_preserve) {
|
||||
if (profile.support_fp16_denorm_preserve) {
|
||||
break;
|
||||
case Shader::FloatDenormKind::RoundingModeRTE:
|
||||
if (can_fp16 && profile.support_fp16_round_rte) {
|
||||
ctx.AddCapability(spv::Capability::RoundingModeRTE);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 16U);
|
||||
} else {
|
||||
LOG_WARNING(Shader_SPIRV, "f16.rte requested but not supported");
|
||||
}
|
||||
break;
|
||||
case Shader::FloatDenormKind::DenormPreserve:
|
||||
if (can_fp16 && profile.support_fp16_denorm_preserve) {
|
||||
ctx.AddCapability(spv::Capability::DenormPreserve);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
|
||||
} else {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support");
|
||||
LOG_WARNING(Shader_SPIRV, "f16.pre requested but not supported");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program,
|
||||
EmitContext& ctx, Id main_func) {
|
||||
if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
|
||||
return;
|
||||
}
|
||||
if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
|
||||
void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) {
|
||||
if (!profile.has_broken_fp16_float_controls && program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
|
||||
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
|
||||
}
|
||||
|
||||
@@ -76,8 +76,8 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
|
||||
case IR::Attribute::ClipDistance5:
|
||||
case IR::Attribute::ClipDistance6:
|
||||
case IR::Attribute::ClipDistance7: {
|
||||
const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
|
||||
const u32 index{static_cast<u32>(attr) - base};
|
||||
const u32 base{u32(IR::Attribute::ClipDistance0)};
|
||||
const u32 index{u32(attr) - base};
|
||||
if (index >= ctx.profile.max_user_clip_distances) {
|
||||
LOG_WARNING(Shader, "Ignoring clip distance store {} >= {} supported", index,
|
||||
ctx.profile.max_user_clip_distances);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
@@ -769,6 +770,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
constexpr Shader::FloatDenormKind FloatDenormModeToShaderMode(IR::FmzMode const mode) noexcept {
|
||||
switch (mode) {
|
||||
case IR::FmzMode::DontCare: return Shader::FloatDenormKind::None;
|
||||
case IR::FmzMode::FTZ: return Shader::FloatDenormKind::DenormFlushToZero;
|
||||
case IR::FmzMode::FMZ: return Shader::FloatDenormKind::RoundingModeRTE;
|
||||
case IR::FmzMode::None: return Shader::FloatDenormKind::DenormPreserve;
|
||||
}
|
||||
}
|
||||
|
||||
void VisitFpModifiers(Info& info, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::FPAdd16:
|
||||
@@ -778,18 +788,10 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
|
||||
case IR::Opcode::FPFloor16:
|
||||
case IR::Opcode::FPCeil16:
|
||||
case IR::Opcode::FPTrunc16: {
|
||||
const auto control{inst.Flags<IR::FpControl>()};
|
||||
switch (control.fmz_mode) {
|
||||
case IR::FmzMode::DontCare:
|
||||
break;
|
||||
case IR::FmzMode::FTZ:
|
||||
case IR::FmzMode::FMZ:
|
||||
info.uses_fp16_denorms_flush = true;
|
||||
break;
|
||||
case IR::FmzMode::None:
|
||||
info.uses_fp16_denorms_preserve = true;
|
||||
break;
|
||||
}
|
||||
auto const control = inst.Flags<IR::FpControl>();
|
||||
auto const denorm = FloatDenormModeToShaderMode(control.fmz_mode);
|
||||
ASSERT(info.fp16_denorm == FloatDenormKind::None || info.fp16_denorm == denorm);
|
||||
info.fp16_denorm = denorm;
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::FPAdd32:
|
||||
@@ -813,18 +815,10 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
|
||||
case IR::Opcode::FPUnordGreaterThanEqual32:
|
||||
case IR::Opcode::ConvertF16F32:
|
||||
case IR::Opcode::ConvertF64F32: {
|
||||
const auto control{inst.Flags<IR::FpControl>()};
|
||||
switch (control.fmz_mode) {
|
||||
case IR::FmzMode::DontCare:
|
||||
break;
|
||||
case IR::FmzMode::FTZ:
|
||||
case IR::FmzMode::FMZ:
|
||||
info.uses_fp32_denorms_flush = true;
|
||||
break;
|
||||
case IR::FmzMode::None:
|
||||
info.uses_fp32_denorms_preserve = true;
|
||||
break;
|
||||
}
|
||||
const auto control = inst.Flags<IR::FpControl>();
|
||||
auto const denorm = FloatDenormModeToShaderMode(control.fmz_mode);
|
||||
ASSERT(info.fp32_denorm == FloatDenormKind::None || info.fp32_denorm == denorm);
|
||||
info.fp32_denorm = denorm;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
||||
@@ -22,6 +22,8 @@ struct Profile {
|
||||
bool support_fp32_denorm_preserve{};
|
||||
bool support_fp16_denorm_flush{};
|
||||
bool support_fp32_denorm_flush{};
|
||||
bool support_fp16_round_rte{};
|
||||
bool support_fp32_round_rte{};
|
||||
bool support_fp16_signed_zero_nan_preserve{};
|
||||
bool support_fp32_signed_zero_nan_preserve{};
|
||||
bool support_fp64_signed_zero_nan_preserve{};
|
||||
@@ -46,6 +48,9 @@ struct Profile {
|
||||
bool support_multi_viewport{};
|
||||
bool support_geometry_streams{};
|
||||
|
||||
/// FTZ is default mode so no need to specify it again (QCOM)
|
||||
bool uses_ftz_as_default{};
|
||||
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
|
||||
bool lower_left_origin_mode{};
|
||||
|
||||
@@ -235,6 +235,15 @@ struct ImageDescriptor {
|
||||
};
|
||||
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
|
||||
|
||||
enum class FloatDenormKind : u32 {
|
||||
None = 0,
|
||||
DenormPreserve,
|
||||
DenormFlushToZero,
|
||||
SignedZeroInfNanPreserve,
|
||||
RoundingModeRTE,
|
||||
RoundingModeRTZ
|
||||
};
|
||||
|
||||
struct Info {
|
||||
static constexpr size_t MAX_INDIRECT_CBUFS{14};
|
||||
static constexpr size_t MAX_CBUFS{18};
|
||||
@@ -273,10 +282,8 @@ struct Info {
|
||||
|
||||
bool uses_fp16{};
|
||||
bool uses_fp64{};
|
||||
bool uses_fp16_denorms_flush{};
|
||||
bool uses_fp16_denorms_preserve{};
|
||||
bool uses_fp32_denorms_flush{};
|
||||
bool uses_fp32_denorms_preserve{};
|
||||
FloatDenormKind fp16_denorm{};
|
||||
FloatDenormKind fp32_denorm{};
|
||||
bool uses_int8{};
|
||||
bool uses_int16{};
|
||||
bool uses_int64{};
|
||||
|
||||
@@ -198,6 +198,8 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_fp32_denorm_preserve = false,
|
||||
.support_fp16_denorm_flush = false,
|
||||
.support_fp32_denorm_flush = false,
|
||||
.support_fp16_round_rte = false,
|
||||
.support_fp32_round_rte = false,
|
||||
.support_fp16_signed_zero_nan_preserve = false,
|
||||
.support_fp32_signed_zero_nan_preserve = false,
|
||||
.support_fp64_signed_zero_nan_preserve = false,
|
||||
@@ -221,6 +223,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_gl_derivative_control = device.HasDerivativeControl(),
|
||||
.support_geometry_streams = true,
|
||||
|
||||
.uses_ftz_as_default = false,
|
||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
|
||||
|
||||
.lower_left_origin_mode = true,
|
||||
|
||||
@@ -335,12 +335,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
|
||||
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
|
||||
.support_fp16_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
|
||||
.support_fp64_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
|
||||
.support_fp16_round_rte = float_control.shaderRoundingModeRTEFloat16 != VK_FALSE,
|
||||
.support_fp32_round_rte = float_control.shaderRoundingModeRTEFloat32 != VK_FALSE,
|
||||
.support_fp16_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
|
||||
.support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
|
||||
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
|
||||
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
|
||||
.support_viewport_index_layer_non_geometry =
|
||||
@@ -357,6 +356,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_multi_viewport = device.SupportsMultiViewport(),
|
||||
.support_geometry_streams = device.AreTransformFeedbackGeometryStreamsSupported(),
|
||||
|
||||
.uses_ftz_as_default = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
|
||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
|
||||
|
||||
.lower_left_origin_mode = false,
|
||||
|
||||
Reference in New Issue
Block a user