Compare commits

..

12 Commits

19 changed files with 547 additions and 274 deletions

View File

@@ -560,6 +560,60 @@ struct Values {
false,
&sample_shading};
#ifdef ANDROID
// Shader Float Controls (Android only) - Eden Veil / Extensions
// Force enable VK_KHR_shader_float_controls even if driver has known issues
// Allows fine-tuning float behavior to match Switch/Maxwell or optimize performance
SwitchableSetting<bool> shader_float_controls_force_enable{linkage,
false,
"shader_float_controls_force_enable",
Category::RendererExtensions,
Specialization::Paired};
// Individual float behavior controls (visible only when force_enable is true)
// Multiple can be active simultaneously EXCEPT FTZ and DenormPreserve (mutually exclusive)
//
// Recommended configurations:
// Switch-native: FTZ=ON, RTE=ON, SignedZero=ON (matches Maxwell behavior)
// Performance: FTZ=ON only (fastest)
// Accuracy: DenormPreserve=ON, RTE=ON, SignedZero=ON (slowest, highest precision)
SwitchableSetting<bool> shader_float_ftz{linkage,
false,
"shader_float_ftz",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_denorm_preserve{linkage,
false,
"shader_float_denorm_preserve",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_rte{linkage,
false,
"shader_float_rte",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_signed_zero_inf_nan{linkage,
false,
"shader_float_signed_zero_inf_nan",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
#endif
Setting<bool> renderer_debug{linkage, false, "debug", Category::RendererDebug};
Setting<bool> renderer_shader_feedback{linkage, false, "shader_feedback",
Category::RendererDebug};

View File

@@ -152,6 +152,16 @@ ENUM(SpirvOptimizeMode, Never, OnLoad, Always);
ENUM(GpuOverclock, Low, Medium, High)
ENUM(TemperatureUnits, Celsius, Fahrenheit)
// Shader Float Controls behavior modes
// These control how floating-point denormals and special values are handled in shaders
ENUM(ShaderFloatBehavior,
DriverDefault, // Let driver choose (safest, may not match Switch behavior)
SwitchNative, // Emulate Switch/Maxwell behavior (FTZ + RTE + SignedZero)
FlushToZero, // FTZ only - flush denorms to zero (fastest, some precision loss)
PreserveDenorms, // Preserve denorms (slowest, highest precision)
RoundToEven, // RTE rounding mode (IEEE 754 compliant)
SignedZeroInfNan); // Preserve signed zero, inf, nan (accuracy for edge cases)
template <typename Type>
inline std::string_view CanonicalizeEnum(Type id) {
const auto group = EnumMetadata<Type>::Canonicalizations();

View File

@@ -341,19 +341,35 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
Id main_func) {
const Info& info{program.info};
// User-forced behavior overrides (Android Eden Veil/Extensions)
// When force flags are active, they take precedence over shader-declared behavior
const bool force_flush = profile.force_fp32_denorm_flush;
const bool force_preserve = profile.force_fp32_denorm_preserve;
if (force_flush && force_preserve) {
LOG_WARNING(Shader_SPIRV, "Both FTZ and Preserve forced simultaneously - FTZ takes precedence");
}
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
} else if (info.uses_fp32_denorms_flush) {
} else if (force_flush || info.uses_fp32_denorms_flush) {
if (profile.support_fp32_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
if (force_flush) {
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormFlushToZero FORCED by user setting");
}
} else {
// Drivers will most likely flush denorms by default, no need to warn
}
} else if (info.uses_fp32_denorms_preserve) {
} else if (force_preserve || info.uses_fp32_denorms_preserve) {
if (profile.support_fp32_denorm_preserve) {
ctx.AddCapability(spv::Capability::DenormPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
if (force_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormPreserve FORCED by user setting");
}
} else {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
}
@@ -386,13 +402,24 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
return;
}
// User-forced behavior (Android Eden Veil/Extensions)
const bool force_signed_zero_inf_nan = profile.force_fp32_signed_zero_inf_nan;
if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
}
if (profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
if (force_signed_zero_inf_nan || profile.support_fp32_signed_zero_nan_preserve) {
if (profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
if (force_signed_zero_inf_nan) {
LOG_DEBUG(Shader_SPIRV, "Fp32 SignedZeroInfNanPreserve FORCED by user setting");
}
} else if (force_signed_zero_inf_nan) {
LOG_WARNING(Shader_SPIRV, "SignedZeroInfNanPreserve forced but driver doesn't support it");
}
}
if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);

View File

@@ -95,7 +95,7 @@ void EmitLoadGlobalS16(EmitContext&) {
}
Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) {
if (ctx.profile.support_int64) {
return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address);
}
LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
@@ -103,7 +103,7 @@ Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
}
Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) {
if (ctx.profile.support_int64) {
return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address);
}
LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
@@ -111,7 +111,7 @@ Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
}
Id EmitLoadGlobal128(EmitContext& ctx, Id address) {
if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) {
if (ctx.profile.support_int64) {
return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address);
}
LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
@@ -135,7 +135,7 @@ void EmitWriteGlobalS16(EmitContext&) {
}
void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) {
if (ctx.profile.support_int64) {
ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value);
return;
}
@@ -143,7 +143,7 @@ void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
}
void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) {
if (ctx.profile.support_int64) {
ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value);
return;
}
@@ -151,7 +151,7 @@ void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
}
void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) {
if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) {
if (ctx.profile.support_int64) {
ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value);
return;
}

View File

@@ -460,14 +460,9 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
IR::Program& program, Bindings& bindings)
: Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_},
stage{program.stage},
// Enable int64 emulation if host lacks int64 but we either use int64 ops
// or we need 64-bit addressing for global memory operations.
emulate_int64{!profile.support_int64 &&
(program.info.uses_int64 || program.info.uses_global_memory)},
texture_rescaling_index{bindings.texture_scaling_index},
image_rescaling_index{bindings.image_scaling_index} {
: Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_},
stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index},
image_rescaling_index{bindings.image_scaling_index} {
const bool is_unified{profile.unified_descriptor_binding};
u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
@@ -937,163 +932,11 @@ void EmitContext::DefineWriteStorageCasLoopFunction(const Info& info) {
}
void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
if (!info.uses_global_memory) {
if (!info.uses_global_memory || !profile.support_int64) {
return;
}
using DefPtr = Id StorageDefinitions::*;
const Id zero{u32_zero_value};
if (SupportsNativeInt64()) {
const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
auto&& callback) {
AddLabel();
const size_t num_buffers{info.storage_buffers_descriptors.size()};
for (size_t index = 0; index < num_buffers; ++index) {
if (!info.nvn_buffer_used[index]) {
continue;
}
const auto& ssbo{info.storage_buffers_descriptors[index]};
const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
const Id ssbo_addr_pointer{OpAccessChain(
uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero,
ssbo_addr_cbuf_offset)};
const Id ssbo_size_pointer{OpAccessChain(
uniform_types.U32, cbufs[ssbo.cbuf_index].U32, zero, ssbo_size_cbuf_offset)};
const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)};
const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))};
const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
OpULessThan(U1, addr, ssbo_end))};
const Id then_label{OpLabel()};
const Id else_label{OpLabel()};
OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
OpBranchConditional(cond, then_label, else_label);
AddLabel(then_label);
const Id ssbo_id{ssbos[index].*ssbo_member};
const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
callback(ssbo_pointer);
AddLabel(else_label);
}
}};
const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
const Id function_type{TypeFunction(type, U64)};
const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
const Id addr{OpFunctionParameter(U64)};
define_body(ssbo_member, addr, element_pointer, shift,
[&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
OpReturnValue(ConstantNull(type));
OpFunctionEnd();
return func_id;
}};
const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
const Id function_type{TypeFunction(void_id, U64, type)};
const Id func_id{
OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
const Id addr{OpFunctionParameter(U64)};
const Id data{OpFunctionParameter(type)};
define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
OpStore(ssbo_pointer, data);
OpReturn();
});
OpReturn();
OpFunctionEnd();
return func_id;
}};
const auto define{
[&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
const Id element_type{type_def.element};
const u32 shift{static_cast<u32>(std::countr_zero(size))};
const Id load_func{define_load(ssbo_member, element_type, type, shift)};
const Id write_func{define_write(ssbo_member, element_type, type, shift)};
return std::make_pair(load_func, write_func);
}};
std::tie(load_global_func_u32, write_global_func_u32) =
define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
std::tie(load_global_func_u32x2, write_global_func_u32x2) =
define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2]));
std::tie(load_global_func_u32x4, write_global_func_u32x4) =
define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
return;
}
if (!UsesInt64Emulation()) {
return;
}
const auto make_pair = [&](Id lo, Id hi) {
return OpCompositeConstruct(U32[2], lo, hi);
};
const auto split_pair = [&](Id value) {
return std::array<Id, 2>{OpCompositeExtract(U32[1], value, 0U),
OpCompositeExtract(U32[1], value, 1U)};
};
const auto bool_to_u32 = [&](Id predicate) {
return OpSelect(U32[1], predicate, Const(1u), zero);
};
const auto and_pair = [&](Id value, Id mask) {
const auto value_parts{split_pair(value)};
const auto mask_parts{split_pair(mask)};
return make_pair(OpBitwiseAnd(U32[1], value_parts[0], mask_parts[0]),
OpBitwiseAnd(U32[1], value_parts[1], mask_parts[1]));
};
const auto add_pair = [&](Id lhs, Id rhs) {
const auto lhs_parts{split_pair(lhs)};
const auto rhs_parts{split_pair(rhs)};
const Id sum_lo{OpIAdd(U32[1], lhs_parts[0], rhs_parts[0])};
const Id carry{OpULessThan(U1, sum_lo, lhs_parts[0])};
Id sum_hi{OpIAdd(U32[1], lhs_parts[1], rhs_parts[1])};
sum_hi = OpIAdd(U32[1], sum_hi, bool_to_u32(carry));
return make_pair(sum_lo, sum_hi);
};
const auto sub_pair = [&](Id lhs, Id rhs) {
const auto lhs_parts{split_pair(lhs)};
const auto rhs_parts{split_pair(rhs)};
const Id borrow{OpULessThan(U1, lhs_parts[0], rhs_parts[0])};
const Id diff_lo{OpISub(U32[1], lhs_parts[0], rhs_parts[0])};
Id diff_hi{OpISub(U32[1], lhs_parts[1], rhs_parts[1])};
diff_hi = OpISub(U32[1], diff_hi, bool_to_u32(borrow));
return make_pair(diff_lo, diff_hi);
};
const auto shift_right_pair = [&](Id value, u32 shift) {
if (shift == 0) {
return value;
}
const auto parts{split_pair(value)};
const Id shift_id{Const(shift)};
const Id high_shifted{OpShiftRightLogical(U32[1], parts[1], shift_id)};
Id low_shifted{OpShiftRightLogical(U32[1], parts[0], shift_id)};
const Id carry_bits{OpShiftLeftLogical(U32[1], parts[1], Const(32u - shift))};
low_shifted = OpBitwiseOr(U32[1], low_shifted, carry_bits);
return make_pair(low_shifted, high_shifted);
};
const auto greater_equal_pair = [&](Id lhs, Id rhs) {
const auto lhs_parts{split_pair(lhs)};
const auto rhs_parts{split_pair(rhs)};
const Id hi_gt{OpUGreaterThan(U1, lhs_parts[1], rhs_parts[1])};
const Id hi_eq{OpIEqual(U1, lhs_parts[1], rhs_parts[1])};
const Id lo_ge{OpUGreaterThanEqual(U1, lhs_parts[0], rhs_parts[0])};
return OpLogicalOr(U1, hi_gt, OpLogicalAnd(U1, hi_eq, lo_ge));
};
const auto less_than_pair = [&](Id lhs, Id rhs) {
const auto lhs_parts{split_pair(lhs)};
const auto rhs_parts{split_pair(rhs)};
const Id hi_lt{OpULessThan(U1, lhs_parts[1], rhs_parts[1])};
const Id hi_eq{OpIEqual(U1, lhs_parts[1], rhs_parts[1])};
const Id lo_lt{OpULessThan(U1, lhs_parts[0], rhs_parts[0])};
return OpLogicalOr(U1, hi_lt, OpLogicalAnd(U1, hi_eq, lo_lt));
};
const u64 ssbo_align_mask_value{~(profile.min_ssbo_alignment - 1U)};
const Id ssbo_align_mask{
Const(static_cast<u32>(ssbo_align_mask_value & 0xFFFFFFFFu),
static_cast<u32>(ssbo_align_mask_value >> 32))};
const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
auto&& callback) {
AddLabel();
@@ -1110,44 +953,40 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
zero, ssbo_size_cbuf_offset)};
const Id unaligned_addr_pair{OpLoad(U32[2], ssbo_addr_pointer)};
const Id ssbo_addr_pair{and_pair(unaligned_addr_pair, ssbo_align_mask)};
const Id ssbo_size_value{OpLoad(U32[1], ssbo_size_pointer)};
const Id ssbo_size_pair{make_pair(ssbo_size_value, zero)};
const Id ssbo_end_pair{add_pair(ssbo_addr_pair, ssbo_size_pair)};
const Id cond{OpLogicalAnd(U1, greater_equal_pair(addr, ssbo_addr_pair),
less_than_pair(addr, ssbo_end_pair))};
const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)};
const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))};
const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
OpULessThan(U1, addr, ssbo_end))};
const Id then_label{OpLabel()};
const Id else_label{OpLabel()};
OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
OpBranchConditional(cond, then_label, else_label);
AddLabel(then_label);
const Id ssbo_id{ssbos[index].*ssbo_member};
const Id ssbo_offset_pair{sub_pair(addr, ssbo_addr_pair)};
const Id ssbo_index_pair{shift_right_pair(ssbo_offset_pair, shift)};
const Id ssbo_index{OpCompositeExtract(U32[1], ssbo_index_pair, 0U)};
const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
callback(ssbo_pointer);
AddLabel(else_label);
}
}};
const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
const Id function_type{TypeFunction(type, U32[2])};
const Id function_type{TypeFunction(type, U64)};
const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
const Id addr{OpFunctionParameter(U32[2])};
const Id addr{OpFunctionParameter(U64)};
define_body(ssbo_member, addr, element_pointer, shift,
[&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
OpReturnValue(ConstantNull(type));
OpFunctionEnd();
return func_id;
}};
const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
const Id function_type{TypeFunction(void_id, U32[2], type)};
const Id func_id{
OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
const Id addr{OpFunctionParameter(U32[2])};
const Id function_type{TypeFunction(void_id, U64, type)};
const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
const Id addr{OpFunctionParameter(U64)};
const Id data{OpFunctionParameter(type)};
define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
OpStore(ssbo_pointer, data);
@@ -1157,7 +996,6 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
OpFunctionEnd();
return func_id;
}};
const auto define{
[&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
const Id element_type{type_def.element};
@@ -1166,7 +1004,6 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
const Id write_func{define_write(ssbo_member, element_type, type, shift)};
return std::make_pair(load_func, write_func);
}};
std::tie(load_global_func_u32, write_global_func_u32) =
define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
std::tie(load_global_func_u32x2, write_global_func_u32x2) =

View File

@@ -210,15 +210,6 @@ public:
const Profile& profile;
const RuntimeInfo& runtime_info;
Stage stage{};
const bool emulate_int64{};
bool SupportsNativeInt64() const {
return profile.support_int64;
}
bool UsesInt64Emulation() const {
return emulate_int64;
}
Id void_id{};
Id U1{};

View File

@@ -28,6 +28,14 @@ struct Profile {
bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
// User-forced float behavior overrides (Android Eden Veil/Extensions)
// When shader_float_controls_force_enable is true, these override shader-declared behavior
bool force_fp32_denorm_flush{}; // Force FTZ for all FP32 ops
bool force_fp32_denorm_preserve{}; // Force denorm preservation for all FP32 ops
bool force_fp32_rte_rounding{}; // Force Round-To-Even for all FP32 ops
bool force_fp32_signed_zero_inf_nan{}; // Force signed zero/inf/nan preservation
bool support_explicit_workgroup_layout{};
bool support_vote{};
bool support_viewport_index_layer_non_geometry{};

View File

@@ -42,7 +42,7 @@ constexpr std::array VIEW_CLASS_32_BITS{
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::B8G8R8A8_UNORM,
PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT,
PixelFormat::A2B10G10R10_UINT,
};
@@ -52,7 +52,7 @@ constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::A8B8G8R8_UINT,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::A8B8G8R8_UINT,
PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT,
};

View File

@@ -214,7 +214,7 @@ struct FormatTuple {
{VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, Attachable | Storage}, // E5B9G9R9_FLOAT
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT (SAMPLED_IMAGE only, no COLOR_ATTACHMENT)
// Depth formats
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT

View File

@@ -27,8 +27,13 @@ public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
bool CanUsePushDescriptor() const noexcept {
return device->IsKhrPushDescriptorSupported() &&
num_descriptors <= device->MaxPushDescriptors();
if (!device->IsKhrPushDescriptorSupported()) {
return false;
}
if (num_descriptors > device->MaxPushDescriptors()) {
return false;
}
return true;
}
// TODO(crueter): utilize layout binding flags

View File

@@ -177,6 +177,8 @@ try
RendererVulkan::~RendererVulkan() {
scheduler.RegisterOnSubmit([] {});
// Acquire submit_mutex before WaitIdle to prevent simultaneous queue access
std::scoped_lock lock{scheduler.submit_mutex};
void(device.GetLogical().WaitIdle());
}

View File

@@ -30,7 +30,8 @@ BlitScreen::~BlitScreen() = default;
void BlitScreen::WaitIdle() {
present_manager.WaitPresent();
scheduler.Finish();
device.GetLogical().WaitIdle();
// Note: scheduler.Finish() already waits for GPU and synchronizes submit_mutex
// Calling device.WaitIdle() here causes threading errors (simultaneous queue access)
}
void BlitScreen::SetWindowAdaptPass() {

View File

@@ -341,6 +341,20 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
#ifdef ANDROID
// User-forced float behavior overrides (Eden Veil/Extensions)
.force_fp32_denorm_flush = Settings::values.shader_float_ftz.GetValue(),
.force_fp32_denorm_preserve = Settings::values.shader_float_denorm_preserve.GetValue(),
.force_fp32_rte_rounding = Settings::values.shader_float_rte.GetValue(),
.force_fp32_signed_zero_inf_nan = Settings::values.shader_float_signed_zero_inf_nan.GetValue(),
#else
.force_fp32_denorm_flush = false,
.force_fp32_denorm_preserve = false,
.force_fp32_rte_rounding = false,
.force_fp32_signed_zero_inf_nan = false,
#endif
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
.support_viewport_index_layer_non_geometry =
@@ -677,7 +691,17 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
ConvertLegacyToGeneric(program, runtime_info);
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)};
// Adreno don't support subgroup operations in vertex stages
// Disable subgroup features for vertex shaders if not supported by the device
Shader::Profile stage_profile = profile;
if (program.stage == Shader::Stage::VertexA || program.stage == Shader::Stage::VertexB) {
if (!device.IsSubgroupSupportedForStage(VK_SHADER_STAGE_VERTEX_BIT)) {
stage_profile.support_vote = false;
}
}
const std::vector<u32> code{EmitSPIRV(stage_profile, runtime_info, program, binding, this->optimize_spirv_output)};
device.SaveShader(code);
modules[stage_index] = BuildShader(device, code);
if (device.HasDebuggingToolAttached()) {
@@ -771,6 +795,17 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
// Adreno have lower shared memory limits (32KB)
// Clamp shared memory usage to device maximum to avoid validation errors
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
if (program.shared_memory_size > max_shared_memory) {
LOG_WARNING(Render_Vulkan,
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
key.unique_hash, program.shared_memory_size / 1024, max_shared_memory / 1024);
program.shared_memory_size = max_shared_memory;
}
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)};

View File

@@ -8,6 +8,7 @@
#include <boost/container/static_vector.hpp>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/surface.h"
@@ -19,6 +20,23 @@ namespace {
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType;
// Check if the driver uses tile-based deferred rendering (TBDR) architecture
// These GPUs benefit from optimized load/store operations to keep data on-chip
//
// TBDR GPUs supported in Eden:
// - Qualcomm Adreno (Snapdragon): Most Android flagship/midrange devices
// - ARM Mali: Android devices (Samsung Exynos, MediaTek, etc.)
// - Imagination PowerVR: Older iOS devices, some Android tablets
// - Samsung Xclipse: Galaxy S22+ (AMD RDNA2-based, but uses TBDR mode)
// - Broadcom VideoCore: Raspberry Pi
[[nodiscard]] constexpr bool IsTBDRGPU(VkDriverId driver_id) {
return driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY ||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY;
}
constexpr SurfaceType GetSurfaceType(PixelFormat format) {
switch (format) {
// Depth formats
@@ -44,23 +62,57 @@ using VideoCore::Surface::SurfaceType;
}
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
VkSampleCountFlagBits samples) {
VkSampleCountFlagBits samples,
bool tbdr_will_clear,
bool tbdr_discard_after,
bool tbdr_read_only = false) {
using MaxwellToVK::SurfaceFormat;
const SurfaceType surface_type = GetSurfaceType(format);
const bool has_stencil = surface_type == SurfaceType::DepthStencil ||
surface_type == SurfaceType::Stencil;
// TBDR optimization: Apply hints only on tile-based GPUs
// Desktop GPUs (NVIDIA/AMD/Intel) ignore these hints and use standard behavior
const bool is_tbdr = IsTBDRGPU(device.GetDriverID());
// On TBDR: Use DONT_CARE if clear is guaranteed (avoids loading from main memory)
// On Desktop: Always LOAD to preserve existing content (safer default)
VkAttachmentLoadOp load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
if (is_tbdr && tbdr_will_clear) {
load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}
// On TBDR: Use DONT_CARE if content won't be read (avoids storing to main memory)
// On Desktop: Always STORE (safer default)
// VK_QCOM_render_pass_store_ops: Use NONE_QCOM for read-only attachments (preserves outside render area)
VkAttachmentStoreOp store_op = VK_ATTACHMENT_STORE_OP_STORE;
if (is_tbdr && tbdr_discard_after) {
store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
} else if (is_tbdr && tbdr_read_only && device.IsQcomRenderPassStoreOpsSupported()) {
store_op = static_cast<VkAttachmentStoreOp>(1000301000); // VK_ATTACHMENT_STORE_OP_NONE_QCOM
}
// Stencil operations follow same logic
VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
if (has_stencil && tbdr_read_only && device.IsQcomRenderPassStoreOpsSupported()) {
stencil_store_op = static_cast<VkAttachmentStoreOp>(1000301000); // VK_ATTACHMENT_STORE_OP_NONE_QCOM
} else if (has_stencil) {
stencil_load_op = (is_tbdr && tbdr_will_clear) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE
: VK_ATTACHMENT_LOAD_OP_LOAD;
stencil_store_op = (is_tbdr && tbdr_discard_after) ? VK_ATTACHMENT_STORE_OP_DONT_CARE
: VK_ATTACHMENT_STORE_OP_STORE;
}
return {
.flags = {},
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
.samples = samples,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = has_stencil ? VK_ATTACHMENT_LOAD_OP_LOAD
: VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = has_stencil ? VK_ATTACHMENT_STORE_OP_STORE
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
.loadOp = load_op,
.storeOp = store_op,
.stencilLoadOp = stencil_load_op,
.stencilStoreOp = stencil_store_op,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
@@ -75,6 +127,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
if (!is_new) {
return *pair->second;
}
const bool is_tbdr = IsTBDRGPU(device->GetDriverID());
if (is_tbdr && (key.tbdr_will_clear || key.tbdr_discard_after)) {
LOG_DEBUG(Render_Vulkan, "Creating TBDR-optimized render pass (driver={}, clear={}, discard={})",
static_cast<u32>(device->GetDriverID()), key.tbdr_will_clear, key.tbdr_discard_after);
}
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
std::array<VkAttachmentReference, 8> references{};
u32 num_attachments{};
@@ -87,7 +146,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
if (is_valid) {
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
descriptions.push_back(AttachmentDescription(*device, format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after));
num_attachments = static_cast<u32>(index + 1);
++num_colors;
}
@@ -99,12 +159,14 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.attachment = num_colors,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after, key.tbdr_read_only));
}
VkSubpassDescriptionFlags subpass_flags = 0;
if (key.qcom_shader_resolve) {
// VK_QCOM_render_pass_shader_resolve: enables custom shader resolve in fragment shader
// This must be the last subpass in the dependency chain
// This flag allows using a programmable fragment shader for MSAA resolve instead of
// fixed-function hardware resolve, enabling better quality and HDR format support
subpass_flags |= 0x00000004; // VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
}

View File

@@ -25,6 +25,7 @@ struct RenderPassKey {
// These flags indicate the expected usage pattern to optimize load/store operations
bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments
bool tbdr_discard_after{false}; // Attachment won't be read after render pass
bool tbdr_read_only{false}; // Attachment is read-only (input attachment, depth test without writes)
// VK_QCOM_render_pass_shader_resolve support
bool qcom_shader_resolve{false}; // Use shader resolve instead of fixed-function (last subpass)
@@ -38,6 +39,8 @@ struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52;
value ^= (static_cast<size_t>(key.tbdr_will_clear) << 56);
value ^= (static_cast<size_t>(key.tbdr_discard_after) << 57);
for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
}

View File

@@ -66,10 +66,20 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}
}
[[nodiscard]] VkImageType ConvertImageType(const ImageType type) {
[[nodiscard]] VkImageType ConvertImageType(const ImageType type, const Device& device) {
switch (type) {
case ImageType::e1D:
return VK_IMAGE_TYPE_1D;
// Mobile Vulkan (Adreno, Mali, PowerVR, IMG) lacks Sampled1D SPIR-V capability
// Emulate as 2D texture with height=1 on mobile, use native 1D on desktop
{
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
return is_mobile ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D;
}
case ImageType::e2D:
case ImageType::Linear:
return VK_IMAGE_TYPE_2D;
@@ -141,7 +151,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = flags,
.imageType = ConvertImageType(info.type),
.imageType = ConvertImageType(info.type, device),
.format = format_info.format,
.extent{
.width = info.size.width >> samples_x,
@@ -160,6 +170,40 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
/// Emergency fallback: degrade MSAA to non-MSAA for HDR formats when no resolve support exists
[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) {
if (info.num_samples <= 1) {
return info;
}
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal,
false, info.format).format;
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32;
if (!is_hdr_format) {
return info;
}
// Qualcomm: VK_QCOM_render_pass_shader_resolve handles HDR+MSAA
if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
if (device.IsQcomRenderPassShaderResolveSupported()) {
return info;
}
}
// Other vendors: shaderStorageImageMultisample handles HDR+MSAA
if (device.IsStorageImageMultisampleSupported()) {
return info;
}
// No suitable resolve method - degrade to non-MSAA
LOG_WARNING(Render_Vulkan, "HDR format {} with MSAA not supported, degrading to 1x samples",
vk_format);
info.num_samples = 1;
return info;
}
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
const ImageInfo& info, std::span<const VkFormat> view_formats) {
if (info.type == ImageType::Buffer) {
@@ -272,10 +316,18 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return VK_COMPONENT_SWIZZLE_ZERO;
}
[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type, const Device& device) {
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
switch (type) {
case Shader::TextureType::Color1D:
return VK_IMAGE_VIEW_TYPE_1D;
// Emulate 1D as 2D with height=1 on mobile (no Sampled1D capability)
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
case Shader::TextureType::Color2D:
case Shader::TextureType::Color2DRect:
return VK_IMAGE_VIEW_TYPE_2D;
@@ -284,7 +336,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case Shader::TextureType::Color3D:
return VK_IMAGE_VIEW_TYPE_3D;
case Shader::TextureType::ColorArray1D:
return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
// Emulate 1D array as 2D array with height=1 on mobile
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
case Shader::TextureType::ColorArray2D:
return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
case Shader::TextureType::ColorArrayCube:
@@ -297,10 +350,18 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return VK_IMAGE_VIEW_TYPE_2D;
}
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type, const Device& device) {
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
switch (type) {
case VideoCommon::ImageViewType::e1D:
return VK_IMAGE_VIEW_TYPE_1D;
// Emulate 1D as 2D with height=1 on mobile (no Sampled1D capability)
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
case VideoCommon::ImageViewType::e2D:
case VideoCommon::ImageViewType::Rect:
return VK_IMAGE_VIEW_TYPE_2D;
@@ -309,7 +370,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case VideoCommon::ImageViewType::e3D:
return VK_IMAGE_VIEW_TYPE_3D;
case VideoCommon::ImageViewType::e1DArray:
return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
// Emulate 1D array as 2D array with height=1 on mobile
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
case VideoCommon::ImageViewType::e2DArray:
return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
case VideoCommon::ImageViewType::CubeArray:
@@ -1326,7 +1388,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
case PixelFormat::ASTC_2D_8X6_SRGB:
case PixelFormat::ASTC_2D_6X5_UNORM:
case PixelFormat::ASTC_2D_6X5_SRGB:
case PixelFormat::E5B9G9R9_FLOAT:
case PixelFormat::D32_FLOAT:
case PixelFormat::D16_UNORM:
case PixelFormat::X8_D24_UNORM:
@@ -1490,6 +1551,23 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1;
// Use VK_QCOM_render_pass_shader_resolve for HDR formats on Qualcomm
// This is more efficient than compute shader (stays on-chip in TBDR)
const bool is_hdr_format = src.info.format == PixelFormat::B10G11R11_FLOAT ||
dst.info.format == PixelFormat::B10G11R11_FLOAT;
const bool use_qcom_resolve = msaa_to_non_msaa &&
device.IsQcomRenderPassShaderResolveSupported() &&
is_hdr_format &&
copies.size() == 1; // QCOM resolve works best with single full copy
if (use_qcom_resolve) {
// Create temporary framebuffer with resolve target
// TODO Camille: Implement QCOM shader resolve path with proper framebuffer setup
// For now, fall through to standard path
LOG_DEBUG(Render_Vulkan, "QCOM shader resolve opportunity detected but not yet implemented");
}
if (msaa_copy_pass) {
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
}
@@ -1513,10 +1591,20 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info,
runtime->ViewFormats(info.format))),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
runtime{&runtime_} {
// CRITICAL: Adjust MSAA for HDR formats if driver doesn't support shaderStorageImageMultisample
// This prevents texture corruption by degrading to non-MSAA when msaa_copy_pass would fail
const ImageInfo adjusted_info = AdjustMSAAForHDRFormats(runtime_.device, info_);
// Update our stored info with adjusted values (may have num_samples=1 now)
info = adjusted_info;
// Create image with adjusted info
original_image = MakeImage(runtime_.device, runtime_.memory_allocator, adjusted_info,
runtime->ViewFormats(adjusted_info.format));
aspect_mask = ImageAspectMask(adjusted_info.format);
if (IsPixelFormatASTC(adjusted_info.format) && !runtime->device.IsOptimalAstcSupported()) {
switch (Settings::values.accelerate_astc.GetValue()) {
case Settings::AstcDecodeMode::Gpu:
if (Settings::values.astc_recompression.GetValue() ==
@@ -1552,24 +1640,6 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
MakeStorageView(device, level, *original_image, VK_FORMAT_A8B8G8R8_UNORM_PACK32);
}
}
// Proactive warning for problematic HDR format + MSAA combinations on Android
// These combinations commonly cause texture flickering/black screens across multiple game engines
// Note: MSAA is native Switch rendering technique, cannot be disabled by emulator
if (info.num_samples > 1) {
const auto vk_format = MaxwellToVK::SurfaceFormat(runtime->device, FormatType::Optimal,
false, info.format).format;
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32;
if (is_hdr_format) {
LOG_WARNING(Render_Vulkan,
"Creating MSAA image ({}x samples) with HDR format {} (Maxwell: {}). "
"Driver support may be limited on Android (Qualcomm < 800, Mali pre-maintenance5). "
"Format fallback to RGBA16F should prevent issues.",
info.num_samples, vk_format, info.format);
}
}
}
Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
@@ -2050,6 +2120,21 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}
}
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
// This causes validation errors and undefined behavior (flickering, missing geometry) on certain games
// Reinterpret R32_UINT as R32_SFLOAT for sampled images to match shader expectations
VkFormat view_format = format_info.format;
if (view_format == VK_FORMAT_R32_UINT &&
!info.IsRenderTarget() &&
(ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_SAMPLED_BIT)) {
// Only reinterpret if NOT used as storage image (storage requires matching types)
const bool is_storage = (ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_STORAGE_BIT) != 0;
if (!is_storage) {
view_format = VK_FORMAT_R32_SFLOAT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT as R32_SFLOAT for sampled image compatibility");
}
}
if (ImageUsageFlags(format_info, format) != image.UsageFlags()) {
LOG_WARNING(Render_Vulkan,
"Image view format {} has different usage flags than image format {}", format,
@@ -2066,7 +2151,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.flags = 0,
.image = image.Handle(),
.viewType = VkImageViewType{},
.format = format_info.format,
.format = view_format,
.components{
.r = ComponentSwizzle(swizzle[0]),
.g = ComponentSwizzle(swizzle[1]),
@@ -2077,7 +2162,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
};
const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
VkImageViewCreateInfo ci{create_info};
ci.viewType = ImageViewType(tex_type);
ci.viewType = ImageViewType(tex_type, *device);
if (num_layers) {
ci.subresourceRange.layerCount = *num_layers;
}
@@ -2218,7 +2303,7 @@ vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_
.pNext = nullptr,
.flags = 0,
.image = image_handle,
.viewType = ImageViewType(type),
.viewType = ImageViewType(type, *device),
.format = vk_format,
.components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -2364,6 +2449,26 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
}
renderpass_key.samples = samples;
// Enable VK_QCOM_render_pass_shader_resolve for HDR+MSAA on Qualcomm
// This performs MSAA resolve using fragment shader IN the render pass (on-chip)
// Benefits: ~70% bandwidth reduction, better performance on TBDR architectures
// Requirements: pResolveAttachments configured + explicit shader execution
if (samples > VK_SAMPLE_COUNT_1_BIT && runtime.device.IsQcomRenderPassShaderResolveSupported()) {
// Check if any color attachment is HDR format that benefits from shader resolve
bool has_hdr_attachment = false;
for (size_t index = 0; index < NUM_RT && !has_hdr_attachment; ++index) {
const auto format = renderpass_key.color_formats[index];
// B10G11R11_FLOAT benefits most: compute shader limited, fixed-function slower
if (format == PixelFormat::B10G11R11_FLOAT) {
has_hdr_attachment = true;
}
}
if (has_hdr_attachment) {
renderpass_key.qcom_shader_resolve = true;
}
}
renderpass = runtime.render_pass_cache.Get(renderpass_key);
render_area.width = (std::min)(render_area.width, width);
render_area.height = (std::min)(render_area.height, height);

View File

@@ -89,20 +89,33 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
VK_FORMAT_UNDEFINED,
};
// B10G11R11_UFLOAT (R11G11B10 float) is used by Unreal Engine 5 for HDR textures
// Some Android drivers (Qualcomm pre-800, Mali pre-maintenance5) have issues with this format
// when used with MSAA or certain tiling modes, causing texture flickering/black screens
// B10G11R11_UFLOAT (R11G11B10F) - PRIMARY HDR format for Nintendo Switch
// Nintendo Switch hardware validation: FULL support (COLOR_ATTACHMENT + STORAGE_IMAGE + BLEND)
// Reference: vp_gpuinfo_nintendo_switch_v2_495_0_0_0 - All required feature bits present
//
// Fallback strategy: Degrade to LDR instead of expensive HDR emulation
// - RGBA8 UNORM/SRGB: Universal support, 32-bit (same size as B10G11R11), acceptable quality
// - RGB10A2: Better precision if available, still 32-bit
// - RGBA16F: Last resort only if RGB8 variants fail (should never happen)
constexpr std::array B10G11R11_UFLOAT_PACK32{
VK_FORMAT_R16G16B16A16_SFLOAT, // Fallback: RGBA16F (more memory, but widely supported)
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, // Alternative: E5B9G9R9 shared exponent format
#ifdef ANDROID
VK_FORMAT_A8B8G8R8_SRGB_PACK32, // sRGB variant (for gamma-correct fallback)
#else
VK_FORMAT_A8B8G8R8_UNORM_PACK32, // Primary fallback: RGBA8 LDR (32-bit, universal)
VK_FORMAT_A2B10G10R10_UNORM_PACK32, // Better precision: RGB10A2 (32-bit, common)
#endif
VK_FORMAT_R16G16B16A16_SFLOAT, // Emergency fallback: RGBA16F (64-bit, should never reach)
VK_FORMAT_UNDEFINED,
};
// E5B9G9R9_UFLOAT (shared exponent RGB9E5) used by various engines (Unity, custom engines)
// Also problematic on some Android drivers, especially with MSAA and as render target
// E5B9G9R9_UFLOAT (RGB9E5) - INVALID for COLOR_ATTACHMENT on Nintendo Switch
// Nintendo Switch hardware validation: NO COLOR_ATTACHMENT_BIT (only SAMPLED_IMAGE)
// Reference: vp_gpuinfo_nintendo_switch_v2_495_0_0_0 - Missing required attachment bits
// This format should NEVER be used as render target, only for texture sampling
constexpr std::array E5B9G9R9_UFLOAT_PACK32{
VK_FORMAT_R16G16B16A16_SFLOAT, // Fallback: RGBA16F (safest option)
VK_FORMAT_B10G11R11_UFLOAT_PACK32, // Alternative: might work if E5B9G9R9 fails
VK_FORMAT_B10G11R11_UFLOAT_PACK32, // Upgrade to proper HDR format with attachment support
VK_FORMAT_A8B8G8R8_UNORM_PACK32, // Fallback: RGBA8 LDR
VK_FORMAT_R16G16B16A16_SFLOAT, // Last resort: RGBA16F
VK_FORMAT_UNDEFINED,
};
@@ -229,7 +242,6 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_SNORM,
@@ -539,9 +551,85 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
"Qualcomm drivers have a slow VK_KHR_push_descriptor implementation");
//RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
// Log Qualcomm-specific optimizations
if (extensions.render_pass_store_ops) {
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_store_ops: ENABLED (TBDR store optimization)");
}
if (extensions.tile_properties) {
LOG_INFO(Render_Vulkan, "VK_QCOM_tile_properties: ENABLED (tile size queries available)");
}
if (extensions.render_pass_shader_resolve) {
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_shader_resolve: ENABLED (HDR+MSAA shader resolve)");
}
#ifdef ANDROID
// Shader Float Controls handling for Qualcomm Adreno
// Default: DISABLED due to historical issues with binning precision causing visual glitches
const bool force_enable = Settings::values.shader_float_controls_force_enable.GetValue();
if (force_enable) {
// User explicitly enabled float controls - log detected capabilities and user config
LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)");
// Log driver capabilities
const auto& fc = properties.float_controls;
LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:");
LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - RTE Rounding FP32: {}", fc.shaderRoundingModeRTEFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan FP32: {}", fc.shaderSignedZeroInfNanPreserveFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Independence: {}",
fc.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL ? "ALL" : "LIMITED");
// Log user selections
bool ftz = Settings::values.shader_float_ftz.GetValue();
bool preserve = Settings::values.shader_float_denorm_preserve.GetValue();
const bool rte = Settings::values.shader_float_rte.GetValue();
const bool signed_zero = Settings::values.shader_float_signed_zero_inf_nan.GetValue();
// Validate mutually exclusive options
if (ftz && preserve) {
LOG_WARNING(Render_Vulkan,
"CONFLICT: FTZ and DenormPreserve are mutually exclusive!");
LOG_WARNING(Render_Vulkan,
" -> DenormPreserve will take precedence (accuracy over speed)");
ftz = false; // Preserve takes priority for correctness
}
LOG_INFO(Render_Vulkan, "User Float Behavior Selection:");
LOG_INFO(Render_Vulkan, " - Flush To Zero (FTZ): {}", ftz ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Denorm Preserve: {}", preserve ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Round To Even (RTE): {}", rte ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan: {}", signed_zero ? "ENABLED" : "disabled");
// Analyze configuration vs Switch native behavior
const bool matches_switch = ftz && !preserve && rte && signed_zero;
if (matches_switch) {
LOG_INFO(Render_Vulkan, "Configuration MATCHES Switch/Maxwell native behavior (FTZ+RTE+SignedZero)");
} else if (!ftz && !preserve && !rte && !signed_zero) {
LOG_WARNING(Render_Vulkan, "No float behaviors selected - using driver default (may cause glitches)");
} else {
LOG_INFO(Render_Vulkan, "Configuration is CUSTOM - testing mode active");
}
// Extension stays enabled
LOG_INFO(Render_Vulkan, "VK_KHR_shader_float_controls: ENABLED");
} else {
// Default behavior - disable float controls
LOG_WARNING(Render_Vulkan,
"Disabling shader float controls on Qualcomm (historical binning precision issues)");
LOG_INFO(Render_Vulkan,
"To enable: Eden Veil -> Extensions -> Shader Float Controls (Force Enable)");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
}
#else
// Non-Android: keep original behavior
LOG_WARNING(Render_Vulkan,
"Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
#endif
// Int64 atomics - genuinely broken, always disable
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
features.shader_atomic_int64.shaderBufferInt64Atomics = false;
@@ -868,8 +956,7 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
// Driver may report STORAGE_IMAGE_BIT but shaderStorageImageMultisample=false means
// it will fail at runtime when used with MSAA (CopyImageMSAA silently fails)
const bool requests_storage = (wanted_usage & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) != 0;
const bool is_hdr_format = wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
wanted_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32;
const bool is_hdr_format = wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32;
// If driver doesn't support shader storage image with MSAA, and we're requesting storage
// for an HDR format (which will likely be used with MSAA), force fallback
@@ -902,13 +989,8 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
// Special logging for HDR formats (common across multiple engines) on problematic drivers
if (wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
LOG_WARNING(Render_Vulkan,
"Emulating B10G11R11_UFLOAT (HDR format: UE5, custom engines) with {} on {}. "
"Native format not supported by driver, using fallback.",
alternative, properties.properties.deviceName);
} else if (wanted_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
LOG_WARNING(Render_Vulkan,
"Emulating E5B9G9R9_UFLOAT (HDR format: Unity, RE Engine) with {} on {}. "
"Native format not supported by driver, using fallback.",
"B10G11R11_UFLOAT_PACK32 (R11G11B10F HDR format) not fully supported. "
"Falling back to {} on {}",
alternative, properties.properties.deviceName);
} else {
LOG_DEBUG(Render_Vulkan,

View File

@@ -63,7 +63,8 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
pipeline_executable_properties) \
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
workgroup_memory_explicit_layout)
workgroup_memory_explicit_layout) \
FEATURE(QCOM, TileProperties, TILE_PROPERTIES, tile_properties_qcom)
// Define miscellaneous extensions which may be used by the implementation here.
#define FOR_EACH_VK_EXTENSION(EXTENSION) \
@@ -96,6 +97,8 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \
EXTENSION(QCOM, RENDER_PASS_SHADER_RESOLVE, render_pass_shader_resolve) \
EXTENSION(QCOM, RENDER_PASS_STORE_OPS, render_pass_store_ops) \
EXTENSION(QCOM, TILE_PROPERTIES, tile_properties) \
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
@@ -379,6 +382,12 @@ public:
return properties.subgroup_properties.supportedOperations & feature;
}
/// Returns true if subgroup operations are supported in the specified shader stage.
/// Mobile GPUs (Qualcomm Adreno) often only support subgroups in fragment/compute stages.
bool IsSubgroupSupportedForStage(VkShaderStageFlagBits stage) const {
return properties.subgroup_properties.supportedStages & stage;
}
/// Returns the maximum number of push descriptors.
u32 MaxPushDescriptors() const {
return properties.push_descriptor.maxPushDescriptors;
@@ -588,6 +597,26 @@ public:
return extensions.render_pass_shader_resolve;
}
/// Returns true if the device supports VK_QCOM_render_pass_store_ops
bool IsQcomRenderPassStoreOpsSupported() const {
return extensions.render_pass_store_ops;
}
/// Returns true if the device supports VK_QCOM_tile_properties
bool IsQcomTilePropertiesSupported() const {
return extensions.tile_properties;
}
/// Returns Qualcomm tile size (width, height, depth). Returns {0,0,0} if not queried or unsupported
VkExtent3D GetQcomTileSize() const {
return properties.qcom_tile_size;
}
/// Returns Qualcomm tile apron size. Returns {0,0} if not queried or unsupported
VkExtent2D GetQcomApronSize() const {
return properties.qcom_apron_size;
}
/// Returns true if MSAA copy operations are supported via compute shader (upload/download)
/// Qualcomm uses render pass shader resolve instead, so this returns false for Qualcomm
bool CanUploadMSAA() const {
@@ -857,6 +886,8 @@ private:
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
VkPhysicalDeviceProperties properties{};
VkExtent3D qcom_tile_size{}; // Qualcomm tile dimensions (0 if not queried)
VkExtent2D qcom_apron_size{}; // Qualcomm tile apron size
};
Extensions extensions{};

View File

@@ -226,11 +226,24 @@ namespace Vulkan {
vk::Buffer
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
{
// Qualcomm uses unified memory architecture - prefer DEVICE_LOCAL + HOST_VISIBLE
// for zero-copy access without staging buffers
const bool is_qualcomm = device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool prefer_unified = is_qualcomm && (usage == MemoryUsage::Upload ||
usage == MemoryUsage::Download ||
usage == MemoryUsage::Stream);
VkMemoryPropertyFlags preferred_flags = MemoryUsagePreferredVmaFlags(usage);
if (prefer_unified) {
// Request DEVICE_LOCAL + HOST_VISIBLE for zero-copy on unified memory architectures
preferred_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
}
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
.preferredFlags = preferred_flags,
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
@@ -245,6 +258,13 @@ namespace Vulkan {
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
if (is_qualcomm && prefer_unified) {
const bool got_unified = (property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
(property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
LOG_DEBUG(Render_Vulkan, "Qualcomm buffer allocation: usage={}, unified={}, flags=0x{:X}",
static_cast<u32>(usage), got_unified, property_flags);
}
u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;