Compare commits

...

83 Commits

Author SHA1 Message Date
PavelBARABANOV
8c99f0c166 Revert [shader_recompiler, spir-v] Adding INT64 emulation path 2025-11-18 21:01:17 +03:00
PavelBARABANOV
520e07e756 test 2025-11-17 17:45:36 +03:00
CamilleLaVey
674f552ff1 [vk, texture_cache] Workaround for games with wrong usage of R32 with float samplers 2025-11-15 19:41:26 -04:00
CamilleLaVey
75d9236520 [vk, pipeline_cache, texture_cache, qcom] Resolving textures and pipeline usage for QCOM 2025-11-15 19:18:00 -04:00
CamilleLaVey
89926bce0b [vk, spir-v] Adding emulation of Array 1D with 2D for Android 2025-11-15 18:54:53 -04:00
CamilleLaVey
6c1fc4b4ed [vk] Handling threading missuse of VkQueue 2025-11-15 18:30:31 -04:00
CamilleLaVey
311f06047b [vk, Maxwell] Refining support for HDR Formats based on Specs 2025-11-15 18:24:14 -04:00
CamilleLaVey
46df717f7c [vk] Degrade HDR Format to LDR 2025-11-15 16:12:44 -04:00
CamilleLaVey
dcf9483b0b [vk, qcom] TEST: Adjustment to the MSAA and format resolve with native support 2025-11-15 15:06:43 -04:00
CamilleLaVey
2b828a9fee [vk] Re-adjusted HDR format handling based on physical specifications 2025-11-15 14:44:47 -04:00
CamilleLaVey
6fe1f86984 [vk, qcom] Revert Push Descriptors Threshold due to data corruption 2025-11-15 14:14:37 -04:00
CamilleLaVey
af073f13cf [vk, qcom] Including Tile Properties and Store Ops based on QCOM specifications 2025-11-15 03:02:08 -04:00
CamilleLaVey
aea945b671 [vk, host shaders, qcom] MSAA Handling by Native QCOM Shader Resolve 2025-11-15 01:47:28 -04:00
CamilleLaVey
c52fda760a Merge branch 'stuffmadeforfun' of https://git.eden-emu.dev/eden-emu/eden into stuffmadeforfun 2025-11-14 23:40:23 -04:00
lizzie
c168755c65 fix license 2025-11-15 03:38:33 +00:00
CamilleLaVey
8a83cf0271 [service, hle] Add defensive check in WriteBuffer 2025-11-15 03:37:57 +00:00
CamilleLaVey
3db41fbce6 [memory, vk] TEST: Tiled GPU optimization try #1 2025-11-15 03:37:56 +00:00
CamilleLaVey
5e7fb6eead Fix building issues 2025-11-15 03:37:56 +00:00
CamilleLaVey
bcc5390943 [texture_cache, gl, vk] Initial implementation for HDR + MSAA detection on QCOM drivers. 2025-11-15 03:37:44 +00:00
CamilleLaVey
a51d875d91 [shader_recompiler, spir-v] verifying int64 emulation path activation 2025-11-15 03:37:17 +00:00
PavelBARABANOV
6134a57367 Revert "TEST: Enabling TimelineSemaphores for QCOM and Turnip"
This reverts commit 3cd33fce44.
2025-11-15 03:37:01 +00:00
CamilleLaVey
c845b6086f Adding missing headers 2025-11-15 03:37:01 +00:00
CamilleLaVey
31c168efe1 [shader_recompiler, spir-v] Adding INT64 emulation path 2025-11-15 03:36:25 +00:00
CamilleLaVey
8bd87204f5 TEST: Enabling TimelineSemaphores for QCOM and Turnip 2025-11-15 03:36:09 +00:00
lizzie
e72a206aee fix 2025-11-15 03:36:08 +00:00
CamilleLaVey
6a62fa7ee3 Implement handling for texture cache flickering 2025-11-15 03:36:01 +00:00
crueter
52b630dfdc build
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-11-15 03:35:37 +00:00
CamilleLaVey
4860050358 attempt to fix building issues 2025-11-15 03:35:37 +00:00
CamilleLaVey
47f0563c1b Giving maintance to driver features and unused extensions 2025-11-15 03:35:35 +00:00
CamilleLaVey
b1208f03ee Fix building issues 2025-11-15 03:33:30 +00:00
CamilleLaVey
0fd603c094 [texture_cache, gl, vk] Initial implementation for HDR + MSAA detection on QCOM drivers. 2025-11-15 03:33:30 +00:00
CamilleLaVey
1ca19af7fb [shader_recompiler, spir-v] verifying int64 emulation path activation 2025-11-15 03:33:30 +00:00
PavelBARABANOV
ddd78c3b37 Revert "TEST: Enabling TimelineSemaphores for QCOM and Turnip"
This reverts commit 3cd33fce44.
2025-11-15 03:33:30 +00:00
CamilleLaVey
2e68f8795d Adding missing headers 2025-11-15 03:33:28 +00:00
CamilleLaVey
d3595fd2b1 [gl, vk, texture cache] Attempt to get correct MSAA image upload and download 2025-11-15 03:33:28 +00:00
CamilleLaVey
033531509b [shader_recompiler, spir-v] Adding INT64 emulation path 2025-11-15 03:33:28 +00:00
CamilleLaVey
b9954de1ca Fixing missing headers 2025-11-15 03:33:28 +00:00
CamilleLaVey
5f88deeebf [gl. vk] Extending impl for atomic floats operations 2025-11-15 03:33:28 +00:00
CamilleLaVey
d25da944ed Changing checks in HostMemor for virtual memory mapping 2025-11-15 03:33:27 +00:00
CamilleLaVey
ec274a855e TEST: Enabling TimelineSemaphores for QCOM and Turnip 2025-11-15 03:33:27 +00:00
CamilleLaVey
8133d4a8b4 Improved handling for Custom Border Color buggy impl on ARM/ QCOM and Turnip 2025-11-15 03:33:27 +00:00
lizzie
4f3e4bf9cb fix 2025-11-15 03:33:27 +00:00
CamilleLaVey
ec9e0f37ea Implement handling for texture cache flickering 2025-11-15 03:33:27 +00:00
crueter
b5f7735dba build
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-11-15 03:33:27 +00:00
CamilleLaVey
5f501d6ec0 attempt to fix building issues 2025-11-15 03:33:27 +00:00
CamilleLaVey
e820f304a5 Giving maintance to driver features and unused extensions 2025-11-15 03:33:26 +00:00
CamilleLaVey
3527a33430 [service, hle] Add defensive check in WriteBuffer 2025-11-14 20:31:14 -04:00
CamilleLaVey
ee5565077c [memory, vk] TEST: Tiled GPU optimization try #1 2025-11-14 01:32:02 -04:00
CamilleLaVey
9085ff1229 Merge branch 'stuffmadeforfun' of https://git.eden-emu.dev/eden-emu/eden into stuffmadeforfun 2025-11-13 23:10:40 -04:00
CamilleLaVey
6eff1779a2 Fix building issues 2025-11-14 03:03:28 +01:00
CamilleLaVey
3228cffd23 [texture_cache, gl, vk] Initial implementation for HDR + MSAA detection on QCOM drivers. 2025-11-14 03:03:28 +01:00
CamilleLaVey
9d9530efe0 [shader_recompiler, spir-v] verifying int64 emulation path activation 2025-11-14 03:03:28 +01:00
PavelBARABANOV
aaaa7c7601 Revert "TEST: Enabling TimelineSemaphores for QCOM and Turnip"
This reverts commit 3cd33fce44.
2025-11-14 03:03:28 +01:00
CamilleLaVey
7f8a507b79 Adding missing headers 2025-11-14 03:03:28 +01:00
CamilleLaVey
c28ae059e8 [gl, vk, texture cache] Attempt to get correct MSAA image upload and download 2025-11-14 03:03:28 +01:00
CamilleLaVey
7f1369f9a8 [shader_recompiler, spir-v] Adding INT64 emulation path 2025-11-14 03:03:28 +01:00
CamilleLaVey
6b05c164a1 Fixing missing headers 2025-11-14 03:03:28 +01:00
CamilleLaVey
a3f9d3b59c [gl. vk] Extending impl for atomic floats operations 2025-11-14 03:03:28 +01:00
CamilleLaVey
b066a6ffa0 Changing checks in HostMemor for virtual memory mapping 2025-11-14 03:03:28 +01:00
CamilleLaVey
a14cba7f11 TEST: Enabling TimelineSemaphores for QCOM and Turnip 2025-11-14 03:03:28 +01:00
CamilleLaVey
2d85b70373 Improved handling for Custom Border Color buggy impl on ARM/ QCOM and Turnip 2025-11-14 03:03:28 +01:00
lizzie
aa8cc4da38 fix 2025-11-14 03:03:28 +01:00
CamilleLaVey
baddaf0040 Implement handling for texture cache flickering 2025-11-14 03:03:28 +01:00
crueter
35b4e34e09 build
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-11-14 03:03:28 +01:00
CamilleLaVey
b574e9c334 attempt to fix building issues 2025-11-14 03:03:28 +01:00
CamilleLaVey
d6b5a3e181 Giving maintance to driver features and unused extensions 2025-11-14 03:03:28 +01:00
CamilleLaVey
a65a35432e Fix building issues 2025-11-13 21:30:19 -04:00
CamilleLaVey
6e575364eb [texture_cache, gl, vk] Initial implementation for HDR + MSAA detection on QCOM drivers. 2025-11-13 20:37:47 -04:00
CamilleLaVey
71a1442ab6 [shader_recompiler, spir-v] verifying int64 emulation path activation 2025-11-13 18:42:49 -04:00
PavelBARABANOV
4a17762ed7 Revert "TEST: Enabling TimelineSemaphores for QCOM and Turnip"
This reverts commit 3cd33fce44.
2025-11-10 18:52:31 +03:00
CamilleLaVey
447c4de73d Adding missing headers 2025-11-10 00:27:38 -04:00
CamilleLaVey
cd2c4d8caf [gl, vk, texture cache] Attempt to get correct MSAA image upload and download 2025-11-10 00:22:08 -04:00
CamilleLaVey
ee64c945fb [shader_recompiler, spir-v] Adding INT64 emulation path 2025-11-09 23:14:51 -04:00
CamilleLaVey
eec5d48220 Fixing missing headers 2025-11-09 20:03:19 -04:00
CamilleLaVey
75cc43a57a [gl. vk] Extending impl for atomic floats operations 2025-11-10 00:55:57 +01:00
CamilleLaVey
0078094b86 Changing checks in HostMemor for virtual memory mapping 2025-11-10 00:55:57 +01:00
CamilleLaVey
3cd33fce44 TEST: Enabling TimelineSemaphores for QCOM and Turnip 2025-11-10 00:55:57 +01:00
CamilleLaVey
ccafe0ed91 Improved handling for Custom Border Color buggy impl on ARM/ QCOM and Turnip 2025-11-10 00:55:57 +01:00
lizzie
94af9ff51f fix 2025-11-10 00:55:57 +01:00
CamilleLaVey
d229fdca32 Implement handling for texture cache flickering 2025-11-10 00:55:57 +01:00
crueter
e636e940ed build
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-11-10 00:55:57 +01:00
CamilleLaVey
2798174b00 attempt to fix building issues 2025-11-10 00:55:57 +01:00
CamilleLaVey
46f2084114 Giving maintance to driver features and unused extensions 2025-11-10 00:55:57 +01:00
39 changed files with 1054 additions and 79 deletions

View File

@@ -121,7 +121,7 @@ else()
-Werror=unused -Werror=unused
-Wno-attributes -Wno-attributes
-Wno-invalid-offsetof $<$<COMPILE_LANGUAGE:CXX>:-Wno-invalid-offsetof>
-Wno-unused-parameter -Wno-unused-parameter
-Wno-missing-field-initializers -Wno-missing-field-initializers
) )

View File

@@ -730,7 +730,9 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
ASSERT(virtual_offset % PageAlignment == 0); ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(host_offset % PageAlignment == 0); ASSERT(host_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0); ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size); if (impl && virtual_base) {
ASSERT(virtual_offset + length <= virtual_size);
}
ASSERT(host_offset + length <= backing_size); ASSERT(host_offset + length <= backing_size);
if (length == 0 || !virtual_base || !impl) { if (length == 0 || !virtual_base || !impl) {
return; return;
@@ -741,7 +743,9 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap) { void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap) {
ASSERT(virtual_offset % PageAlignment == 0); ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0); ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size); if (impl && virtual_base) {
ASSERT(virtual_offset + length <= virtual_size);
}
if (length == 0 || !virtual_base || !impl) { if (length == 0 || !virtual_base || !impl) {
return; return;
} }
@@ -751,7 +755,9 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap)
void HostMemory::Protect(size_t virtual_offset, size_t length, MemoryPermission perm) { void HostMemory::Protect(size_t virtual_offset, size_t length, MemoryPermission perm) {
ASSERT(virtual_offset % PageAlignment == 0); ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0); ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size); if (impl && virtual_base) {
ASSERT(virtual_offset + length <= virtual_size);
}
if (length == 0 || !virtual_base || !impl) { if (length == 0 || !virtual_base || !impl) {
return; return;
} }

View File

@@ -560,6 +560,60 @@ struct Values {
false, false,
&sample_shading}; &sample_shading};
#ifdef ANDROID
// Shader Float Controls (Android only) - Eden Veil / Extensions
// Force enable VK_KHR_shader_float_controls even if driver has known issues
// Allows fine-tuning float behavior to match Switch/Maxwell or optimize performance
SwitchableSetting<bool> shader_float_controls_force_enable{linkage,
false,
"shader_float_controls_force_enable",
Category::RendererExtensions,
Specialization::Paired};
// Individual float behavior controls (visible only when force_enable is true)
// Multiple can be active simultaneously EXCEPT FTZ and DenormPreserve (mutually exclusive)
//
// Recommended configurations:
// Switch-native: FTZ=ON, RTE=ON, SignedZero=ON (matches Maxwell behavior)
// Performance: FTZ=ON only (fastest)
// Accuracy: DenormPreserve=ON, RTE=ON, SignedZero=ON (slowest, highest precision)
SwitchableSetting<bool> shader_float_ftz{linkage,
false,
"shader_float_ftz",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_denorm_preserve{linkage,
false,
"shader_float_denorm_preserve",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_rte{linkage,
false,
"shader_float_rte",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_signed_zero_inf_nan{linkage,
false,
"shader_float_signed_zero_inf_nan",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
#endif
Setting<bool> renderer_debug{linkage, false, "debug", Category::RendererDebug}; Setting<bool> renderer_debug{linkage, false, "debug", Category::RendererDebug};
Setting<bool> renderer_shader_feedback{linkage, false, "shader_feedback", Setting<bool> renderer_shader_feedback{linkage, false, "shader_feedback",
Category::RendererDebug}; Category::RendererDebug};

View File

@@ -152,6 +152,16 @@ ENUM(SpirvOptimizeMode, Never, OnLoad, Always);
ENUM(GpuOverclock, Low, Medium, High) ENUM(GpuOverclock, Low, Medium, High)
ENUM(TemperatureUnits, Celsius, Fahrenheit) ENUM(TemperatureUnits, Celsius, Fahrenheit)
// Shader Float Controls behavior modes
// These control how floating-point denormals and special values are handled in shaders
ENUM(ShaderFloatBehavior,
DriverDefault, // Let driver choose (safest, may not match Switch behavior)
SwitchNative, // Emulate Switch/Maxwell behavior (FTZ + RTE + SignedZero)
FlushToZero, // FTZ only - flush denorms to zero (fastest, some precision loss)
PreserveDenorms, // Preserve denorms (slowest, highest precision)
RoundToEven, // RTE rounding mode (IEEE 754 compliant)
SignedZeroInfNan); // Preserve signed zero, inf, nan (accuracy for edge cases)
template <typename Type> template <typename Type>
inline std::string_view CanonicalizeEnum(Type id) { inline std::string_view CanonicalizeEnum(Type id) {
const auto group = EnumMetadata<Type>::Canonicalizations(); const auto group = EnumMetadata<Type>::Canonicalizations();

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -393,6 +396,24 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
BufferDescriptorB()[buffer_index].Size()}; BufferDescriptorB()[buffer_index].Size()};
const std::size_t buffer_size{GetWriteBufferSize(buffer_index)}; const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
// Defensive check: if client didn't provide output buffer, log detailed error but don't crash
if (buffer_size == 0) {
LOG_ERROR(Core,
"WriteBuffer called but client provided NO output buffer! "
"Requested size: 0x{:X}, buffer_index: {}, is_buffer_b: {}, "
"BufferB count: {}, BufferC count: {}",
size, buffer_index, is_buffer_b, BufferDescriptorB().size(),
BufferDescriptorC().size());
// Log command context for debugging
LOG_ERROR(Core, "IPC Command: 0x{:X}, Type: {}", GetCommand(),
static_cast<u32>(GetCommandType()));
// Return 0 instead of crashing - let service handle error
return 0;
}
if (size > buffer_size) { if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size, LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size); buffer_size);

View File

@@ -380,13 +380,14 @@ void EmitContext::SetupExtensions() {
if (info.uses_int64 && profile.support_int64) { if (info.uses_int64 && profile.support_int64) {
header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
} }
if (info.uses_int64_bit_atomics) { if (info.uses_int64_bit_atomics && profile.support_gl_shader_atomic_int64) {
header += "#extension GL_NV_shader_atomic_int64 : enable\n"; header += "#extension GL_NV_shader_atomic_int64 : enable\n";
} }
if (info.uses_atomic_f32_add) { if (info.uses_atomic_f32_add && profile.support_gl_shader_atomic_float) {
header += "#extension GL_NV_shader_atomic_float : enable\n"; header += "#extension GL_NV_shader_atomic_float : enable\n";
} }
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { if ((info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) &&
profile.support_gl_shader_atomic_fp16_vector) {
header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
} }
if (info.uses_fp16) { if (info.uses_fp16) {

View File

@@ -341,19 +341,35 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
Id main_func) { Id main_func) {
const Info& info{program.info}; const Info& info{program.info};
// User-forced behavior overrides (Android Eden Veil/Extensions)
// When force flags are active, they take precedence over shader-declared behavior
const bool force_flush = profile.force_fp32_denorm_flush;
const bool force_preserve = profile.force_fp32_denorm_preserve;
if (force_flush && force_preserve) {
LOG_WARNING(Shader_SPIRV, "Both FTZ and Preserve forced simultaneously - FTZ takes precedence");
}
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
} else if (info.uses_fp32_denorms_flush) { } else if (force_flush || info.uses_fp32_denorms_flush) {
if (profile.support_fp32_denorm_flush) { if (profile.support_fp32_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero); ctx.AddCapability(spv::Capability::DenormFlushToZero);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
if (force_flush) {
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormFlushToZero FORCED by user setting");
}
} else { } else {
// Drivers will most likely flush denorms by default, no need to warn // Drivers will most likely flush denorms by default, no need to warn
} }
} else if (info.uses_fp32_denorms_preserve) { } else if (force_preserve || info.uses_fp32_denorms_preserve) {
if (profile.support_fp32_denorm_preserve) { if (profile.support_fp32_denorm_preserve) {
ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddCapability(spv::Capability::DenormPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
if (force_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormPreserve FORCED by user setting");
}
} else { } else {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
} }
@@ -386,13 +402,24 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
return; return;
} }
// User-forced behavior (Android Eden Veil/Extensions)
const bool force_signed_zero_inf_nan = profile.force_fp32_signed_zero_inf_nan;
if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
} }
if (profile.support_fp32_signed_zero_nan_preserve) { if (force_signed_zero_inf_nan || profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); if (profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
if (force_signed_zero_inf_nan) {
LOG_DEBUG(Shader_SPIRV, "Fp32 SignedZeroInfNanPreserve FORCED by user setting");
}
} else if (force_signed_zero_inf_nan) {
LOG_WARNING(Shader_SPIRV, "SignedZeroInfNanPreserve forced but driver doesn't support it");
}
} }
if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -293,6 +296,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
} }
// This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2 // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
IR::Inst* addr_inst{addr.InstRecursive()}; IR::Inst* addr_inst{addr.InstRecursive()};
// Unwrap Identity ops introduced by lowerings (e.g., PackUint2x32 -> Identity)
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
const IR::Value id_arg{addr_inst->Arg(0)};
if (id_arg.IsImmediate()) {
return std::nullopt;
}
addr_inst = id_arg.InstRecursive();
}
s32 imm_offset{0}; s32 imm_offset{0};
if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
// If it's an IAdd64, get the immediate offset it is applying and grab the address // If it's an IAdd64, get the immediate offset it is applying and grab the address
@@ -308,6 +319,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
return std::nullopt; return std::nullopt;
} }
addr_inst = iadd_addr.InstRecursive(); addr_inst = iadd_addr.InstRecursive();
// Unwrap Identity again if present after folding IAdd64
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
const IR::Value id_arg{addr_inst->Arg(0)};
if (id_arg.IsImmediate()) {
return std::nullopt;
}
addr_inst = id_arg.InstRecursive();
}
} }
// With IAdd64 handled, now PackUint2x32 is expected // With IAdd64 handled, now PackUint2x32 is expected
if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) { if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
@@ -317,6 +336,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
return std::nullopt; return std::nullopt;
} }
addr_inst = vector.InstRecursive(); addr_inst = vector.InstRecursive();
// Unwrap Identity that may replace PackUint2x32
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
const IR::Value id_arg{addr_inst->Arg(0)};
if (id_arg.IsImmediate()) {
return std::nullopt;
}
addr_inst = id_arg.InstRecursive();
}
} }
// The vector is expected to be a CompositeConstructU32x2 // The vector is expected to be a CompositeConstructU32x2
if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -25,6 +28,14 @@ struct Profile {
bool support_fp16_signed_zero_nan_preserve{}; bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{};
// User-forced float behavior overrides (Android Eden Veil/Extensions)
// When shader_float_controls_force_enable is true, these override shader-declared behavior
bool force_fp32_denorm_flush{}; // Force FTZ for all FP32 ops
bool force_fp32_denorm_preserve{}; // Force denorm preservation for all FP32 ops
bool force_fp32_rte_rounding{}; // Force Round-To-Even for all FP32 ops
bool force_fp32_signed_zero_inf_nan{}; // Force signed zero/inf/nan preservation
bool support_explicit_workgroup_layout{}; bool support_explicit_workgroup_layout{};
bool support_vote{}; bool support_vote{};
bool support_viewport_index_layer_non_geometry{}; bool support_viewport_index_layer_non_geometry{};
@@ -38,6 +49,9 @@ struct Profile {
bool support_gl_nv_gpu_shader_5{}; bool support_gl_nv_gpu_shader_5{};
bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_amd_gpu_shader_half_float{};
bool support_gl_texture_shadow_lod{}; bool support_gl_texture_shadow_lod{};
bool support_gl_shader_atomic_float{};
bool support_gl_shader_atomic_fp16_vector{};
bool support_gl_shader_atomic_int64{};
bool support_gl_warp_intrinsics{}; bool support_gl_warp_intrinsics{};
bool support_gl_variable_aoffi{}; bool support_gl_variable_aoffi{};
bool support_gl_sparse_textures{}; bool support_gl_sparse_textures{};

View File

@@ -42,7 +42,7 @@ constexpr std::array VIEW_CLASS_32_BITS{
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM, PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM, PixelFormat::A8B8G8R8_SRGB, PixelFormat::B8G8R8A8_UNORM,
PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT, PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT,
PixelFormat::A2B10G10R10_UINT, PixelFormat::A2B10G10R10_UINT,
}; };
@@ -52,7 +52,7 @@ constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM, PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SRGB, PixelFormat::A8B8G8R8_UINT,
PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT, PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT,
}; };

View File

@@ -76,6 +76,7 @@ set(SHADER_FILES
vulkan_quad_indexed.comp vulkan_quad_indexed.comp
vulkan_turbo_mode.comp vulkan_turbo_mode.comp
vulkan_uint8.comp vulkan_uint8.comp
vulkan_qcom_msaa_resolve.frag
convert_rgba8_to_bgra8.frag convert_rgba8_to_bgra8.frag
convert_yuv420_to_rgb.comp convert_yuv420_to_rgb.comp
convert_rgb_to_yuv420.comp convert_rgb_to_yuv420.comp

View File

@@ -0,0 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#version 450
// VK_QCOM_render_pass_shader_resolve fragment shader
// Resolves MSAA attachment to single-sample within render pass
// Requires VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM in subpass flags
// Use combined image sampler for MSAA texture instead of input attachment
// This allows us to sample MSAA textures from previous rendering
layout(set = 0, binding = 0) uniform sampler2DMS msaa_texture;
layout(location = 0) out vec4 color_output;
layout(push_constant) uniform PushConstants {
vec2 tex_scale;
vec2 tex_offset;
} push_constants;
// Custom MSAA resolve using box filter (simple average)
// Assumes 4x MSAA (can be extended with push constant for dynamic sample count)
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
ivec2 tex_size = textureSize(msaa_texture);
// Clamp coordinates to texture bounds
coord = clamp(coord, ivec2(0), tex_size - ivec2(1));
vec4 accumulated_color = vec4(0.0);
int sample_count = 4; // Adreno typically uses 4x MSAA max
// Box filter: simple average of all MSAA samples
for (int i = 0; i < sample_count; i++) {
accumulated_color += texelFetch(msaa_texture, coord, i);
}
color_output = accumulated_color / float(sample_count);
}

View File

@@ -225,6 +225,9 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
has_draw_texture = GLAD_GL_NV_draw_texture; has_draw_texture = GLAD_GL_NV_draw_texture;
has_shader_atomic_float = GLAD_GL_NV_shader_atomic_float;
has_shader_atomic_fp16_vector = GLAD_GL_NV_shader_atomic_fp16_vector;
has_shader_atomic_int64 = GLAD_GL_NV_shader_atomic_int64;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia; need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info; can_report_memory = GLAD_GL_NVX_gpu_memory_info;

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -152,6 +155,18 @@ public:
return has_draw_texture; return has_draw_texture;
} }
bool HasShaderAtomicFloat() const {
return has_shader_atomic_float;
}
bool HasShaderAtomicFp16Vector() const {
return has_shader_atomic_fp16_vector;
}
bool HasShaderAtomicInt64() const {
return has_shader_atomic_int64;
}
bool IsWarpSizePotentiallyLargerThanGuest() const { bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest; return warp_size_potentially_larger_than_guest;
} }
@@ -235,6 +250,9 @@ private:
bool has_amd_shader_half_float{}; bool has_amd_shader_half_float{};
bool has_sparse_texture_2{}; bool has_sparse_texture_2{};
bool has_draw_texture{}; bool has_draw_texture{};
bool has_shader_atomic_float{};
bool has_shader_atomic_fp16_vector{};
bool has_shader_atomic_int64{};
bool warp_size_potentially_larger_than_guest{}; bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{}; bool need_fastmath_off{};
bool has_cbuf_ftou_bug{}; bool has_cbuf_ftou_bug{};

View File

@@ -215,6 +215,9 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
.support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
.support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
.support_gl_shader_atomic_float = device.HasShaderAtomicFloat(),
.support_gl_shader_atomic_fp16_vector = device.HasShaderAtomicFp16Vector(),
.support_gl_shader_atomic_int64 = device.HasShaderAtomicInt64(),
.support_gl_warp_intrinsics = false, .support_gl_warp_intrinsics = false,
.support_gl_variable_aoffi = device.HasVariableAoffi(), .support_gl_variable_aoffi = device.HasVariableAoffi(),
.support_gl_sparse_textures = device.HasSparseTexture2(), .support_gl_sparse_textures = device.HasSparseTexture2(),

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -97,6 +100,10 @@ public:
return true; return true;
} }
bool CanDownloadMSAA() const noexcept {
return true;
}
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);

View File

@@ -40,6 +40,7 @@
#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h" #include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h"
#include "video_core/host_shaders/dither_temporal_frag_spv.h" #include "video_core/host_shaders/dither_temporal_frag_spv.h"
#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h" #include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h"
#include "video_core/host_shaders/vulkan_qcom_msaa_resolve_frag_spv.h"
namespace Vulkan { namespace Vulkan {
@@ -545,6 +546,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)), convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)),
dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)), dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)),
dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)), dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)),
qcom_msaa_resolve_frag(BuildShader(device, VULKAN_QCOM_MSAA_RESOLVE_FRAG_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {} nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
@@ -1240,4 +1242,30 @@ void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebu
Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view); Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view);
} }
void BlitImageHelper::ResolveMSAAQcom(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
// VK_QCOM_render_pass_shader_resolve implementation
// This must be used within a render pass with VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
ConvertPipeline(qcom_msaa_resolve_pipeline,
dst_framebuffer->RenderPass(),
false);
RecordShaderReadBarrier(scheduler, src_image_view);
scheduler.RequestRenderpass(dst_framebuffer);
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkPipeline pipeline = *qcom_msaa_resolve_pipeline;
scheduler.Record([this, src_view, layout, pipeline](vk::CommandBuffer cmdbuf) {
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, *nearest_sampler, src_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
} // namespace Vulkan } // namespace Vulkan

View File

@@ -95,6 +95,8 @@ public:
void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ResolveMSAAQcom(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
private: private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
@@ -159,6 +161,7 @@ private:
vk::ShaderModule convert_rgba16f_to_rgba8_frag; vk::ShaderModule convert_rgba16f_to_rgba8_frag;
vk::ShaderModule dither_temporal_frag; vk::ShaderModule dither_temporal_frag;
vk::ShaderModule dynamic_resolution_scale_comp; vk::ShaderModule dynamic_resolution_scale_comp;
vk::ShaderModule qcom_msaa_resolve_frag;
vk::Sampler linear_sampler; vk::Sampler linear_sampler;
vk::Sampler nearest_sampler; vk::Sampler nearest_sampler;
@@ -188,6 +191,7 @@ private:
vk::Pipeline convert_rgba16f_to_rgba8_pipeline; vk::Pipeline convert_rgba16f_to_rgba8_pipeline;
vk::Pipeline dither_temporal_pipeline; vk::Pipeline dither_temporal_pipeline;
vk::Pipeline dynamic_resolution_scale_pipeline; vk::Pipeline dynamic_resolution_scale_pipeline;
vk::Pipeline qcom_msaa_resolve_pipeline;
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@@ -214,7 +214,7 @@ struct FormatTuple {
{VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT (SAMPLED_IMAGE only, no COLOR_ATTACHMENT)
// Depth formats // Depth formats
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -24,8 +27,13 @@ public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
bool CanUsePushDescriptor() const noexcept { bool CanUsePushDescriptor() const noexcept {
return device->IsKhrPushDescriptorSupported() && if (!device->IsKhrPushDescriptorSupported()) {
num_descriptors <= device->MaxPushDescriptors(); return false;
}
if (num_descriptors > device->MaxPushDescriptors()) {
return false;
}
return true;
} }
// TODO(crueter): utilize layout binding flags // TODO(crueter): utilize layout binding flags

View File

@@ -5,7 +5,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h" #include "common/assert.h"
#include <ranges>
#include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/present/util.h"
namespace Vulkan { namespace Vulkan {

View File

@@ -177,6 +177,8 @@ try
RendererVulkan::~RendererVulkan() { RendererVulkan::~RendererVulkan() {
scheduler.RegisterOnSubmit([] {}); scheduler.RegisterOnSubmit([] {});
// Acquire submit_mutex before WaitIdle to prevent simultaneous queue access
std::scoped_lock lock{scheduler.submit_mutex};
void(device.GetLogical().WaitIdle()); void(device.GetLogical().WaitIdle());
} }

View File

@@ -30,7 +30,8 @@ BlitScreen::~BlitScreen() = default;
void BlitScreen::WaitIdle() { void BlitScreen::WaitIdle() {
present_manager.WaitPresent(); present_manager.WaitPresent();
scheduler.Finish(); scheduler.Finish();
device.GetLogical().WaitIdle(); // Note: scheduler.Finish() already waits for GPU and synchronizes submit_mutex
// Calling device.WaitIdle() here causes threading errors (simultaneous queue access)
} }
void BlitScreen::SetWindowAdaptPass() { void BlitScreen::SetWindowAdaptPass() {

View File

@@ -341,6 +341,20 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve = .support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
#ifdef ANDROID
// User-forced float behavior overrides (Eden Veil/Extensions)
.force_fp32_denorm_flush = Settings::values.shader_float_ftz.GetValue(),
.force_fp32_denorm_preserve = Settings::values.shader_float_denorm_preserve.GetValue(),
.force_fp32_rte_rounding = Settings::values.shader_float_rte.GetValue(),
.force_fp32_signed_zero_inf_nan = Settings::values.shader_float_signed_zero_inf_nan.GetValue(),
#else
.force_fp32_denorm_flush = false,
.force_fp32_denorm_preserve = false,
.force_fp32_rte_rounding = false,
.force_fp32_signed_zero_inf_nan = false,
#endif
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
.support_viewport_index_layer_non_geometry = .support_viewport_index_layer_non_geometry =
@@ -677,7 +691,17 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
ConvertLegacyToGeneric(program, runtime_info); ConvertLegacyToGeneric(program, runtime_info);
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)};
// Adreno don't support subgroup operations in vertex stages
// Disable subgroup features for vertex shaders if not supported by the device
Shader::Profile stage_profile = profile;
if (program.stage == Shader::Stage::VertexA || program.stage == Shader::Stage::VertexB) {
if (!device.IsSubgroupSupportedForStage(VK_SHADER_STAGE_VERTEX_BIT)) {
stage_profile.support_vote = false;
}
}
const std::vector<u32> code{EmitSPIRV(stage_profile, runtime_info, program, binding, this->optimize_spirv_output)};
device.SaveShader(code); device.SaveShader(code);
modules[stage_index] = BuildShader(device, code); modules[stage_index] = BuildShader(device, code);
if (device.HasDebuggingToolAttached()) { if (device.HasDebuggingToolAttached()) {
@@ -771,6 +795,17 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
} }
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
// Adreno have lower shared memory limits (32KB)
// Clamp shared memory usage to device maximum to avoid validation errors
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
if (program.shared_memory_size > max_shared_memory) {
LOG_WARNING(Render_Vulkan,
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
key.unique_hash, program.shared_memory_size / 1024, max_shared_memory / 1024);
program.shared_memory_size = max_shared_memory;
}
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)}; const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
device.SaveShader(code); device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)}; vk::ShaderModule spv_module{BuildShader(device, code)};

View File

@@ -8,6 +8,7 @@
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/surface.h" #include "video_core/surface.h"
@@ -19,6 +20,23 @@ namespace {
using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType; using VideoCore::Surface::SurfaceType;
// Check if the driver uses tile-based deferred rendering (TBDR) architecture
// These GPUs benefit from optimized load/store operations to keep data on-chip
//
// TBDR GPUs supported in Eden:
// - Qualcomm Adreno (Snapdragon): Most Android flagship/midrange devices
// - ARM Mali: Android devices (Samsung Exynos, MediaTek, etc.)
// - Imagination PowerVR: Older iOS devices, some Android tablets
// - Samsung Xclipse: Galaxy S22+ (AMD RDNA2-based, but uses TBDR mode)
// - Broadcom VideoCore: Raspberry Pi
[[nodiscard]] constexpr bool IsTBDRGPU(VkDriverId driver_id) {
return driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY ||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY;
}
constexpr SurfaceType GetSurfaceType(PixelFormat format) { constexpr SurfaceType GetSurfaceType(PixelFormat format) {
switch (format) { switch (format) {
// Depth formats // Depth formats
@@ -44,23 +62,57 @@ using VideoCore::Surface::SurfaceType;
} }
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
VkSampleCountFlagBits samples) { VkSampleCountFlagBits samples,
bool tbdr_will_clear,
bool tbdr_discard_after,
bool tbdr_read_only = false) {
using MaxwellToVK::SurfaceFormat; using MaxwellToVK::SurfaceFormat;
const SurfaceType surface_type = GetSurfaceType(format); const SurfaceType surface_type = GetSurfaceType(format);
const bool has_stencil = surface_type == SurfaceType::DepthStencil || const bool has_stencil = surface_type == SurfaceType::DepthStencil ||
surface_type == SurfaceType::Stencil; surface_type == SurfaceType::Stencil;
// TBDR optimization: Apply hints only on tile-based GPUs
// Desktop GPUs (NVIDIA/AMD/Intel) ignore these hints and use standard behavior
const bool is_tbdr = IsTBDRGPU(device.GetDriverID());
// On TBDR: Use DONT_CARE if clear is guaranteed (avoids loading from main memory)
// On Desktop: Always LOAD to preserve existing content (safer default)
VkAttachmentLoadOp load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
if (is_tbdr && tbdr_will_clear) {
load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}
// On TBDR: Use DONT_CARE if content won't be read (avoids storing to main memory)
// On Desktop: Always STORE (safer default)
// VK_QCOM_render_pass_store_ops: Use NONE_QCOM for read-only attachments (preserves outside render area)
VkAttachmentStoreOp store_op = VK_ATTACHMENT_STORE_OP_STORE;
if (is_tbdr && tbdr_discard_after) {
store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
} else if (is_tbdr && tbdr_read_only && device.IsQcomRenderPassStoreOpsSupported()) {
store_op = static_cast<VkAttachmentStoreOp>(1000301000); // VK_ATTACHMENT_STORE_OP_NONE_QCOM
}
// Stencil operations follow same logic
VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
if (has_stencil && tbdr_read_only && device.IsQcomRenderPassStoreOpsSupported()) {
stencil_store_op = static_cast<VkAttachmentStoreOp>(1000301000); // VK_ATTACHMENT_STORE_OP_NONE_QCOM
} else if (has_stencil) {
stencil_load_op = (is_tbdr && tbdr_will_clear) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE
: VK_ATTACHMENT_LOAD_OP_LOAD;
stencil_store_op = (is_tbdr && tbdr_discard_after) ? VK_ATTACHMENT_STORE_OP_DONT_CARE
: VK_ATTACHMENT_STORE_OP_STORE;
}
return { return {
.flags = {}, .flags = {},
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format, .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
.samples = samples, .samples = samples,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .loadOp = load_op,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE, .storeOp = store_op,
.stencilLoadOp = has_stencil ? VK_ATTACHMENT_LOAD_OP_LOAD .stencilLoadOp = stencil_load_op,
: VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = stencil_store_op,
.stencilStoreOp = has_stencil ? VK_ATTACHMENT_STORE_OP_STORE
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL, .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL, .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
}; };
@@ -75,6 +127,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
if (!is_new) { if (!is_new) {
return *pair->second; return *pair->second;
} }
const bool is_tbdr = IsTBDRGPU(device->GetDriverID());
if (is_tbdr && (key.tbdr_will_clear || key.tbdr_discard_after)) {
LOG_DEBUG(Render_Vulkan, "Creating TBDR-optimized render pass (driver={}, clear={}, discard={})",
static_cast<u32>(device->GetDriverID()), key.tbdr_will_clear, key.tbdr_discard_after);
}
boost::container::static_vector<VkAttachmentDescription, 9> descriptions; boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
std::array<VkAttachmentReference, 8> references{}; std::array<VkAttachmentReference, 8> references{};
u32 num_attachments{}; u32 num_attachments{};
@@ -87,7 +146,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.layout = VK_IMAGE_LAYOUT_GENERAL, .layout = VK_IMAGE_LAYOUT_GENERAL,
}; };
if (is_valid) { if (is_valid) {
descriptions.push_back(AttachmentDescription(*device, format, key.samples)); descriptions.push_back(AttachmentDescription(*device, format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after));
num_attachments = static_cast<u32>(index + 1); num_attachments = static_cast<u32>(index + 1);
++num_colors; ++num_colors;
} }
@@ -99,10 +159,19 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.attachment = num_colors, .attachment = num_colors,
.layout = VK_IMAGE_LAYOUT_GENERAL, .layout = VK_IMAGE_LAYOUT_GENERAL,
}; };
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after, key.tbdr_read_only));
} }
VkSubpassDescriptionFlags subpass_flags = 0;
if (key.qcom_shader_resolve) {
// VK_QCOM_render_pass_shader_resolve: enables custom shader resolve in fragment shader
// This flag allows using a programmable fragment shader for MSAA resolve instead of
// fixed-function hardware resolve, enabling better quality and HDR format support
subpass_flags |= 0x00000004; // VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
}
const VkSubpassDescription subpass{ const VkSubpassDescription subpass{
.flags = 0, .flags = subpass_flags,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0, .inputAttachmentCount = 0,
.pInputAttachments = nullptr, .pInputAttachments = nullptr,

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -17,6 +20,15 @@ struct RenderPassKey {
std::array<VideoCore::Surface::PixelFormat, 8> color_formats; std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
VideoCore::Surface::PixelFormat depth_format; VideoCore::Surface::PixelFormat depth_format;
VkSampleCountFlagBits samples; VkSampleCountFlagBits samples;
// TBDR optimization hints - only affect tile-based GPUs (Qualcomm, ARM, Imagination)
// These flags indicate the expected usage pattern to optimize load/store operations
bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments
bool tbdr_discard_after{false}; // Attachment won't be read after render pass
bool tbdr_read_only{false}; // Attachment is read-only (input attachment, depth test without writes)
// VK_QCOM_render_pass_shader_resolve support
bool qcom_shader_resolve{false}; // Use shader resolve instead of fixed-function (last subpass)
}; };
} // namespace Vulkan } // namespace Vulkan
@@ -27,6 +39,8 @@ struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48; size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52; value ^= static_cast<size_t>(key.samples) << 52;
value ^= (static_cast<size_t>(key.tbdr_will_clear) << 56);
value ^= (static_cast<size_t>(key.tbdr_discard_after) << 57);
for (size_t i = 0; i < key.color_formats.size(); ++i) { for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
} }

View File

@@ -171,6 +171,10 @@ void Swapchain::Create(
resource_ticks.clear(); resource_ticks.clear();
resource_ticks.resize(image_count); resource_ticks.resize(image_count);
// Initialize incremental-present probe flags for this swapchain.
incremental_present_usable = device.IsKhrIncrementalPresentSupported();
incremental_present_probed = false;
} }
bool Swapchain::AcquireNextImage() { bool Swapchain::AcquireNextImage() {
@@ -202,7 +206,13 @@ bool Swapchain::AcquireNextImage() {
void Swapchain::Present(VkSemaphore render_semaphore) { void Swapchain::Present(VkSemaphore render_semaphore) {
const auto present_queue{device.GetPresentQueue()}; const auto present_queue{device.GetPresentQueue()};
const VkPresentInfoKHR present_info{ // If the device advertises VK_KHR_incremental_present, we attempt a one-time probe
// on the first present to validate the driver/compositor accepts present-region info.
VkPresentRegionsKHR present_regions{};
VkPresentRegionKHR region{};
VkRectLayerKHR layer{};
VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr, .pNext = nullptr,
.waitSemaphoreCount = render_semaphore ? 1U : 0U, .waitSemaphoreCount = render_semaphore ? 1U : 0U,
@@ -212,6 +222,20 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
.pImageIndices = &image_index, .pImageIndices = &image_index,
.pResults = nullptr, .pResults = nullptr,
}; };
if (incremental_present_usable && !incremental_present_probed) {
// Build a minimal present-region describing a single 1x1 dirty rect at (0,0).
layer.offset = {0, 0};
layer.extent = {1, 1};
region.rectangleCount = 1;
region.pRectangles = &layer;
present_regions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
present_regions.pNext = nullptr;
present_regions.swapchainCount = 1;
present_regions.pRegions = &region;
present_info.pNext = &present_regions;
}
std::scoped_lock lock{scheduler.submit_mutex}; std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result = present_queue.Present(present_info)) { switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS: case VK_SUCCESS:
@@ -227,8 +251,18 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
break; break;
default: default:
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", string_VkResult(result)); LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", string_VkResult(result));
// If the first present with incremental-present pNext failed, disable future use.
if (incremental_present_usable && !incremental_present_probed) {
incremental_present_usable = false;
LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_incremental_present for this swapchain due to present failure: {}", string_VkResult(result));
}
break; break;
} }
if (incremental_present_usable && !incremental_present_probed) {
// Mark probe as completed if we reached here (success or handled failure above).
incremental_present_probed = true;
LOG_INFO(Render_Vulkan, "VK_KHR_incremental_present probe completed: usable={}", incremental_present_usable);
}
++frame_index; ++frame_index;
if (frame_index >= image_count) { if (frame_index >= image_count) {
frame_index = 0; frame_index = 0;

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -158,6 +161,8 @@ private:
bool is_outdated{}; bool is_outdated{};
bool is_suboptimal{}; bool is_suboptimal{};
bool incremental_present_usable{};
bool incremental_present_probed{};
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@@ -66,10 +66,20 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
} }
} }
[[nodiscard]] VkImageType ConvertImageType(const ImageType type) { [[nodiscard]] VkImageType ConvertImageType(const ImageType type, const Device& device) {
switch (type) { switch (type) {
case ImageType::e1D: case ImageType::e1D:
return VK_IMAGE_TYPE_1D; // Mobile Vulkan (Adreno, Mali, PowerVR, IMG) lacks Sampled1D SPIR-V capability
// Emulate as 2D texture with height=1 on mobile, use native 1D on desktop
{
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
return is_mobile ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D;
}
case ImageType::e2D: case ImageType::e2D:
case ImageType::Linear: case ImageType::Linear:
return VK_IMAGE_TYPE_2D; return VK_IMAGE_TYPE_2D;
@@ -141,7 +151,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = flags, .flags = flags,
.imageType = ConvertImageType(info.type), .imageType = ConvertImageType(info.type, device),
.format = format_info.format, .format = format_info.format,
.extent{ .extent{
.width = info.size.width >> samples_x, .width = info.size.width >> samples_x,
@@ -160,6 +170,40 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}; };
} }
/// Emergency fallback: degrade MSAA to non-MSAA for HDR formats when no resolve support exists
[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) {
if (info.num_samples <= 1) {
return info;
}
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal,
false, info.format).format;
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32;
if (!is_hdr_format) {
return info;
}
// Qualcomm: VK_QCOM_render_pass_shader_resolve handles HDR+MSAA
if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
if (device.IsQcomRenderPassShaderResolveSupported()) {
return info;
}
}
// Other vendors: shaderStorageImageMultisample handles HDR+MSAA
if (device.IsStorageImageMultisampleSupported()) {
return info;
}
// No suitable resolve method - degrade to non-MSAA
LOG_WARNING(Render_Vulkan, "HDR format {} with MSAA not supported, degrading to 1x samples",
vk_format);
info.num_samples = 1;
return info;
}
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator, [[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
const ImageInfo& info, std::span<const VkFormat> view_formats) { const ImageInfo& info, std::span<const VkFormat> view_formats) {
if (info.type == ImageType::Buffer) { if (info.type == ImageType::Buffer) {
@@ -272,10 +316,18 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return VK_COMPONENT_SWIZZLE_ZERO; return VK_COMPONENT_SWIZZLE_ZERO;
} }
[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { [[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type, const Device& device) {
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
switch (type) { switch (type) {
case Shader::TextureType::Color1D: case Shader::TextureType::Color1D:
return VK_IMAGE_VIEW_TYPE_1D; // Emulate 1D as 2D with height=1 on mobile (no Sampled1D capability)
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
case Shader::TextureType::Color2D: case Shader::TextureType::Color2D:
case Shader::TextureType::Color2DRect: case Shader::TextureType::Color2DRect:
return VK_IMAGE_VIEW_TYPE_2D; return VK_IMAGE_VIEW_TYPE_2D;
@@ -284,7 +336,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case Shader::TextureType::Color3D: case Shader::TextureType::Color3D:
return VK_IMAGE_VIEW_TYPE_3D; return VK_IMAGE_VIEW_TYPE_3D;
case Shader::TextureType::ColorArray1D: case Shader::TextureType::ColorArray1D:
return VK_IMAGE_VIEW_TYPE_1D_ARRAY; // Emulate 1D array as 2D array with height=1 on mobile
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
case Shader::TextureType::ColorArray2D: case Shader::TextureType::ColorArray2D:
return VK_IMAGE_VIEW_TYPE_2D_ARRAY; return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
case Shader::TextureType::ColorArrayCube: case Shader::TextureType::ColorArrayCube:
@@ -297,10 +350,18 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return VK_IMAGE_VIEW_TYPE_2D; return VK_IMAGE_VIEW_TYPE_2D;
} }
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type, const Device& device) {
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
switch (type) { switch (type) {
case VideoCommon::ImageViewType::e1D: case VideoCommon::ImageViewType::e1D:
return VK_IMAGE_VIEW_TYPE_1D; // Emulate 1D as 2D with height=1 on mobile (no Sampled1D capability)
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
case VideoCommon::ImageViewType::e2D: case VideoCommon::ImageViewType::e2D:
case VideoCommon::ImageViewType::Rect: case VideoCommon::ImageViewType::Rect:
return VK_IMAGE_VIEW_TYPE_2D; return VK_IMAGE_VIEW_TYPE_2D;
@@ -309,7 +370,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case VideoCommon::ImageViewType::e3D: case VideoCommon::ImageViewType::e3D:
return VK_IMAGE_VIEW_TYPE_3D; return VK_IMAGE_VIEW_TYPE_3D;
case VideoCommon::ImageViewType::e1DArray: case VideoCommon::ImageViewType::e1DArray:
return VK_IMAGE_VIEW_TYPE_1D_ARRAY; // Emulate 1D array as 2D array with height=1 on mobile
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
case VideoCommon::ImageViewType::e2DArray: case VideoCommon::ImageViewType::e2DArray:
return VK_IMAGE_VIEW_TYPE_2D_ARRAY; return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
case VideoCommon::ImageViewType::CubeArray: case VideoCommon::ImageViewType::CubeArray:
@@ -857,6 +919,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
compute_pass_descriptor_queue, memory_allocator); compute_pass_descriptor_queue, memory_allocator);
} }
// MSAA copy support via compute shader (only for non-Qualcomm with shaderStorageImageMultisample)
// Qualcomm uses VK_QCOM_render_pass_shader_resolve (fragment shader in render pass)
if (device.IsStorageImageMultisampleSupported()) { if (device.IsStorageImageMultisampleSupported()) {
msaa_copy_pass = std::make_unique<MSAACopyPass>( msaa_copy_pass = std::make_unique<MSAACopyPass>(
device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue); device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
@@ -1323,7 +1388,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
case PixelFormat::ASTC_2D_8X6_SRGB: case PixelFormat::ASTC_2D_8X6_SRGB:
case PixelFormat::ASTC_2D_6X5_UNORM: case PixelFormat::ASTC_2D_6X5_UNORM:
case PixelFormat::ASTC_2D_6X5_SRGB: case PixelFormat::ASTC_2D_6X5_SRGB:
case PixelFormat::E5B9G9R9_FLOAT:
case PixelFormat::D32_FLOAT: case PixelFormat::D32_FLOAT:
case PixelFormat::D16_UNORM: case PixelFormat::D16_UNORM:
case PixelFormat::X8_D24_UNORM: case PixelFormat::X8_D24_UNORM:
@@ -1487,6 +1551,23 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) { std::span<const VideoCommon::ImageCopy> copies) {
const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1; const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1;
// Use VK_QCOM_render_pass_shader_resolve for HDR formats on Qualcomm
// This is more efficient than compute shader (stays on-chip in TBDR)
const bool is_hdr_format = src.info.format == PixelFormat::B10G11R11_FLOAT ||
dst.info.format == PixelFormat::B10G11R11_FLOAT;
const bool use_qcom_resolve = msaa_to_non_msaa &&
device.IsQcomRenderPassShaderResolveSupported() &&
is_hdr_format &&
copies.size() == 1; // QCOM resolve works best with single full copy
if (use_qcom_resolve) {
// Create temporary framebuffer with resolve target
// TODO Camille: Implement QCOM shader resolve path with proper framebuffer setup
// For now, fall through to standard path
LOG_DEBUG(Render_Vulkan, "QCOM shader resolve opportunity detected but not yet implemented");
}
if (msaa_copy_pass) { if (msaa_copy_pass) {
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa); return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
} }
@@ -1510,10 +1591,20 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_) VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info, runtime{&runtime_} {
runtime->ViewFormats(info.format))), // CRITICAL: Adjust MSAA for HDR formats if driver doesn't support shaderStorageImageMultisample
aspect_mask(ImageAspectMask(info.format)) { // This prevents texture corruption by degrading to non-MSAA when msaa_copy_pass would fail
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { const ImageInfo adjusted_info = AdjustMSAAForHDRFormats(runtime_.device, info_);
// Update our stored info with adjusted values (may have num_samples=1 now)
info = adjusted_info;
// Create image with adjusted info
original_image = MakeImage(runtime_.device, runtime_.memory_allocator, adjusted_info,
runtime->ViewFormats(adjusted_info.format));
aspect_mask = ImageAspectMask(adjusted_info.format);
if (IsPixelFormatASTC(adjusted_info.format) && !runtime->device.IsOptimalAstcSupported()) {
switch (Settings::values.accelerate_astc.GetValue()) { switch (Settings::values.accelerate_astc.GetValue()) {
case Settings::AstcDecodeMode::Gpu: case Settings::AstcDecodeMode::Gpu:
if (Settings::values.astc_recompression.GetValue() == if (Settings::values.astc_recompression.GetValue() ==
@@ -2029,6 +2120,21 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
} }
} }
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
// This causes validation errors and undefined behavior (flickering, missing geometry) on certain games
// Reinterpret R32_UINT as R32_SFLOAT for sampled images to match shader expectations
VkFormat view_format = format_info.format;
if (view_format == VK_FORMAT_R32_UINT &&
!info.IsRenderTarget() &&
(ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_SAMPLED_BIT)) {
// Only reinterpret if NOT used as storage image (storage requires matching types)
const bool is_storage = (ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_STORAGE_BIT) != 0;
if (!is_storage) {
view_format = VK_FORMAT_R32_SFLOAT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT as R32_SFLOAT for sampled image compatibility");
}
}
if (ImageUsageFlags(format_info, format) != image.UsageFlags()) { if (ImageUsageFlags(format_info, format) != image.UsageFlags()) {
LOG_WARNING(Render_Vulkan, LOG_WARNING(Render_Vulkan,
"Image view format {} has different usage flags than image format {}", format, "Image view format {} has different usage flags than image format {}", format,
@@ -2045,7 +2151,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.flags = 0, .flags = 0,
.image = image.Handle(), .image = image.Handle(),
.viewType = VkImageViewType{}, .viewType = VkImageViewType{},
.format = format_info.format, .format = view_format,
.components{ .components{
.r = ComponentSwizzle(swizzle[0]), .r = ComponentSwizzle(swizzle[0]),
.g = ComponentSwizzle(swizzle[1]), .g = ComponentSwizzle(swizzle[1]),
@@ -2056,7 +2162,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}; };
const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) { const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
VkImageViewCreateInfo ci{create_info}; VkImageViewCreateInfo ci{create_info};
ci.viewType = ImageViewType(tex_type); ci.viewType = ImageViewType(tex_type, *device);
if (num_layers) { if (num_layers) {
ci.subresourceRange.layerCount = *num_layers; ci.subresourceRange.layerCount = *num_layers;
} }
@@ -2197,7 +2303,7 @@ vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.image = image_handle, .image = image_handle,
.viewType = ImageViewType(type), .viewType = ImageViewType(type, *device),
.format = vk_format, .format = vk_format,
.components{ .components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY, .r = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -2343,6 +2449,26 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
} }
renderpass_key.samples = samples; renderpass_key.samples = samples;
// Enable VK_QCOM_render_pass_shader_resolve for HDR+MSAA on Qualcomm
// This performs MSAA resolve using fragment shader IN the render pass (on-chip)
// Benefits: ~70% bandwidth reduction, better performance on TBDR architectures
// Requirements: pResolveAttachments configured + explicit shader execution
if (samples > VK_SAMPLE_COUNT_1_BIT && runtime.device.IsQcomRenderPassShaderResolveSupported()) {
// Check if any color attachment is HDR format that benefits from shader resolve
bool has_hdr_attachment = false;
for (size_t index = 0; index < NUM_RT && !has_hdr_attachment; ++index) {
const auto format = renderpass_key.color_formats[index];
// B10G11R11_FLOAT benefits most: compute shader limited, fixed-function slower
if (format == PixelFormat::B10G11R11_FLOAT) {
has_hdr_attachment = true;
}
}
if (has_hdr_attachment) {
renderpass_key.qcom_shader_resolve = true;
}
}
renderpass = runtime.render_pass_cache.Get(renderpass_key); renderpass = runtime.render_pass_cache.Get(renderpass_key);
render_area.width = (std::min)(render_area.width, width); render_area.width = (std::min)(render_area.width, width);
render_area.height = (std::min)(render_area.height, height); render_area.height = (std::min)(render_area.height, height);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
@@ -85,6 +88,10 @@ public:
return msaa_copy_pass.operator bool(); return msaa_copy_pass.operator bool();
} }
bool CanDownloadMSAA() const noexcept {
return msaa_copy_pass.operator bool();
}
void AccelerateImageUpload(Image&, const StagingBufferRef&, void AccelerateImageUpload(Image&, const StagingBufferRef&,
std::span<const VideoCommon::SwizzleParameters>); std::span<const VideoCommon::SwizzleParameters>);

View File

@@ -131,10 +131,6 @@ bool ImageBase::IsSafeDownload() const noexcept {
if (True(flags & ImageFlagBits::CpuModified)) { if (True(flags & ImageFlagBits::CpuModified)) {
return false; return false;
} }
if (info.num_samples > 1) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
return false;
}
return true; return true;
} }

View File

@@ -101,8 +101,12 @@ void TextureCache<P>::RunGarbageCollector() {
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) { if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
return false; return false;
} }
const bool must_download = const bool supports_msaa_download = HasMsaaDownloadSupport(image.info);
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); if (!supports_msaa_download && image.info.num_samples > 1) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
}
const bool must_download = supports_msaa_download && image.IsSafeDownload() &&
False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode && must_download) { if (!high_priority_mode && must_download) {
return false; return false;
} }
@@ -548,10 +552,14 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
template <class P> template <class P>
void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) { void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
boost::container::small_vector<ImageId, 16> images; boost::container::small_vector<ImageId, 16> images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) { if (!image.IsSafeDownload()) {
return; return;
} }
if (!HasMsaaDownloadSupport(image.info)) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
return;
}
image.flags &= ~ImageFlagBits::GpuModified; image.flags &= ~ImageFlagBits::GpuModified;
images.push_back(image_id); images.push_back(image_id);
}); });
@@ -930,6 +938,17 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo
return NULL_IMAGE_ID; return NULL_IMAGE_ID;
} }
auto& image = slot_images[dst_id]; auto& image = slot_images[dst_id];
if (image.info.num_samples > 1) {
if (is_upload) {
if (!HasMsaaUploadSupport(image.info)) {
return NULL_IMAGE_ID;
}
} else {
if (!HasMsaaDownloadSupport(image.info)) {
return NULL_IMAGE_ID;
}
}
}
if (False(image.flags & ImageFlagBits::GpuModified)) { if (False(image.flags & ImageFlagBits::GpuModified)) {
// No need to waste time on an image that's synced with guest // No need to waste time on an image that's synced with guest
return NULL_IMAGE_ID; return NULL_IMAGE_ID;
@@ -1056,7 +1075,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
image.flags &= ~ImageFlagBits::CpuModified; image.flags &= ~ImageFlagBits::CpuModified;
TrackImage(image, image_id); TrackImage(image, image_id);
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { if (!HasMsaaUploadSupport(image.info)) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
runtime.TransitionImageLayout(image); runtime.TransitionImageLayout(image);
return; return;
@@ -1274,6 +1293,16 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
return fitted_size; return fitted_size;
} }
template <class P>
bool TextureCache<P>::HasMsaaUploadSupport(const ImageInfo& info) const noexcept {
return info.num_samples <= 1 || runtime.CanUploadMSAA();
}
template <class P>
bool TextureCache<P>::HasMsaaDownloadSupport(const ImageInfo& info) const noexcept {
return info.num_samples <= 1 || runtime.CanDownloadMSAA();
}
template <class P> template <class P>
void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted)); UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
@@ -1491,7 +1520,31 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
for (const ImageId overlap_id : join_ignore_textures) { for (const ImageId overlap_id : join_ignore_textures) {
Image& overlap = slot_images[overlap_id]; Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) { if (True(overlap.flags & ImageFlagBits::GpuModified)) {
UNIMPLEMENTED(); // Merge GPU-modified contents from the overlapping image into the newly
// created image to preserve guest-visible data. Compute shrink/scale
// copies and dispatch a GPU-side copy. This mirrors the behavior used
// for overlaps handled in join_copies_to_do above.
new_image.flags |= ImageFlagBits::GpuModified;
const auto& resolution = Settings::values.resolution_info;
const auto base_opt = new_image.TryFindBase(overlap.gpu_addr);
if (base_opt) {
const SubresourceBase base = base_opt.value();
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
if (overlap.info.num_samples != new_image.info.num_samples) {
runtime.CopyImageMSAA(new_image, overlap, FixSmallVectorADL(copies));
} else {
runtime.CopyImage(new_image, overlap, FixSmallVectorADL(copies));
}
new_image.modification_tick = overlap.modification_tick;
} else {
// If we cannot determine a base mapping, fallback to preserving the
// overlap (avoid deleting GPU-modified data) and log the event so
// it can be investigated, we're trying to pinpoint the issue of texture flickering.
LOG_WARNING(HW_GPU, "Could not map overlap gpu_addr {:#x} into new image; preserving overlap", u64(overlap.gpu_addr));
continue;
}
} }
if (True(overlap.flags & ImageFlagBits::Tracked)) { if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id); UntrackImage(overlap, overlap_id);
@@ -1551,6 +1604,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
for (const auto& copy_object : join_copies_to_do) { for (const auto& copy_object : join_copies_to_do) {
Image& overlap = slot_images[copy_object.id]; Image& overlap = slot_images[copy_object.id];
if (copy_object.is_alias) { if (copy_object.is_alias) {
if (!HasMsaaDownloadSupport(overlap.info)) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
continue;
}
if (!overlap.IsSafeDownload()) { if (!overlap.IsSafeDownload()) {
continue; continue;
} }
@@ -2467,8 +2524,13 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
if (new_id) { if (new_id) {
const ImageViewBase& old_view = slot_image_views[new_id]; const ImageViewBase& old_view = slot_image_views[new_id];
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
const PendingDownload new_download{true, 0, old_view.image_id}; const ImageBase& image = slot_images[old_view.image_id];
uncommitted_downloads.emplace_back(new_download); if (!HasMsaaDownloadSupport(image.info)) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
} else {
const PendingDownload new_download{true, 0, old_view.image_id};
uncommitted_downloads.emplace_back(new_download);
}
} }
} }
*old_id = new_id; *old_id = new_id;

View File

@@ -426,6 +426,8 @@ private:
bool ScaleUp(Image& image); bool ScaleUp(Image& image);
bool ScaleDown(Image& image); bool ScaleDown(Image& image);
u64 GetScaledImageSizeBytes(const ImageBase& image); u64 GetScaledImageSizeBytes(const ImageBase& image);
[[nodiscard]] bool HasMsaaUploadSupport(const ImageInfo& info) const noexcept;
[[nodiscard]] bool HasMsaaDownloadSupport(const ImageInfo& info) const noexcept;
void QueueAsyncDecode(Image& image, ImageId image_id); void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode(); void TickAsyncDecode();

View File

@@ -22,6 +22,34 @@
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
#ifndef VK_KHR_MAINTENANCE_1_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_1_EXTENSION_NAME "VK_KHR_maintenance1"
#endif
#ifndef VK_KHR_MAINTENANCE_2_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_2_EXTENSION_NAME "VK_KHR_maintenance2"
#endif
#ifndef VK_KHR_MAINTENANCE_3_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_3_EXTENSION_NAME "VK_KHR_maintenance3"
#endif
#ifndef VK_KHR_MAINTENANCE_4_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_4_EXTENSION_NAME "VK_KHR_maintenance4"
#endif
#ifndef VK_KHR_MAINTENANCE_5_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_5_EXTENSION_NAME "VK_KHR_maintenance5"
#endif
#ifndef VK_KHR_MAINTENANCE_6_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_6_EXTENSION_NAME "VK_KHR_maintenance6"
#endif
#ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7"
#endif
#ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8"
#endif
#ifndef VK_KHR_MAINTENANCE_9_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_9_EXTENSION_NAME "VK_KHR_maintenance9"
#endif
// Sanitize macros // Sanitize macros
#undef CreateEvent #undef CreateEvent
#undef CreateSemaphore #undef CreateSemaphore

View File

@@ -1,6 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@@ -90,6 +89,36 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
VK_FORMAT_UNDEFINED, VK_FORMAT_UNDEFINED,
}; };
// B10G11R11_UFLOAT (R11G11B10F) - PRIMARY HDR format for Nintendo Switch
// Nintendo Switch hardware validation: FULL support (COLOR_ATTACHMENT + STORAGE_IMAGE + BLEND)
// Reference: vp_gpuinfo_nintendo_switch_v2_495_0_0_0 - All required feature bits present
//
// Fallback strategy: Degrade to LDR instead of expensive HDR emulation
// - RGBA8 UNORM/SRGB: Universal support, 32-bit (same size as B10G11R11), acceptable quality
// - RGB10A2: Better precision if available, still 32-bit
// - RGBA16F: Last resort only if RGB8 variants fail (should never happen)
constexpr std::array B10G11R11_UFLOAT_PACK32{
#ifdef ANDROID
VK_FORMAT_A8B8G8R8_SRGB_PACK32, // sRGB variant (for gamma-correct fallback)
#else
VK_FORMAT_A8B8G8R8_UNORM_PACK32, // Primary fallback: RGBA8 LDR (32-bit, universal)
VK_FORMAT_A2B10G10R10_UNORM_PACK32, // Better precision: RGB10A2 (32-bit, common)
#endif
VK_FORMAT_R16G16B16A16_SFLOAT, // Emergency fallback: RGBA16F (64-bit, should never reach)
VK_FORMAT_UNDEFINED,
};
// E5B9G9R9_UFLOAT (RGB9E5) - INVALID for COLOR_ATTACHMENT on Nintendo Switch
// Nintendo Switch hardware validation: NO COLOR_ATTACHMENT_BIT (only SAMPLED_IMAGE)
// Reference: vp_gpuinfo_nintendo_switch_v2_495_0_0_0 - Missing required attachment bits
// This format should NEVER be used as render target, only for texture sampling
constexpr std::array E5B9G9R9_UFLOAT_PACK32{
VK_FORMAT_B10G11R11_UFLOAT_PACK32, // Upgrade to proper HDR format with attachment support
VK_FORMAT_A8B8G8R8_UNORM_PACK32, // Fallback: RGBA8 LDR
VK_FORMAT_R16G16B16A16_SFLOAT, // Last resort: RGBA16F
VK_FORMAT_UNDEFINED,
};
} // namespace Alternatives } // namespace Alternatives
template <typename T> template <typename T>
@@ -122,6 +151,10 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data(); return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data();
case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT: case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data(); return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data();
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
return Alternatives::B10G11R11_UFLOAT_PACK32.data();
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
return Alternatives::E5B9G9R9_UFLOAT_PACK32.data();
default: default:
return nullptr; return nullptr;
} }
@@ -209,7 +242,6 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT,
VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_SNORM,
@@ -416,7 +448,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_suitable = GetSuitability(surface != nullptr); const bool is_suitable = GetSuitability(surface != nullptr);
const VkDriverId driver_id = properties.driver.driverID; const VkDriverId driver_id = properties.driver.driverID;
const auto device_id = properties.properties.deviceID; // uncomment this if you want per-device overrides :P
// const u32 device_id = properties.properties.deviceID;
const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
const bool is_amd_driver = const bool is_amd_driver =
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
@@ -427,7 +461,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK; const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP; const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
const bool is_s8gen2 = device_id == 0x43050a01;
const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) { if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) {
@@ -494,11 +527,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
CollectPhysicalMemoryInfo(); CollectPhysicalMemoryInfo();
CollectToolingInfo(); CollectToolingInfo();
if (is_qualcomm || is_turnip) { // Driver-specific handling for VK_EXT_custom_border_color
LOG_WARNING(Render_Vulkan, // On some Qualcomm/Turnip/ARM drivers the extension may be partially implemented.
"Qualcomm and Turnip drivers have broken VK_EXT_custom_border_color"); // Enable it if ANY useful feature bit is reported; otherwise, let the removal pass drop it.
//RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color, if (is_qualcomm || is_turnip || is_arm) {
//VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); const bool has_any_custom_border_color =
features.custom_border_color.customBorderColors ||
features.custom_border_color.customBorderColorWithoutFormat;
if (!has_any_custom_border_color) {
LOG_WARNING(Render_Vulkan,
"Disabling VK_EXT_custom_border_color on '{}' — no usable custom border color features reported",
properties.driver.driverName);
// Do not clear here; final removal happens in RemoveUnsuitableExtensions based on bits.
} else {
LOG_INFO(Render_Vulkan,
"Partial VK_EXT_custom_border_color support detected on '{}' — enabling available features",
properties.driver.driverName);
}
} }
if (is_qualcomm) { if (is_qualcomm) {
@@ -506,9 +551,85 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
"Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation");
//RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); //RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
// Log Qualcomm-specific optimizations
if (extensions.render_pass_store_ops) {
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_store_ops: ENABLED (TBDR store optimization)");
}
if (extensions.tile_properties) {
LOG_INFO(Render_Vulkan, "VK_QCOM_tile_properties: ENABLED (tile size queries available)");
}
if (extensions.render_pass_shader_resolve) {
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_shader_resolve: ENABLED (HDR+MSAA shader resolve)");
}
#ifdef ANDROID
// Shader Float Controls handling for Qualcomm Adreno
// Default: DISABLED due to historical issues with binning precision causing visual glitches
const bool force_enable = Settings::values.shader_float_controls_force_enable.GetValue();
if (force_enable) {
// User explicitly enabled float controls - log detected capabilities and user config
LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)");
// Log driver capabilities
const auto& fc = properties.float_controls;
LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:");
LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - RTE Rounding FP32: {}", fc.shaderRoundingModeRTEFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan FP32: {}", fc.shaderSignedZeroInfNanPreserveFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Independence: {}",
fc.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL ? "ALL" : "LIMITED");
// Log user selections
bool ftz = Settings::values.shader_float_ftz.GetValue();
bool preserve = Settings::values.shader_float_denorm_preserve.GetValue();
const bool rte = Settings::values.shader_float_rte.GetValue();
const bool signed_zero = Settings::values.shader_float_signed_zero_inf_nan.GetValue();
// Validate mutually exclusive options
if (ftz && preserve) {
LOG_WARNING(Render_Vulkan,
"CONFLICT: FTZ and DenormPreserve are mutually exclusive!");
LOG_WARNING(Render_Vulkan,
" -> DenormPreserve will take precedence (accuracy over speed)");
ftz = false; // Preserve takes priority for correctness
}
LOG_INFO(Render_Vulkan, "User Float Behavior Selection:");
LOG_INFO(Render_Vulkan, " - Flush To Zero (FTZ): {}", ftz ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Denorm Preserve: {}", preserve ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Round To Even (RTE): {}", rte ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan: {}", signed_zero ? "ENABLED" : "disabled");
// Analyze configuration vs Switch native behavior
const bool matches_switch = ftz && !preserve && rte && signed_zero;
if (matches_switch) {
LOG_INFO(Render_Vulkan, "Configuration MATCHES Switch/Maxwell native behavior (FTZ+RTE+SignedZero)");
} else if (!ftz && !preserve && !rte && !signed_zero) {
LOG_WARNING(Render_Vulkan, "No float behaviors selected - using driver default (may cause glitches)");
} else {
LOG_INFO(Render_Vulkan, "Configuration is CUSTOM - testing mode active");
}
// Extension stays enabled
LOG_INFO(Render_Vulkan, "VK_KHR_shader_float_controls: ENABLED");
} else {
// Default behavior - disable float controls
LOG_WARNING(Render_Vulkan,
"Disabling shader float controls on Qualcomm (historical binning precision issues)");
LOG_INFO(Render_Vulkan,
"To enable: Eden Veil -> Extensions -> Shader Float Controls (Force Enable)");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
}
#else
// Non-Android: keep original behavior
LOG_WARNING(Render_Vulkan, LOG_WARNING(Render_Vulkan,
"Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers"); "Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
#endif
// Int64 atomics - genuinely broken, always disable
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
features.shader_atomic_int64.shaderBufferInt64Atomics = false; features.shader_atomic_int64.shaderBufferInt64Atomics = false;
@@ -681,9 +802,40 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
has_broken_compute = has_broken_compute =
CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) && CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
!Settings::values.enable_compute_pipelines.GetValue(); !Settings::values.enable_compute_pipelines.GetValue();
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { must_emulate_bgr565 = false; // Default: assume emulation isn't required
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
if (is_intel_anv) {
LOG_WARNING(Render_Vulkan, "Intel ANV driver does not support native BGR format");
must_emulate_bgr565 = true; must_emulate_bgr565 = true;
} else if (is_qualcomm) {
// Qualcomm driver version where VK_KHR_maintenance5 and A1B5G5R5 become reliable
constexpr uint32_t QUALCOMM_FIXED_DRIVER_VERSION = VK_MAKE_VERSION(512, 800, 1);
// Check if VK_KHR_maintenance5 is supported
if (extensions.maintenance5 && properties.properties.driverVersion >= QUALCOMM_FIXED_DRIVER_VERSION) {
LOG_INFO(Render_Vulkan, "Qualcomm driver supports VK_KHR_maintenance5, disabling BGR emulation");
must_emulate_bgr565 = false;
} else {
LOG_WARNING(Render_Vulkan, "Qualcomm driver doesn't support native BGR, emulating formats");
must_emulate_bgr565 = true;
}
} else if (is_turnip) {
// Mesa Turnip added support for maintenance5 in Mesa 25.0
if (extensions.maintenance5) {
LOG_INFO(Render_Vulkan, "Turnip driver supports VK_KHR_maintenance5, disabling BGR emulation");
must_emulate_bgr565 = false;
} else {
LOG_WARNING(Render_Vulkan, "Turnip driver doesn't support native BGR, emulating formats");
must_emulate_bgr565 = true;
}
} else if (is_arm) {
// ARM Mali: stop emulating BGR5 formats when VK_KHR_maintenance5 is available
if (extensions.maintenance5) {
LOG_INFO(Render_Vulkan, "ARM driver supports VK_KHR_maintenance5, disabling BGR emulation");
must_emulate_bgr565 = false;
} else {
LOG_WARNING(Render_Vulkan, "ARM driver doesn't support native BGR, emulating formats");
must_emulate_bgr565 = true;
}
} }
if (extensions.push_descriptor && is_intel_anv) { if (extensions.push_descriptor && is_intel_anv) {
const u32 version = (properties.properties.driverVersion << 3) >> 3; const u32 version = (properties.properties.driverVersion << 3) >> 3;
@@ -800,15 +952,32 @@ Device::~Device() {
VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const { FormatType format_type) const {
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
return wanted_format; // CRITICAL FIX: Even if format is "supported", check for STORAGE + HDR + no MSAA support
// Driver may report STORAGE_IMAGE_BIT but shaderStorageImageMultisample=false means
// it will fail at runtime when used with MSAA (CopyImageMSAA silently fails)
const bool requests_storage = (wanted_usage & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) != 0;
const bool is_hdr_format = wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32;
// If driver doesn't support shader storage image with MSAA, and we're requesting storage
// for an HDR format (which will likely be used with MSAA), force fallback
if (requests_storage && is_hdr_format && !features.features.shaderStorageImageMultisample) {
LOG_WARNING(Render_Vulkan,
"Format {} reports STORAGE_IMAGE_BIT but driver doesn't support "
"shaderStorageImageMultisample. Forcing fallback for MSAA compatibility.",
wanted_format);
// Continue to alternatives search below
} else {
return wanted_format;
}
} }
// The wanted format is not supported by hardware, search for alternatives // The wanted format is not supported by hardware, search for alternatives
const VkFormat* alternatives = GetFormatAlternatives(wanted_format); const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) { if (alternatives == nullptr) {
LOG_ERROR(Render_Vulkan, LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} has no defined alternatives and host " "Format={} (0x{:X}) with usage={} and type={} has no defined alternatives and host "
"hardware does not support it", "hardware does not support it. Driver: {} Device: {}",
wanted_format, wanted_usage, format_type); wanted_format, static_cast<u32>(wanted_format), wanted_usage, format_type,
GetDriverName(), properties.properties.deviceName);
return wanted_format; return wanted_format;
} }
@@ -817,9 +986,17 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
if (!IsFormatSupported(alternative, wanted_usage, format_type)) { if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue; continue;
} }
LOG_DEBUG(Render_Vulkan, // Special logging for HDR formats (common across multiple engines) on problematic drivers
if (wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
LOG_WARNING(Render_Vulkan,
"B10G11R11_UFLOAT_PACK32 (R11G11B10F HDR format) not fully supported. "
"Falling back to {} on {}",
alternative, properties.properties.deviceName);
} else {
LOG_DEBUG(Render_Vulkan,
"Emulating format={} with alternative format={} with usage={} and type={}", "Emulating format={} with alternative format={} with usage={} and type={}",
wanted_format, alternative, wanted_usage, format_type); wanted_format, alternative, wanted_usage, format_type);
}
return alternative; return alternative;
} }
@@ -1267,6 +1444,43 @@ void Device::RemoveUnsuitableExtensions() {
VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
} }
// VK_KHR_shader_float16_int8
const bool float16_int8_requested = extensions.shader_float16_int8;
const bool float16_int8_usable =
features.shader_float16_int8.shaderFloat16 || features.shader_float16_int8.shaderInt8;
if (float16_int8_requested && !float16_int8_usable) {
LOG_WARNING(Render_Vulkan,
"Disabling VK_KHR_shader_float16_int8 — no shaderFloat16/shaderInt8 features reported");
}
extensions.shader_float16_int8 = float16_int8_requested && float16_int8_usable;
RemoveExtensionFeatureIfUnsuitable(float16_int8_usable, features.shader_float16_int8,
VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
// VK_EXT_shader_atomic_float
const bool atomic_float_requested = extensions.shader_atomic_float;
const auto& atomic_float_features = features.shader_atomic_float;
const bool supports_buffer_f32 = atomic_float_features.shaderBufferFloat32Atomics ||
atomic_float_features.shaderBufferFloat32AtomicAdd;
const bool supports_shared_f32 = atomic_float_features.shaderSharedFloat32Atomics ||
atomic_float_features.shaderSharedFloat32AtomicAdd;
const bool supports_image_f32 = atomic_float_features.shaderImageFloat32Atomics ||
atomic_float_features.shaderImageFloat32AtomicAdd;
const bool supports_sparse_f32 = atomic_float_features.sparseImageFloat32Atomics ||
atomic_float_features.sparseImageFloat32AtomicAdd;
const bool supports_buffer_f64 = atomic_float_features.shaderBufferFloat64Atomics ||
atomic_float_features.shaderBufferFloat64AtomicAdd;
const bool supports_shared_f64 = atomic_float_features.shaderSharedFloat64Atomics ||
atomic_float_features.shaderSharedFloat64AtomicAdd;
const bool atomic_float_usable = supports_buffer_f32 || supports_shared_f32 || supports_image_f32 ||
supports_sparse_f32 || supports_buffer_f64 || supports_shared_f64;
if (atomic_float_requested && !atomic_float_usable) {
LOG_WARNING(Render_Vulkan,
"Disabling VK_EXT_shader_atomic_float — no usable atomic float feature bits reported");
}
extensions.shader_atomic_float = atomic_float_requested && atomic_float_usable;
RemoveExtensionFeatureIfUnsuitable(atomic_float_usable, features.shader_atomic_float,
VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME);
// VK_KHR_shader_atomic_int64 // VK_KHR_shader_atomic_int64
extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics && extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics &&
features.shader_atomic_int64.shaderSharedInt64Atomics; features.shader_atomic_int64.shaderSharedInt64Atomics;
@@ -1300,6 +1514,21 @@ void Device::RemoveUnsuitableExtensions() {
RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback, RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback,
VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
// VK_EXT_robustness2
extensions.robustness_2 =
features.robustness2.robustBufferAccess2 && features.robustness2.robustImageAccess2;
RemoveExtensionFeatureIfUnsuitable(extensions.robustness_2, features.robustness2,
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
// VK_EXT_image_robustness
extensions.image_robustness = features.image_robustness.robustImageAccess;
RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness,
VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME);
// VK_EXT_swapchain_maintenance1
extensions.swapchain_maintenance1 = loaded_extensions.contains(VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.swapchain_maintenance1, VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
// VK_EXT_vertex_input_dynamic_state // VK_EXT_vertex_input_dynamic_state
extensions.vertex_input_dynamic_state = extensions.vertex_input_dynamic_state =
features.vertex_input_dynamic_state.vertexInputDynamicState; features.vertex_input_dynamic_state.vertexInputDynamicState;

View File

@@ -49,9 +49,11 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \ FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \
FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \ FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \
FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \ FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \
FEATURE(EXT, ShaderAtomicFloat, SHADER_ATOMIC_FLOAT, shader_atomic_float) \
FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \ FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \
FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
FEATURE(EXT, ImageRobustness, IMAGE_ROBUSTNESS, image_robustness) \
FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \ FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
primitive_topology_list_restart) \ primitive_topology_list_restart) \
FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \ FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
@@ -61,7 +63,8 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \ FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
pipeline_executable_properties) \ pipeline_executable_properties) \
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \ FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
workgroup_memory_explicit_layout) workgroup_memory_explicit_layout) \
FEATURE(QCOM, TileProperties, TILE_PROPERTIES, tile_properties_qcom)
// Define miscellaneous extensions which may be used by the implementation here. // Define miscellaneous extensions which may be used by the implementation here.
#define FOR_EACH_VK_EXTENSION(EXTENSION) \ #define FOR_EACH_VK_EXTENSION(EXTENSION) \
@@ -82,7 +85,9 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \ EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \
EXTENSION(KHR, SWAPCHAIN, swapchain) \ EXTENSION(KHR, SWAPCHAIN, swapchain) \
EXTENSION(KHR, INCREMENTAL_PRESENT, incremental_present) \
EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \ EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
EXTENSION(EXT, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \
EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \ EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \ EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \ EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
@@ -90,7 +95,19 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) \ EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) \
EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \ EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \
EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \ EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \
EXTENSION(QCOM, RENDER_PASS_SHADER_RESOLVE, render_pass_shader_resolve) \
EXTENSION(QCOM, RENDER_PASS_STORE_OPS, render_pass_store_ops) \
EXTENSION(QCOM, TILE_PROPERTIES, tile_properties) \
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
EXTENSION(KHR, MAINTENANCE_4, maintenance4) \
EXTENSION(KHR, MAINTENANCE_5, maintenance5) \
EXTENSION(KHR, MAINTENANCE_6, maintenance6) \
EXTENSION(KHR, MAINTENANCE_7, maintenance7) \
EXTENSION(KHR, MAINTENANCE_8, maintenance8) \
EXTENSION(KHR, MAINTENANCE_9, maintenance9)
// Define extensions which must be supported. // Define extensions which must be supported.
#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
@@ -365,6 +382,12 @@ public:
return properties.subgroup_properties.supportedOperations & feature; return properties.subgroup_properties.supportedOperations & feature;
} }
/// Returns true if subgroup operations are supported in the specified shader stage.
/// Mobile GPUs (Qualcomm Adreno) often only support subgroups in fragment/compute stages.
bool IsSubgroupSupportedForStage(VkShaderStageFlagBits stage) const {
return properties.subgroup_properties.supportedStages & stage;
}
/// Returns the maximum number of push descriptors. /// Returns the maximum number of push descriptors.
u32 MaxPushDescriptors() const { u32 MaxPushDescriptors() const {
return properties.push_descriptor.maxPushDescriptors; return properties.push_descriptor.maxPushDescriptors;
@@ -455,6 +478,11 @@ public:
return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2; return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2;
} }
/// Returns true if the device supports VK_KHR_incremental_present.
bool IsKhrIncrementalPresentSupported() const {
return extensions.incremental_present;
}
/// Returns true if the device supports VK_EXT_primitive_topology_list_restart. /// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
bool IsTopologyListPrimitiveRestartSupported() const { bool IsTopologyListPrimitiveRestartSupported() const {
return features.primitive_topology_list_restart.primitiveTopologyListRestart; return features.primitive_topology_list_restart.primitiveTopologyListRestart;
@@ -564,6 +592,41 @@ public:
return extensions.filter_cubic_weights; return extensions.filter_cubic_weights;
} }
/// Returns true if the device supports VK_QCOM_render_pass_shader_resolve
bool IsQcomRenderPassShaderResolveSupported() const {
return extensions.render_pass_shader_resolve;
}
/// Returns true if the device supports VK_QCOM_render_pass_store_ops
bool IsQcomRenderPassStoreOpsSupported() const {
return extensions.render_pass_store_ops;
}
/// Returns true if the device supports VK_QCOM_tile_properties
bool IsQcomTilePropertiesSupported() const {
return extensions.tile_properties;
}
/// Returns Qualcomm tile size (width, height, depth). Returns {0,0,0} if not queried or unsupported
VkExtent3D GetQcomTileSize() const {
return properties.qcom_tile_size;
}
/// Returns Qualcomm tile apron size. Returns {0,0} if not queried or unsupported
VkExtent2D GetQcomApronSize() const {
return properties.qcom_apron_size;
}
/// Returns true if MSAA copy operations are supported via compute shader (upload/download)
/// Qualcomm uses render pass shader resolve instead, so this returns false for Qualcomm
bool CanUploadMSAA() const {
return IsStorageImageMultisampleSupported();
}
bool CanDownloadMSAA() const {
return CanUploadMSAA();
}
/// Returns true if the device supports VK_EXT_line_rasterization. /// Returns true if the device supports VK_EXT_line_rasterization.
bool IsExtLineRasterizationSupported() const { bool IsExtLineRasterizationSupported() const {
return extensions.line_rasterization; return extensions.line_rasterization;
@@ -594,6 +657,11 @@ public:
return extensions.shader_atomic_int64; return extensions.shader_atomic_int64;
} }
/// Returns true if the device supports VK_EXT_shader_atomic_float.
bool IsExtShaderAtomicFloatSupported() const {
return extensions.shader_atomic_float;
}
bool IsExtConditionalRendering() const { bool IsExtConditionalRendering() const {
return extensions.conditional_rendering; return extensions.conditional_rendering;
} }
@@ -817,8 +885,9 @@ private:
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{}; VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
VkPhysicalDeviceProperties properties{}; VkPhysicalDeviceProperties properties{};
VkExtent3D qcom_tile_size{}; // Qualcomm tile dimensions (0 if not queried)
VkExtent2D qcom_apron_size{}; // Qualcomm tile apron size
}; };
Extensions extensions{}; Extensions extensions{};

View File

@@ -226,11 +226,24 @@ namespace Vulkan {
vk::Buffer vk::Buffer
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
{ {
// Qualcomm uses unified memory architecture - prefer DEVICE_LOCAL + HOST_VISIBLE
// for zero-copy access without staging buffers
const bool is_qualcomm = device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool prefer_unified = is_qualcomm && (usage == MemoryUsage::Upload ||
usage == MemoryUsage::Download ||
usage == MemoryUsage::Stream);
VkMemoryPropertyFlags preferred_flags = MemoryUsagePreferredVmaFlags(usage);
if (prefer_unified) {
// Request DEVICE_LOCAL + HOST_VISIBLE for zero-copy on unified memory architectures
preferred_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
}
const VmaAllocationCreateInfo alloc_ci = { const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage), .usage = MemoryUsageVma(usage),
.requiredFlags = 0, .requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage), .preferredFlags = preferred_flags,
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE, .pool = VK_NULL_HANDLE,
.pUserData = nullptr, .pUserData = nullptr,
@@ -245,6 +258,13 @@ namespace Vulkan {
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
if (is_qualcomm && prefer_unified) {
const bool got_unified = (property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
(property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
LOG_DEBUG(Render_Vulkan, "Qualcomm buffer allocation: usage={}, unified={}, flags=0x{:X}",
static_cast<u32>(usage), got_unified, property_flags);
}
u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData); u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{}; const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;