Compare commits
217 Commits
v0.0.4
...
stuffmadef
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
96422c0896 | ||
|
|
8e8b5e9e63 | ||
|
|
6c58ff734a | ||
|
|
d3b9a48e8c | ||
|
|
8cfc2ce3c0 | ||
|
|
b76b1c6cf1 | ||
|
|
24341611e4 | ||
|
|
3798228275 | ||
|
|
6ff16e0282 | ||
|
|
3b95438059 | ||
|
|
2522b15034 | ||
|
|
d53225835a | ||
|
|
fccecc0664 | ||
|
|
334a440af8 | ||
|
|
8c22603f68 | ||
|
|
2208ffaec5 | ||
|
|
a35e761f82 | ||
|
|
37c38cb0c7 | ||
|
|
23204cfd1e | ||
|
|
c2301fd228 | ||
|
|
470291f787 | ||
|
|
fa94828ea7 | ||
|
|
05e8f2fd83 | ||
|
|
9e9d9716bf | ||
|
|
04cb0e4157 | ||
|
|
6d3c8a4577 | ||
|
|
9c2c83d429 | ||
|
|
4e8ce769b6 | ||
|
|
6922216c0f | ||
|
|
b85bf7d0ae | ||
|
|
5d9f379bd9 | ||
|
|
f3290428e2 | ||
|
|
1e0b300e85 | ||
|
|
cb8fb25126 | ||
|
|
37d0d43c94 | ||
|
|
5c205edcbd | ||
|
|
dbb33e903d | ||
|
|
37d407ab32 | ||
|
|
dc0ddce0fe | ||
|
|
a2a9045718 | ||
|
|
6f4672d8c8 | ||
|
|
481c1b0486 | ||
|
|
c39934aa65 | ||
|
|
e12c7f44cc | ||
|
|
c2412e53c0 | ||
|
|
56de1618b6 | ||
|
|
e0de78aaa3 | ||
|
|
4e36d2e468 | ||
|
|
4495724c2a | ||
|
|
f9e484fba6 | ||
|
|
07f07d8d39 | ||
|
|
54f92930e2 | ||
|
|
01658fc320 | ||
|
|
5845764ed4 | ||
|
|
cf06bd9a4b | ||
|
|
19ea6bb629 | ||
|
|
be245477af | ||
|
|
47e6072a29 | ||
|
|
4e8ec2551a | ||
|
|
f418c6c3b0 | ||
|
|
6bce124b28 | ||
|
|
65284939e6 | ||
|
|
98baa7c918 | ||
|
|
32c12852a8 | ||
|
|
f562b9032d | ||
|
|
f00031d421 | ||
|
|
86cd8a98c5 | ||
|
|
d5e7b51c97 | ||
|
|
3c114eeaaa | ||
|
|
dfb09191f3 | ||
|
|
63a9eb9d62 | ||
|
|
3fe1ec9322 | ||
|
|
60d4f84ad3 | ||
|
|
baf821f5a2 | ||
|
|
e3507c741e | ||
|
|
b1e984475f | ||
|
|
24238c215a | ||
|
|
0c1030e2cb | ||
|
|
ad45b41c48 | ||
|
|
eb4d094a56 | ||
|
|
b4cbc89596 | ||
|
|
7f587e952c | ||
|
|
522e17c865 | ||
|
|
de914eeaa0 | ||
|
|
9dd79109d5 | ||
|
|
0ce3fab047 | ||
|
|
3dd1c12b9f | ||
|
|
d097764008 | ||
|
|
21bfce5ab9 | ||
|
|
73f8b5ff42 | ||
|
|
d3f827868d | ||
|
|
ac581764ca | ||
|
|
2994723321 | ||
|
|
4726894c58 | ||
|
|
a147c45dfa | ||
|
|
3fab473f68 | ||
|
|
3ad77cf268 | ||
|
|
b9b2c84e03 | ||
|
|
a3a59aca91 | ||
|
|
5da42f728c | ||
|
|
c1c15277b1 | ||
|
|
08cfef6883 | ||
|
|
c2a1488529 | ||
|
|
aaf5fbc289 | ||
|
|
ecef42f2c5 | ||
|
|
d6debaa49c | ||
|
|
43a4ad946d | ||
|
|
045415b9bb | ||
|
|
34351e8c1a | ||
|
|
1f43d1f66c | ||
|
|
9628044990 | ||
|
|
1ed5af15b1 | ||
|
|
69bd1d5c8a | ||
|
|
c9739c7d57 | ||
|
|
76975ed544 | ||
|
|
eb7d25718c | ||
|
|
370b4c4109 | ||
|
|
4a8085a6cf | ||
|
|
aa85880902 | ||
|
|
69cc6c4b59 | ||
|
|
2d08578b1d | ||
|
|
d86cfbe5d0 | ||
|
|
b90cd12c7d | ||
|
|
53a827ac4f | ||
|
|
bba1904529 | ||
|
|
e33d2541d2 | ||
|
|
3628654a98 | ||
|
|
1f5def5756 | ||
|
|
ca479883ec | ||
|
|
300c913883 | ||
|
|
15b9a373cc | ||
|
|
d19229b179 | ||
|
|
0d999519a4 | ||
|
|
92e9b82f6e | ||
|
|
21575a7cc2 | ||
|
|
7d8c5dad97 | ||
|
|
674f552ff1 | ||
|
|
75d9236520 | ||
|
|
89926bce0b | ||
|
|
6c1fc4b4ed | ||
|
|
311f06047b | ||
|
|
46df717f7c | ||
|
|
dcf9483b0b | ||
|
|
2b828a9fee | ||
|
|
6fe1f86984 | ||
|
|
af073f13cf | ||
|
|
aea945b671 | ||
|
|
c52fda760a | ||
|
|
c168755c65 | ||
|
|
8a83cf0271 | ||
|
|
3db41fbce6 | ||
|
|
5e7fb6eead | ||
|
|
bcc5390943 | ||
|
|
a51d875d91 | ||
|
|
6134a57367 | ||
|
|
c845b6086f | ||
|
|
31c168efe1 | ||
|
|
8bd87204f5 | ||
|
|
e72a206aee | ||
|
|
6a62fa7ee3 | ||
|
|
52b630dfdc | ||
|
|
4860050358 | ||
|
|
47f0563c1b | ||
|
|
b1208f03ee | ||
|
|
0fd603c094 | ||
|
|
1ca19af7fb | ||
|
|
ddd78c3b37 | ||
|
|
2e68f8795d | ||
|
|
d3595fd2b1 | ||
|
|
033531509b | ||
|
|
b9954de1ca | ||
|
|
5f88deeebf | ||
|
|
d25da944ed | ||
|
|
ec274a855e | ||
|
|
8133d4a8b4 | ||
|
|
4f3e4bf9cb | ||
|
|
ec9e0f37ea | ||
|
|
b5f7735dba | ||
|
|
5f501d6ec0 | ||
|
|
e820f304a5 | ||
|
|
3527a33430 | ||
|
|
ee5565077c | ||
|
|
9085ff1229 | ||
|
|
6eff1779a2 | ||
|
|
3228cffd23 | ||
|
|
9d9530efe0 | ||
|
|
aaaa7c7601 | ||
|
|
7f8a507b79 | ||
|
|
c28ae059e8 | ||
|
|
7f1369f9a8 | ||
|
|
6b05c164a1 | ||
|
|
a3f9d3b59c | ||
|
|
b066a6ffa0 | ||
|
|
a14cba7f11 | ||
|
|
2d85b70373 | ||
|
|
aa8cc4da38 | ||
|
|
baddaf0040 | ||
|
|
35b4e34e09 | ||
|
|
b574e9c334 | ||
|
|
d6b5a3e181 | ||
|
|
a65a35432e | ||
|
|
6e575364eb | ||
|
|
71a1442ab6 | ||
|
|
4a17762ed7 | ||
|
|
447c4de73d | ||
|
|
cd2c4d8caf | ||
|
|
ee64c945fb | ||
|
|
eec5d48220 | ||
|
|
75cc43a57a | ||
|
|
0078094b86 | ||
|
|
3cd33fce44 | ||
|
|
ccafe0ed91 | ||
|
|
94af9ff51f | ||
|
|
d229fdca32 | ||
|
|
e636e940ed | ||
|
|
2798174b00 | ||
|
|
46f2084114 |
@@ -121,7 +121,7 @@ else()
|
||||
-Werror=unused
|
||||
|
||||
-Wno-attributes
|
||||
-Wno-invalid-offsetof
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wno-invalid-offsetof>
|
||||
-Wno-unused-parameter
|
||||
-Wno-missing-field-initializers
|
||||
)
|
||||
|
||||
@@ -29,6 +29,9 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
|
||||
SYNC_MEMORY_OPERATIONS("sync_memory_operations"),
|
||||
BUFFER_REORDER_DISABLE("disable_buffer_reorder"),
|
||||
RENDERER_DEBUG("debug"),
|
||||
RENDERER_VERTEX_INPUT_DYNAMIC_STATE("vertex_input_dynamic_state"),
|
||||
FORCE_IDENTITY_SWIZZLE("force_identity_swizzle"),
|
||||
FORCE_LDR_TO_SRGB("force_ldr_to_srgb"),
|
||||
RENDERER_PROVOKING_VERTEX("provoking_vertex"),
|
||||
RENDERER_DESCRIPTOR_INDEXING("descriptor_indexing"),
|
||||
RENDERER_SAMPLE_SHADING("sample_shading"),
|
||||
|
||||
@@ -19,6 +19,7 @@ enum class IntSetting(override val key: String) : AbstractIntSetting {
|
||||
RENDERER_NVDEC_EMULATION("nvdec_emulation"),
|
||||
RENDERER_ASTC_DECODE_METHOD("accelerate_astc"),
|
||||
RENDERER_ASTC_RECOMPRESSION("astc_recompression"),
|
||||
RENDERER_FORMAT_REINTERPRETATION("format_reinterpretation"),
|
||||
RENDERER_ACCURACY("gpu_accuracy"),
|
||||
RENDERER_RESOLUTION("resolution_setup"),
|
||||
RENDERER_VSYNC("use_vsync"),
|
||||
|
||||
@@ -146,6 +146,27 @@ abstract class SettingsItem(
|
||||
descriptionId = R.string.provoking_vertex_description
|
||||
)
|
||||
)
|
||||
put(
|
||||
SwitchSetting(
|
||||
BooleanSetting.RENDERER_VERTEX_INPUT_DYNAMIC_STATE,
|
||||
titleId = R.string.vertex_input_dynamic_state,
|
||||
descriptionId = R.string.vertex_input_dynamic_state_description
|
||||
)
|
||||
)
|
||||
put(
|
||||
SwitchSetting(
|
||||
BooleanSetting.FORCE_IDENTITY_SWIZZLE,
|
||||
titleId = R.string.force_identity_swizzle,
|
||||
descriptionId = R.string.force_identity_swizzle_description
|
||||
)
|
||||
)
|
||||
put(
|
||||
SwitchSetting(
|
||||
BooleanSetting.FORCE_LDR_TO_SRGB,
|
||||
titleId = R.string.force_ldr_to_srgb,
|
||||
descriptionId = R.string.force_ldr_to_srgb_description
|
||||
)
|
||||
)
|
||||
put(
|
||||
SwitchSetting(
|
||||
BooleanSetting.RENDERER_DESCRIPTOR_INDEXING,
|
||||
@@ -359,6 +380,14 @@ abstract class SettingsItem(
|
||||
valuesId = R.array.astcRecompressionMethodValues
|
||||
)
|
||||
)
|
||||
put(
|
||||
SingleChoiceSetting(
|
||||
IntSetting.RENDERER_FORMAT_REINTERPRETATION,
|
||||
titleId = R.string.format_reinterpretation,
|
||||
choicesId = R.array.formatReinterpretationNames,
|
||||
valuesId = R.array.formatReinterpretationValues
|
||||
)
|
||||
)
|
||||
put(
|
||||
SingleChoiceSetting(
|
||||
IntSetting.RENDERER_VRAM_USAGE_MODE,
|
||||
|
||||
@@ -469,6 +469,10 @@ class SettingsFragmentPresenter(
|
||||
add(IntSetting.RENDERER_NVDEC_EMULATION.key)
|
||||
add(IntSetting.RENDERER_ASTC_DECODE_METHOD.key)
|
||||
add(IntSetting.RENDERER_ASTC_RECOMPRESSION.key)
|
||||
add(IntSetting.RENDERER_FORMAT_REINTERPRETATION.key)
|
||||
add(BooleanSetting.RENDERER_VERTEX_INPUT_DYNAMIC_STATE.key)
|
||||
add(BooleanSetting.FORCE_IDENTITY_SWIZZLE.key)
|
||||
add(BooleanSetting.FORCE_LDR_TO_SRGB.key)
|
||||
add(IntSetting.RENDERER_VRAM_USAGE_MODE.key)
|
||||
add(IntSetting.RENDERER_OPTIMIZE_SPIRV_OUTPUT.key)
|
||||
|
||||
|
||||
@@ -161,6 +161,22 @@
|
||||
<item>2</item> <!-- BC3 -->
|
||||
</integer-array>
|
||||
|
||||
<!-- Format Reinterpretation Choices -->
|
||||
<string-array name="formatReinterpretationNames">
|
||||
<item>@string/format_reinterpretation_disabled</item>
|
||||
<item>@string/format_reinterpretation_r32uint_to_r32sfloat</item>
|
||||
<item>@string/format_reinterpretation_r32sint_to_r32uint</item>
|
||||
<item>@string/format_reinterpretation_r32sfloat_to_r32sint</item>
|
||||
</string-array>
|
||||
|
||||
<!-- Format Reinterpretation Values -->
|
||||
<integer-array name="formatReinterpretationValues">
|
||||
<item>0</item>
|
||||
<item>1</item>
|
||||
<item>2</item>
|
||||
<item>3</item>
|
||||
</integer-array>
|
||||
|
||||
<!-- NVDEC Emulation Choices -->
|
||||
<string-array name="rendererNvdecNames">
|
||||
<item>@string/nvdec_emulation_none</item> <!-- Off -->
|
||||
|
||||
@@ -96,6 +96,8 @@
|
||||
<string name="disabled">Disabled</string>
|
||||
<string name="provoking_vertex">Provoking Vertex</string>
|
||||
<string name="provoking_vertex_description">Improves lighting and vertex handling in certain games. Only supported on Vulkan 1.0+ GPUs.</string>
|
||||
<string name="vertex_input_dynamic_state">Vertex Input Dynamic State</string>
|
||||
<string name="vertex_input_dynamic_state_description">Improves lighting and vertex handling in certain games. Only supported on Vulkan 1.0+ GPUs.</string>
|
||||
<string name="descriptor_indexing">Descriptor Indexing</string>
|
||||
<string name="descriptor_indexing_description">Improves texture and buffer handling, as well as the Maxwell translation layer. Supported by some Vulkan 1.1 GPUs and all Vulkan 1.2+ GPUs.</string>
|
||||
<string name="sample_shading">Sample Shading</string>
|
||||
@@ -943,7 +945,22 @@
|
||||
<string name="astc_recompression_bc1">BC1 (Low Quality)</string>
|
||||
<string name="astc_recompression_bc3">BC3 (Medium Quality)</string>
|
||||
|
||||
<!-- ASTC Recompression Method Choices -->
|
||||
<!-- Format Reinterpretation -->
|
||||
<string name="format_reinterpretation">Format Reinterpretation</string>
|
||||
<string name="format_reinterpretation_disabled">Disabled</string>
|
||||
<string name="format_reinterpretation_r32uint_to_r32sfloat">R32 UInt to R32 SFloat</string>
|
||||
<string name="format_reinterpretation_r32sint_to_r32uint">R32 SInt to R32 UInt</string>
|
||||
<string name="format_reinterpretation_r32sfloat_to_r32sint">R32 SFloat to R32 SInt</string>
|
||||
|
||||
<!-- Force Identity Swizzle -->
|
||||
<string name="force_identity_swizzle">Force Identity Swizzle</string>
|
||||
<string name="force_identity_swizzle_description">Forces identity component swizzle for storage and input attachment images. Required by Vulkan spec. Disable if graphical issues.</string>
|
||||
|
||||
<!-- Force LDR to sRGB -->
|
||||
<string name="force_ldr_to_srgb">Force LDR to sRGB</string>
|
||||
<string name="force_ldr_to_srgb_description">Converts LDR texture formats to sRGB for proper gamma correction. Fixes washed out colors on Adreno GPUs. Enable if textures look too bright or desaturated.</string>
|
||||
|
||||
<!-- VRAM Usage Mode -->
|
||||
<string name="vram_usage_mode">VRAM Usage Mode</string>
|
||||
<string name="vram_usage_mode_description">Control how aggressively the emulator allocates and frees GPU memory.</string>
|
||||
<string name="vram_usage_conservative">Conservative</string>
|
||||
|
||||
@@ -730,7 +730,9 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
|
||||
ASSERT(virtual_offset % PageAlignment == 0);
|
||||
ASSERT(host_offset % PageAlignment == 0);
|
||||
ASSERT(length % PageAlignment == 0);
|
||||
ASSERT(virtual_offset + length <= virtual_size);
|
||||
if (impl && virtual_base) {
|
||||
ASSERT(virtual_offset + length <= virtual_size);
|
||||
}
|
||||
ASSERT(host_offset + length <= backing_size);
|
||||
if (length == 0 || !virtual_base || !impl) {
|
||||
return;
|
||||
@@ -741,7 +743,9 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
|
||||
void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap) {
|
||||
ASSERT(virtual_offset % PageAlignment == 0);
|
||||
ASSERT(length % PageAlignment == 0);
|
||||
ASSERT(virtual_offset + length <= virtual_size);
|
||||
if (impl && virtual_base) {
|
||||
ASSERT(virtual_offset + length <= virtual_size);
|
||||
}
|
||||
if (length == 0 || !virtual_base || !impl) {
|
||||
return;
|
||||
}
|
||||
@@ -751,7 +755,9 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap)
|
||||
void HostMemory::Protect(size_t virtual_offset, size_t length, MemoryPermission perm) {
|
||||
ASSERT(virtual_offset % PageAlignment == 0);
|
||||
ASSERT(length % PageAlignment == 0);
|
||||
ASSERT(virtual_offset + length <= virtual_size);
|
||||
if (impl && virtual_base) {
|
||||
ASSERT(virtual_offset + length <= virtual_size);
|
||||
}
|
||||
if (length == 0 || !virtual_base || !impl) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ SWITCHABLE(AstcRecompression, true);
|
||||
SWITCHABLE(AudioMode, true);
|
||||
SWITCHABLE(CpuBackend, true);
|
||||
SWITCHABLE(CpuAccuracy, true);
|
||||
SWITCHABLE(FormatReinterpretation, true);
|
||||
SWITCHABLE(FullscreenMode, true);
|
||||
SWITCHABLE(GpuAccuracy, true);
|
||||
SWITCHABLE(Language, true);
|
||||
|
||||
@@ -69,6 +69,7 @@ SWITCHABLE(AstcRecompression, true);
|
||||
SWITCHABLE(AudioMode, true);
|
||||
SWITCHABLE(CpuBackend, true);
|
||||
SWITCHABLE(CpuAccuracy, true);
|
||||
SWITCHABLE(FormatReinterpretation, true);
|
||||
SWITCHABLE(FullscreenMode, true);
|
||||
SWITCHABLE(GpuAccuracy, true);
|
||||
SWITCHABLE(Language, true);
|
||||
@@ -349,7 +350,7 @@ struct Values {
|
||||
linkage, true, "use_asynchronous_gpu_emulation", Category::Renderer};
|
||||
SwitchableSetting<AstcDecodeMode, true> accelerate_astc{linkage,
|
||||
#ifdef ANDROID
|
||||
AstcDecodeMode::Cpu,
|
||||
AstcDecodeMode::Gpu,
|
||||
#else
|
||||
AstcDecodeMode::Gpu,
|
||||
#endif
|
||||
@@ -400,7 +401,7 @@ struct Values {
|
||||
true,
|
||||
true};
|
||||
SwitchableSetting<int, true> fsr_sharpening_slider{linkage,
|
||||
25,
|
||||
0,
|
||||
0,
|
||||
200,
|
||||
"fsr_sharpening_slider",
|
||||
@@ -451,6 +452,19 @@ struct Values {
|
||||
AstcRecompression::Uncompressed,
|
||||
"astc_recompression",
|
||||
Category::RendererAdvanced};
|
||||
SwitchableSetting<FormatReinterpretation, true> format_reinterpretation{
|
||||
linkage,
|
||||
FormatReinterpretation::Disabled,
|
||||
"format_reinterpretation",
|
||||
Category::RendererAdvanced};
|
||||
SwitchableSetting<bool> force_identity_swizzle{linkage,
|
||||
false,
|
||||
"force_identity_swizzle",
|
||||
Category::RendererAdvanced};
|
||||
SwitchableSetting<bool> force_ldr_to_srgb{linkage,
|
||||
false,
|
||||
"force_ldr_to_srgb",
|
||||
Category::RendererAdvanced};
|
||||
SwitchableSetting<VramUsageMode, true> vram_usage_mode{linkage,
|
||||
VramUsageMode::Conservative,
|
||||
"vram_usage_mode",
|
||||
@@ -546,6 +560,15 @@ struct Values {
|
||||
Category::RendererExtensions,
|
||||
Specialization::Scalar};
|
||||
|
||||
SwitchableSetting<bool> vertex_input_dynamic_state{linkage,
|
||||
#if defined (ANDROID)
|
||||
false, // Disabled by default on Android (some drivers have issues)
|
||||
#else
|
||||
false, // Disabled by default on desktop (some drivers have issues)
|
||||
#endif
|
||||
"vertex_input_dynamic_state",
|
||||
Category::RendererExtensions};
|
||||
|
||||
SwitchableSetting<bool> provoking_vertex{linkage, false, "provoking_vertex", Category::RendererExtensions};
|
||||
SwitchableSetting<bool> descriptor_indexing{linkage, false, "descriptor_indexing", Category::RendererExtensions};
|
||||
SwitchableSetting<bool> sample_shading{linkage, false, "sample_shading", Category::RendererExtensions, Specialization::Paired};
|
||||
@@ -560,6 +583,60 @@ struct Values {
|
||||
false,
|
||||
&sample_shading};
|
||||
|
||||
#ifdef ANDROID
|
||||
// Shader Float Controls (Android only) - Eden Veil / Extensions
|
||||
// Force enable VK_KHR_shader_float_controls even if driver has known issues
|
||||
// Allows fine-tuning float behavior to match Switch/Maxwell or optimize performance
|
||||
SwitchableSetting<bool> shader_float_controls_force_enable{linkage,
|
||||
false,
|
||||
"shader_float_controls_force_enable",
|
||||
Category::RendererExtensions,
|
||||
Specialization::Paired};
|
||||
|
||||
// Individual float behavior controls (visible only when force_enable is true)
|
||||
// Multiple can be active simultaneously EXCEPT FTZ and DenormPreserve (mutually exclusive)
|
||||
//
|
||||
// Recommended configurations:
|
||||
// Switch-native: FTZ=ON, RTE=ON, SignedZero=ON (matches Maxwell behavior)
|
||||
// Performance: FTZ=ON only (fastest)
|
||||
// Accuracy: DenormPreserve=ON, RTE=ON, SignedZero=ON (slowest, highest precision)
|
||||
SwitchableSetting<bool> shader_float_ftz{linkage,
|
||||
false,
|
||||
"shader_float_ftz",
|
||||
Category::RendererExtensions,
|
||||
Specialization::Default,
|
||||
true,
|
||||
false,
|
||||
&shader_float_controls_force_enable};
|
||||
|
||||
SwitchableSetting<bool> shader_float_denorm_preserve{linkage,
|
||||
false,
|
||||
"shader_float_denorm_preserve",
|
||||
Category::RendererExtensions,
|
||||
Specialization::Default,
|
||||
true,
|
||||
false,
|
||||
&shader_float_controls_force_enable};
|
||||
|
||||
SwitchableSetting<bool> shader_float_rte{linkage,
|
||||
false,
|
||||
"shader_float_rte",
|
||||
Category::RendererExtensions,
|
||||
Specialization::Default,
|
||||
true,
|
||||
false,
|
||||
&shader_float_controls_force_enable};
|
||||
|
||||
SwitchableSetting<bool> shader_float_signed_zero_inf_nan{linkage,
|
||||
false,
|
||||
"shader_float_signed_zero_inf_nan",
|
||||
Category::RendererExtensions,
|
||||
Specialization::Default,
|
||||
true,
|
||||
false,
|
||||
&shader_float_controls_force_enable};
|
||||
#endif
|
||||
|
||||
Setting<bool> renderer_debug{linkage, false, "debug", Category::RendererDebug};
|
||||
Setting<bool> renderer_shader_feedback{linkage, false, "shader_feedback",
|
||||
Category::RendererDebug};
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
||||
@@ -151,6 +151,17 @@ ENUM(AppletMode, HLE, LLE);
|
||||
ENUM(SpirvOptimizeMode, Never, OnLoad, Always);
|
||||
ENUM(GpuOverclock, Low, Medium, High)
|
||||
ENUM(TemperatureUnits, Celsius, Fahrenheit)
|
||||
ENUM(FormatReinterpretation, Disabled, R32UintToR32Sfloat, R32SintToR32Uint, R32SfloatToR32Sint)
|
||||
|
||||
// Shader Float Controls behavior modes
|
||||
// These control how floating-point denormals and special values are handled in shaders
|
||||
ENUM(ShaderFloatBehavior,
|
||||
DriverDefault, // Let driver choose (safest, may not match Switch behavior)
|
||||
SwitchNative, // Emulate Switch/Maxwell behavior (FTZ + RTE + SignedZero)
|
||||
FlushToZero, // FTZ only - flush denorms to zero (fastest, some precision loss)
|
||||
PreserveDenorms, // Preserve denorms (slowest, highest precision)
|
||||
RoundToEven, // RTE rounding mode (IEEE 754 compliant)
|
||||
SignedZeroInfNan); // Preserve signed zero, inf, nan (accuracy for edge cases)
|
||||
|
||||
template <typename Type>
|
||||
inline std::string_view CanonicalizeEnum(Type id) {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -393,6 +396,24 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
|
||||
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
|
||||
BufferDescriptorB()[buffer_index].Size()};
|
||||
const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
|
||||
|
||||
// Defensive check: if client didn't provide output buffer, log detailed error but don't crash
|
||||
if (buffer_size == 0) {
|
||||
LOG_ERROR(Core,
|
||||
"WriteBuffer called but client provided NO output buffer! "
|
||||
"Requested size: 0x{:X}, buffer_index: {}, is_buffer_b: {}, "
|
||||
"BufferB count: {}, BufferC count: {}",
|
||||
size, buffer_index, is_buffer_b, BufferDescriptorB().size(),
|
||||
BufferDescriptorC().size());
|
||||
|
||||
// Log command context for debugging
|
||||
LOG_ERROR(Core, "IPC Command: 0x{:X}, Type: {}", GetCommand(),
|
||||
static_cast<u32>(GetCommandType()));
|
||||
|
||||
// Return 0 instead of crashing - let service handle error
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (size > buffer_size) {
|
||||
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
|
||||
buffer_size);
|
||||
|
||||
@@ -196,6 +196,21 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QObject* parent)
|
||||
tr("Stretches the renderer to fit the specified aspect ratio.\nMost games only support "
|
||||
"16:9, so modifications are required to get other ratios.\nAlso controls the "
|
||||
"aspect ratio of captured screenshots."));
|
||||
INSERT(Settings,
|
||||
format_reinterpretation,
|
||||
tr("Format Reinterpretation:"),
|
||||
tr("Reinterprets certain texture formats for accuracy rendering.\nMay cause "
|
||||
"graphical issues in some games."));
|
||||
INSERT(Settings,
|
||||
force_identity_swizzle,
|
||||
tr("Force Identity Swizzle"),
|
||||
tr("Forces identity component swizzle for storage and input attachment images. "
|
||||
"Required by Vulkan spec. Disable only for debugging driver issues."));
|
||||
INSERT(Settings,
|
||||
force_ldr_to_srgb,
|
||||
tr("Force LDR Formats to sRGB"),
|
||||
tr("Converts LDR texture formats (RGBA8_UNORM, A2B10G10R10_UNORM) to sRGB variants. "
|
||||
"Fixes gamma correction issues on some games. Enable for correct colors on Adreno GPUs."));
|
||||
INSERT(Settings,
|
||||
use_disk_shader_cache,
|
||||
tr("Use persistent pipeline cache"),
|
||||
@@ -717,6 +732,13 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QObject* parent)
|
||||
PAIR(GpuOverclock, Medium, tr("Medium (256)")),
|
||||
PAIR(GpuOverclock, High, tr("High (512)")),
|
||||
}});
|
||||
translations->insert({Settings::EnumMetadata<Settings::FormatReinterpretation>::Index(),
|
||||
{
|
||||
PAIR(FormatReinterpretation, Disabled, tr("Disabled")),
|
||||
PAIR(FormatReinterpretation, R32UintToR32Sfloat, tr("R32 Uint to R32 Float")),
|
||||
PAIR(FormatReinterpretation, R32SintToR32Uint, tr("R32 Sint to R32 Uint")),
|
||||
PAIR(FormatReinterpretation, R32SfloatToR32Sint, tr("R32 Float to R32 Sint")),
|
||||
}});
|
||||
|
||||
#undef PAIR
|
||||
#undef CTX_PAIR
|
||||
|
||||
@@ -55,6 +55,17 @@ static const std::map<Settings::ScalingFilter, QString> scaling_filter_texts_map
|
||||
{Settings::ScalingFilter::Mmpx, QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "MMPX"))},
|
||||
};
|
||||
|
||||
static const std::map<Settings::FormatReinterpretation, QString> format_reinterpretation_texts_map = {
|
||||
{Settings::FormatReinterpretation::Disabled,
|
||||
QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "Disabled"))},
|
||||
{Settings::FormatReinterpretation::R32UintToR32Sfloat,
|
||||
QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "R32 Uint to R32 Float"))},
|
||||
{Settings::FormatReinterpretation::R32SintToR32Uint,
|
||||
QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "R32 Sint to R32 Uint"))},
|
||||
{Settings::FormatReinterpretation::R32SfloatToR32Sint,
|
||||
QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "R32 Float to R32 Sint"))},
|
||||
};
|
||||
|
||||
static const std::map<Settings::ConsoleMode, QString> use_docked_mode_texts_map = {
|
||||
{Settings::ConsoleMode::Docked, QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "Docked"))},
|
||||
{Settings::ConsoleMode::Handheld, QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "Handheld"))},
|
||||
|
||||
@@ -380,13 +380,14 @@ void EmitContext::SetupExtensions() {
|
||||
if (info.uses_int64 && profile.support_int64) {
|
||||
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
|
||||
}
|
||||
if (info.uses_int64_bit_atomics) {
|
||||
if (info.uses_int64_bit_atomics && profile.support_gl_shader_atomic_int64) {
|
||||
header += "#extension GL_NV_shader_atomic_int64 : enable\n";
|
||||
}
|
||||
if (info.uses_atomic_f32_add) {
|
||||
if (info.uses_atomic_f32_add && profile.support_gl_shader_atomic_float) {
|
||||
header += "#extension GL_NV_shader_atomic_float : enable\n";
|
||||
}
|
||||
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
|
||||
if ((info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) &&
|
||||
profile.support_gl_shader_atomic_fp16_vector) {
|
||||
header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
|
||||
}
|
||||
if (info.uses_fp16) {
|
||||
|
||||
@@ -341,19 +341,35 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
|
||||
Id main_func) {
|
||||
const Info& info{program.info};
|
||||
|
||||
// User-forced behavior overrides (Android Eden Veil/Extensions)
|
||||
// When force flags are active, they take precedence over shader-declared behavior
|
||||
const bool force_flush = profile.force_fp32_denorm_flush;
|
||||
const bool force_preserve = profile.force_fp32_denorm_preserve;
|
||||
|
||||
if (force_flush && force_preserve) {
|
||||
LOG_WARNING(Shader_SPIRV, "Both FTZ and Preserve forced simultaneously - FTZ takes precedence");
|
||||
}
|
||||
|
||||
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
|
||||
} else if (info.uses_fp32_denorms_flush) {
|
||||
} else if (force_flush || info.uses_fp32_denorms_flush) {
|
||||
if (profile.support_fp32_denorm_flush) {
|
||||
ctx.AddCapability(spv::Capability::DenormFlushToZero);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
|
||||
if (force_flush) {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormFlushToZero FORCED by user setting");
|
||||
}
|
||||
} else {
|
||||
// Drivers will most likely flush denorms by default, no need to warn
|
||||
}
|
||||
} else if (info.uses_fp32_denorms_preserve) {
|
||||
} else if (force_preserve || info.uses_fp32_denorms_preserve) {
|
||||
if (profile.support_fp32_denorm_preserve) {
|
||||
ctx.AddCapability(spv::Capability::DenormPreserve);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
|
||||
if (force_preserve) {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormPreserve FORCED by user setting");
|
||||
}
|
||||
} else {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
|
||||
}
|
||||
@@ -386,13 +402,24 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
|
||||
if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
|
||||
return;
|
||||
}
|
||||
|
||||
// User-forced behavior (Android Eden Veil/Extensions)
|
||||
const bool force_signed_zero_inf_nan = profile.force_fp32_signed_zero_inf_nan;
|
||||
|
||||
if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
|
||||
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
|
||||
}
|
||||
if (profile.support_fp32_signed_zero_nan_preserve) {
|
||||
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
|
||||
if (force_signed_zero_inf_nan || profile.support_fp32_signed_zero_nan_preserve) {
|
||||
if (profile.support_fp32_signed_zero_nan_preserve) {
|
||||
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
|
||||
if (force_signed_zero_inf_nan) {
|
||||
LOG_DEBUG(Shader_SPIRV, "Fp32 SignedZeroInfNanPreserve FORCED by user setting");
|
||||
}
|
||||
} else if (force_signed_zero_inf_nan) {
|
||||
LOG_WARNING(Shader_SPIRV, "SignedZeroInfNanPreserve forced but driver doesn't support it");
|
||||
}
|
||||
}
|
||||
if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
|
||||
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -318,13 +321,23 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
|
||||
return;
|
||||
}
|
||||
|
||||
// Mobile GPUs: 1D textures emulated as 2D with height=1
|
||||
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
|
||||
|
||||
Id result_type{};
|
||||
switch (info.type) {
|
||||
case TextureType::Buffer:
|
||||
case TextureType::Color1D: {
|
||||
result_type = ctx.U32[1];
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
if (emulate_1d) {
|
||||
// Treat as 2D: offset needs Y component
|
||||
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
|
||||
result_type = ctx.U32[2];
|
||||
} else {
|
||||
result_type = ctx.U32[1];
|
||||
}
|
||||
break;
|
||||
case TextureType::ColorArray1D:
|
||||
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
|
||||
[[fallthrough]];
|
||||
@@ -348,6 +361,40 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
|
||||
}
|
||||
coords = ctx.OpIAdd(result_type, coords, offset);
|
||||
}
|
||||
|
||||
// Helper: Convert 1D coordinates to 2D when emulating 1D textures on mobile GPUs
|
||||
[[nodiscard]] Id AdjustCoordinatesForEmulation(EmitContext& ctx, const IR::TextureInstInfo& info,
|
||||
Id coords) {
|
||||
if (!ctx.profile.needs_1d_texture_emulation) {
|
||||
return coords;
|
||||
}
|
||||
|
||||
switch (info.type) {
|
||||
case TextureType::Color1D: {
|
||||
// Convert scalar → vec2(x, 0.0)
|
||||
return ctx.OpCompositeConstruct(ctx.F32[2], coords, ctx.f32_zero_value);
|
||||
}
|
||||
case TextureType::ColorArray1D: {
|
||||
// Convert vec2(x, layer) → vec3(x, 0.0, layer)
|
||||
// ColorArray1D coords are always vec2 in IR
|
||||
const Id x = ctx.OpCompositeExtract(ctx.F32[1], coords, 0);
|
||||
const Id layer = ctx.OpCompositeExtract(ctx.F32[1], coords, 1);
|
||||
return ctx.OpCompositeConstruct(ctx.F32[3], x, ctx.f32_zero_value, layer);
|
||||
}
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
case TextureType::Color2DRect:
|
||||
// No adjustment needed for non-1D textures
|
||||
return coords;
|
||||
}
|
||||
|
||||
return coords; // Unreachable, but silences -Werror=return-type
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
||||
@@ -449,6 +496,7 @@ Id EmitBoundImageWrite(EmitContext&) {
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id bias_lc, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
if (ctx.stage == Stage::Fragment) {
|
||||
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
|
||||
bias_lc, offset);
|
||||
@@ -470,6 +518,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id lod, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const ImageOperands operands(ctx, false, true, false, lod, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
|
||||
@@ -479,6 +528,7 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
if (ctx.stage == Stage::Fragment) {
|
||||
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
|
||||
bias_lc, offset);
|
||||
@@ -500,6 +550,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
|
||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id lod, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const ImageOperands operands(ctx, false, true, false, lod, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
|
||||
&EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
|
||||
@@ -509,6 +560,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
|
||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const ImageOperands operands(ctx, offset, offset2);
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
|
||||
@@ -521,6 +573,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const ImageOperands operands(ctx, offset, offset2);
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
|
||||
@@ -533,6 +586,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
||||
Id lod, Id ms) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
AddOffsetToCoordinates(ctx, info, coords, offset);
|
||||
if (info.type == TextureType::Buffer) {
|
||||
lod = Id{};
|
||||
@@ -559,9 +613,20 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
|
||||
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
|
||||
: ctx.OpImageQuerySize(type, image);
|
||||
}};
|
||||
|
||||
// Mobile GPUs: 1D textures emulated as 2D, query returns vec2 instead of scalar
|
||||
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
|
||||
|
||||
switch (info.type) {
|
||||
case TextureType::Color1D:
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
|
||||
if (emulate_1d) {
|
||||
// Query as 2D, extract only X component for 1D size
|
||||
const Id size_2d = query(ctx.U32[2]);
|
||||
const Id width = ctx.OpCompositeExtract(ctx.U32[1], size_2d, 0);
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], width, zero, zero, mips());
|
||||
} else {
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
|
||||
}
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorCube:
|
||||
@@ -579,6 +644,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
|
||||
|
||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const Id zero{ctx.f32_zero_value};
|
||||
const Id sampler{Texture(ctx, info, index)};
|
||||
return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
|
||||
@@ -588,6 +654,7 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
|
||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id derivatives, const IR::Value& offset, Id lod_clamp) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const auto operands = info.num_derivatives == 3
|
||||
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
|
||||
ctx.Def(offset), {}, lod_clamp)
|
||||
@@ -600,6 +667,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
|
||||
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
|
||||
LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
|
||||
return ctx.ConstantNull(ctx.U32[4]);
|
||||
@@ -616,6 +684,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
|
||||
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const auto [image, is_integer] = Image(ctx, index, info);
|
||||
if (!is_integer) {
|
||||
color = ctx.OpBitcast(ctx.F32[4], color);
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
||||
@@ -33,11 +33,24 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
|
||||
const Id type{ctx.F32[1]};
|
||||
const bool depth{desc.is_depth};
|
||||
const bool ms{desc.is_multisample};
|
||||
|
||||
// Mobile GPUs lack Sampled1D SPIR-V capability - emulate 1D as 2D with array layer
|
||||
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
|
||||
|
||||
// Debug log for 1D emulation
|
||||
if (desc.type == TextureType::Color1D || desc.type == TextureType::ColorArray1D) {
|
||||
LOG_WARNING(Shader_SPIRV, "ImageType(texture): Creating {} texture, emulate_1d={}",
|
||||
desc.type == TextureType::Color1D ? "Color1D" : "ColorArray1D",
|
||||
emulate_1d);
|
||||
}
|
||||
|
||||
switch (desc.type) {
|
||||
case TextureType::Color1D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
|
||||
return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format)
|
||||
: ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
|
||||
case TextureType::ColorArray1D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
|
||||
return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format)
|
||||
: ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
|
||||
@@ -79,11 +92,22 @@ spv::ImageFormat GetImageFormat(ImageFormat format) {
|
||||
|
||||
Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) {
|
||||
const spv::ImageFormat format{GetImageFormat(desc.format)};
|
||||
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
|
||||
|
||||
// Debug log for 1D emulation
|
||||
if (desc.type == TextureType::Color1D || desc.type == TextureType::ColorArray1D) {
|
||||
LOG_WARNING(Shader_SPIRV, "ImageType: Creating {} image, emulate_1d={}",
|
||||
desc.type == TextureType::Color1D ? "Color1D" : "ColorArray1D",
|
||||
emulate_1d);
|
||||
}
|
||||
|
||||
switch (desc.type) {
|
||||
case TextureType::Color1D:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format);
|
||||
return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format)
|
||||
: ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format);
|
||||
case TextureType::ColorArray1D:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format);
|
||||
return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 2, format)
|
||||
: ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format);
|
||||
case TextureType::Color2D:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format);
|
||||
case TextureType::ColorArray2D:
|
||||
@@ -1442,6 +1466,15 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
||||
subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
|
||||
subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
|
||||
subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
|
||||
|
||||
// Vulkan spec: Fragment shader Input variables with integer/float type must have Flat decoration
|
||||
if (stage == Stage::Fragment) {
|
||||
Decorate(subgroup_mask_eq, spv::Decoration::Flat);
|
||||
Decorate(subgroup_mask_lt, spv::Decoration::Flat);
|
||||
Decorate(subgroup_mask_le, spv::Decoration::Flat);
|
||||
Decorate(subgroup_mask_gt, spv::Decoration::Flat);
|
||||
Decorate(subgroup_mask_ge, spv::Decoration::Flat);
|
||||
}
|
||||
}
|
||||
if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
|
||||
(profile.warp_size_potentially_larger_than_guest &&
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -293,6 +296,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
||||
}
|
||||
// This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
|
||||
IR::Inst* addr_inst{addr.InstRecursive()};
|
||||
// Unwrap Identity ops introduced by lowerings (e.g., PackUint2x32 -> Identity)
|
||||
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
|
||||
const IR::Value id_arg{addr_inst->Arg(0)};
|
||||
if (id_arg.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = id_arg.InstRecursive();
|
||||
}
|
||||
s32 imm_offset{0};
|
||||
if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
|
||||
// If it's an IAdd64, get the immediate offset it is applying and grab the address
|
||||
@@ -308,6 +319,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = iadd_addr.InstRecursive();
|
||||
// Unwrap Identity again if present after folding IAdd64
|
||||
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
|
||||
const IR::Value id_arg{addr_inst->Arg(0)};
|
||||
if (id_arg.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = id_arg.InstRecursive();
|
||||
}
|
||||
}
|
||||
// With IAdd64 handled, now PackUint2x32 is expected
|
||||
if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
|
||||
@@ -317,6 +336,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = vector.InstRecursive();
|
||||
// Unwrap Identity that may replace PackUint2x32
|
||||
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
|
||||
const IR::Value id_arg{addr_inst->Arg(0)};
|
||||
if (id_arg.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = id_arg.InstRecursive();
|
||||
}
|
||||
}
|
||||
// The vector is expected to be a CompositeConstructU32x2
|
||||
if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -25,6 +28,14 @@ struct Profile {
|
||||
bool support_fp16_signed_zero_nan_preserve{};
|
||||
bool support_fp32_signed_zero_nan_preserve{};
|
||||
bool support_fp64_signed_zero_nan_preserve{};
|
||||
|
||||
// User-forced float behavior overrides (Android Eden Veil/Extensions)
|
||||
// When shader_float_controls_force_enable is true, these override shader-declared behavior
|
||||
bool force_fp32_denorm_flush{}; // Force FTZ for all FP32 ops
|
||||
bool force_fp32_denorm_preserve{}; // Force denorm preservation for all FP32 ops
|
||||
bool force_fp32_rte_rounding{}; // Force Round-To-Even for all FP32 ops
|
||||
bool force_fp32_signed_zero_inf_nan{}; // Force signed zero/inf/nan preservation
|
||||
|
||||
bool support_explicit_workgroup_layout{};
|
||||
bool support_vote{};
|
||||
bool support_viewport_index_layer_non_geometry{};
|
||||
@@ -38,6 +49,9 @@ struct Profile {
|
||||
bool support_gl_nv_gpu_shader_5{};
|
||||
bool support_gl_amd_gpu_shader_half_float{};
|
||||
bool support_gl_texture_shadow_lod{};
|
||||
bool support_gl_shader_atomic_float{};
|
||||
bool support_gl_shader_atomic_fp16_vector{};
|
||||
bool support_gl_shader_atomic_int64{};
|
||||
bool support_gl_warp_intrinsics{};
|
||||
bool support_gl_variable_aoffi{};
|
||||
bool support_gl_sparse_textures{};
|
||||
@@ -81,6 +95,8 @@ struct Profile {
|
||||
bool ignore_nan_fp_comparisons{};
|
||||
/// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs
|
||||
bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
|
||||
/// Mobile GPUs lack Sampled1D capability - need to emulate 1D textures as 2D with height=1
|
||||
bool needs_1d_texture_emulation{};
|
||||
|
||||
u32 gl_max_compute_smem_size{};
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -130,7 +133,7 @@ enum class TexturePixelFormat {
|
||||
ASTC_2D_8X6_SRGB,
|
||||
ASTC_2D_6X5_UNORM,
|
||||
ASTC_2D_6X5_SRGB,
|
||||
E5B9G9R9_FLOAT,
|
||||
|
||||
D32_FLOAT,
|
||||
D16_UNORM,
|
||||
X8_D24_UNORM,
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -42,7 +45,7 @@ constexpr std::array VIEW_CLASS_32_BITS{
|
||||
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
|
||||
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
|
||||
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
|
||||
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM,
|
||||
PixelFormat::A8B8G8R8_SRGB, PixelFormat::B8G8R8A8_UNORM,
|
||||
PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT,
|
||||
PixelFormat::A2B10G10R10_UINT,
|
||||
};
|
||||
@@ -52,7 +55,7 @@ constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{
|
||||
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
|
||||
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
|
||||
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
|
||||
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::A8B8G8R8_UINT,
|
||||
PixelFormat::A8B8G8R8_SRGB, PixelFormat::A8B8G8R8_UINT,
|
||||
PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT,
|
||||
};
|
||||
|
||||
|
||||
@@ -76,6 +76,7 @@ set(SHADER_FILES
|
||||
vulkan_quad_indexed.comp
|
||||
vulkan_turbo_mode.comp
|
||||
vulkan_uint8.comp
|
||||
vulkan_qcom_msaa_resolve.frag
|
||||
convert_rgba8_to_bgra8.frag
|
||||
convert_yuv420_to_rgb.comp
|
||||
convert_rgb_to_yuv420.comp
|
||||
|
||||
39
src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag
Normal file
39
src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag
Normal file
@@ -0,0 +1,39 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#version 450
|
||||
|
||||
// VK_QCOM_render_pass_shader_resolve fragment shader
|
||||
// Resolves MSAA attachment to single-sample within render pass
|
||||
// Requires VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM in subpass flags
|
||||
|
||||
// Use combined image sampler for MSAA texture instead of input attachment
|
||||
// This allows us to sample MSAA textures from previous rendering
|
||||
layout(set = 0, binding = 0) uniform sampler2DMS msaa_texture;
|
||||
|
||||
layout(location = 0) out vec4 color_output;
|
||||
|
||||
layout(push_constant) uniform PushConstants {
|
||||
vec2 tex_scale;
|
||||
vec2 tex_offset;
|
||||
} push_constants;
|
||||
|
||||
// Custom MSAA resolve using box filter (simple average)
|
||||
// Assumes 4x MSAA (can be extended with push constant for dynamic sample count)
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
ivec2 tex_size = textureSize(msaa_texture);
|
||||
|
||||
// Clamp coordinates to texture bounds
|
||||
coord = clamp(coord, ivec2(0), tex_size - ivec2(1));
|
||||
|
||||
vec4 accumulated_color = vec4(0.0);
|
||||
int sample_count = 4; // Adreno typically uses 4x MSAA max
|
||||
|
||||
// Box filter: simple average of all MSAA samples
|
||||
for (int i = 0; i < sample_count; i++) {
|
||||
accumulated_color += texelFetch(msaa_texture, coord, i);
|
||||
}
|
||||
|
||||
color_output = accumulated_color / float(sample_count);
|
||||
}
|
||||
@@ -225,6 +225,9 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
|
||||
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
|
||||
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
|
||||
has_draw_texture = GLAD_GL_NV_draw_texture;
|
||||
has_shader_atomic_float = GLAD_GL_NV_shader_atomic_float;
|
||||
has_shader_atomic_fp16_vector = GLAD_GL_NV_shader_atomic_fp16_vector;
|
||||
has_shader_atomic_int64 = GLAD_GL_NV_shader_atomic_int64;
|
||||
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
|
||||
need_fastmath_off = is_nvidia;
|
||||
can_report_memory = GLAD_GL_NVX_gpu_memory_info;
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -152,6 +155,18 @@ public:
|
||||
return has_draw_texture;
|
||||
}
|
||||
|
||||
bool HasShaderAtomicFloat() const {
|
||||
return has_shader_atomic_float;
|
||||
}
|
||||
|
||||
bool HasShaderAtomicFp16Vector() const {
|
||||
return has_shader_atomic_fp16_vector;
|
||||
}
|
||||
|
||||
bool HasShaderAtomicInt64() const {
|
||||
return has_shader_atomic_int64;
|
||||
}
|
||||
|
||||
bool IsWarpSizePotentiallyLargerThanGuest() const {
|
||||
return warp_size_potentially_larger_than_guest;
|
||||
}
|
||||
@@ -235,6 +250,9 @@ private:
|
||||
bool has_amd_shader_half_float{};
|
||||
bool has_sparse_texture_2{};
|
||||
bool has_draw_texture{};
|
||||
bool has_shader_atomic_float{};
|
||||
bool has_shader_atomic_fp16_vector{};
|
||||
bool has_shader_atomic_int64{};
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
bool need_fastmath_off{};
|
||||
bool has_cbuf_ftou_bug{};
|
||||
|
||||
@@ -215,6 +215,9 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
|
||||
.support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
|
||||
.support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
|
||||
.support_gl_shader_atomic_float = device.HasShaderAtomicFloat(),
|
||||
.support_gl_shader_atomic_fp16_vector = device.HasShaderAtomicFp16Vector(),
|
||||
.support_gl_shader_atomic_int64 = device.HasShaderAtomicInt64(),
|
||||
.support_gl_warp_intrinsics = false,
|
||||
.support_gl_variable_aoffi = device.HasVariableAoffi(),
|
||||
.support_gl_sparse_textures = device.HasSparseTexture2(),
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -97,6 +100,10 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CanDownloadMSAA() const noexcept {
|
||||
return true;
|
||||
}
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -113,7 +116,6 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
|
||||
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
|
||||
{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT_24_8}, // X8_D24_UNORM
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h"
|
||||
#include "video_core/host_shaders/dither_temporal_frag_spv.h"
|
||||
#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_qcom_msaa_resolve_frag_spv.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
@@ -545,6 +546,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
|
||||
convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)),
|
||||
dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)),
|
||||
dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)),
|
||||
qcom_msaa_resolve_frag(BuildShader(device, VULKAN_QCOM_MSAA_RESOLVE_FRAG_SPV)),
|
||||
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
|
||||
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
|
||||
|
||||
@@ -1240,4 +1242,30 @@ void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebu
|
||||
Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ResolveMSAAQcom(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
// VK_QCOM_render_pass_shader_resolve implementation
|
||||
// This must be used within a render pass with VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
|
||||
ConvertPipeline(qcom_msaa_resolve_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
|
||||
RecordShaderReadBarrier(scheduler, src_image_view);
|
||||
scheduler.RequestRenderpass(dst_framebuffer);
|
||||
|
||||
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
|
||||
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||
const VkPipeline pipeline = *qcom_msaa_resolve_pipeline;
|
||||
|
||||
scheduler.Record([this, src_view, layout, pipeline](vk::CommandBuffer cmdbuf) {
|
||||
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
|
||||
UpdateOneTextureDescriptorSet(device, descriptor_set, *nearest_sampler, src_view);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr);
|
||||
cmdbuf.Draw(3, 1, 0, 0);
|
||||
});
|
||||
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -95,6 +95,8 @@ public:
|
||||
void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ResolveMSAAQcom(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
private:
|
||||
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
@@ -159,6 +161,7 @@ private:
|
||||
vk::ShaderModule convert_rgba16f_to_rgba8_frag;
|
||||
vk::ShaderModule dither_temporal_frag;
|
||||
vk::ShaderModule dynamic_resolution_scale_comp;
|
||||
vk::ShaderModule qcom_msaa_resolve_frag;
|
||||
vk::Sampler linear_sampler;
|
||||
vk::Sampler nearest_sampler;
|
||||
|
||||
@@ -188,6 +191,7 @@ private:
|
||||
vk::Pipeline convert_rgba16f_to_rgba8_pipeline;
|
||||
vk::Pipeline dither_temporal_pipeline;
|
||||
vk::Pipeline dynamic_resolution_scale_pipeline;
|
||||
vk::Pipeline qcom_msaa_resolve_pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -58,12 +58,42 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
|
||||
const auto topology_ = maxwell3d.draw_manager->GetDrawState().topology;
|
||||
|
||||
raw1 = 0;
|
||||
extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0);
|
||||
extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0);
|
||||
extended_dynamic_state_2_extra.Assign(features.has_extended_dynamic_state_2_extra ? 1 : 0);
|
||||
extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0);
|
||||
extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0);
|
||||
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
|
||||
raw1_eds3_extended = 0;
|
||||
pad_align_u64 = 0;
|
||||
|
||||
// EDS1
|
||||
extended_dynamic_state.Assign(features.has_extended_dynamic_state);
|
||||
|
||||
// EDS2
|
||||
extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2);
|
||||
extended_dynamic_state_2_logic_op.Assign(features.has_extended_dynamic_state_2_logic_op);
|
||||
extended_dynamic_state_2_patch_control_points.Assign(features.has_extended_dynamic_state_2_patch_control_points);
|
||||
|
||||
// EDS3 - Blending/Enables
|
||||
extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend);
|
||||
extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables);
|
||||
|
||||
// EDS3 - Granular features
|
||||
extended_dynamic_state_3_depth_clamp.Assign(features.has_extended_dynamic_state_3_depth_clamp);
|
||||
extended_dynamic_state_3_logic_op_enable.Assign(features.has_extended_dynamic_state_3_logic_op_enable);
|
||||
extended_dynamic_state_3_tessellation_domain_origin.Assign(features.has_extended_dynamic_state_3_tessellation_domain_origin);
|
||||
extended_dynamic_state_3_polygon_mode.Assign(features.has_extended_dynamic_state_3_polygon_mode);
|
||||
extended_dynamic_state_3_rasterization_samples.Assign(features.has_extended_dynamic_state_3_rasterization_samples);
|
||||
extended_dynamic_state_3_sample_mask.Assign(features.has_extended_dynamic_state_3_sample_mask);
|
||||
extended_dynamic_state_3_alpha_to_coverage_enable.Assign(features.has_extended_dynamic_state_3_alpha_to_coverage_enable);
|
||||
extended_dynamic_state_3_alpha_to_one_enable.Assign(features.has_extended_dynamic_state_3_alpha_to_one_enable);
|
||||
extended_dynamic_state_3_depth_clip_enable.Assign(features.has_extended_dynamic_state_3_depth_clip_enable);
|
||||
extended_dynamic_state_3_depth_clip_negative_one_to_one.Assign(features.has_extended_dynamic_state_3_depth_clip_negative_one_to_one);
|
||||
extended_dynamic_state_3_line_rasterization_mode.Assign(features.has_extended_dynamic_state_3_line_rasterization_mode);
|
||||
extended_dynamic_state_3_line_stipple_enable.Assign(features.has_extended_dynamic_state_3_line_stipple_enable);
|
||||
extended_dynamic_state_3_provoking_vertex_mode.Assign(features.has_extended_dynamic_state_3_provoking_vertex_mode);
|
||||
extended_dynamic_state_3_conservative_rasterization_mode.Assign(features.has_extended_dynamic_state_3_conservative_rasterization_mode);
|
||||
extended_dynamic_state_3_sample_locations_enable.Assign(features.has_extended_dynamic_state_3_sample_locations_enable);
|
||||
extended_dynamic_state_3_rasterization_stream.Assign(features.has_extended_dynamic_state_3_rasterization_stream);
|
||||
|
||||
// Vertex Input
|
||||
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input);
|
||||
|
||||
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
|
||||
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
|
||||
polygon_mode.Assign(PackPolygonMode(VideoCore::EffectivePolygonMode(regs)));
|
||||
@@ -158,7 +188,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
|
||||
return static_cast<u16>(array.stride.Value());
|
||||
});
|
||||
}
|
||||
if (!extended_dynamic_state_2_extra) {
|
||||
if (!extended_dynamic_state_2_logic_op) {
|
||||
dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2);
|
||||
}
|
||||
if (!extended_dynamic_state_3_blend) {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -18,12 +21,35 @@ namespace Vulkan {
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct DynamicFeatures {
|
||||
bool has_extended_dynamic_state;
|
||||
bool has_extended_dynamic_state_2;
|
||||
bool has_extended_dynamic_state_2_extra;
|
||||
bool has_extended_dynamic_state_3_blend;
|
||||
bool has_extended_dynamic_state_3_enables;
|
||||
bool has_dynamic_vertex_input;
|
||||
// VK_EXT_extended_dynamic_state (EDS1) - Bit 0
|
||||
bool has_extended_dynamic_state : 1;
|
||||
// VK_EXT_extended_dynamic_state2 (EDS2) - Bits 1-3
|
||||
bool has_extended_dynamic_state_2 : 1; // Core EDS2
|
||||
bool has_extended_dynamic_state_2_logic_op : 1; // LogicOp
|
||||
bool has_extended_dynamic_state_2_patch_control_points : 1; // Tessellation
|
||||
// VK_EXT_extended_dynamic_state3 (EDS3) - Bits 4-5
|
||||
bool has_extended_dynamic_state_3_blend : 1; // Blending composite
|
||||
bool has_extended_dynamic_state_3_enables : 1; // Enables composite
|
||||
// VK_EXT_vertex_input_dynamic_state - Bit 6
|
||||
bool has_dynamic_vertex_input : 1;
|
||||
// EDS3 Granular Features - Bits 7-15
|
||||
bool has_extended_dynamic_state_3_depth_clamp;
|
||||
bool has_extended_dynamic_state_3_logic_op_enable;
|
||||
bool has_extended_dynamic_state_3_tessellation_domain_origin;
|
||||
bool has_extended_dynamic_state_3_polygon_mode;
|
||||
bool has_extended_dynamic_state_3_rasterization_samples;
|
||||
bool has_extended_dynamic_state_3_sample_mask;
|
||||
bool has_extended_dynamic_state_3_alpha_to_coverage_enable : 1;
|
||||
bool has_extended_dynamic_state_3_alpha_to_one_enable : 1;
|
||||
bool has_extended_dynamic_state_3_depth_clip_enable : 1;
|
||||
// EDS3 Additional Features - Bits 16-22
|
||||
bool has_extended_dynamic_state_3_depth_clip_negative_one_to_one : 1;
|
||||
bool has_extended_dynamic_state_3_line_rasterization_mode : 1;
|
||||
bool has_extended_dynamic_state_3_line_stipple_enable : 1;
|
||||
bool has_extended_dynamic_state_3_provoking_vertex_mode : 1;
|
||||
bool has_extended_dynamic_state_3_conservative_rasterization_mode : 1;
|
||||
bool has_extended_dynamic_state_3_sample_locations_enable : 1;
|
||||
bool has_extended_dynamic_state_3_rasterization_stream : 1;
|
||||
};
|
||||
|
||||
struct FixedPipelineState {
|
||||
@@ -184,23 +210,56 @@ struct FixedPipelineState {
|
||||
|
||||
union {
|
||||
u32 raw1;
|
||||
// EDS1 - Bit 0
|
||||
BitField<0, 1, u32> extended_dynamic_state;
|
||||
|
||||
// EDS2 - Bits 1-3
|
||||
BitField<1, 1, u32> extended_dynamic_state_2;
|
||||
BitField<2, 1, u32> extended_dynamic_state_2_extra;
|
||||
BitField<3, 1, u32> extended_dynamic_state_3_blend;
|
||||
BitField<4, 1, u32> extended_dynamic_state_3_enables;
|
||||
BitField<5, 1, u32> dynamic_vertex_input;
|
||||
BitField<6, 1, u32> xfb_enabled;
|
||||
BitField<7, 1, u32> ndc_minus_one_to_one;
|
||||
BitField<8, 2, u32> polygon_mode;
|
||||
BitField<10, 2, u32> tessellation_primitive;
|
||||
BitField<12, 2, u32> tessellation_spacing;
|
||||
BitField<14, 1, u32> tessellation_clockwise;
|
||||
BitField<15, 5, u32> patch_control_points_minus_one;
|
||||
BitField<2, 1, u32> extended_dynamic_state_2_logic_op;
|
||||
BitField<3, 1, u32> extended_dynamic_state_2_patch_control_points;
|
||||
|
||||
// EDS3 Blending/Enables - Bits 4-5
|
||||
BitField<4, 1, u32> extended_dynamic_state_3_blend;
|
||||
BitField<5, 1, u32> extended_dynamic_state_3_enables;
|
||||
|
||||
// Vertex Input - Bit 6
|
||||
BitField<6, 1, u32> dynamic_vertex_input;
|
||||
|
||||
// Other state - Bits 7-19
|
||||
BitField<7, 1, u32> xfb_enabled;
|
||||
BitField<8, 1, u32> ndc_minus_one_to_one;
|
||||
BitField<9, 2, u32> polygon_mode;
|
||||
BitField<11, 2, u32> tessellation_primitive;
|
||||
BitField<13, 2, u32> tessellation_spacing;
|
||||
BitField<15, 1, u32> tessellation_clockwise;
|
||||
BitField<16, 5, u32> patch_control_points_minus_one;
|
||||
|
||||
// Topology and MSAA - Bits 24-31
|
||||
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
|
||||
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
|
||||
};
|
||||
|
||||
union {
|
||||
u32 raw1_eds3_extended;
|
||||
// EDS3 Additional Features - Bits 0-15
|
||||
BitField<0, 1, u32> extended_dynamic_state_3_depth_clamp;
|
||||
BitField<1, 1, u32> extended_dynamic_state_3_logic_op_enable;
|
||||
BitField<2, 1, u32> extended_dynamic_state_3_tessellation_domain_origin;
|
||||
BitField<3, 1, u32> extended_dynamic_state_3_polygon_mode;
|
||||
BitField<4, 1, u32> extended_dynamic_state_3_rasterization_samples;
|
||||
BitField<5, 1, u32> extended_dynamic_state_3_sample_mask;
|
||||
BitField<6, 1, u32> extended_dynamic_state_3_alpha_to_coverage_enable;
|
||||
BitField<7, 1, u32> extended_dynamic_state_3_alpha_to_one_enable;
|
||||
BitField<8, 1, u32> extended_dynamic_state_3_depth_clip_enable;
|
||||
BitField<9, 1, u32> extended_dynamic_state_3_depth_clip_negative_one_to_one;
|
||||
BitField<10, 1, u32> extended_dynamic_state_3_line_rasterization_mode;
|
||||
BitField<11, 1, u32> extended_dynamic_state_3_line_stipple_enable;
|
||||
BitField<12, 1, u32> extended_dynamic_state_3_provoking_vertex_mode;
|
||||
BitField<13, 1, u32> extended_dynamic_state_3_conservative_rasterization_mode;
|
||||
BitField<14, 1, u32> extended_dynamic_state_3_sample_locations_enable;
|
||||
BitField<15, 1, u32> extended_dynamic_state_3_rasterization_stream;
|
||||
};
|
||||
|
||||
union {
|
||||
u32 raw2;
|
||||
BitField<1, 3, u32> alpha_test_func;
|
||||
@@ -215,12 +274,15 @@ struct FixedPipelineState {
|
||||
BitField<16, 1, u32> alpha_to_one_enabled;
|
||||
BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
|
||||
};
|
||||
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
||||
|
||||
|
||||
u32 alpha_test_ref;
|
||||
u32 point_size;
|
||||
|
||||
|
||||
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
||||
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
|
||||
|
||||
u32 pad_align_u64;
|
||||
|
||||
union {
|
||||
u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
|
||||
u64 enabled_divisors;
|
||||
|
||||
@@ -214,7 +214,6 @@ struct FormatTuple {
|
||||
{VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
|
||||
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
|
||||
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
|
||||
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT
|
||||
|
||||
// Depth formats
|
||||
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
|
||||
@@ -242,6 +241,18 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
|
||||
PixelFormat pixel_format) {
|
||||
ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
|
||||
FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
|
||||
|
||||
// Force LDR formats to sRGB when toggle is enabled (fixes gamma on Adreno GPUs)
|
||||
if (Settings::values.force_ldr_to_srgb.GetValue() && !with_srgb) {
|
||||
if (pixel_format == PixelFormat::A8B8G8R8_UNORM) {
|
||||
tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
|
||||
with_srgb = true; // Ensure we use sRGB variant
|
||||
} else if (pixel_format == PixelFormat::A2B10G10R10_UNORM) {
|
||||
// A2B10G10R10 doesn't have sRGB variant in Vulkan, keep as UNORM
|
||||
// The gamma correction will be handled by shaders if needed
|
||||
}
|
||||
}
|
||||
|
||||
// Transcode on hardware that doesn't support ASTC natively
|
||||
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
|
||||
const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -24,8 +27,13 @@ public:
|
||||
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
|
||||
|
||||
bool CanUsePushDescriptor() const noexcept {
|
||||
return device->IsKhrPushDescriptorSupported() &&
|
||||
num_descriptors <= device->MaxPushDescriptors();
|
||||
if (!device->IsKhrPushDescriptorSupported()) {
|
||||
return false;
|
||||
}
|
||||
if (num_descriptors > device->MaxPushDescriptors()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO(crueter): utilize layout binding flags
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include <ranges>
|
||||
#include "video_core/renderer_vulkan/present/util.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
@@ -837,18 +837,42 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
.pAttachments = cb_attachments.data(),
|
||||
.blendConstants = {}
|
||||
};
|
||||
static_vector<VkDynamicState, 34> dynamic_states{
|
||||
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
|
||||
VK_DYNAMIC_STATE_LINE_WIDTH,
|
||||
};
|
||||
// Base Vulkan Dynamic States - Always active (independent of EDS)
|
||||
// Granular fallback: Each state added only if device supports it (protection against broken drivers)
|
||||
static_vector<VkDynamicState, 34> dynamic_states;
|
||||
if (device.SupportsDynamicViewport()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_VIEWPORT);
|
||||
}
|
||||
if (device.SupportsDynamicScissor()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_SCISSOR);
|
||||
}
|
||||
if (device.SupportsDynamicLineWidth()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_WIDTH);
|
||||
}
|
||||
if (device.SupportsDynamicDepthBias()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS);
|
||||
}
|
||||
if (device.SupportsDynamicBlendConstants()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS);
|
||||
}
|
||||
if (device.SupportsDynamicDepthBounds()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS);
|
||||
}
|
||||
if (device.SupportsDynamicStencilCompareMask()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK);
|
||||
}
|
||||
if (device.SupportsDynamicStencilWriteMask()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK);
|
||||
}
|
||||
if (device.SupportsDynamicStencilReference()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
|
||||
}
|
||||
|
||||
// EDS1 - Extended Dynamic State
|
||||
if (key.state.extended_dynamic_state) {
|
||||
static constexpr std::array extended{
|
||||
VK_DYNAMIC_STATE_CULL_MODE_EXT,
|
||||
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
|
||||
//VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, //Disabled for VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME
|
||||
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
|
||||
@@ -856,49 +880,65 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
|
||||
};
|
||||
if (key.state.dynamic_vertex_input) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
|
||||
}
|
||||
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
|
||||
if (key.state.extended_dynamic_state_2) {
|
||||
static constexpr std::array extended2{
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
|
||||
|
||||
// Note: VERTEX_INPUT_BINDING_STRIDE is part of EDS1, not VIDS
|
||||
// When VIDS is disabled, we still need dynamic stride with BindVertexBuffers2EXT
|
||||
if (!key.state.dynamic_vertex_input) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
|
||||
}
|
||||
if (key.state.extended_dynamic_state_2_extra) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
|
||||
}
|
||||
if (key.state.extended_dynamic_state_3_blend) {
|
||||
static constexpr std::array extended3{
|
||||
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
|
||||
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
|
||||
}
|
||||
|
||||
// VK_DYNAMIC_STATE_COLOR_BLEND_ADVANCED_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
|
||||
}
|
||||
if (key.state.extended_dynamic_state_3_enables) {
|
||||
static constexpr std::array extended3{
|
||||
VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT,
|
||||
// Vertex Input Dynamic State (independent toggle, replaces VERTEX_INPUT_BINDING_STRIDE when enabled)
|
||||
if (key.state.dynamic_vertex_input) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
|
||||
}
|
||||
|
||||
// additional state3 extensions
|
||||
VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT,
|
||||
// EDS2 - Extended Dynamic State 2 Core (3 states)
|
||||
if (key.state.extended_dynamic_state_2) {
|
||||
static constexpr std::array extended2{
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
|
||||
}
|
||||
|
||||
VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT,
|
||||
// EDS2 - Logic Op (granular feature)
|
||||
if (key.state.extended_dynamic_state_2_logic_op) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
|
||||
}
|
||||
|
||||
VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
|
||||
}
|
||||
// EDS3 - Blending (composite: ColorBlendEnable + Equation + WriteMask)
|
||||
if (key.state.extended_dynamic_state_3_blend) {
|
||||
static constexpr std::array extended3{
|
||||
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
|
||||
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
|
||||
|
||||
// VK_DYNAMIC_STATE_COLOR_BLEND_ADVANCED_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
|
||||
}
|
||||
|
||||
// EDS3 - Enables (granular: DepthClamp + LogicOpEnable + ...)
|
||||
if (key.state.extended_dynamic_state_3_enables) {
|
||||
static constexpr std::array extended3{
|
||||
VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT,
|
||||
|
||||
// additional state3 extensions
|
||||
VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT,
|
||||
|
||||
VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT,
|
||||
|
||||
VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
|
||||
}
|
||||
|
||||
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
|
||||
|
||||
@@ -341,6 +341,17 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
|
||||
.support_fp64_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
|
||||
|
||||
// Switch/Maxwell native float behavior - ONLY for Turnip Mesa (Stock Qualcomm broken)
|
||||
// Stock Adreno drivers have broken float controls disabled in vulkan_device.cpp
|
||||
.force_fp32_denorm_flush = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY &&
|
||||
device.IsKhrShaderFloatControlsSupported(), // false on Stock, true on Turnip
|
||||
.force_fp32_denorm_preserve = false, // FTZ dominates
|
||||
.force_fp32_rte_rounding = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY &&
|
||||
device.IsKhrShaderFloatControlsSupported(), // false on Stock, true on Turnip
|
||||
.force_fp32_signed_zero_inf_nan = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY &&
|
||||
device.IsKhrShaderFloatControlsSupported(), // false on Stock, true on Turnip
|
||||
|
||||
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
|
||||
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
|
||||
.support_viewport_index_layer_non_geometry =
|
||||
@@ -371,10 +382,17 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
|
||||
.has_broken_unsigned_image_offsets = false,
|
||||
.has_broken_signed_operations = false,
|
||||
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
|
||||
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
|
||||
.ignore_nan_fp_comparisons = false,
|
||||
.has_broken_spirv_subgroup_mask_vector_extract_dynamic =
|
||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
|
||||
.needs_1d_texture_emulation =
|
||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY,
|
||||
.has_broken_robust =
|
||||
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
|
||||
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
|
||||
@@ -408,14 +426,34 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
|
||||
LOG_INFO(Render_Vulkan, "DynamicState value is set to {}", (u32) dynamic_state);
|
||||
|
||||
dynamic_features = DynamicFeatures{
|
||||
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported() && dynamic_state > 0,
|
||||
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported() && dynamic_state > 1,
|
||||
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported() && dynamic_state > 1,
|
||||
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported() && dynamic_state > 2,
|
||||
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported() && dynamic_state > 2,
|
||||
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported() && dynamic_state > 0,
|
||||
};
|
||||
dynamic_features = {};
|
||||
// EDS1 - All-or-nothing (enabled if driver supports AND setting > 0)
|
||||
dynamic_features.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported() && dynamic_state > 0;
|
||||
// EDS2 - Core features (enabled if driver supports AND setting > 1)
|
||||
dynamic_features.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported() && dynamic_state > 1;
|
||||
dynamic_features.has_extended_dynamic_state_2_logic_op = device.IsExtExtendedDynamicState2LogicOpSupported() && dynamic_state > 1;
|
||||
dynamic_features.has_extended_dynamic_state_2_patch_control_points = device.IsExtExtendedDynamicState2PatchControlPointsSupported() && dynamic_state > 1;
|
||||
// EDS3 - Granular features (enabled if driver supports AND setting > 2)
|
||||
dynamic_features.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_depth_clamp = device.IsExtExtendedDynamicState3DepthClampEnableSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_logic_op_enable = device.IsExtExtendedDynamicState3LogicOpEnableSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_tessellation_domain_origin = device.IsExtExtendedDynamicState3TessellationDomainOriginSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_polygon_mode = device.IsExtExtendedDynamicState3PolygonModeSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_rasterization_samples = device.IsExtExtendedDynamicState3RasterizationSamplesSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_sample_mask = device.IsExtExtendedDynamicState3SampleMaskSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_alpha_to_coverage_enable = device.IsExtExtendedDynamicState3AlphaToCoverageEnableSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_alpha_to_one_enable = device.IsExtExtendedDynamicState3AlphaToOneEnableSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_depth_clip_enable = device.IsExtExtendedDynamicState3DepthClipEnableSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_depth_clip_negative_one_to_one = device.IsExtExtendedDynamicState3DepthClipNegativeOneToOneSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_line_rasterization_mode = device.IsExtExtendedDynamicState3LineRasterizationModeSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_line_stipple_enable = device.IsExtExtendedDynamicState3LineStippleEnableSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_provoking_vertex_mode = device.IsExtExtendedDynamicState3ProvokingVertexModeSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_conservative_rasterization_mode = device.IsExtExtendedDynamicState3ConservativeRasterizationModeSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_sample_locations_enable = device.IsExtExtendedDynamicState3SampleLocationsEnableSupported() && dynamic_state > 2;
|
||||
dynamic_features.has_extended_dynamic_state_3_rasterization_stream = device.IsExtExtendedDynamicState3RasterizationStreamSupported() && dynamic_state > 2;
|
||||
// Vertex input dynamic state (independent toggle)
|
||||
dynamic_features.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported() && Settings::values.vertex_input_dynamic_state.GetValue();
|
||||
}
|
||||
|
||||
PipelineCache::~PipelineCache() {
|
||||
@@ -516,17 +554,30 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
|
||||
GraphicsPipelineCacheKey key;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key));
|
||||
|
||||
if ((key.state.extended_dynamic_state != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state ||
|
||||
(key.state.extended_dynamic_state_2 != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state_2 ||
|
||||
(key.state.extended_dynamic_state_2_extra != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state_2_extra ||
|
||||
(key.state.extended_dynamic_state_3_blend != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state_3_blend ||
|
||||
(key.state.extended_dynamic_state_3_enables != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state_3_enables ||
|
||||
(key.state.dynamic_vertex_input != 0) != dynamic_features.has_dynamic_vertex_input) {
|
||||
// Validate dynamic features compatibility - granular per-feature check
|
||||
if ((key.state.extended_dynamic_state != 0) != dynamic_features.has_extended_dynamic_state
|
||||
|| (key.state.extended_dynamic_state_2 != 0) != dynamic_features.has_extended_dynamic_state_2
|
||||
|| (key.state.extended_dynamic_state_2_logic_op != 0) != dynamic_features.has_extended_dynamic_state_2_logic_op
|
||||
|| (key.state.extended_dynamic_state_2_patch_control_points != 0) != dynamic_features.has_extended_dynamic_state_2_patch_control_points
|
||||
|| (key.state.extended_dynamic_state_3_blend != 0) != dynamic_features.has_extended_dynamic_state_3_blend
|
||||
|| (key.state.extended_dynamic_state_3_enables != 0) != dynamic_features.has_extended_dynamic_state_3_enables
|
||||
|| (key.state.extended_dynamic_state_3_depth_clamp != 0) != dynamic_features.has_extended_dynamic_state_3_depth_clamp
|
||||
|| (key.state.extended_dynamic_state_3_logic_op_enable != 0) != dynamic_features.has_extended_dynamic_state_3_logic_op_enable
|
||||
|| (key.state.extended_dynamic_state_3_tessellation_domain_origin != 0) != dynamic_features.has_extended_dynamic_state_3_tessellation_domain_origin
|
||||
|| (key.state.extended_dynamic_state_3_polygon_mode != 0) != dynamic_features.has_extended_dynamic_state_3_polygon_mode
|
||||
|| (key.state.extended_dynamic_state_3_rasterization_samples != 0) != dynamic_features.has_extended_dynamic_state_3_rasterization_samples
|
||||
|| (key.state.extended_dynamic_state_3_sample_mask != 0) != dynamic_features.has_extended_dynamic_state_3_sample_mask
|
||||
|| (key.state.extended_dynamic_state_3_alpha_to_coverage_enable != 0) != dynamic_features.has_extended_dynamic_state_3_alpha_to_coverage_enable
|
||||
|| (key.state.extended_dynamic_state_3_alpha_to_one_enable != 0) != dynamic_features.has_extended_dynamic_state_3_alpha_to_one_enable
|
||||
|| (key.state.extended_dynamic_state_3_depth_clip_enable != 0) != dynamic_features.has_extended_dynamic_state_3_depth_clip_enable
|
||||
|| (key.state.extended_dynamic_state_3_depth_clip_negative_one_to_one != 0) != dynamic_features.has_extended_dynamic_state_3_depth_clip_negative_one_to_one
|
||||
|| (key.state.extended_dynamic_state_3_line_rasterization_mode != 0) != dynamic_features.has_extended_dynamic_state_3_line_rasterization_mode
|
||||
|| (key.state.extended_dynamic_state_3_line_stipple_enable != 0) != dynamic_features.has_extended_dynamic_state_3_line_stipple_enable
|
||||
|| (key.state.extended_dynamic_state_3_provoking_vertex_mode != 0) != dynamic_features.has_extended_dynamic_state_3_provoking_vertex_mode
|
||||
|| (key.state.extended_dynamic_state_3_conservative_rasterization_mode != 0) != dynamic_features.has_extended_dynamic_state_3_conservative_rasterization_mode
|
||||
|| (key.state.extended_dynamic_state_3_sample_locations_enable != 0) != dynamic_features.has_extended_dynamic_state_3_sample_locations_enable
|
||||
|| (key.state.extended_dynamic_state_3_rasterization_stream != 0) != dynamic_features.has_extended_dynamic_state_3_rasterization_stream
|
||||
|| (key.state.dynamic_vertex_input != 0) != dynamic_features.has_dynamic_vertex_input) {
|
||||
return;
|
||||
}
|
||||
workers.QueueWork([this, key, envs_ = std::move(envs), &state, &callback]() mutable {
|
||||
@@ -677,7 +728,17 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||
|
||||
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
|
||||
ConvertLegacyToGeneric(program, runtime_info);
|
||||
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)};
|
||||
|
||||
// Adreno don't support subgroup operations in vertex stages
|
||||
// Disable subgroup features for vertex shaders if not supported by the device
|
||||
Shader::Profile stage_profile = profile;
|
||||
if (program.stage == Shader::Stage::VertexA || program.stage == Shader::Stage::VertexB) {
|
||||
if (!device.IsSubgroupSupportedForStage(VK_SHADER_STAGE_VERTEX_BIT)) {
|
||||
stage_profile.support_vote = false;
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<u32> code{EmitSPIRV(stage_profile, runtime_info, program, binding, this->optimize_spirv_output)};
|
||||
device.SaveShader(code);
|
||||
modules[stage_index] = BuildShader(device, code);
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
@@ -771,6 +832,26 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||
}
|
||||
|
||||
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||
|
||||
// Mobile GPUs (Adreno, Mali, etc.) have lower shared memory limits (32KB vs Switch's 48KB)
|
||||
// Clamp shared memory usage to device maximum to prevent missing textures/effects
|
||||
const auto driver_id = device.GetDriverID();
|
||||
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
|
||||
|
||||
if (is_mobile) {
|
||||
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
|
||||
if (program.shared_memory_size > max_shared_memory) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - "
|
||||
"clamping to device limit (may cause artifacts if shader accesses out of bounds)",
|
||||
key.unique_hash, program.shared_memory_size / 1024, max_shared_memory / 1024);
|
||||
program.shared_memory_size = max_shared_memory;
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
|
||||
device.SaveShader(code);
|
||||
vk::ShaderModule spv_module{BuildShader(device, code)};
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -113,6 +116,10 @@ public:
|
||||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
[[nodiscard]] const DynamicFeatures& GetDynamicFeatures() const noexcept {
|
||||
return dynamic_features;
|
||||
}
|
||||
|
||||
private:
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
|
||||
|
||||
|
||||
@@ -954,9 +954,10 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
||||
UpdateStencilFaces(regs);
|
||||
UpdateLineWidth(regs);
|
||||
|
||||
const u8 dynamic_state = Settings::values.dyna_state.GetValue();
|
||||
const auto& dynamic_features = pipeline_cache.GetDynamicFeatures();
|
||||
|
||||
if (device.IsExtExtendedDynamicStateSupported() && dynamic_state > 0) {
|
||||
// EDS1 - Extended Dynamic State 1
|
||||
if (dynamic_features.has_extended_dynamic_state) {
|
||||
UpdateCullMode(regs);
|
||||
UpdateDepthCompareOp(regs);
|
||||
UpdateFrontFace(regs);
|
||||
@@ -966,42 +967,82 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
||||
UpdateDepthTestEnable(regs);
|
||||
UpdateDepthWriteEnable(regs);
|
||||
UpdateStencilTestEnable(regs);
|
||||
if (device.IsExtExtendedDynamicState2Supported() && dynamic_state > 1) {
|
||||
UpdatePrimitiveRestartEnable(regs);
|
||||
UpdateRasterizerDiscardEnable(regs);
|
||||
UpdateDepthBiasEnable(regs);
|
||||
}
|
||||
if (device.IsExtExtendedDynamicState3EnablesSupported() && dynamic_state > 2) {
|
||||
using namespace Tegra::Engines;
|
||||
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE || device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
|
||||
struct In {
|
||||
const Maxwell3D::Regs::VertexAttribute::Type d;
|
||||
In(Maxwell3D::Regs::VertexAttribute::Type n) : d(n) {}
|
||||
bool operator()(Maxwell3D::Regs::VertexAttribute n) const {
|
||||
return n.type == d;
|
||||
}
|
||||
};
|
||||
auto has_float = std::any_of(regs.vertex_attrib_format.begin(), regs.vertex_attrib_format.end(), In(Maxwell3D::Regs::VertexAttribute::Type::Float));
|
||||
if (regs.logic_op.enable) {
|
||||
regs.logic_op.enable = static_cast<u32>(!has_float);
|
||||
}
|
||||
}
|
||||
UpdateLogicOpEnable(regs);
|
||||
UpdateDepthClampEnable(regs);
|
||||
UpdateLineStippleEnable(regs);
|
||||
UpdateConservativeRasterizationMode(regs);
|
||||
}
|
||||
}
|
||||
if (device.IsExtExtendedDynamicState2ExtrasSupported() && dynamic_state > 1) {
|
||||
UpdateLogicOp(regs);
|
||||
}
|
||||
if (device.IsExtExtendedDynamicState3BlendingSupported() && dynamic_state > 2) {
|
||||
UpdateBlending(regs);
|
||||
}
|
||||
}
|
||||
if (device.IsExtVertexInputDynamicStateSupported() && dynamic_state > 0)
|
||||
if (auto* gp = pipeline_cache.CurrentGraphicsPipeline(); gp && gp->HasDynamicVertexInput())
|
||||
|
||||
// EDS2 - Extended Dynamic State 2 Core
|
||||
if (dynamic_features.has_extended_dynamic_state_2) {
|
||||
if (state_tracker.TouchStateEnable()) {
|
||||
UpdatePrimitiveRestartEnable(regs);
|
||||
UpdateRasterizerDiscardEnable(regs);
|
||||
UpdateDepthBiasEnable(regs);
|
||||
}
|
||||
}
|
||||
|
||||
// EDS2 - LogicOp (granular feature)
|
||||
if (dynamic_features.has_extended_dynamic_state_2_logic_op) {
|
||||
UpdateLogicOp(regs);
|
||||
}
|
||||
|
||||
// EDS3 - Depth Clamp Enable (granular)
|
||||
if (dynamic_features.has_extended_dynamic_state_3_depth_clamp ||
|
||||
dynamic_features.has_extended_dynamic_state_3_enables) {
|
||||
if (state_tracker.TouchStateEnable()) {
|
||||
UpdateDepthClampEnable(regs);
|
||||
}
|
||||
}
|
||||
|
||||
// EDS3 - Logic Op Enable (granular)
|
||||
if (dynamic_features.has_extended_dynamic_state_3_logic_op_enable ||
|
||||
dynamic_features.has_extended_dynamic_state_3_enables) {
|
||||
if (state_tracker.TouchStateEnable()) {
|
||||
using namespace Tegra::Engines;
|
||||
// AMD workaround for logic op with float vertex attributes
|
||||
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|
||||
device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
|
||||
struct In {
|
||||
const Maxwell3D::Regs::VertexAttribute::Type d;
|
||||
In(Maxwell3D::Regs::VertexAttribute::Type n) : d(n) {}
|
||||
bool operator()(Maxwell3D::Regs::VertexAttribute n) const {
|
||||
return n.type == d;
|
||||
}
|
||||
};
|
||||
auto has_float = std::any_of(regs.vertex_attrib_format.begin(),
|
||||
regs.vertex_attrib_format.end(),
|
||||
In(Maxwell3D::Regs::VertexAttribute::Type::Float));
|
||||
if (regs.logic_op.enable) {
|
||||
regs.logic_op.enable = static_cast<u32>(!has_float);
|
||||
}
|
||||
}
|
||||
UpdateLogicOpEnable(regs);
|
||||
}
|
||||
}
|
||||
|
||||
// EDS3 - Line Stipple Enable (granular)
|
||||
if (dynamic_features.has_extended_dynamic_state_3_line_stipple_enable) {
|
||||
if (state_tracker.TouchStateEnable()) {
|
||||
UpdateLineStippleEnable(regs);
|
||||
}
|
||||
}
|
||||
|
||||
// EDS3 - Conservative Rasterization Mode (granular)
|
||||
if (dynamic_features.has_extended_dynamic_state_3_conservative_rasterization_mode) {
|
||||
if (state_tracker.TouchStateEnable()) {
|
||||
UpdateConservativeRasterizationMode(regs);
|
||||
}
|
||||
}
|
||||
|
||||
// EDS3 - Blending (composite feature: ColorBlendEnable + ColorBlendEquation + ColorWriteMask)
|
||||
if (dynamic_features.has_extended_dynamic_state_3_blend) {
|
||||
UpdateBlending(regs);
|
||||
}
|
||||
|
||||
// Vertex Input Dynamic State
|
||||
if (dynamic_features.has_dynamic_vertex_input) {
|
||||
if (auto* gp = pipeline_cache.CurrentGraphicsPipeline(); gp && gp->HasDynamicVertexInput()) {
|
||||
UpdateVertexInput(regs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::HandleTransformFeedback() {
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/surface.h"
|
||||
@@ -19,6 +20,23 @@ namespace {
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
// Check if the driver uses tile-based deferred rendering (TBDR) architecture
|
||||
// These GPUs benefit from optimized load/store operations to keep data on-chip
|
||||
//
|
||||
// TBDR GPUs supported in Eden:
|
||||
// - Qualcomm Adreno (Snapdragon): Most Android flagship/midrange devices
|
||||
// - ARM Mali: Android devices (Samsung Exynos, MediaTek, etc.)
|
||||
// - Imagination PowerVR: Older iOS devices, some Android tablets
|
||||
// - Samsung Xclipse: Galaxy S22+ (AMD RDNA2-based, but uses TBDR mode)
|
||||
// - Broadcom VideoCore: Raspberry Pi
|
||||
[[nodiscard]] constexpr bool IsTBDRGPU(VkDriverId driver_id) {
|
||||
return driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY;
|
||||
}
|
||||
|
||||
constexpr SurfaceType GetSurfaceType(PixelFormat format) {
|
||||
switch (format) {
|
||||
// Depth formats
|
||||
@@ -44,23 +62,57 @@ using VideoCore::Surface::SurfaceType;
|
||||
}
|
||||
|
||||
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
|
||||
VkSampleCountFlagBits samples) {
|
||||
VkSampleCountFlagBits samples,
|
||||
bool tbdr_will_clear,
|
||||
bool tbdr_discard_after,
|
||||
bool tbdr_read_only = false) {
|
||||
using MaxwellToVK::SurfaceFormat;
|
||||
|
||||
const SurfaceType surface_type = GetSurfaceType(format);
|
||||
const bool has_stencil = surface_type == SurfaceType::DepthStencil ||
|
||||
surface_type == SurfaceType::Stencil;
|
||||
|
||||
// TBDR optimization: Apply hints only on tile-based GPUs
|
||||
// Desktop GPUs (NVIDIA/AMD/Intel) ignore these hints and use standard behavior
|
||||
const bool is_tbdr = IsTBDRGPU(device.GetDriverID());
|
||||
|
||||
// On TBDR: Use DONT_CARE if clear is guaranteed (avoids loading from main memory)
|
||||
// On Desktop: Always LOAD to preserve existing content (safer default)
|
||||
VkAttachmentLoadOp load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
if (is_tbdr && tbdr_will_clear) {
|
||||
load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
}
|
||||
|
||||
// On TBDR: Use DONT_CARE if content won't be read (avoids storing to main memory)
|
||||
// On Desktop: Always STORE (safer default)
|
||||
// VK_QCOM_render_pass_store_ops: Use NONE_QCOM for read-only attachments (preserves outside render area)
|
||||
VkAttachmentStoreOp store_op = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
if (is_tbdr && tbdr_discard_after) {
|
||||
store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
} else if (is_tbdr && tbdr_read_only && device.IsQcomRenderPassStoreOpsSupported()) {
|
||||
store_op = static_cast<VkAttachmentStoreOp>(1000301000); // VK_ATTACHMENT_STORE_OP_NONE_QCOM
|
||||
}
|
||||
|
||||
// Stencil operations follow same logic
|
||||
VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
if (has_stencil && tbdr_read_only && device.IsQcomRenderPassStoreOpsSupported()) {
|
||||
stencil_store_op = static_cast<VkAttachmentStoreOp>(1000301000); // VK_ATTACHMENT_STORE_OP_NONE_QCOM
|
||||
} else if (has_stencil) {
|
||||
stencil_load_op = (is_tbdr && tbdr_will_clear) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE
|
||||
: VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
stencil_store_op = (is_tbdr && tbdr_discard_after) ? VK_ATTACHMENT_STORE_OP_DONT_CARE
|
||||
: VK_ATTACHMENT_STORE_OP_STORE;
|
||||
}
|
||||
|
||||
return {
|
||||
.flags = {},
|
||||
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
|
||||
.samples = samples,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.stencilLoadOp = has_stencil ? VK_ATTACHMENT_LOAD_OP_LOAD
|
||||
: VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
||||
.stencilStoreOp = has_stencil ? VK_ATTACHMENT_STORE_OP_STORE
|
||||
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
|
||||
.loadOp = load_op,
|
||||
.storeOp = store_op,
|
||||
.stencilLoadOp = stencil_load_op,
|
||||
.stencilStoreOp = stencil_store_op,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
@@ -75,6 +127,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
|
||||
if (!is_new) {
|
||||
return *pair->second;
|
||||
}
|
||||
|
||||
const bool is_tbdr = IsTBDRGPU(device->GetDriverID());
|
||||
if (is_tbdr && (key.tbdr_will_clear || key.tbdr_discard_after)) {
|
||||
LOG_DEBUG(Render_Vulkan, "Creating TBDR-optimized render pass (driver={}, clear={}, discard={})",
|
||||
static_cast<u32>(device->GetDriverID()), key.tbdr_will_clear, key.tbdr_discard_after);
|
||||
}
|
||||
|
||||
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
|
||||
std::array<VkAttachmentReference, 8> references{};
|
||||
u32 num_attachments{};
|
||||
@@ -87,7 +146,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
if (is_valid) {
|
||||
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
|
||||
descriptions.push_back(AttachmentDescription(*device, format, key.samples,
|
||||
key.tbdr_will_clear, key.tbdr_discard_after));
|
||||
num_attachments = static_cast<u32>(index + 1);
|
||||
++num_colors;
|
||||
}
|
||||
@@ -99,10 +159,19 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
|
||||
.attachment = num_colors,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
|
||||
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples,
|
||||
key.tbdr_will_clear, key.tbdr_discard_after, key.tbdr_read_only));
|
||||
}
|
||||
VkSubpassDescriptionFlags subpass_flags = 0;
|
||||
if (key.qcom_shader_resolve) {
|
||||
// VK_QCOM_render_pass_shader_resolve: enables custom shader resolve in fragment shader
|
||||
// This flag allows using a programmable fragment shader for MSAA resolve instead of
|
||||
// fixed-function hardware resolve, enabling better quality and HDR format support
|
||||
subpass_flags |= 0x00000004; // VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
|
||||
}
|
||||
|
||||
const VkSubpassDescription subpass{
|
||||
.flags = 0,
|
||||
.flags = subpass_flags,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -17,6 +20,15 @@ struct RenderPassKey {
|
||||
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
|
||||
VideoCore::Surface::PixelFormat depth_format;
|
||||
VkSampleCountFlagBits samples;
|
||||
|
||||
// TBDR optimization hints - only affect tile-based GPUs (Qualcomm, ARM, Imagination)
|
||||
// These flags indicate the expected usage pattern to optimize load/store operations
|
||||
bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments
|
||||
bool tbdr_discard_after{false}; // Attachment won't be read after render pass
|
||||
bool tbdr_read_only{false}; // Attachment is read-only (input attachment, depth test without writes)
|
||||
|
||||
// VK_QCOM_render_pass_shader_resolve support
|
||||
bool qcom_shader_resolve{false}; // Use shader resolve instead of fixed-function (last subpass)
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -27,6 +39,8 @@ struct hash<Vulkan::RenderPassKey> {
|
||||
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
|
||||
size_t value = static_cast<size_t>(key.depth_format) << 48;
|
||||
value ^= static_cast<size_t>(key.samples) << 52;
|
||||
value ^= (static_cast<size_t>(key.tbdr_will_clear) << 56);
|
||||
value ^= (static_cast<size_t>(key.tbdr_discard_after) << 57);
|
||||
for (size_t i = 0; i < key.color_formats.size(); ++i) {
|
||||
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
|
||||
}
|
||||
|
||||
@@ -171,6 +171,10 @@ void Swapchain::Create(
|
||||
|
||||
resource_ticks.clear();
|
||||
resource_ticks.resize(image_count);
|
||||
|
||||
// Initialize incremental-present probe flags for this swapchain.
|
||||
incremental_present_usable = device.IsKhrIncrementalPresentSupported();
|
||||
incremental_present_probed = false;
|
||||
}
|
||||
|
||||
bool Swapchain::AcquireNextImage() {
|
||||
@@ -202,7 +206,13 @@ bool Swapchain::AcquireNextImage() {
|
||||
|
||||
void Swapchain::Present(VkSemaphore render_semaphore) {
|
||||
const auto present_queue{device.GetPresentQueue()};
|
||||
const VkPresentInfoKHR present_info{
|
||||
// If the device advertises VK_KHR_incremental_present, we attempt a one-time probe
|
||||
// on the first present to validate the driver/compositor accepts present-region info.
|
||||
VkPresentRegionsKHR present_regions{};
|
||||
VkPresentRegionKHR region{};
|
||||
VkRectLayerKHR layer{};
|
||||
|
||||
VkPresentInfoKHR present_info{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreCount = render_semaphore ? 1U : 0U,
|
||||
@@ -212,6 +222,20 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
|
||||
.pImageIndices = &image_index,
|
||||
.pResults = nullptr,
|
||||
};
|
||||
|
||||
if (incremental_present_usable && !incremental_present_probed) {
|
||||
// Build a minimal present-region describing a single 1x1 dirty rect at (0,0).
|
||||
layer.offset = {0, 0};
|
||||
layer.extent = {1, 1};
|
||||
region.rectangleCount = 1;
|
||||
region.pRectangles = &layer;
|
||||
present_regions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
|
||||
present_regions.pNext = nullptr;
|
||||
present_regions.swapchainCount = 1;
|
||||
present_regions.pRegions = ®ion;
|
||||
|
||||
present_info.pNext = &present_regions;
|
||||
}
|
||||
std::scoped_lock lock{scheduler.submit_mutex};
|
||||
switch (const VkResult result = present_queue.Present(present_info)) {
|
||||
case VK_SUCCESS:
|
||||
@@ -227,8 +251,18 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", string_VkResult(result));
|
||||
// If the first present with incremental-present pNext failed, disable future use.
|
||||
if (incremental_present_usable && !incremental_present_probed) {
|
||||
incremental_present_usable = false;
|
||||
LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_incremental_present for this swapchain due to present failure: {}", string_VkResult(result));
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (incremental_present_usable && !incremental_present_probed) {
|
||||
// Mark probe as completed if we reached here (success or handled failure above).
|
||||
incremental_present_probed = true;
|
||||
LOG_INFO(Render_Vulkan, "VK_KHR_incremental_present probe completed: usable={}", incremental_present_usable);
|
||||
}
|
||||
++frame_index;
|
||||
if (frame_index >= image_count) {
|
||||
frame_index = 0;
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -158,6 +161,8 @@ private:
|
||||
|
||||
bool is_outdated{};
|
||||
bool is_suboptimal{};
|
||||
bool incremental_present_usable{};
|
||||
bool incremental_present_probed{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -66,10 +66,20 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageType ConvertImageType(const ImageType type) {
|
||||
[[nodiscard]] VkImageType ConvertImageType(const ImageType type, const Device& device) {
|
||||
switch (type) {
|
||||
case ImageType::e1D:
|
||||
return VK_IMAGE_TYPE_1D;
|
||||
// Mobile Vulkan (Adreno, Mali, PowerVR, IMG) lacks Sampled1D SPIR-V capability
|
||||
// Emulate as 2D texture with height=1 on mobile, use native 1D on desktop
|
||||
{
|
||||
const auto driver_id = device.GetDriverID();
|
||||
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
|
||||
return is_mobile ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D;
|
||||
}
|
||||
case ImageType::e2D:
|
||||
case ImageType::Linear:
|
||||
return VK_IMAGE_TYPE_2D;
|
||||
@@ -141,7 +151,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = flags,
|
||||
.imageType = ConvertImageType(info.type),
|
||||
.imageType = ConvertImageType(info.type, device),
|
||||
.format = format_info.format,
|
||||
.extent{
|
||||
.width = info.size.width >> samples_x,
|
||||
@@ -160,6 +170,40 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
};
|
||||
}
|
||||
|
||||
/// Emergency fallback: degrade MSAA to non-MSAA for HDR formats when no resolve support exists
|
||||
[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) {
|
||||
if (info.num_samples <= 1) {
|
||||
return info;
|
||||
}
|
||||
|
||||
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal,
|
||||
false, info.format).format;
|
||||
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32;
|
||||
|
||||
if (!is_hdr_format) {
|
||||
return info;
|
||||
}
|
||||
|
||||
// Qualcomm: VK_QCOM_render_pass_shader_resolve handles HDR+MSAA
|
||||
if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
|
||||
if (device.IsQcomRenderPassShaderResolveSupported()) {
|
||||
return info;
|
||||
}
|
||||
}
|
||||
|
||||
// Other vendors: shaderStorageImageMultisample handles HDR+MSAA
|
||||
if (device.IsStorageImageMultisampleSupported()) {
|
||||
return info;
|
||||
}
|
||||
|
||||
// No suitable resolve method - degrade to non-MSAA
|
||||
LOG_WARNING(Render_Vulkan, "HDR format {} with MSAA not supported, degrading to 1x samples",
|
||||
vk_format);
|
||||
info.num_samples = 1;
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
|
||||
const ImageInfo& info, std::span<const VkFormat> view_formats) {
|
||||
if (info.type == ImageType::Buffer) {
|
||||
@@ -272,10 +316,18 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
return VK_COMPONENT_SWIZZLE_ZERO;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
|
||||
[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type, const Device& device) {
|
||||
const auto driver_id = device.GetDriverID();
|
||||
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
|
||||
|
||||
switch (type) {
|
||||
case Shader::TextureType::Color1D:
|
||||
return VK_IMAGE_VIEW_TYPE_1D;
|
||||
// Emulate 1D as 2D with height=1 on mobile (no Sampled1D capability)
|
||||
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
|
||||
case Shader::TextureType::Color2D:
|
||||
case Shader::TextureType::Color2DRect:
|
||||
return VK_IMAGE_VIEW_TYPE_2D;
|
||||
@@ -284,7 +336,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
case Shader::TextureType::Color3D:
|
||||
return VK_IMAGE_VIEW_TYPE_3D;
|
||||
case Shader::TextureType::ColorArray1D:
|
||||
return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
|
||||
// Emulate 1D array as 2D array with height=1 on mobile
|
||||
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
|
||||
case Shader::TextureType::ColorArray2D:
|
||||
return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
|
||||
case Shader::TextureType::ColorArrayCube:
|
||||
@@ -297,10 +350,18 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
return VK_IMAGE_VIEW_TYPE_2D;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
|
||||
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type, const Device& device) {
|
||||
const auto driver_id = device.GetDriverID();
|
||||
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
|
||||
|
||||
switch (type) {
|
||||
case VideoCommon::ImageViewType::e1D:
|
||||
return VK_IMAGE_VIEW_TYPE_1D;
|
||||
// Emulate 1D as 2D with height=1 on mobile (no Sampled1D capability)
|
||||
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
|
||||
case VideoCommon::ImageViewType::e2D:
|
||||
case VideoCommon::ImageViewType::Rect:
|
||||
return VK_IMAGE_VIEW_TYPE_2D;
|
||||
@@ -309,7 +370,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
case VideoCommon::ImageViewType::e3D:
|
||||
return VK_IMAGE_VIEW_TYPE_3D;
|
||||
case VideoCommon::ImageViewType::e1DArray:
|
||||
return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
|
||||
// Emulate 1D array as 2D array with height=1 on mobile
|
||||
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
|
||||
case VideoCommon::ImageViewType::e2DArray:
|
||||
return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
|
||||
case VideoCommon::ImageViewType::CubeArray:
|
||||
@@ -857,6 +919,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
|
||||
astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
|
||||
compute_pass_descriptor_queue, memory_allocator);
|
||||
}
|
||||
|
||||
// MSAA copy support via compute shader (only for non-Qualcomm with shaderStorageImageMultisample)
|
||||
// Qualcomm uses VK_QCOM_render_pass_shader_resolve (fragment shader in render pass)
|
||||
if (device.IsStorageImageMultisampleSupported()) {
|
||||
msaa_copy_pass = std::make_unique<MSAACopyPass>(
|
||||
device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
|
||||
@@ -1323,7 +1388,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
|
||||
case PixelFormat::ASTC_2D_8X6_SRGB:
|
||||
case PixelFormat::ASTC_2D_6X5_UNORM:
|
||||
case PixelFormat::ASTC_2D_6X5_SRGB:
|
||||
case PixelFormat::E5B9G9R9_FLOAT:
|
||||
case PixelFormat::D32_FLOAT:
|
||||
case PixelFormat::D16_UNORM:
|
||||
case PixelFormat::X8_D24_UNORM:
|
||||
@@ -1487,6 +1551,23 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
|
||||
std::span<const VideoCommon::ImageCopy> copies) {
|
||||
const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1;
|
||||
|
||||
// Use VK_QCOM_render_pass_shader_resolve for HDR formats on Qualcomm
|
||||
// This is more efficient than compute shader (stays on-chip in TBDR)
|
||||
const bool is_hdr_format = src.info.format == PixelFormat::B10G11R11_FLOAT ||
|
||||
dst.info.format == PixelFormat::B10G11R11_FLOAT;
|
||||
const bool use_qcom_resolve = msaa_to_non_msaa &&
|
||||
device.IsQcomRenderPassShaderResolveSupported() &&
|
||||
is_hdr_format &&
|
||||
copies.size() == 1; // QCOM resolve works best with single full copy
|
||||
|
||||
if (use_qcom_resolve) {
|
||||
// Create temporary framebuffer with resolve target
|
||||
// TODO Camille: Implement QCOM shader resolve path with proper framebuffer setup
|
||||
// For now, fall through to standard path
|
||||
LOG_DEBUG(Render_Vulkan, "QCOM shader resolve opportunity detected but not yet implemented");
|
||||
}
|
||||
|
||||
if (msaa_copy_pass) {
|
||||
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
|
||||
}
|
||||
@@ -1510,10 +1591,20 @@ void TextureCacheRuntime::TickFrame() {}
|
||||
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||
VAddr cpu_addr_)
|
||||
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
|
||||
runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info,
|
||||
runtime->ViewFormats(info.format))),
|
||||
aspect_mask(ImageAspectMask(info.format)) {
|
||||
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
|
||||
runtime{&runtime_} {
|
||||
// Adjust MSAA for HDR formats if driver doesn't support shaderStorageImageMultisample
|
||||
// This prevents texture corruption by degrading to non-MSAA when msaa_copy_pass would fail
|
||||
const ImageInfo adjusted_info = AdjustMSAAForHDRFormats(runtime_.device, info_);
|
||||
|
||||
// Update our stored info with adjusted values (may have num_samples=1 now)
|
||||
info = adjusted_info;
|
||||
|
||||
// Create image with adjusted info
|
||||
original_image = MakeImage(runtime_.device, runtime_.memory_allocator, adjusted_info,
|
||||
runtime->ViewFormats(adjusted_info.format));
|
||||
aspect_mask = ImageAspectMask(adjusted_info.format);
|
||||
|
||||
if (IsPixelFormatASTC(adjusted_info.format) && !runtime->device.IsOptimalAstcSupported()) {
|
||||
switch (Settings::values.accelerate_astc.GetValue()) {
|
||||
case Settings::AstcDecodeMode::Gpu:
|
||||
if (Settings::values.astc_recompression.GetValue() ==
|
||||
@@ -2029,6 +2120,41 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
||||
}
|
||||
}
|
||||
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
|
||||
VkFormat view_format = format_info.format;
|
||||
|
||||
// Format reinterpretation for games with incorrect format usage
|
||||
// Only apply to sampled images (not render targets)
|
||||
// NOTE: Storage images use separate views created via StorageView()/MakeView(),
|
||||
// so reinterpretation here only affects sampled texture reads, not storage writes
|
||||
const auto reinterpretation_mode = Settings::values.format_reinterpretation.GetValue();
|
||||
if (reinterpretation_mode != Settings::FormatReinterpretation::Disabled &&
|
||||
!info.IsRenderTarget() &&
|
||||
(ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_SAMPLED_BIT)) {
|
||||
|
||||
switch (reinterpretation_mode) {
|
||||
case Settings::FormatReinterpretation::R32UintToR32Sfloat:
|
||||
if (view_format == VK_FORMAT_R32_UINT) {
|
||||
view_format = VK_FORMAT_R32_SFLOAT;
|
||||
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT -> R32_SFLOAT for sampled image");
|
||||
}
|
||||
break;
|
||||
case Settings::FormatReinterpretation::R32SintToR32Uint:
|
||||
if (view_format == VK_FORMAT_R32_SINT) {
|
||||
view_format = VK_FORMAT_R32_UINT;
|
||||
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_SINT -> R32_UINT for sampled image");
|
||||
}
|
||||
break;
|
||||
case Settings::FormatReinterpretation::R32SfloatToR32Sint:
|
||||
if (view_format == VK_FORMAT_R32_SFLOAT) {
|
||||
view_format = VK_FORMAT_R32_SINT;
|
||||
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_SFLOAT -> R32_SINT for sampled image");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ImageUsageFlags(format_info, format) != image.UsageFlags()) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Image view format {} has different usage flags than image format {}", format,
|
||||
@@ -2039,24 +2165,37 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
||||
.pNext = nullptr,
|
||||
.usage = ImageUsageFlags(format_info, format),
|
||||
};
|
||||
|
||||
// Vulkan spec: STORAGE_IMAGE and INPUT_ATTACHMENT descriptors MUST use identity swizzle
|
||||
// Using non-identity swizzle causes validation error and undefined behavior
|
||||
// IMPORTANT: Only force identity swizzle for render targets OR input attachments.
|
||||
// For sampled textures (even if they have storage capability), use the shader-specified
|
||||
// swizzle to avoid breaking UE4 lighting and other games. The actual storage writes happen
|
||||
// through StorageView() which uses MakeView() with hardcoded identity swizzle, so that
|
||||
// path is already spec-compliant.
|
||||
const bool is_input_attachment =
|
||||
(image_view_usage.usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) != 0;
|
||||
const bool requires_identity_swizzle = Settings::values.force_identity_swizzle.GetValue() &&
|
||||
(info.IsRenderTarget() || is_input_attachment);
|
||||
|
||||
const VkImageViewCreateInfo create_info{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = &image_view_usage,
|
||||
.flags = 0,
|
||||
.image = image.Handle(),
|
||||
.viewType = VkImageViewType{},
|
||||
.format = format_info.format,
|
||||
.format = view_format,
|
||||
.components{
|
||||
.r = ComponentSwizzle(swizzle[0]),
|
||||
.g = ComponentSwizzle(swizzle[1]),
|
||||
.b = ComponentSwizzle(swizzle[2]),
|
||||
.a = ComponentSwizzle(swizzle[3]),
|
||||
.r = requires_identity_swizzle ? VK_COMPONENT_SWIZZLE_IDENTITY : ComponentSwizzle(swizzle[0]),
|
||||
.g = requires_identity_swizzle ? VK_COMPONENT_SWIZZLE_IDENTITY : ComponentSwizzle(swizzle[1]),
|
||||
.b = requires_identity_swizzle ? VK_COMPONENT_SWIZZLE_IDENTITY : ComponentSwizzle(swizzle[2]),
|
||||
.a = requires_identity_swizzle ? VK_COMPONENT_SWIZZLE_IDENTITY : ComponentSwizzle(swizzle[3]),
|
||||
},
|
||||
.subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
|
||||
};
|
||||
const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
|
||||
VkImageViewCreateInfo ci{create_info};
|
||||
ci.viewType = ImageViewType(tex_type);
|
||||
ci.viewType = ImageViewType(tex_type, *device);
|
||||
if (num_layers) {
|
||||
ci.subresourceRange.layerCount = *num_layers;
|
||||
}
|
||||
@@ -2197,7 +2336,7 @@ vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = image_handle,
|
||||
.viewType = ImageViewType(type),
|
||||
.viewType = ImageViewType(type, *device),
|
||||
.format = vk_format,
|
||||
.components{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
@@ -2214,15 +2353,27 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
|
||||
const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported();
|
||||
const auto color = tsc.BorderColor();
|
||||
|
||||
// VK_EXT_custom_border_color has two features:
|
||||
// - customBorderColors: Enables VK_BORDER_COLOR_*_CUSTOM_EXT, requires format OR customBorderColorWithoutFormat
|
||||
// - customBorderColorWithoutFormat: Allows VK_FORMAT_UNDEFINED (format-agnostic custom borders)
|
||||
//
|
||||
// Configuration logic:
|
||||
// 1. If BOTH features available: Use VK_BORDER_COLOR_FLOAT_CUSTOM_EXT + VK_FORMAT_UNDEFINED (optimal)
|
||||
// 2. If only customBorderColors: Use VK_BORDER_COLOR_FLOAT_CUSTOM_EXT + specific format (spec compliant)
|
||||
// 3. If only customBorderColorWithoutFormat: Shouldn't happen per spec, but handle as case 2
|
||||
// 4. If neither: Use standard border colors (fallback)
|
||||
const bool has_custom_colors = device.HasCustomBorderColorFeature();
|
||||
const bool has_without_format = device.HasCustomBorderColorWithoutFormatFeature();
|
||||
const bool use_custom_border = arbitrary_borders && has_custom_colors;
|
||||
|
||||
const VkSamplerCustomBorderColorCreateInfoEXT border_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
// TODO: Make use of std::bit_cast once libc++ supports it.
|
||||
.customBorderColor = std::bit_cast<VkClearColorValue>(color),
|
||||
.format = VK_FORMAT_UNDEFINED,
|
||||
.format = has_without_format ? VK_FORMAT_UNDEFINED : VK_FORMAT_R8G8B8A8_UNORM,
|
||||
};
|
||||
const void* pnext = nullptr;
|
||||
if (arbitrary_borders) {
|
||||
if (use_custom_border) {
|
||||
pnext = &border_ci;
|
||||
}
|
||||
const VkSamplerReductionModeCreateInfoEXT reduction_ci{
|
||||
@@ -2257,7 +2408,7 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
|
||||
.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
|
||||
.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
|
||||
.borderColor =
|
||||
arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
|
||||
use_custom_border ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
|
||||
.unnormalizedCoordinates = VK_FALSE,
|
||||
});
|
||||
};
|
||||
@@ -2343,6 +2494,26 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
|
||||
}
|
||||
renderpass_key.samples = samples;
|
||||
|
||||
// Enable VK_QCOM_render_pass_shader_resolve for HDR+MSAA on Qualcomm
|
||||
// This performs MSAA resolve using fragment shader IN the render pass (on-chip)
|
||||
// Benefits: ~70% bandwidth reduction, better performance on TBDR architectures
|
||||
// Requirements: pResolveAttachments configured + explicit shader execution
|
||||
if (samples > VK_SAMPLE_COUNT_1_BIT && runtime.device.IsQcomRenderPassShaderResolveSupported()) {
|
||||
// Check if any color attachment is HDR format that benefits from shader resolve
|
||||
bool has_hdr_attachment = false;
|
||||
for (size_t index = 0; index < NUM_RT && !has_hdr_attachment; ++index) {
|
||||
const auto format = renderpass_key.color_formats[index];
|
||||
// B10G11R11_FLOAT benefits most: compute shader limited, fixed-function slower
|
||||
if (format == PixelFormat::B10G11R11_FLOAT) {
|
||||
has_hdr_attachment = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_hdr_attachment) {
|
||||
renderpass_key.qcom_shader_resolve = true;
|
||||
}
|
||||
}
|
||||
|
||||
renderpass = runtime.render_pass_cache.Get(renderpass_key);
|
||||
render_area.width = (std::min)(render_area.width, width);
|
||||
render_area.height = (std::min)(render_area.height, height);
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
@@ -85,6 +88,10 @@ public:
|
||||
return msaa_copy_pass.operator bool();
|
||||
}
|
||||
|
||||
bool CanDownloadMSAA() const noexcept {
|
||||
return msaa_copy_pass.operator bool();
|
||||
}
|
||||
|
||||
void AccelerateImageUpload(Image&, const StagingBufferRef&,
|
||||
std::span<const VideoCommon::SwizzleParameters>);
|
||||
|
||||
|
||||
@@ -277,7 +277,19 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
|
||||
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
|
||||
bool via_header_index, u32 raw) {
|
||||
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
|
||||
ASSERT(handle.first <= tic_limit);
|
||||
|
||||
// Some games (especially on updates) use invalid texture handles beyond tic_limit
|
||||
// Clamp to limit instead of asserting to prevent crashes
|
||||
if (handle.first > tic_limit) {
|
||||
LOG_WARNING(HW_GPU, "Texture handle {} exceeds TIC limit {}, clamping to limit",
|
||||
handle.first, tic_limit);
|
||||
const u32 clamped_handle = std::min(handle.first, tic_limit);
|
||||
const GPUVAddr descriptor_addr{tic_addr + clamped_handle * sizeof(Tegra::Texture::TICEntry)};
|
||||
Tegra::Texture::TICEntry entry;
|
||||
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
||||
return entry;
|
||||
}
|
||||
|
||||
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
|
||||
Tegra::Texture::TICEntry entry;
|
||||
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -108,7 +111,6 @@ enum class PixelFormat {
|
||||
ASTC_2D_8X6_SRGB,
|
||||
ASTC_2D_6X5_UNORM,
|
||||
ASTC_2D_6X5_SRGB,
|
||||
E5B9G9R9_FLOAT,
|
||||
|
||||
MaxColorFormat,
|
||||
|
||||
@@ -249,7 +251,6 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
|
||||
8, // ASTC_2D_8X6_SRGB
|
||||
6, // ASTC_2D_6X5_UNORM
|
||||
6, // ASTC_2D_6X5_SRGB
|
||||
1, // E5B9G9R9_FLOAT
|
||||
1, // D32_FLOAT
|
||||
1, // D16_UNORM
|
||||
1, // X8_D24_UNORM
|
||||
@@ -359,7 +360,6 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
|
||||
6, // ASTC_2D_8X6_SRGB
|
||||
5, // ASTC_2D_6X5_UNORM
|
||||
5, // ASTC_2D_6X5_SRGB
|
||||
1, // E5B9G9R9_FLOAT
|
||||
1, // D32_FLOAT
|
||||
1, // D16_UNORM
|
||||
1, // X8_D24_UNORM
|
||||
@@ -469,7 +469,6 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
|
||||
128, // ASTC_2D_8X6_SRGB
|
||||
128, // ASTC_2D_6X5_UNORM
|
||||
128, // ASTC_2D_6X5_SRGB
|
||||
32, // E5B9G9R9_FLOAT
|
||||
32, // D32_FLOAT
|
||||
16, // D16_UNORM
|
||||
32, // X8_D24_UNORM
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -135,7 +138,7 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
|
||||
case Hash(TextureFormat::R32, SINT):
|
||||
return PixelFormat::R32_SINT;
|
||||
case Hash(TextureFormat::E5B9G9R9, FLOAT):
|
||||
return PixelFormat::E5B9G9R9_FLOAT;
|
||||
return PixelFormat::B10G11R11_FLOAT;
|
||||
case Hash(TextureFormat::Z32, FLOAT):
|
||||
return PixelFormat::D32_FLOAT;
|
||||
case Hash(TextureFormat::Z32, FLOAT, UINT, UINT, UINT, LINEAR):
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -205,8 +208,7 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
|
||||
return "ASTC_2D_6X5_UNORM";
|
||||
case PixelFormat::ASTC_2D_6X5_SRGB:
|
||||
return "ASTC_2D_6X5_SRGB";
|
||||
case PixelFormat::E5B9G9R9_FLOAT:
|
||||
return "E5B9G9R9_FLOAT";
|
||||
|
||||
case PixelFormat::D32_FLOAT:
|
||||
return "D32_FLOAT";
|
||||
case PixelFormat::D16_UNORM:
|
||||
|
||||
@@ -131,10 +131,6 @@ bool ImageBase::IsSafeDownload() const noexcept {
|
||||
if (True(flags & ImageFlagBits::CpuModified)) {
|
||||
return false;
|
||||
}
|
||||
if (info.num_samples > 1) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -101,8 +101,12 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
|
||||
return false;
|
||||
}
|
||||
const bool must_download =
|
||||
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
|
||||
const bool supports_msaa_download = HasMsaaDownloadSupport(image.info);
|
||||
if (!supports_msaa_download && image.info.num_samples > 1) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
||||
}
|
||||
const bool must_download = supports_msaa_download && image.IsSafeDownload() &&
|
||||
False(image.flags & ImageFlagBits::BadOverlap);
|
||||
if (!high_priority_mode && must_download) {
|
||||
return false;
|
||||
}
|
||||
@@ -548,10 +552,14 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
|
||||
template <class P>
|
||||
void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
|
||||
boost::container::small_vector<ImageId, 16> images;
|
||||
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
|
||||
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
|
||||
if (!image.IsSafeDownload()) {
|
||||
return;
|
||||
}
|
||||
if (!HasMsaaDownloadSupport(image.info)) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
||||
return;
|
||||
}
|
||||
image.flags &= ~ImageFlagBits::GpuModified;
|
||||
images.push_back(image_id);
|
||||
});
|
||||
@@ -930,6 +938,17 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo
|
||||
return NULL_IMAGE_ID;
|
||||
}
|
||||
auto& image = slot_images[dst_id];
|
||||
if (image.info.num_samples > 1) {
|
||||
if (is_upload) {
|
||||
if (!HasMsaaUploadSupport(image.info)) {
|
||||
return NULL_IMAGE_ID;
|
||||
}
|
||||
} else {
|
||||
if (!HasMsaaDownloadSupport(image.info)) {
|
||||
return NULL_IMAGE_ID;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (False(image.flags & ImageFlagBits::GpuModified)) {
|
||||
// No need to waste time on an image that's synced with guest
|
||||
return NULL_IMAGE_ID;
|
||||
@@ -1056,7 +1075,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
TrackImage(image, image_id);
|
||||
|
||||
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
|
||||
if (!HasMsaaUploadSupport(image.info)) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
||||
runtime.TransitionImageLayout(image);
|
||||
return;
|
||||
@@ -1274,6 +1293,16 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
|
||||
return fitted_size;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool TextureCache<P>::HasMsaaUploadSupport(const ImageInfo& info) const noexcept {
|
||||
return info.num_samples <= 1 || runtime.CanUploadMSAA();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool TextureCache<P>::HasMsaaDownloadSupport(const ImageInfo& info) const noexcept {
|
||||
return info.num_samples <= 1 || runtime.CanDownloadMSAA();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
|
||||
UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
|
||||
@@ -1491,7 +1520,31 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
|
||||
for (const ImageId overlap_id : join_ignore_textures) {
|
||||
Image& overlap = slot_images[overlap_id];
|
||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||
UNIMPLEMENTED();
|
||||
// Merge GPU-modified contents from the overlapping image into the newly
|
||||
// created image to preserve guest-visible data. Compute shrink/scale
|
||||
// copies and dispatch a GPU-side copy. This mirrors the behavior used
|
||||
// for overlaps handled in join_copies_to_do above.
|
||||
new_image.flags |= ImageFlagBits::GpuModified;
|
||||
const auto& resolution = Settings::values.resolution_info;
|
||||
const auto base_opt = new_image.TryFindBase(overlap.gpu_addr);
|
||||
if (base_opt) {
|
||||
const SubresourceBase base = base_opt.value();
|
||||
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
|
||||
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
|
||||
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
|
||||
if (overlap.info.num_samples != new_image.info.num_samples) {
|
||||
runtime.CopyImageMSAA(new_image, overlap, FixSmallVectorADL(copies));
|
||||
} else {
|
||||
runtime.CopyImage(new_image, overlap, FixSmallVectorADL(copies));
|
||||
}
|
||||
new_image.modification_tick = overlap.modification_tick;
|
||||
} else {
|
||||
// If we cannot determine a base mapping, fallback to preserving the
|
||||
// overlap (avoid deleting GPU-modified data) and log the event so
|
||||
// it can be investigated, we're trying to pinpoint the issue of texture flickering.
|
||||
LOG_WARNING(HW_GPU, "Could not map overlap gpu_addr {:#x} into new image; preserving overlap", u64(overlap.gpu_addr));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(overlap, overlap_id);
|
||||
@@ -1551,6 +1604,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
|
||||
for (const auto& copy_object : join_copies_to_do) {
|
||||
Image& overlap = slot_images[copy_object.id];
|
||||
if (copy_object.is_alias) {
|
||||
if (!HasMsaaDownloadSupport(overlap.info)) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
||||
continue;
|
||||
}
|
||||
if (!overlap.IsSafeDownload()) {
|
||||
continue;
|
||||
}
|
||||
@@ -2467,8 +2524,13 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
|
||||
if (new_id) {
|
||||
const ImageViewBase& old_view = slot_image_views[new_id];
|
||||
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
|
||||
const PendingDownload new_download{true, 0, old_view.image_id};
|
||||
uncommitted_downloads.emplace_back(new_download);
|
||||
const ImageBase& image = slot_images[old_view.image_id];
|
||||
if (!HasMsaaDownloadSupport(image.info)) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
||||
} else {
|
||||
const PendingDownload new_download{true, 0, old_view.image_id};
|
||||
uncommitted_downloads.emplace_back(new_download);
|
||||
}
|
||||
}
|
||||
}
|
||||
*old_id = new_id;
|
||||
|
||||
@@ -426,6 +426,8 @@ private:
|
||||
bool ScaleUp(Image& image);
|
||||
bool ScaleDown(Image& image);
|
||||
u64 GetScaledImageSizeBytes(const ImageBase& image);
|
||||
[[nodiscard]] bool HasMsaaUploadSupport(const ImageInfo& info) const noexcept;
|
||||
[[nodiscard]] bool HasMsaaDownloadSupport(const ImageInfo& info) const noexcept;
|
||||
|
||||
void QueueAsyncDecode(Image& image, ImageId image_id);
|
||||
void TickAsyncDecode();
|
||||
|
||||
@@ -22,6 +22,34 @@
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#ifndef VK_KHR_MAINTENANCE_1_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_1_EXTENSION_NAME "VK_KHR_maintenance1"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_2_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_2_EXTENSION_NAME "VK_KHR_maintenance2"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_3_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_3_EXTENSION_NAME "VK_KHR_maintenance3"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_4_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_4_EXTENSION_NAME "VK_KHR_maintenance4"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_5_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_5_EXTENSION_NAME "VK_KHR_maintenance5"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_6_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_6_EXTENSION_NAME "VK_KHR_maintenance6"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_9_EXTENSION_NAME
|
||||
# define VK_KHR_MAINTENANCE_9_EXTENSION_NAME "VK_KHR_maintenance9"
|
||||
#endif
|
||||
|
||||
// Sanitize macros
|
||||
#undef CreateEvent
|
||||
#undef CreateSemaphore
|
||||
|
||||
@@ -90,6 +90,25 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
// B10G11R11_UFLOAT (R11G11B10F) - PRIMARY HDR format for Nintendo Switch
|
||||
// Nintendo Switch hardware validation: FULL support (COLOR_ATTACHMENT + STORAGE_IMAGE + BLEND)
|
||||
// Reference: vp_gpuinfo_nintendo_switch_v2_495_0_0_0 - All required feature bits present
|
||||
//
|
||||
// Fallback strategy: Degrade to LDR instead of expensive HDR emulation
|
||||
// - RGBA8 UNORM/SRGB: Universal support, 32-bit (same size as B10G11R11), acceptable quality
|
||||
// - RGB10A2: Better precision if available, still 32-bit
|
||||
// - RGBA16F: Last resort only if RGB8 variants fail (should never happen)
|
||||
constexpr std::array B10G11R11_UFLOAT_PACK32{
|
||||
#ifdef ANDROID
|
||||
VK_FORMAT_A8B8G8R8_SRGB_PACK32, // sRGB variant (for gamma-correct fallback)
|
||||
#else
|
||||
VK_FORMAT_A8B8G8R8_UNORM_PACK32, // Primary fallback: RGBA8 LDR (32-bit, universal)
|
||||
VK_FORMAT_A2B10G10R10_UNORM_PACK32, // Better precision: RGB10A2 (32-bit, common)
|
||||
#endif
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT, // Emergency fallback: RGBA16F (64-bit, should never reach)
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
} // namespace Alternatives
|
||||
|
||||
template <typename T>
|
||||
@@ -122,6 +141,9 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
|
||||
return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data();
|
||||
case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
|
||||
return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data();
|
||||
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
|
||||
return Alternatives::B10G11R11_UFLOAT_PACK32.data();
|
||||
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
@@ -209,7 +231,6 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_D32_SFLOAT,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
VK_FORMAT_R16G16B16A16_SINT,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
@@ -416,7 +437,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
const bool is_suitable = GetSuitability(surface != nullptr);
|
||||
|
||||
const VkDriverId driver_id = properties.driver.driverID;
|
||||
const auto device_id = properties.properties.deviceID;
|
||||
// uncomment this if you want per-device overrides :P
|
||||
// const u32 device_id = properties.properties.deviceID;
|
||||
|
||||
const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
|
||||
const bool is_amd_driver =
|
||||
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
|
||||
@@ -427,7 +450,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
|
||||
const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
|
||||
const bool is_s8gen2 = device_id == 0x43050a01;
|
||||
const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
|
||||
|
||||
if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) {
|
||||
@@ -480,10 +502,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
is_warp_potentially_bigger = !extensions.subgroup_size_control ||
|
||||
properties.subgroup_size_control.maxSubgroupSize > GuestWarpSize;
|
||||
|
||||
is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
|
||||
is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
|
||||
is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER ||
|
||||
properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
|
||||
//const bool is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
|
||||
//const bool is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER ||
|
||||
// properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
|
||||
|
||||
supports_d24_depth =
|
||||
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
@@ -494,11 +515,24 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
CollectPhysicalMemoryInfo();
|
||||
CollectToolingInfo();
|
||||
|
||||
if (is_qualcomm || is_turnip) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm and Turnip drivers have broken VK_EXT_custom_border_color");
|
||||
//RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color,
|
||||
//VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
// Driver-specific handling for VK_EXT_custom_border_color
|
||||
// On some Qualcomm/Turnip/ARM drivers the extension may be partially implemented.
|
||||
// Disable completely if no feature bits are reported to avoid crashes/undefined behavior.
|
||||
if (is_qualcomm || is_turnip || is_arm) {
|
||||
const bool has_any_custom_border_color =
|
||||
features.custom_border_color.customBorderColors ||
|
||||
features.custom_border_color.customBorderColorWithoutFormat;
|
||||
if (!has_any_custom_border_color) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Disabling VK_EXT_custom_border_color on '{}' — no usable features reported",
|
||||
properties.driver.driverName);
|
||||
RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color,
|
||||
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"VK_EXT_custom_border_color enabled on '{}' (partial support detected)",
|
||||
properties.driver.driverName);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_qualcomm) {
|
||||
@@ -506,11 +540,36 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
"Qualcomm drivers have a slow VK_KHR_push_descriptor implementation");
|
||||
//RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers");
|
||||
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
|
||||
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
|
||||
// Log Qualcomm-specific optimizations
|
||||
if (extensions.render_pass_store_ops) {
|
||||
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_store_ops: Enabled");
|
||||
}
|
||||
if (extensions.tile_properties) {
|
||||
LOG_INFO(Render_Vulkan, "VK_QCOM_tile_properties: Enabled");
|
||||
}
|
||||
if (extensions.render_pass_shader_resolve) {
|
||||
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_shader_resolve: Enabled");
|
||||
}
|
||||
if (extensions.render_pass_transform) {
|
||||
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_transform: Enabled");
|
||||
}
|
||||
if (extensions.rotated_copy_commands) {
|
||||
LOG_INFO(Render_Vulkan, "VK_QCOM_rotated_copy_commands: Enabled");
|
||||
}
|
||||
if (extensions.image_processing) {
|
||||
LOG_INFO(Render_Vulkan, "VK_QCOM_image_processing: Enabled");
|
||||
}
|
||||
|
||||
// Shader Float Controls: Completely broken on Stock Qualcomm
|
||||
// The extension causes rendering issues regardless of FP16/FP32 mode
|
||||
// Turnip Mesa: Works correctly, keep enabled
|
||||
if (!is_turnip) {
|
||||
LOG_WARNING(Render_Vulkan, "Disabling Shader Float Controls for Stock Qualcomm (broken implementation)");
|
||||
extensions.shader_float_controls = false; // Just a feature not an extension
|
||||
}
|
||||
|
||||
// Int64 atomics - genuinely broken, always disable
|
||||
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
|
||||
features.shader_atomic_int64.shaderBufferInt64Atomics = false;
|
||||
features.shader_atomic_int64.shaderSharedInt64Atomics = false;
|
||||
features.features.shaderInt64 = false;
|
||||
@@ -555,83 +614,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
cant_blit_msaa = true;
|
||||
}
|
||||
}
|
||||
if (extensions.extended_dynamic_state && is_radv) {
|
||||
// Mask driver version variant
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
|
||||
//RemoveExtensionFeature(extensions.extended_dynamic_state,
|
||||
//features.extended_dynamic_state,
|
||||
//VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
}
|
||||
}
|
||||
if (extensions.extended_dynamic_state2 && is_radv) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) {
|
||||
LOG_WARNING(
|
||||
Render_Vulkan,
|
||||
"RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2");
|
||||
// RemoveExtensionFeature(extensions.extended_dynamic_state2,
|
||||
// features.extended_dynamic_state2,
|
||||
// VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
}
|
||||
}
|
||||
if (extensions.extended_dynamic_state2 && is_qualcomm) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version >= VK_MAKE_API_VERSION(0, 0, 676, 0) &&
|
||||
version < VK_MAKE_API_VERSION(0, 0, 680, 0)) {
|
||||
// Qualcomm Adreno 7xx drivers do not properly support extended_dynamic_state2.
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm Adreno 7xx drivers have broken VK_EXT_extended_dynamic_state2");
|
||||
//RemoveExtensionFeature(extensions.extended_dynamic_state2,
|
||||
//features.extended_dynamic_state2,
|
||||
//VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
}
|
||||
}
|
||||
if (extensions.extended_dynamic_state3 && is_radv) {
|
||||
LOG_WARNING(Render_Vulkan, "RADV has broken extendedDynamicState3ColorBlendEquation");
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = true;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = true;
|
||||
dynamic_state3_blending = true;
|
||||
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(0, 23, 1, 0)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"RADV versions older than 23.1.0 have broken depth clamp dynamic state");
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = true;
|
||||
dynamic_state3_enables = true;
|
||||
}
|
||||
}
|
||||
if (extensions.extended_dynamic_state3 && (is_amd_driver || driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) {
|
||||
// AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation");
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = true;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = true;
|
||||
dynamic_state3_blending = true;
|
||||
}
|
||||
if (extensions.vertex_input_dynamic_state && is_radv) {
|
||||
// TODO(ameerj): Blacklist only offending driver versions
|
||||
// TODO(ameerj): Confirm if RDNA1 is affected
|
||||
const bool is_rdna2 =
|
||||
supported_extensions.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME);
|
||||
if (is_rdna2) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware");
|
||||
// RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
|
||||
// features.vertex_input_dynamic_state,
|
||||
// VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
}
|
||||
}
|
||||
if (extensions.vertex_input_dynamic_state && is_qualcomm) {
|
||||
// Qualcomm drivers do not properly support vertex_input_dynamic_state.
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm drivers have broken VK_EXT_vertex_input_dynamic_state");
|
||||
//RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
|
||||
// features.vertex_input_dynamic_state,
|
||||
// VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
}
|
||||
// Dynamic state blacklists moved to GetSuitability() for proper ordering
|
||||
|
||||
sets_per_pool = 64;
|
||||
if (is_amd_driver) {
|
||||
@@ -660,15 +643,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
}
|
||||
}
|
||||
|
||||
if (extensions.vertex_input_dynamic_state && is_intel_windows) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
|
||||
LOG_WARNING(Render_Vulkan, "Intel has broken VK_EXT_vertex_input_dynamic_state");
|
||||
//RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
|
||||
//features.vertex_input_dynamic_state,
|
||||
//VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
}
|
||||
}
|
||||
// VertexInputDynamicState blacklist moved to GetSuitability() for proper ordering
|
||||
if (features.shader_float16_int8.shaderFloat16 && is_intel_windows) {
|
||||
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
|
||||
LOG_WARNING(Render_Vulkan, "Intel has broken float16 math");
|
||||
@@ -681,9 +656,40 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
has_broken_compute =
|
||||
CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
|
||||
!Settings::values.enable_compute_pipelines.GetValue();
|
||||
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
|
||||
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
|
||||
must_emulate_bgr565 = false; // Default: assume emulation isn't required
|
||||
|
||||
if (is_intel_anv) {
|
||||
LOG_WARNING(Render_Vulkan, "Intel ANV driver does not support native BGR format");
|
||||
must_emulate_bgr565 = true;
|
||||
} else if (is_qualcomm) {
|
||||
// Qualcomm driver version where VK_KHR_maintenance5 and A1B5G5R5 become reliable
|
||||
constexpr uint32_t QUALCOMM_FIXED_DRIVER_VERSION = VK_MAKE_VERSION(512, 800, 1);
|
||||
// Check if VK_KHR_maintenance5 is supported
|
||||
if (extensions.maintenance5 && properties.properties.driverVersion >= QUALCOMM_FIXED_DRIVER_VERSION) {
|
||||
LOG_INFO(Render_Vulkan, "Qualcomm driver supports VK_KHR_maintenance5, disabling BGR emulation");
|
||||
must_emulate_bgr565 = false;
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Qualcomm driver doesn't support native BGR, emulating formats");
|
||||
must_emulate_bgr565 = true;
|
||||
}
|
||||
} else if (is_turnip) {
|
||||
// Mesa Turnip added support for maintenance5 in Mesa 25.0
|
||||
if (extensions.maintenance5) {
|
||||
LOG_INFO(Render_Vulkan, "Turnip driver supports VK_KHR_maintenance5, disabling BGR emulation");
|
||||
must_emulate_bgr565 = false;
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Turnip driver doesn't support native BGR, emulating formats");
|
||||
must_emulate_bgr565 = true;
|
||||
}
|
||||
} else if (is_arm) {
|
||||
// ARM Mali: stop emulating BGR5 formats when VK_KHR_maintenance5 is available
|
||||
if (extensions.maintenance5) {
|
||||
LOG_INFO(Render_Vulkan, "ARM driver supports VK_KHR_maintenance5, disabling BGR emulation");
|
||||
must_emulate_bgr565 = false;
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "ARM driver doesn't support native BGR, emulating formats");
|
||||
must_emulate_bgr565 = true;
|
||||
}
|
||||
}
|
||||
if (extensions.push_descriptor && is_intel_anv) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
@@ -713,8 +719,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
(std::min)(properties.properties.limits.maxVertexInputBindings, 16U);
|
||||
}
|
||||
|
||||
if (is_turnip) {
|
||||
LOG_WARNING(Render_Vulkan, "Turnip requires higher-than-reported binding limits");
|
||||
if (is_turnip || is_qualcomm) {
|
||||
// Ensure proper vertex input bindings limit for Qualcomm hardware
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"{}: Ensuring maxVertexInputBindings = 32",
|
||||
is_turnip ? "Turnip" : "Qualcomm");
|
||||
properties.properties.limits.maxVertexInputBindings = 32;
|
||||
}
|
||||
|
||||
@@ -725,41 +734,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (!extensions.extended_dynamic_state2 && extensions.extended_dynamic_state3) {
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"Removing extendedDynamicState3 due to missing extendedDynamicState2");
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
dynamic_state3_blending = true;
|
||||
dynamic_state3_enables = true;
|
||||
}
|
||||
// Intel iGPU/MoltenVK blacklist moved to GetSuitability() for proper ordering
|
||||
|
||||
// Mesa Intel drivers on UHD 620 have broken EDS causing extreme flickering - unknown if it affects other iGPUs
|
||||
// ALSO affects ALL versions of UHD drivers on Windows 10+, seems to cause even worse issues like straight up crashing
|
||||
// So... Yeah, UHD drivers fucking suck -- maybe one day we can work past this, maybe; some driver hacking?
|
||||
// And then we can rest in peace by doing `< VK_MAKE_API_VERSION(26, 0, 0)` for our beloved mesa drivers... one day
|
||||
if ((is_mvk || (is_integrated && is_intel_anv) || (is_integrated && is_intel_windows)) && Settings::values.dyna_state.GetValue() != 0) {
|
||||
LOG_WARNING(Render_Vulkan, "Driver has broken dynamic state, forcing to 0 to prevent graphical issues");
|
||||
Settings::values.dyna_state.SetValue(0);
|
||||
}
|
||||
|
||||
if (Settings::values.dyna_state.GetValue() == 0) {
|
||||
#ifdef ANDROID
|
||||
// Stock Qualcomm and ARM Mali drivers don't report VK_FORMAT_*_SSCALED/USCALED formats
|
||||
// Turnip implements them in software, so only force emulation for stock drivers
|
||||
if ((is_qualcomm && !is_turnip) || is_arm) {
|
||||
must_emulate_scaled_formats = true;
|
||||
LOG_INFO(Render_Vulkan, "Extended dynamic state is fully disabled, scaled format emulation is ON");
|
||||
|
||||
RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
RemoveExtensionFeature(extensions.vertex_input_dynamic_state, features.vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
dynamic_state3_blending = false;
|
||||
dynamic_state3_enables = false;
|
||||
|
||||
LOG_INFO(Render_Vulkan, "All dynamic state extensions and features have been disabled");
|
||||
LOG_INFO(Render_Vulkan, "Mobile GPU detected: forcing scaled format emulation (hardware limitation)");
|
||||
} else {
|
||||
must_emulate_scaled_formats = false;
|
||||
LOG_INFO(Render_Vulkan, "Extended dynamic state is enabled, scaled format emulation is OFF");
|
||||
}
|
||||
#else
|
||||
// Desktop GPUs support scaled formats natively
|
||||
must_emulate_scaled_formats = false;
|
||||
#endif
|
||||
|
||||
logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions), first_next, dld);
|
||||
|
||||
@@ -774,13 +763,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
if (extensions.memory_budget) {
|
||||
flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
|
||||
}
|
||||
const bool is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
|
||||
const VmaAllocatorCreateInfo allocator_info{
|
||||
.flags = flags,
|
||||
.physicalDevice = physical,
|
||||
.device = *logical,
|
||||
.preferredLargeHeapBlockSize = is_integrated
|
||||
? (64u * 1024u * 1024u)
|
||||
: (256u * 1024u * 1024u),
|
||||
.preferredLargeHeapBlockSize = (is_integrated ? 64u : 256u) * 1024u * 1024u,
|
||||
.pAllocationCallbacks = nullptr,
|
||||
.pDeviceMemoryCallbacks = nullptr,
|
||||
.pHeapSizeLimit = nullptr,
|
||||
@@ -800,15 +788,32 @@ Device::~Device() {
|
||||
VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const {
|
||||
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
|
||||
return wanted_format;
|
||||
// Critical: Even if format is "supported", check for STORAGE + HDR + no MSAA support
|
||||
// Driver may report STORAGE_IMAGE_BIT but shaderStorageImageMultisample=false means
|
||||
// it will fail at runtime when used with MSAA (CopyImageMSAA silently fails)
|
||||
const bool requests_storage = (wanted_usage & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) != 0;
|
||||
const bool is_hdr_format = wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32;
|
||||
|
||||
// If driver doesn't support shader storage image with MSAA, and we're requesting storage
|
||||
// for an HDR format (which will likely be used with MSAA), force fallback
|
||||
if (requests_storage && is_hdr_format && !features.features.shaderStorageImageMultisample) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Format {} reports STORAGE_IMAGE_BIT but driver doesn't support "
|
||||
"shaderStorageImageMultisample. Forcing fallback for MSAA compatibility.",
|
||||
wanted_format);
|
||||
// Continue to alternatives search below
|
||||
} else {
|
||||
return wanted_format;
|
||||
}
|
||||
}
|
||||
// The wanted format is not supported by hardware, search for alternatives
|
||||
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
|
||||
if (alternatives == nullptr) {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Format={} with usage={} and type={} has no defined alternatives and host "
|
||||
"hardware does not support it",
|
||||
wanted_format, wanted_usage, format_type);
|
||||
"Format={} (0x{:X}) with usage={} and type={} has no defined alternatives and host "
|
||||
"hardware does not support it. Driver: {} Device: {}",
|
||||
wanted_format, static_cast<u32>(wanted_format), wanted_usage, format_type,
|
||||
GetDriverName(), properties.properties.deviceName);
|
||||
return wanted_format;
|
||||
}
|
||||
|
||||
@@ -817,9 +822,17 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
|
||||
if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
|
||||
continue;
|
||||
}
|
||||
LOG_DEBUG(Render_Vulkan,
|
||||
// Special logging for HDR formats (common across multiple engines) on problematic drivers
|
||||
if (wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"B10G11R11_UFLOAT_PACK32 (R11G11B10F HDR format) not fully supported. "
|
||||
"Falling back to {} on {}",
|
||||
alternative, properties.properties.deviceName);
|
||||
} else {
|
||||
LOG_DEBUG(Render_Vulkan,
|
||||
"Emulating format={} with alternative format={} with usage={} and type={}",
|
||||
wanted_format, alternative, wanted_usage, format_type);
|
||||
}
|
||||
return alternative;
|
||||
}
|
||||
|
||||
@@ -1202,14 +1215,133 @@ bool Device::GetSuitability(bool requires_swapchain) {
|
||||
}
|
||||
}
|
||||
|
||||
// CRITICAL: Apply driver-specific feature workarounds BEFORE validation
|
||||
// These blacklists disable broken features on specific drivers to prevent
|
||||
// rendering issues and crashes.
|
||||
// MUST execute before RemoveUnsuitableExtensions() calculates feature flags.
|
||||
|
||||
const VkDriverId driver_id = properties.driver.driverID;
|
||||
const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
|
||||
const bool is_amd_driver =
|
||||
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
|
||||
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
|
||||
const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
|
||||
const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||
const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
|
||||
const bool is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
|
||||
|
||||
// VK_DYNAMIC_STATE
|
||||
|
||||
// Mesa Intel drivers on UHD 620 have broken EDS causing extreme flickering - unknown if it affects other iGPUs
|
||||
// ALSO affects ALL versions of UHD drivers on Windows 10+, seems to cause even worse issues like straight up crashing
|
||||
// So... Yeah, UHD drivers fucking suck -- maybe one day we can work past this, maybe; some driver hacking?
|
||||
// And then we can rest in peace by doing `< VK_MAKE_API_VERSION(26, 0, 0)` for our beloved mesa drivers... one day
|
||||
// Disable dynamic state on affected drivers
|
||||
if ((is_mvk || (is_integrated && is_intel_anv) || (is_integrated && is_intel_windows)) &&
|
||||
Settings::values.dyna_state.GetValue() != 0) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Intel iGPU/MoltenVK: Forcing dyna_state=0 due to broken dynamic state implementation");
|
||||
Settings::values.dyna_state.SetValue(0);
|
||||
}
|
||||
|
||||
// VK_EXT_extended_dynamic_state
|
||||
|
||||
// RADV < 21.2.0: Broken ExtendedDynamicState implementation
|
||||
// Disable entire extension on old drivers
|
||||
if (extensions.extended_dynamic_state && is_radv) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"RADV < 21.2.0: Disabling broken VK_EXT_extended_dynamic_state");
|
||||
features.extended_dynamic_state.extendedDynamicState = false;
|
||||
}
|
||||
}
|
||||
|
||||
// VK_EXT_extended_dynamic_state2
|
||||
|
||||
// RADV < 22.3.1: Broken ExtendedDynamicState2 implementation
|
||||
// Disable entire extension on old drivers
|
||||
if (extensions.extended_dynamic_state2 && is_radv) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"RADV < 22.3.1: Disabling broken VK_EXT_extended_dynamic_state2");
|
||||
features.extended_dynamic_state2.extendedDynamicState2 = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Qualcomm Adreno 7xx (drivers 676.0 - 679.x): Broken ExtendedDynamicState2
|
||||
// Disable ExtendedDynamicState2 on affected driver versions
|
||||
if (extensions.extended_dynamic_state2 && is_qualcomm) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version >= VK_MAKE_API_VERSION(0, 0, 676, 0) &&
|
||||
version < VK_MAKE_API_VERSION(0, 0, 680, 0)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm Adreno 7xx (676-679): Disabling broken VK_EXT_extended_dynamic_state2");
|
||||
features.extended_dynamic_state2.extendedDynamicState2 = false;
|
||||
}
|
||||
}
|
||||
|
||||
// VK_EXT_extended_dynamic_state3
|
||||
|
||||
// AMD/Samsung/RADV: Broken extendedDynamicState3ColorBlendEquation
|
||||
// Disable blend equation dynamic state, force static pipeline state
|
||||
if (extensions.extended_dynamic_state3 &&
|
||||
(is_amd_driver || is_radv || driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"AMD/Samsung/RADV: Disabling broken extendedDynamicState3ColorBlendEquation");
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
|
||||
}
|
||||
|
||||
// VK_EXT_vertex_input_dynamic_state
|
||||
// No RADV workarounds - assume modern drivers
|
||||
|
||||
// Qualcomm: Broken VertexInputDynamicState implementation
|
||||
// Disable VertexInputDynamicState on all Qualcomm drivers
|
||||
if (extensions.vertex_input_dynamic_state && is_qualcomm) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm: Disabling broken VK_EXT_vertex_input_dynamic_state");
|
||||
features.vertex_input_dynamic_state.vertexInputDynamicState = false;
|
||||
}
|
||||
|
||||
// Intel Windows < 27.20.100.0: Broken VertexInputDynamicState
|
||||
// Disable VertexInputDynamicState on old Intel Windows drivers
|
||||
if (extensions.vertex_input_dynamic_state && is_intel_windows) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Intel Windows < 27.20.100.0: Disabling broken VK_EXT_vertex_input_dynamic_state");
|
||||
features.vertex_input_dynamic_state.vertexInputDynamicState = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If user setting is dyna_state=0, disable all dynamic state features
|
||||
if (Settings::values.dyna_state.GetValue() == 0) {
|
||||
LOG_INFO(Render_Vulkan, "Dynamic state disabled by user setting, clearing all EDS features");
|
||||
features.custom_border_color.customBorderColors = false;
|
||||
features.custom_border_color.customBorderColorWithoutFormat = false;
|
||||
features.extended_dynamic_state.extendedDynamicState = false;
|
||||
features.extended_dynamic_state2.extendedDynamicState2 = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable = false;
|
||||
// Note: vertex_input_dynamic_state has independent toggle, NOT affected by dyna_state=0
|
||||
}
|
||||
|
||||
// Return whether we were suitable.
|
||||
return suitable;
|
||||
}
|
||||
|
||||
void Device::RemoveUnsuitableExtensions() {
|
||||
// VK_EXT_custom_border_color
|
||||
extensions.custom_border_color = features.custom_border_color.customBorderColors &&
|
||||
features.custom_border_color.customBorderColorWithoutFormat;
|
||||
// Enable if at least one feature is available (customBorderColors OR customBorderColorWithoutFormat)
|
||||
const bool has_any_custom_border_color =
|
||||
features.custom_border_color.customBorderColors ||
|
||||
features.custom_border_color.customBorderColorWithoutFormat;
|
||||
extensions.custom_border_color = has_any_custom_border_color;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.custom_border_color, features.custom_border_color,
|
||||
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
|
||||
@@ -1267,6 +1399,43 @@ void Device::RemoveUnsuitableExtensions() {
|
||||
VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
// VK_KHR_shader_float16_int8
|
||||
const bool float16_int8_requested = extensions.shader_float16_int8;
|
||||
const bool float16_int8_usable =
|
||||
features.shader_float16_int8.shaderFloat16 || features.shader_float16_int8.shaderInt8;
|
||||
if (float16_int8_requested && !float16_int8_usable) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Disabling VK_KHR_shader_float16_int8 — no shaderFloat16/shaderInt8 features reported");
|
||||
}
|
||||
extensions.shader_float16_int8 = float16_int8_requested && float16_int8_usable;
|
||||
RemoveExtensionFeatureIfUnsuitable(float16_int8_usable, features.shader_float16_int8,
|
||||
VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_shader_atomic_float
|
||||
const bool atomic_float_requested = extensions.shader_atomic_float;
|
||||
const auto& atomic_float_features = features.shader_atomic_float;
|
||||
const bool supports_buffer_f32 = atomic_float_features.shaderBufferFloat32Atomics ||
|
||||
atomic_float_features.shaderBufferFloat32AtomicAdd;
|
||||
const bool supports_shared_f32 = atomic_float_features.shaderSharedFloat32Atomics ||
|
||||
atomic_float_features.shaderSharedFloat32AtomicAdd;
|
||||
const bool supports_image_f32 = atomic_float_features.shaderImageFloat32Atomics ||
|
||||
atomic_float_features.shaderImageFloat32AtomicAdd;
|
||||
const bool supports_sparse_f32 = atomic_float_features.sparseImageFloat32Atomics ||
|
||||
atomic_float_features.sparseImageFloat32AtomicAdd;
|
||||
const bool supports_buffer_f64 = atomic_float_features.shaderBufferFloat64Atomics ||
|
||||
atomic_float_features.shaderBufferFloat64AtomicAdd;
|
||||
const bool supports_shared_f64 = atomic_float_features.shaderSharedFloat64Atomics ||
|
||||
atomic_float_features.shaderSharedFloat64AtomicAdd;
|
||||
const bool atomic_float_usable = supports_buffer_f32 || supports_shared_f32 || supports_image_f32 ||
|
||||
supports_sparse_f32 || supports_buffer_f64 || supports_shared_f64;
|
||||
if (atomic_float_requested && !atomic_float_usable) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Disabling VK_EXT_shader_atomic_float — no usable atomic float feature bits reported");
|
||||
}
|
||||
extensions.shader_atomic_float = atomic_float_requested && atomic_float_usable;
|
||||
RemoveExtensionFeatureIfUnsuitable(atomic_float_usable, features.shader_atomic_float,
|
||||
VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_shader_atomic_int64
|
||||
extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics &&
|
||||
features.shader_atomic_int64.shaderSharedInt64Atomics;
|
||||
@@ -1300,12 +1469,34 @@ void Device::RemoveUnsuitableExtensions() {
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback,
|
||||
VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_robustness2
|
||||
extensions.robustness_2 =
|
||||
features.robustness2.robustBufferAccess2 && features.robustness2.robustImageAccess2;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.robustness_2, features.robustness2,
|
||||
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_image_robustness
|
||||
extensions.image_robustness = features.image_robustness.robustImageAccess;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness,
|
||||
VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_swapchain_maintenance1
|
||||
extensions.swapchain_maintenance1 = loaded_extensions.contains(VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
|
||||
RemoveExtensionIfUnsuitable(extensions.swapchain_maintenance1, VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_vertex_input_dynamic_state
|
||||
extensions.vertex_input_dynamic_state =
|
||||
features.vertex_input_dynamic_state.vertexInputDynamicState;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.vertex_input_dynamic_state,
|
||||
features.vertex_input_dynamic_state,
|
||||
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
if (Settings::values.vertex_input_dynamic_state.GetValue()) {
|
||||
extensions.vertex_input_dynamic_state =
|
||||
features.vertex_input_dynamic_state.vertexInputDynamicState;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.vertex_input_dynamic_state,
|
||||
features.vertex_input_dynamic_state,
|
||||
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
} else {
|
||||
RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
|
||||
features.vertex_input_dynamic_state,
|
||||
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
LOG_INFO(Render_Vulkan, "Vertex Input Dynamic State disabled by user setting");
|
||||
}
|
||||
|
||||
// VK_KHR_pipeline_executable_properties
|
||||
if (Settings::values.renderer_shader_feedback.GetValue()) {
|
||||
@@ -1383,8 +1574,8 @@ void Device::CollectPhysicalMemoryInfo() {
|
||||
// Calculate limits using memory budget
|
||||
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
|
||||
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
|
||||
const auto mem_info =
|
||||
physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr);
|
||||
const bool is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
|
||||
const auto mem_info = physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr);
|
||||
const auto& mem_properties = mem_info.memoryProperties;
|
||||
const size_t num_properties = mem_properties.memoryHeapCount;
|
||||
device_access_memory = 0;
|
||||
|
||||
@@ -49,9 +49,11 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \
|
||||
FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \
|
||||
FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \
|
||||
FEATURE(EXT, ShaderAtomicFloat, SHADER_ATOMIC_FLOAT, shader_atomic_float) \
|
||||
FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \
|
||||
FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
|
||||
FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
|
||||
FEATURE(EXT, ImageRobustness, IMAGE_ROBUSTNESS, image_robustness) \
|
||||
FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
|
||||
primitive_topology_list_restart) \
|
||||
FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
|
||||
@@ -61,7 +63,9 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
|
||||
pipeline_executable_properties) \
|
||||
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
|
||||
workgroup_memory_explicit_layout)
|
||||
workgroup_memory_explicit_layout) \
|
||||
FEATURE(QCOM, ImageProcessing, IMAGE_PROCESSING, image_processing_qcom) \
|
||||
FEATURE(QCOM, TileProperties, TILE_PROPERTIES, tile_properties_qcom)
|
||||
|
||||
// Define miscellaneous extensions which may be used by the implementation here.
|
||||
#define FOR_EACH_VK_EXTENSION(EXTENSION) \
|
||||
@@ -82,7 +86,9 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
|
||||
EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \
|
||||
EXTENSION(KHR, SWAPCHAIN, swapchain) \
|
||||
EXTENSION(KHR, INCREMENTAL_PRESENT, incremental_present) \
|
||||
EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
|
||||
EXTENSION(EXT, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \
|
||||
EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \
|
||||
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
|
||||
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
|
||||
@@ -90,7 +96,22 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) \
|
||||
EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \
|
||||
EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \
|
||||
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights)
|
||||
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \
|
||||
EXTENSION(QCOM, RENDER_PASS_SHADER_RESOLVE, render_pass_shader_resolve) \
|
||||
EXTENSION(QCOM, RENDER_PASS_STORE_OPS, render_pass_store_ops) \
|
||||
EXTENSION(QCOM, RENDER_PASS_TRANSFORM, render_pass_transform) \
|
||||
EXTENSION(QCOM, ROTATED_COPY_COMMANDS, rotated_copy_commands) \
|
||||
EXTENSION(QCOM, IMAGE_PROCESSING, image_processing) \
|
||||
EXTENSION(QCOM, TILE_PROPERTIES, tile_properties) \
|
||||
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
|
||||
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
|
||||
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
|
||||
EXTENSION(KHR, MAINTENANCE_4, maintenance4) \
|
||||
EXTENSION(KHR, MAINTENANCE_5, maintenance5) \
|
||||
EXTENSION(KHR, MAINTENANCE_6, maintenance6) \
|
||||
EXTENSION(KHR, MAINTENANCE_7, maintenance7) \
|
||||
EXTENSION(KHR, MAINTENANCE_8, maintenance8) \
|
||||
EXTENSION(KHR, MAINTENANCE_9, maintenance9)
|
||||
|
||||
// Define extensions which must be supported.
|
||||
#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
|
||||
@@ -365,6 +386,12 @@ public:
|
||||
return properties.subgroup_properties.supportedOperations & feature;
|
||||
}
|
||||
|
||||
/// Returns true if subgroup operations are supported in the specified shader stage.
|
||||
/// Mobile GPUs (Qualcomm Adreno) often only support subgroups in fragment/compute stages.
|
||||
bool IsSubgroupSupportedForStage(VkShaderStageFlagBits stage) const {
|
||||
return properties.subgroup_properties.supportedStages & stage;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of push descriptors.
|
||||
u32 MaxPushDescriptors() const {
|
||||
return properties.push_descriptor.maxPushDescriptors;
|
||||
@@ -455,6 +482,11 @@ public:
|
||||
return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_incremental_present.
|
||||
bool IsKhrIncrementalPresentSupported() const {
|
||||
return extensions.incremental_present;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
|
||||
bool IsTopologyListPrimitiveRestartSupported() const {
|
||||
return features.primitive_topology_list_restart.primitiveTopologyListRestart;
|
||||
@@ -520,6 +552,31 @@ public:
|
||||
return extensions.custom_border_color;
|
||||
}
|
||||
|
||||
/// Returns true if customBorderColors feature is enabled (allows VK_BORDER_COLOR_*_CUSTOM_EXT).
|
||||
bool HasCustomBorderColorFeature() const {
|
||||
return features.custom_border_color.customBorderColors;
|
||||
}
|
||||
|
||||
/// Returns true if customBorderColorWithoutFormat feature is enabled (allows VK_FORMAT_UNDEFINED).
|
||||
bool HasCustomBorderColorWithoutFormatFeature() const {
|
||||
return features.custom_border_color.customBorderColorWithoutFormat;
|
||||
}
|
||||
|
||||
/// Base Vulkan Dynamic State support checks.
|
||||
/// These provide granular control over each base dynamic state, allowing individual states
|
||||
/// to be disabled if broken driver implementations are detected at device initialization.
|
||||
/// By default all states are enabled. If a specific driver has issues with certain states,
|
||||
/// they can be disabled in vulkan_device.cpp constructor (see has_broken_compute pattern).
|
||||
bool SupportsDynamicViewport() const { return supports_dynamic_viewport; }
|
||||
bool SupportsDynamicScissor() const { return supports_dynamic_scissor; }
|
||||
bool SupportsDynamicLineWidth() const { return supports_dynamic_line_width; }
|
||||
bool SupportsDynamicDepthBias() const { return supports_dynamic_depth_bias; }
|
||||
bool SupportsDynamicBlendConstants() const { return supports_dynamic_blend_constants; }
|
||||
bool SupportsDynamicDepthBounds() const { return supports_dynamic_depth_bounds; }
|
||||
bool SupportsDynamicStencilCompareMask() const { return supports_dynamic_stencil_compare; }
|
||||
bool SupportsDynamicStencilWriteMask() const { return supports_dynamic_stencil_write; }
|
||||
bool SupportsDynamicStencilReference() const { return supports_dynamic_stencil_reference; }
|
||||
|
||||
/// Returns true if the device supports VK_EXT_extended_dynamic_state.
|
||||
bool IsExtExtendedDynamicStateSupported() const {
|
||||
return extensions.extended_dynamic_state;
|
||||
@@ -554,6 +611,98 @@ public:
|
||||
return dynamic_state3_enables;
|
||||
}
|
||||
|
||||
// EDS2 granular feature checks
|
||||
bool IsExtExtendedDynamicState2LogicOpSupported() const {
|
||||
return extensions.extended_dynamic_state2 &&
|
||||
features.extended_dynamic_state2.extendedDynamicState2LogicOp;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState2PatchControlPointsSupported() const {
|
||||
return extensions.extended_dynamic_state2 &&
|
||||
features.extended_dynamic_state2.extendedDynamicState2PatchControlPoints;
|
||||
}
|
||||
|
||||
// EDS3 granular feature checks
|
||||
bool IsExtExtendedDynamicState3DepthClampEnableSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3LogicOpEnableSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3TessellationDomainOriginSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3TessellationDomainOrigin;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3PolygonModeSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3PolygonMode;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3RasterizationSamplesSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3RasterizationSamples;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3SampleMaskSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3SampleMask;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3AlphaToCoverageEnableSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3AlphaToCoverageEnable;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3AlphaToOneEnableSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3AlphaToOneEnable;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3DepthClipEnableSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClipEnable;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3DepthClipNegativeOneToOneSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClipNegativeOneToOne;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3LineRasterizationModeSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3LineRasterizationMode;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3LineStippleEnableSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3LineStippleEnable;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3ProvokingVertexModeSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3ProvokingVertexMode;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3ConservativeRasterizationModeSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3ConservativeRasterizationMode;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3SampleLocationsEnableSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3SampleLocationsEnable;
|
||||
}
|
||||
|
||||
bool IsExtExtendedDynamicState3RasterizationStreamSupported() const {
|
||||
return extensions.extended_dynamic_state3 &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3RasterizationStream;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_filter_cubic
|
||||
bool IsExtFilterCubicSupported() const {
|
||||
return extensions.filter_cubic;
|
||||
@@ -564,6 +713,56 @@ public:
|
||||
return extensions.filter_cubic_weights;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_QCOM_render_pass_shader_resolve
|
||||
bool IsQcomRenderPassShaderResolveSupported() const {
|
||||
return extensions.render_pass_shader_resolve;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_QCOM_render_pass_store_ops
|
||||
bool IsQcomRenderPassStoreOpsSupported() const {
|
||||
return extensions.render_pass_store_ops;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_QCOM_tile_properties
|
||||
bool IsQcomTilePropertiesSupported() const {
|
||||
return extensions.tile_properties;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_QCOM_render_pass_transform
|
||||
bool IsQcomRenderPassTransformSupported() const {
|
||||
return extensions.render_pass_transform;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_QCOM_rotated_copy_commands
|
||||
bool IsQcomRotatedCopyCommandsSupported() const {
|
||||
return extensions.rotated_copy_commands;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_QCOM_image_processing
|
||||
bool IsQcomImageProcessingSupported() const {
|
||||
return extensions.image_processing;
|
||||
}
|
||||
|
||||
/// Returns Qualcomm tile size (width, height, depth). Returns {0,0,0} if not queried or unsupported
|
||||
VkExtent3D GetQcomTileSize() const {
|
||||
return properties.qcom_tile_size;
|
||||
}
|
||||
|
||||
/// Returns Qualcomm tile apron size. Returns {0,0} if not queried or unsupported
|
||||
VkExtent2D GetQcomApronSize() const {
|
||||
return properties.qcom_apron_size;
|
||||
}
|
||||
|
||||
/// Returns true if MSAA copy operations are supported via compute shader (upload/download)
|
||||
/// Qualcomm uses render pass shader resolve instead, so this returns false for Qualcomm
|
||||
bool CanUploadMSAA() const {
|
||||
return IsStorageImageMultisampleSupported();
|
||||
}
|
||||
|
||||
bool CanDownloadMSAA() const {
|
||||
return CanUploadMSAA();
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_line_rasterization.
|
||||
bool IsExtLineRasterizationSupported() const {
|
||||
return extensions.line_rasterization;
|
||||
@@ -594,6 +793,11 @@ public:
|
||||
return extensions.shader_atomic_int64;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_shader_atomic_float.
|
||||
bool IsExtShaderAtomicFloatSupported() const {
|
||||
return extensions.shader_atomic_float;
|
||||
}
|
||||
|
||||
bool IsExtConditionalRendering() const {
|
||||
return extensions.conditional_rendering;
|
||||
}
|
||||
@@ -817,8 +1021,9 @@ private:
|
||||
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
|
||||
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
|
||||
VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
|
||||
|
||||
VkPhysicalDeviceProperties properties{};
|
||||
VkExtent3D qcom_tile_size{}; // Qualcomm tile dimensions (0 if not queried)
|
||||
VkExtent2D qcom_apron_size{}; // Qualcomm tile apron size
|
||||
};
|
||||
|
||||
Extensions extensions{};
|
||||
@@ -833,9 +1038,6 @@ private:
|
||||
bool is_blit_depth24_stencil8_supported{}; ///< Support for blitting from and to D24S8.
|
||||
bool is_blit_depth32_stencil8_supported{}; ///< Support for blitting from and to D32S8.
|
||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||
bool is_integrated{}; ///< Is GPU an iGPU.
|
||||
bool is_virtual{}; ///< Is GPU a virtual GPU.
|
||||
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
|
||||
bool has_broken_compute{}; ///< Compute shaders can cause crashes
|
||||
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
|
||||
bool has_broken_parallel_compiling{}; ///< Has broken parallel shader compiling.
|
||||
@@ -849,6 +1051,22 @@ private:
|
||||
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
|
||||
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
|
||||
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
||||
|
||||
/// Base Vulkan Dynamic State support flags (granular fallback for broken drivers).
|
||||
/// All default to true. These can be individually disabled in vulkan_device.cpp
|
||||
/// if specific broken driver implementations are detected during initialization.
|
||||
/// This provides emergency protection against drivers that report support but crash/misbehave.
|
||||
/// Pattern: Check driver/device and set to false in vulkan_device.cpp constructor.
|
||||
bool supports_dynamic_viewport{true}; ///< VK_DYNAMIC_STATE_VIEWPORT
|
||||
bool supports_dynamic_scissor{true}; ///< VK_DYNAMIC_STATE_SCISSOR
|
||||
bool supports_dynamic_line_width{true}; ///< VK_DYNAMIC_STATE_LINE_WIDTH
|
||||
bool supports_dynamic_depth_bias{true}; ///< VK_DYNAMIC_STATE_DEPTH_BIAS
|
||||
bool supports_dynamic_blend_constants{true}; ///< VK_DYNAMIC_STATE_BLEND_CONSTANTS
|
||||
bool supports_dynamic_depth_bounds{true}; ///< VK_DYNAMIC_STATE_DEPTH_BOUNDS
|
||||
bool supports_dynamic_stencil_compare{true}; ///< VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
|
||||
bool supports_dynamic_stencil_write{true}; ///< VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
|
||||
bool supports_dynamic_stencil_reference{true};///< VK_DYNAMIC_STATE_STENCIL_REFERENCE
|
||||
|
||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||
u32 sets_per_pool{}; ///< Sets per Description Pool
|
||||
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
|
||||
|
||||
@@ -226,11 +226,24 @@ namespace Vulkan {
|
||||
vk::Buffer
|
||||
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
|
||||
{
|
||||
// Qualcomm uses unified memory architecture - prefer DEVICE_LOCAL + HOST_VISIBLE
|
||||
// for zero-copy access without staging buffers
|
||||
const bool is_qualcomm = device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||
const bool prefer_unified = is_qualcomm && (usage == MemoryUsage::Upload ||
|
||||
usage == MemoryUsage::Download ||
|
||||
usage == MemoryUsage::Stream);
|
||||
|
||||
VkMemoryPropertyFlags preferred_flags = MemoryUsagePreferredVmaFlags(usage);
|
||||
if (prefer_unified) {
|
||||
// Request DEVICE_LOCAL + HOST_VISIBLE for zero-copy on unified memory architectures
|
||||
preferred_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
|
||||
}
|
||||
|
||||
const VmaAllocationCreateInfo alloc_ci = {
|
||||
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
|
||||
.usage = MemoryUsageVma(usage),
|
||||
.requiredFlags = 0,
|
||||
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
|
||||
.preferredFlags = preferred_flags,
|
||||
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
|
||||
.pool = VK_NULL_HANDLE,
|
||||
.pUserData = nullptr,
|
||||
@@ -245,6 +258,13 @@ namespace Vulkan {
|
||||
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
|
||||
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
|
||||
|
||||
if (is_qualcomm && prefer_unified) {
|
||||
const bool got_unified = (property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
|
||||
(property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
LOG_DEBUG(Render_Vulkan, "Qualcomm buffer allocation: usage={}, unified={}, flags=0x{:X}",
|
||||
static_cast<u32>(usage), got_unified, property_flags);
|
||||
}
|
||||
|
||||
u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
|
||||
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
|
||||
const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
|
||||
|
||||
Reference in New Issue
Block a user