Compare commits

..

209 Commits

Author SHA1 Message Date
MaranBr
18704cc65f Reduce synchronization overhead and improve performance in DMA operations 2025-12-21 23:49:16 -03:00
PavelBARABANOV
b40f91974b disabling for Android "Vulkan Asynchronous Presentation" 2025-12-21 23:48:43 -03:00
Caio Oliveira
5f6ee64dc3 DONOTMERGE 2025-12-21 23:46:09 -03:00
CamilleLaVey
722a8e4ede [shader_enviroment] TICEntry limits to Empty 2025-12-21 21:16:37 -03:00
CamilleLaVey
9caa1d5b1f [revert] StreamingByteCount conditional to TFB Active 2025-12-21 21:16:37 -03:00
CamilleLaVey
c0ea48e440 [revert] TIC entry + Kill Switch for TFC on TFB 2025-12-21 21:16:37 -03:00
CamilleLaVey
03e872f69c [shader_enviroment, vk] TIC entry + Kill Switch for TFC on TFB 2025-12-21 21:16:37 -03:00
CamilleLaVey
61ff781e0a [vk, rasterizer] StreamingByteCount conditional to TFB Active 2025-12-21 21:16:37 -03:00
CamilleLaVey
ee2f1a62de [vk] Start of a downhill 2025-12-21 21:16:36 -03:00
crueter
aee49e4877 Revert "[vk] Correct polygon draw topology mapping for line and point modes (#2834)"
This reverts commit 6ba25b6cc0.
2025-12-21 21:16:36 -03:00
CamilleLaVey
c5f30776b6 [vk] sRGB format handling for Storage. 2025-12-21 21:16:36 -03:00
CamilleLaVey
ae1c974c2e [vk] Ordering UnsupportedFormatKey 2025-12-21 21:16:36 -03:00
CamilleLaVey
1aec0c0c50 [vk] Image Remaining Layers 3D 2025-12-21 21:16:36 -03:00
CamilleLaVey
c15cd06711 [vk] Another try with Depth/Stencil handling 2025-12-21 21:16:36 -03:00
CamilleLaVey
1067a69daf [vk, qcom] Code clean-up. 2025-12-21 21:16:35 -03:00
CamilleLaVey
d6a8c0b013 [revert] Added linear filtering in texture blitting operations 2025-12-21 21:16:35 -03:00
CamilleLaVey
59a2fdba90 Fix building 2025-12-21 21:16:35 -03:00
CamilleLaVey
7e875e9072 [vk] SL complete removal 2025-12-21 21:16:35 -03:00
CamilleLaVey
fc6081d3b9 [revert] Allocate data transfer from Maxwell to VK using Sample Locations as channel for MSAA 2025-12-21 21:16:35 -03:00
CamilleLaVey
ff36a072e3 [revert] SL Sample Count Clamp 2025-12-21 21:16:35 -03:00
CamilleLaVey
7fccd7573a [revert] SL Table order 2025-12-21 21:16:35 -03:00
CamilleLaVey
3e5ba87a4a [vk] SL Table order 2025-12-21 21:16:34 -03:00
CamilleLaVey
731965ac8f [vk] SL Sample Count Clamp 2025-12-21 21:16:34 -03:00
CamilleLaVey
094b2a3274 [vk] DrainPendingBuild 2025-12-21 21:16:34 -03:00
CamilleLaVey
2ce3e09d62 [vk, qcom[ Removed parallel compilling from qcom entirely 2025-12-21 21:16:34 -03:00
CamilleLaVey
cc47aa473a [vk, qcom, turnip] TimelineSemaphore removal 2025-12-21 21:16:34 -03:00
CamilleLaVey
b0cd2e8cbe [vk] Adjusted Texel Block View for Depth/Stencil attachment images 2025-12-21 21:16:34 -03:00
CamilleLaVey
2ccf9811b7 [vk, qcom] SFC disabled 2025-12-21 21:16:33 -03:00
CamilleLaVey
583a9fd320 [maxwell, vk] Allocate data transfer from Maxwell to VK using Sample Locations as channel for MSAA 2025-12-21 21:16:33 -03:00
CamilleLaVey
6df9d06e65 [vk] Removed unused extensions 2025-12-21 21:16:33 -03:00
CamilleLaVey
d7a120fe74 [revert] Adjusting unused features 2025-12-21 21:16:33 -03:00
CamilleLaVey
6f6ce56528 [revert] fix building 2025-12-21 21:16:33 -03:00
CamilleLaVey
dc8c6ce6d5 [vk] TEST: SFC + WGML 2025-12-21 21:16:33 -03:00
CamilleLaVey
22e8b24361 fix building 2025-12-21 21:16:32 -03:00
CamilleLaVey
f63226ff10 [vk, qcom] Adjusting unused features 2025-12-21 21:16:32 -03:00
CamilleLaVey
9f3803c986 [vk] Hotfix for DepthBounds and StencilMask. 2025-12-21 21:16:32 -03:00
CamilleLaVey
5899816f7c [revert] Update Gradle to 8.13.1 2025-12-21 21:16:32 -03:00
CamilleLaVey
280d05452a [Android] Core 1++ 2025-12-21 21:16:32 -03:00
CamilleLaVey
ccc31edfbb another one 2025-12-21 21:16:32 -03:00
CamilleLaVey
7ce570f420 Missing header 2025-12-21 21:16:32 -03:00
CamilleLaVey
fbf023fa88 [vk] Exception modified & logged 2025-12-21 21:16:31 -03:00
CamilleLaVey
98b4bfb461 [hle] BufferDescritorC 2025-12-21 21:16:31 -03:00
CamilleLaVey
4e6c978574 [vk, qcom] TimelineSemaphore syncs to GPUTick. 2025-12-21 21:16:31 -03:00
CamilleLaVey
09b897dbf2 [gl] WaitTick 2025-12-21 21:16:31 -03:00
CamilleLaVey
3fc9f0efe3 [gl, vk] Access Tracking Synchronization 2025-12-21 21:16:31 -03:00
CamilleLaVey
b17b70e09d [vk] SurfaceType Depth/Stencil 2025-12-21 21:16:31 -03:00
CamilleLaVey
65083b10d8 [vk] SamplerComponentType upgraded DepthCompareSampling 2025-12-21 21:16:30 -03:00
CamilleLaVey
f2227af36c [Vk] Sample Locations Adjustments for Depth/Stencil 2025-12-21 21:16:30 -03:00
CamilleLaVey
f1a5f9137b [vk] Sample Location Depth Bit 2025-12-21 21:16:30 -03:00
CamilleLaVey
f933828749 [vk] Increased PipelineWorkers for testing purposes 2025-12-21 21:16:30 -03:00
CamilleLaVey
05e8e1d494 [vk] KeepAliveTick in Scheduler 2025-12-21 21:16:30 -03:00
CamilleLaVey
5937d19750 [vk] Sample Locations ordering 2025-12-21 21:16:30 -03:00
CamilleLaVey
313e885f7a Hotfix 2025-12-21 21:16:29 -03:00
CamilleLaVey
6b0b72e034 [vk] Re-introduction to MSAA - Sample Locations 2025-12-21 21:16:29 -03:00
CamilleLaVey
c6d85b7589 [vk, mobile, vendor] MegaBuffer removal 2025-12-21 21:16:29 -03:00
CamilleLaVey
005b1dd3be [vk, pipeline] Added In-flight conditional for multiple pipeline compilations 2025-12-21 21:16:29 -03:00
CamilleLaVey
2c4748cc3d [vk, qcom] Extending GetTotalPipelineWorker resources. 2025-12-21 21:16:29 -03:00
CamilleLaVey
a2dbff0bc3 [vk] SanitizeComponent Adjustment 2025-12-21 21:16:29 -03:00
CamilleLaVey
1574e7e804 [vk] DualBlendFactor 2025-12-21 21:16:28 -03:00
CamilleLaVey
ef0061f72b [vk] CounterStreamer 2025-12-21 21:16:28 -03:00
CamilleLaVey
b76b74dcb7 [vk, qcom] Re-enabling CBC for further testing 2025-12-21 21:16:28 -03:00
CamilleLaVey
f6ecb812ca [vk] Sanitize Format/Component 2025-12-21 21:16:28 -03:00
CamilleLaVey
54d6a2015d [vk] Extended PixelFormat for Depth/Stencil 2025-12-21 21:16:28 -03:00
CamilleLaVey
9a2e2f922f [vk] Reordering PixelFormatNumeric 2025-12-21 21:16:28 -03:00
CamilleLaVey
1f74ed2272 Added missing headers 2025-12-21 21:16:27 -03:00
CamilleLaVey
1307614ad0 [gl, vk] Corroborating new helpers order 2025-12-21 21:16:27 -03:00
CamilleLaVey
989a6b7870 Meow 2025-12-21 21:16:27 -03:00
CamilleLaVey
1cf65cba2a [vk] Extending TF handling 2025-12-21 21:16:27 -03:00
CamilleLaVey
3fd7821fc8 [gl, vk, spv] Added component type handling for texture buffers and resolve pixel format variants 2025-12-21 21:16:27 -03:00
CamilleLaVey
59521d7e96 Saving Private Windows 2025-12-21 21:16:27 -03:00
CamilleLaVey
1fccea8db1 [vk] Formatting missing formats 2025-12-21 21:16:27 -03:00
CamilleLaVey
94ac08fcd7 another missing brace 2025-12-21 21:16:26 -03:00
CamilleLaVey
992ea70ce7 If this get builded i'll be able to actually, truly and sincely fly 2025-12-21 21:16:26 -03:00
CamilleLaVey
9de6c125a7 missing brace 2025-12-21 21:16:26 -03:00
CamilleLaVey
5447ed0d60 quick fix 2025-12-21 21:16:26 -03:00
CamilleLaVey
8b9792855f [vk] TextureType extended 2025-12-21 21:16:26 -03:00
CamilleLaVey
419be467ee [vk] Adjustment BitScaleHelper 2025-12-21 21:16:26 -03:00
CamilleLaVey
6df35d859a [vk] BufferCache NullBuffer handling 2025-12-21 21:16:25 -03:00
CamilleLaVey
bd28d4269b [vk] Runtime to change image layout 2025-12-21 21:16:25 -03:00
CamilleLaVey
0baf482214 [vk] TextureFilter 2025-12-21 21:16:25 -03:00
CamilleLaVey
690dfc66f5 [gl, vk] SupportLinearFilter patch 2025-12-21 21:16:25 -03:00
CamilleLaVey
ef7e43bc30 [vk] Wrapper for Sampler Image Filter 2025-12-21 21:16:25 -03:00
CamilleLaVey
d4d704cc26 [vk, rasterizer] Re-order post Sample Locations removal 2025-12-21 21:16:25 -03:00
CamilleLaVey
1c9f603947 [revert] Opcode Promotion path emulation 2025-12-21 21:16:24 -03:00
CamilleLaVey
3873b3fe6c [revert] lambda enemy of da world 2025-12-21 21:16:24 -03:00
CamilleLaVey
fb7d5086b7 [revert] The next step of the human kind before it's doom 2025-12-21 21:16:24 -03:00
CamilleLaVey
c5cbd67dbc [vk] The next step of the human kind before it's doom 2025-12-21 21:16:24 -03:00
CamilleLaVey
7bd0d42bab lambda enemy of da world 2025-12-21 21:16:24 -03:00
CamilleLaVey
08dbacdf53 [vk, gl, spv] Opcode Promotion path emulation 2025-12-21 21:16:24 -03:00
CamilleLaVey
da8809c240 [vk] Fixing wrong enabling logic 2025-12-21 21:16:24 -03:00
CamilleLaVey
eb7159a859 [vk] NullDescriptor guard 2025-12-21 21:16:23 -03:00
CamilleLaVey
65bef7ec08 [vk] Adjusted Transform Feedback 2025-12-21 21:16:23 -03:00
CamilleLaVey
78a44a4ef6 [vk] Adjusted Query Cache 2025-12-21 21:16:23 -03:00
CamilleLaVey
b07356fcbf [vk, qcom] Shader Float Control changed handling 2025-12-21 21:16:23 -03:00
CamilleLaVey
500802cb72 [vk] Removed Sample Locations 2025-12-21 21:16:23 -03:00
CamilleLaVey
e31411170a [vk] removed ImageViewType function 2025-12-21 21:16:23 -03:00
CamilleLaVey
8c350dfd37 [spv, vk] reworked texture view handling and added layer count overrides 2025-12-21 21:16:22 -03:00
CamilleLaVey
de3ac7ad4e [vk] Unused mark for subgroups variables 2025-12-21 21:16:22 -03:00
CamilleLaVey
66903b4b98 [revert] UWU 2025-12-21 21:16:22 -03:00
CamilleLaVey
df088c6442 [Re-introduced] Color output handling in SPIR-V emission 2025-12-21 21:16:22 -03:00
CamilleLaVey
3da798ba05 [Re-introduced] Added linear filtering in texture blitting operations 2025-12-21 21:16:22 -03:00
CamilleLaVey
58c76ece13 [spv, qcom] Ensuring SPV 1.3 2025-12-21 21:16:22 -03:00
CamilleLaVey
e1108010a6 [android] Update Gradle to 8.13.1 2025-12-21 21:16:22 -03:00
CamilleLaVey
3553c372dd [vk, qcom] UWU 2025-12-21 21:16:21 -03:00
CamilleLaVey
261dcc30c1 [revert] Added linear filtering in texture blitting operations 2025-12-21 21:16:21 -03:00
CamilleLaVey
095f8e92b1 [revert] Color output handling in SPIR-V emission. 2025-12-21 21:16:21 -03:00
CamilleLaVey
0e6a271e40 [vk] Remove forced stencil format handling in TextureCacheRuntime 2025-12-21 21:16:21 -03:00
CamilleLaVey
f22407de1e [revert] TiledCacheBarrier starter 2025-12-21 21:16:21 -03:00
CamilleLaVey
8fa7b068b8 [vk, qcom] Returned subgroups functions to QCOM 2025-12-21 21:16:21 -03:00
CamilleLaVey
29c629737b [vk] Added support for Stencil component type in texture handling 2025-12-21 21:16:20 -03:00
CamilleLaVey
f03b4d77c2 [vk, qcom] Graphics Subgroup bugged 2025-12-21 21:16:20 -03:00
CamilleLaVey
41148132f0 [vk] Added support for sample locations in depth and depth-stencil surfaces 2025-12-21 21:16:20 -03:00
CamilleLaVey
bf1efc6a4c [spv] SamplerComponentType 2025-12-21 21:16:20 -03:00
CamilleLaVey
7d535a06c3 [revert] Tightened SSBO tracking heuristics 2025-12-21 21:16:20 -03:00
CamilleLaVey
3a7091d193 [Revert] Adjusted Track function for bias handling and alignment checks for storage buffers 2025-12-21 21:16:20 -03:00
CamilleLaVey
bdc7acce27 [gl, vk] Implement SampledView method for ImageView 2025-12-21 21:16:19 -03:00
CamilleLaVey
68593f9ddc [ir, spv] Added support for sampler component types in texture handling 2025-12-21 21:16:19 -03:00
CamilleLaVey
979d203a77 [spv] Color output handling in SPIR-V emission. 2025-12-21 21:16:19 -03:00
CamilleLaVey
e086078e41 [vk] Added linear filtering in texture blitting operations 2025-12-21 21:16:19 -03:00
CamilleLaVey
535b33bc6b [spv, qcom] Implement warp intrinsics support 2025-12-21 21:16:19 -03:00
CamilleLaVey
e6b3f98b13 [vk] Conditioning Conditional Rendering #2 2025-12-21 21:16:19 -03:00
CamilleLaVey
b03eb6bd78 [vk, qcom] Removed SPIR-V 1.4 for qcom 2025-12-21 21:16:19 -03:00
CamilleLaVey
e4e71a36de [vk] Adjustments to Sample Locations 2025-12-21 21:16:18 -03:00
CamilleLaVey
a83157f0a9 [host] Adjusted Track function for bias handling and alignment checks for storage buffers 2025-12-21 21:16:18 -03:00
CamilleLaVey
8c1e47bbda [host] Added logging for OOM cases with fastmem relation 2025-12-21 21:16:18 -03:00
CamilleLaVey
c18662d039 [ir, nvn] Tightened SSBO tracking heuristics 2025-12-21 21:16:18 -03:00
Caio Oliveira
4fb7aea7b4 Revert "Controlled SPV features on QCOM"
This reverts commit 907b041ec6fb4f16750155f4c41e17389f2e385d.
2025-12-21 21:16:18 -03:00
CamilleLaVey
a8af150df4 Controlled SPV features on QCOM 2025-12-21 21:16:18 -03:00
CamilleLaVey
5b41abc1b1 [vk, qcom] Disabling VK_KHR_push_descriptor for qcom 2025-12-21 21:16:17 -03:00
CamilleLaVey
91f3a4e4bb [vk, vendor, mobile] Improved mobile staging buffer data 2025-12-21 21:16:17 -03:00
CamilleLaVey
aff095523d [vk, rasterizer] Update sample location handling for MSAA configurations 2025-12-21 21:16:17 -03:00
CamilleLaVey
49ba71a594 [vk, rasterizer] offsets float x Uint 2025-12-21 21:16:17 -03:00
CamilleLaVey
97cfd9786f [vk] Sample Locations Structure 2025-12-21 21:16:17 -03:00
CamilleLaVey
bc1873c944 [vk, rasterizer] TiledCacheBarrier starter 2025-12-21 21:16:17 -03:00
CamilleLaVey
e3b10eba10 [maxwell, vk] VK_EXT_Sample_Locations 2025-12-21 21:16:16 -03:00
CamilleLaVey
ce26d82d09 [vk, qcom] Removed 500.800.51 compilling parallel restriction 2025-12-21 21:16:16 -03:00
CamilleLaVey
a353f07b8f [vk, qcom] Adjusting Sampler Budget reserved value 2025-12-21 21:16:16 -03:00
CamilleLaVey
1b160c6091 [vk, qcom] UniformBufferAlignment set by hardware capabilities 2025-12-21 21:16:16 -03:00
CamilleLaVey
95cb6b6b3d [vk, qcom] Samplers Budget Management 2025-12-21 21:16:16 -03:00
CamilleLaVey
7b4b0bb6ca [vk, qcom] Extending limits of broken parallel compiling to 512.800.51 2025-12-21 21:16:16 -03:00
CamilleLaVey
6e93db52a5 [vk, qcom] Binding buffer limits 2025-12-21 21:16:15 -03:00
CamilleLaVey
e33c814879 [vk, vendor] Clamping memory usage in mobile gpu's 2025-12-21 21:16:15 -03:00
CamilleLaVey
5f3ad571cb [vk, qcom] Remove VK_EXT_CUSTOM_BORDER_COLOR 2025-12-21 21:16:15 -03:00
Caio Oliveira
20b171ded3 Partially reverts "[vk] Changing conditions for Swapchain maintenance1 "
This reverts commit 3fc3b5fdad.
2025-12-21 20:15:09 -03:00
CamilleLaVey
68edde944a [revert] Resolving conflicting changes 2025-12-21 19:06:13 -03:00
CamilleLaVey
510c795bd6 Revert "[vk] Ensure image view flags are resolved" 2025-12-21 19:06:13 -03:00
CamilleLaVey
eb7b62fefc Revert "[vk, texture_cache] Preveting ARGB8 get misinterpretated with depth formats" 2025-12-21 19:06:13 -03:00
CamilleLaVey
b974a8f7a8 Revert "[vk, texture_cache] BGRA8 Depth/Stencil format convertions" 2025-12-21 19:06:12 -03:00
CamilleLaVey
40fd7d5558 Revert "[maxwell] Logging for HDR wrong convertions into depth formats"
This reverts commit 66c26e39fe.
2025-12-21 19:06:12 -03:00
CamilleLaVey
a99c35d0ff Revert "[surface, vk, pipeline, texture_cache] Refactor image view handling and add normalized compatible format utility"
This reverts commit 6a230bec1a.
2025-12-21 19:06:12 -03:00
CamilleLaVey
83323d7993 Revert "[spir-v] Add is_integer flag to texture descriptors and update image type handling" 2025-12-21 19:06:12 -03:00
CamilleLaVey
6b8d5c1963 Revert "[vk, pipeline, texture_cache] Renamed MakeView parametter" 2025-12-21 19:06:12 -03:00
CamilleLaVey
7b94b7ebc1 [vk] Gating advanced ExtendedDynamicState1 2025-12-21 19:06:12 -03:00
CamilleLaVey
5d645ba905 [licences] Updating licenses on missing files 2025-12-21 19:06:11 -03:00
CamilleLaVey
5f70c3fc57 [vk] Line rasterization and Alpha features adjusments (again) 2025-12-21 19:06:11 -03:00
CamilleLaVey
b87d0215a0 [vk, scheduler] Applying finising call for TF when it's not getting used 2025-12-21 19:06:11 -03:00
CamilleLaVey
e6e6785583 [vk, qcom] Returning forced SScaled and UScaled formats emulations to Adreno. 2025-12-21 19:06:11 -03:00
CamilleLaVey
60bffd9992 [vk, vendor] Forcing BGR5 emulation path due to driver misbehavior. 2025-12-21 19:06:11 -03:00
CamilleLaVey
130632b8ef [vk] ExtendedDynamicState repair #2 2025-12-21 19:06:11 -03:00
CamilleLaVey
e2264170f6 [vk] Depth State Refresh Update. 2025-12-21 19:06:11 -03:00
CamilleLaVey
737a409088 [vk, compute_pass] Conditioning Conditional Rendering 2025-12-21 19:06:10 -03:00
CamilleLaVey
e7130a28d6 [spir-v, emit] Flat Decoration Adjustment 2025-12-21 19:06:10 -03:00
CamilleLaVey
d9db45913c [spir-v, emit] SPV Image Missmatch 2025-12-21 19:06:10 -03:00
CamilleLaVey
6d2c8e6dbb [vk, rasterizer] Clamping Render-Area out of limits 2025-12-21 19:06:10 -03:00
CamilleLaVey
19600b7e0e [vk, rasterizer, state_tracker] LineMode disabled from scheduler 2025-12-21 19:06:10 -03:00
CamilleLaVey
dd74bb459b [surface, vk, pipeline, texture_cache] Texture Sampling Fix 2025-12-21 19:06:10 -03:00
CamilleLaVey
648bf1e813 [vk, swapchain] Swapchaing Image VkQueue 2025-12-21 19:06:09 -03:00
CamilleLaVey
cbef1f3451 [vk, graphics, pipeline, rasterizer] Alpha Coverage Adjustment 2025-12-21 19:06:09 -03:00
CamilleLaVey
b4472764eb [vk, pipeline, texture_cache] Renamed MakeView parametter 2025-12-21 19:06:09 -03:00
CamilleLaVey
5fe7a51522 [spir-v] Add is_integer flag to texture descriptors and update image type handling 2025-12-21 19:06:09 -03:00
CamilleLaVey
2e0374c458 [surface, vk, pipeline, texture_cache] Refactor image view handling and add normalized compatible format utility 2025-12-21 19:06:09 -03:00
CamilleLaVey
61a0f38a2a [vk] Removing false remove feature logging for robustness2 and image robustness. 2025-12-21 19:06:09 -03:00
CamilleLaVey
72d61c38be [vk] ExtendedDynamicState repair #1 2025-12-21 19:06:09 -03:00
CamilleLaVey
d45094c246 [spir-v] Flat decorations for input interfaces 2025-12-21 19:06:08 -03:00
CamilleLaVey
1607f62149 [vk] VK_EXT_multi_draw 2025-12-21 19:06:08 -03:00
CamilleLaVey
17ddbb2240 [vk] Declaring features from Maintenance5 2025-12-21 19:06:08 -03:00
CamilleLaVey
2eb425d7f8 [vk] Fixing logging statements 2025-12-21 19:06:08 -03:00
CamilleLaVey
114c946971 [vk] Removing Image Robustness from EXT list. 2025-12-21 19:06:08 -03:00
CamilleLaVey
0e4382d09b [vk] ExtendedDynamicState impl close to Vulkan specs 2025-12-21 19:06:08 -03:00
CamilleLaVey
e3a9e85099 [vk, rasterizer] Reduce FlushWork constant drawcalls 2025-12-21 19:06:07 -03:00
CamilleLaVey
eb120ac1ac [vk] Moving Maintenance features to wrapper 2025-12-21 19:06:07 -03:00
CamilleLaVey
956c28b41a [vk] Re-ordering tiling format features 2025-12-21 19:06:07 -03:00
CamilleLaVey
79f20ece71 [vk] Re-ordering format feature 2025-12-21 19:06:07 -03:00
CamilleLaVey
1373ee90b0 [vk] Robustness2 and Image Robustness 2025-12-21 19:06:07 -03:00
CamilleLaVey
74a4816530 [maxwell] Logging for HDR wrong convertions into depth formats 2025-12-21 19:06:07 -03:00
CamilleLaVey
30706cde41 [vk, texture_cache] BGRA8 Depth/Stencil format convertions 2025-12-21 19:06:07 -03:00
CamilleLaVey
e436030383 [vk, texture_cache] Preveting ARGB8 get misinterpretated with depth formats 2025-12-21 19:06:06 -03:00
CamilleLaVey
1bcc998040 [vk] Adjusting Custom Border Color 2025-12-21 19:06:06 -03:00
CamilleLaVey
fbd357d82c [vk] Adjusting VIDS 2025-12-21 19:06:06 -03:00
CamilleLaVey
3fc3b5fdad [vk] Changing conditions for Swapchain maintenance1 2025-12-21 19:06:06 -03:00
CamilleLaVey
b5f8ff7035 [vk] Ensure image view flags are resolved 2025-12-21 19:06:06 -03:00
CamilleLaVey
002078af36 [vk] Aliging ExtendedDynamicState2 2025-12-21 19:06:06 -03:00
CamilleLaVey
e4b4a75cab [vk, spir-v] Conditioning creation of VK_EXT_Shader_Stencil_Export in SPIR-V 2025-12-21 19:06:05 -03:00
CamilleLaVey
7103abccdb fixing building error. 2025-12-21 19:06:05 -03:00
CamilleLaVey
319dfeaad8 [vk, texture_cache, vendor] Adding path for hardware resolve on shader stencil export/ MSAA image blits 2025-12-21 19:06:05 -03:00
CamilleLaVey
49e4f3b29a [vk] Return VK 1.3 as main target, treat VK 1.4 core features as extensions if driver supports it 2025-12-21 19:06:05 -03:00
CamilleLaVey
bafc6f34fb [vk] Ordering double cases specified and allocating them in the correct please on GetSuitability phase 2025-12-21 19:06:05 -03:00
CamilleLaVey
f9d612329a Dammed macros. 2025-12-21 19:06:05 -03:00
CamilleLaVey
6ca3d3a1b1 Fix building issues 2025-12-21 19:06:04 -03:00
CamilleLaVey
7b40a8df80 [vk, qcom] VertexInputDynamicState ban removal 2025-12-21 19:06:04 -03:00
CamilleLaVey
53f10ab19e [vk] Bumping features to 1.4 2025-12-21 19:06:04 -03:00
CamilleLaVey
33c72ef7e1 [vk] Updated maintenance features 2025-12-21 19:06:04 -03:00
CamilleLaVey
acae4b089d [vk, amd, qcom] Removed older driver workarounds 2025-12-21 19:06:04 -03:00
CamilleLaVey
8055a64b5f [vk, spir-v] Adding decoration for NonWritable buffers if vertexPipelineStoresAndAtomics isn't available 2025-12-21 19:06:04 -03:00
CamilleLaVey
e4ae8a72dd [vk, buffer_cache] Aligning VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT logic 2025-12-21 19:06:04 -03:00
CamilleLaVey
d68d0bd65e [vk., vendor] Adding driver id flag in blacklist 2025-12-21 19:06:03 -03:00
CamilleLaVey
a35c9d5576 [Refactor, vk] DynamicState, ExtendedDynamicState and VertexInputDynamicState 2025-12-21 19:06:03 -03:00
Caio Oliveira
eade2c19c8 Revert "[vk] attempt to continue even if unsuitable driver (#3087)"
This reverts commit 7a98ee4ead.
2025-12-21 19:05:54 -03:00
Caio Oliveira
1ff788bed5 Revert "[video_core] Fix inconsistency between EDS and VIDS settings (#3148)"
This reverts commit 7157d5167e.
2025-12-21 19:00:35 -03:00
79 changed files with 3698 additions and 1098 deletions

View File

@@ -18,7 +18,6 @@ plugins {
id("androidx.navigation.safeargs.kotlin")
id("org.jlleitschuh.gradle.ktlint") version "11.4.0"
id("com.github.triplet.play") version "3.8.6"
id("idea")
}
/**
@@ -28,8 +27,6 @@ plugins {
*/
val autoVersion = (((System.currentTimeMillis() / 1000) - 1451606400) / 10).toInt()
val edenDir = project(":Eden").projectDir
@Suppress("UnstableApiUsage")
android {
namespace = "org.yuzu.yuzu_emu"
@@ -244,17 +241,11 @@ android {
externalNativeBuild {
cmake {
version = "3.22.1"
path = file("${edenDir}/CMakeLists.txt")
path = file("../../../CMakeLists.txt")
}
}
}
idea {
module {
// Inclusion to exclude build/ dir from non-Android
excludeDirs.add(file("${edenDir}/build"))
}
}
tasks.register<Delete>("ktlintReset", fun Delete.() {
delete(File(layout.buildDirectory.toString() + File.separator + "intermediates/ktLint"))
@@ -355,7 +346,7 @@ fun getGitVersion(): String {
}
afterEvaluate {
val artifactsDir = layout.projectDirectory.dir("${edenDir}/artifacts")
val artifactsDir = layout.projectDirectory.dir("../../../artifacts")
val outputsDir = layout.buildDirectory.dir("outputs").get()
android.applicationVariants.forEach { variant ->

View File

@@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -22,6 +19,3 @@ dependencyResolutionManagement {
}
include(":app")
include("Eden")
project(":Eden").projectDir = file("../..")

View File

@@ -578,7 +578,10 @@ public:
#endif
int flags = (fd > 0 ? MAP_SHARED : MAP_PRIVATE) | MAP_FIXED;
void* ret = mmap(virtual_base + virtual_offset, length, prot_flags, flags, fd, host_offset);
ASSERT_MSG(ret != MAP_FAILED, "mmap: {}", strerror(errno));
ASSERT_MSG(ret != MAP_FAILED,
"mmap(virt_off=0x{:X}, host_off=0x{:X}, len=0x{:X}, virt_size=0x{:X}, backing_size=0x{:X}, perms=0x{:X}) failed: {}",
virtual_offset, host_offset, length, virtual_size, backing_size,
static_cast<u32>(perms), strerror(errno));
}
void Unmap(size_t virtual_offset, size_t length) {

View File

@@ -466,7 +466,7 @@ struct Values {
true};
SwitchableSetting<bool> async_presentation{linkage,
#ifdef ANDROID
true,
false,
#else
false,
#endif

View File

@@ -130,6 +130,17 @@ public:
ResetStorageBit(id.index);
}
[[nodiscard]] bool Contains(SlotId id) const noexcept {
if (!id) {
return false;
}
const size_t word = id.index / 64;
if (word >= stored_bitset.size()) {
return false;
}
return ((stored_bitset[word] >> (id.index % 64)) & 1) != 0;
}
[[nodiscard]] Iterator begin() noexcept {
const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
if (it == stored_bitset.end()) {

View File

@@ -391,15 +391,28 @@ const std::size_t CACHE_PAGE_SIZE = 4096;
void ArmNce::ClearInstructionCache() {
#ifdef __aarch64__
// Ensure all previous memory operations complete
// Use IC IALLU to actually invalidate L1 instruction cache
asm volatile("dsb ish\n"
"ic iallu\n"
"dsb ish\n"
"isb" ::: "memory");
#endif
}
void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) {
this->ClearInstructionCache();
#ifdef ARCHITECTURE_arm64
// Invalidate instruction cache for specific range instead of full flush
constexpr u64 cache_line_size = 64;
const u64 aligned_addr = addr & ~(cache_line_size - 1);
const u64 end_addr = (addr + size + cache_line_size - 1) & ~(cache_line_size - 1);
asm volatile("dsb ish" ::: "memory");
for (u64 i = aligned_addr; i < end_addr; i += cache_line_size) {
asm volatile("ic ivau, %0" :: "r"(i) : "memory");
}
asm volatile("dsb ish\n"
"isb" ::: "memory");
#endif
}
} // namespace Core

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstring>
#include <sstream>
#include <boost/range/algorithm_ext/erase.hpp>
@@ -187,7 +191,7 @@ void HLERequestContext::ParseCommandBuffer(u32_le* src_cmdbuf, bool incoming) {
buffer_w_descriptors.push_back(rp.PopRaw<IPC::BufferDescriptorABW>());
}
const auto buffer_c_offset = rp.GetCurrentOffset() + command_header->data_size;
buffer_c_offset = rp.GetCurrentOffset() + command_header->data_size;
if (!command_header->IsTipc()) {
// Padding to align to 16 bytes
@@ -294,7 +298,15 @@ Result HLERequestContext::WriteToOutgoingCommandBuffer() {
}
// Write the domain objects to the command buffer, these go after the raw untranslated data.
// TODO(Subv): This completely ignores C buffers.
if (buffer_c_offset != 0 && !buffer_c_descriptors.empty()) {
constexpr u32 WORDS_PER_DESCRIPTOR = sizeof(IPC::BufferDescriptorC) / sizeof(u32);
u32 descriptor_offset = buffer_c_offset;
for (const auto& descriptor : buffer_c_descriptors) {
std::memcpy(&cmd_buf[descriptor_offset], &descriptor, sizeof(descriptor));
descriptor_offset += WORDS_PER_DESCRIPTOR;
}
}
if (GetManager()->IsDomain()) {
current_offset = domain_offset - static_cast<u32>(outgoing_domain_objects.size());
@@ -393,10 +405,14 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
BufferDescriptorB()[buffer_index].Size()};
const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBuffer target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
if (is_buffer_b) {
@@ -418,15 +434,25 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
std::size_t HLERequestContext::WriteBufferB(const void* buffer, std::size_t size,
std::size_t buffer_index) const {
if (buffer_index >= BufferDescriptorB().size() || size == 0) {
if (buffer_index >= BufferDescriptorB().size()) {
LOG_WARNING(Core, "WriteBufferB invalid buffer index {}", buffer_index);
return 0;
}
if (size == 0) {
LOG_WARNING(Core, "skip empty buffer write (B)");
return 0;
}
const auto buffer_size{BufferDescriptorB()[buffer_index].Size()};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBufferB target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
@@ -435,15 +461,25 @@ std::size_t HLERequestContext::WriteBufferB(const void* buffer, std::size_t size
std::size_t HLERequestContext::WriteBufferC(const void* buffer, std::size_t size,
std::size_t buffer_index) const {
if (buffer_index >= BufferDescriptorC().size() || size == 0) {
if (buffer_index >= BufferDescriptorC().size()) {
LOG_WARNING(Core, "WriteBufferC invalid buffer index {}", buffer_index);
return 0;
}
if (size == 0) {
LOG_WARNING(Core, "skip empty buffer write (C)");
return 0;
}
const auto buffer_size{BufferDescriptorC()[buffer_index].Size()};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBufferC target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
@@ -473,12 +509,20 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons
ASSERT_OR_EXECUTE_MSG(
BufferDescriptorB().size() > buffer_index, { return 0; },
"BufferDescriptorB invalid buffer_index {}", buffer_index);
return BufferDescriptorB()[buffer_index].Size();
const auto size = BufferDescriptorB()[buffer_index].Size();
if (size == 0) {
LOG_WARNING(Core, "BufferDescriptorB index {} has zero size", buffer_index);
}
return size;
} else {
ASSERT_OR_EXECUTE_MSG(
BufferDescriptorC().size() > buffer_index, { return 0; },
"BufferDescriptorC invalid buffer_index {}", buffer_index);
return BufferDescriptorC()[buffer_index].Size();
const auto size = BufferDescriptorC()[buffer_index].Size();
if (size == 0) {
LOG_WARNING(Core, "BufferDescriptorC index {} has zero size", buffer_index);
}
return size;
}
return 0;
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -422,6 +425,7 @@ private:
u32 data_payload_offset{};
u32 handles_offset{};
u32 domain_offset{};
u32 buffer_c_offset{};
std::weak_ptr<SessionRequestManager> manager{};
bool is_deferred{false};

View File

@@ -134,4 +134,4 @@ target_include_directories(dynarmic_tests PRIVATE . ../src)
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
add_test(NAME dynarmic_tests COMMAND dynarmic_tests --durations yes)
add_test(dynarmic_tests dynarmic_tests --durations yes)

View File

@@ -11,6 +11,7 @@
#include <vector>
#include <spirv-tools/optimizer.hpp>
#include "common/logging/log.h"
#include "common/settings.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@@ -439,15 +440,23 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
info.uses_subgroup_shuffles) &&
profile.support_vote) {
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(ctx.stage);
const bool needs_warp_intrinsics = info.uses_subgroup_vote ||
info.uses_subgroup_invocation_id ||
info.uses_subgroup_shuffles;
if (needs_warp_intrinsics && profile.support_vote && stage_supports_warp) {
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
ctx.AddCapability(spv::Capability::GroupNonUniformShuffle);
if (!profile.warp_size_potentially_larger_than_guest) {
// vote ops are only used when not taking the long path
ctx.AddCapability(spv::Capability::GroupNonUniformVote);
}
} else if (needs_warp_intrinsics && !stage_supports_warp) {
LOG_WARNING(Shader,
"Warp intrinsics requested in stage {} but the device does not report subgroup "
"support; falling back to scalar approximations",
static_cast<u32>(ctx.stage));
}
if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
ctx.AddCapability(spv::Capability::Int64Atomics);

View File

@@ -491,9 +491,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
}
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
const AttributeType output_type{ctx.runtime_info.color_output_types[index]};
Id pointer_type{ctx.output_f32};
Id store_value{value};
switch (output_type) {
case AttributeType::SignedInt:
pointer_type = ctx.output_s32;
store_value = ctx.OpBitcast(ctx.S32[1], value);
break;
case AttributeType::UnsignedInt:
pointer_type = ctx.output_u32;
store_value = ctx.OpBitcast(ctx.U32[1], value);
break;
default:
break;
}
const Id component_id{ctx.Const(component)};
const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
ctx.OpStore(pointer, value);
const Id pointer{ctx.OpAccessChain(pointer_type, ctx.frag_color.at(index), component_id)};
ctx.OpStore(pointer, store_value);
}
void EmitSetSampleMask(EmitContext& ctx, Id value) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -195,6 +198,41 @@ Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR
}
}
Id TextureColorResultType(EmitContext& ctx, const TextureDefinition& def) {
switch (def.component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return ctx.F32[4];
case SamplerComponentType::Sint:
return ctx.S32[4];
case SamplerComponentType::Stencil:
return ctx.U32[4];
case SamplerComponentType::Uint:
return ctx.U32[4];
}
throw InvalidArgument("Invalid sampler component type {}", def.component_type);
}
Id TextureSampleResultToFloat(EmitContext& ctx, const TextureDefinition& def, Id color) {
switch (def.component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return color;
case SamplerComponentType::Sint:
return ctx.OpConvertSToF(ctx.F32[4], color);
case SamplerComponentType::Stencil:
{
const Id converted{ctx.OpConvertUToF(ctx.F32[4], color)};
const Id inv255{ctx.Const(1.0f / 255.0f)};
const Id scale{ctx.ConstantComposite(ctx.F32[4], inv255, inv255, inv255, inv255)};
return ctx.OpFMul(ctx.F32[4], converted, scale);
}
case SamplerComponentType::Uint:
return ctx.OpConvertUToF(ctx.F32[4], color);
}
throw InvalidArgument("Invalid sampler component type {}", def.component_type);
}
Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
if (!index.IsImmediate() || index.U32() != 0) {
throw NotImplementedException("Indirect image indexing");
@@ -449,31 +487,39 @@ Id EmitBoundImageWrite(EmitContext&) {
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id bias_lc, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const Id texture{Texture(ctx, info, index)};
Id color{};
if (ctx.stage == Stage::Fragment) {
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
bias_lc, offset);
return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
&EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
color = Emit(&EmitContext::OpImageSparseSampleImplicitLod,
&EmitContext::OpImageSampleImplicitLod, ctx, inst, color_type, texture,
coords, operands.MaskOptional(), operands.Span());
} else {
// We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
// if the lod was explicitly zero. This may change on Turing with implicit compute
// derivatives
const Id lod{ctx.Const(0.0f)};
const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
color = Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type, texture,
coords, operands.Mask(), operands.Span());
}
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const ImageOperands operands(ctx, false, true, false, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
@@ -509,13 +555,18 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const ImageOperands operands(ctx, offset, offset2);
const Id texture{Texture(ctx, info, index)};
if (ctx.profile.need_gather_subpixel_offset) {
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
}
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
operands.MaskOptional(), operands.Span());
const Id color{
Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, color_type,
texture, coords, ctx.Const(info.gather_component), operands.MaskOptional(),
operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
@@ -533,6 +584,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition* def =
info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index);
const Id result_type{def ? TextureColorResultType(ctx, *def) : ctx.F32[4]};
AddOffsetToCoordinates(ctx, info, coords, offset);
if (info.type == TextureType::Buffer) {
lod = Id{};
@@ -542,8 +596,13 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
lod = Id{};
}
const ImageOperands operands(lod, ms);
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
Id color{Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst,
result_type, TextureImage(ctx, info, index), coords, operands.MaskOptional(),
operands.Span())};
if (def) {
color = TextureSampleResultToFloat(ctx, *def, color);
}
return color;
}
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
@@ -588,14 +647,17 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id derivatives, const IR::Value& offset, Id lod_clamp) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const auto operands = info.num_derivatives == 3
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
ctx.Def(offset), {}, lod_clamp)
: ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
info.num_derivatives, offset, lod_clamp);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -78,9 +81,25 @@ Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
}
bool SupportsWarpIntrinsics(const EmitContext& ctx) {
return ctx.profile.SupportsWarpIntrinsics(ctx.stage);
}
void SetAlwaysInBounds(EmitContext& ctx, IR::Inst* inst) {
SetInBoundsFlag(inst, ctx.true_value);
}
Id FallbackBallotMask(EmitContext& ctx, Id pred) {
const Id full_mask{ctx.Const(0xFFFFFFFFu)};
return ctx.OpSelect(ctx.U32[1], pred, full_mask, ctx.u32_zero_value);
}
} // Anonymous namespace
Id EmitLaneId(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
const Id id{GetThreadId(ctx)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return id;
@@ -89,6 +108,9 @@ Id EmitLaneId(EmitContext& ctx) {
}
Id EmitVoteAll(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAll(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -102,6 +124,9 @@ Id EmitVoteAll(EmitContext& ctx, Id pred) {
}
Id EmitVoteAny(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAny(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -115,6 +140,9 @@ Id EmitVoteAny(EmitContext& ctx, Id pred) {
}
Id EmitVoteEqual(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAllEqual(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -129,6 +157,9 @@ Id EmitVoteEqual(EmitContext& ctx, Id pred) {
}
Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return FallbackBallotMask(ctx, pred);
}
const Id ballot{ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
@@ -137,27 +168,46 @@ Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
}
Id EmitSubgroupEqMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_eq);
}
Id EmitSubgroupLtMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_lt);
}
Id EmitSubgroupLeMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_le);
}
Id EmitSubgroupGtMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_gt);
}
Id EmitSubgroupGeMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_ge);
}
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
const Id thread_id{EmitLaneId(ctx)};
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
@@ -177,6 +227,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
@@ -192,6 +246,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
@@ -207,6 +265,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};

View File

@@ -28,27 +28,41 @@ enum class Operation {
FPMax,
};
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
Id ComponentScalarType(EmitContext& ctx, SamplerComponentType component_type) {
switch (component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return ctx.F32[1];
case SamplerComponentType::Sint:
return ctx.S32[1];
case SamplerComponentType::Stencil:
return ctx.U32[1];
case SamplerComponentType::Uint:
return ctx.U32[1];
}
throw InvalidArgument("Invalid sampler component type {}", component_type);
}
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc, Id sampled_type) {
const spv::ImageFormat format{spv::ImageFormat::Unknown};
const Id type{ctx.F32[1]};
const bool depth{desc.is_depth};
const bool ms{desc.is_multisample};
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, false, false, 1, format);
case TextureType::ColorArray1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, true, false, 1, format);
case TextureType::Color2D:
case TextureType::Color2DRect:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, depth, false, ms, 1, format);
case TextureType::ColorArray2D:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, depth, true, ms, 1, format);
case TextureType::Color3D:
return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, depth, false, false, 1, format);
case TextureType::ColorCube:
return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Cube, depth, false, false, 1, format);
case TextureType::ColorArrayCube:
return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Cube, depth, true, false, 1, format);
case TextureType::Buffer:
break;
}
@@ -315,6 +329,9 @@ void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
ctx.Decorate(id, spv::Decoration::Binding, binding);
ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
ctx.Name(id, fmt::format("ssbo{}", index));
if (!desc.is_written) {
ctx.Decorate(id, spv::Decoration::NonWritable);
}
if (ctx.profile.supported_spirv >= 0x00010400) {
ctx.interfaces.push_back(id);
}
@@ -546,6 +563,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
output_s32 = Name(TypePointer(spv::StorageClass::Output, S32[1]), "output_s32");
if (info.uses_int8 && profile.support_int8) {
AddCapability(spv::Capability::Int8);
@@ -1359,7 +1377,8 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) {
textures.reserve(info.texture_descriptors.size());
for (const TextureDescriptor& desc : info.texture_descriptors) {
const Id image_type{ImageType(*this, desc)};
const Id result_type{ComponentScalarType(*this, desc.component_type)};
const Id image_type{ImageType(*this, desc, result_type)};
const Id sampled_type{TypeSampledImage(image_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
@@ -1372,8 +1391,10 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in
.sampled_type = sampled_type,
.pointer_type = pointer_type,
.image_type = image_type,
.result_type = result_type,
.count = desc.count,
.is_multisample = desc.is_multisample,
.component_type = desc.component_type,
});
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(id);
@@ -1416,6 +1437,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde
void EmitContext::DefineInputs(const IR::Program& program) {
const Info& info{program.info};
const VaryingState loads{info.loads.mask | info.passthrough.mask};
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(stage);
if (info.uses_workgroup_id) {
workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
@@ -1432,24 +1454,37 @@ void EmitContext::DefineInputs(const IR::Program& program) {
}
if (info.uses_sample_id) {
sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
if (stage == Stage::Fragment) {
Decorate(sample_id, spv::Decoration::Flat);
}
}
if (info.uses_is_helper_invocation) {
is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
}
if (info.uses_subgroup_mask) {
if (info.uses_subgroup_mask && stage_supports_warp) {
subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
if (stage == Stage::Fragment) {
Decorate(subgroup_mask_eq, spv::Decoration::Flat);
Decorate(subgroup_mask_lt, spv::Decoration::Flat);
Decorate(subgroup_mask_le, spv::Decoration::Flat);
Decorate(subgroup_mask_gt, spv::Decoration::Flat);
Decorate(subgroup_mask_ge, spv::Decoration::Flat);
}
}
if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
(profile.warp_size_potentially_larger_than_guest &&
(info.uses_subgroup_vote || info.uses_subgroup_mask))) {
if (stage_supports_warp &&
(info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
(profile.warp_size_potentially_larger_than_guest &&
(info.uses_subgroup_vote || info.uses_subgroup_mask)))) {
AddCapability(spv::Capability::GroupNonUniform);
subgroup_local_invocation_id =
DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
if (stage == Stage::Fragment) {
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
}
if (info.uses_fswzadd) {
const Id f32_one{Const(1.0f)};
@@ -1461,6 +1496,9 @@ void EmitContext::DefineInputs(const IR::Program& program) {
}
if (loads[IR::Attribute::PrimitiveId]) {
primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
if (stage == Stage::Fragment) {
Decorate(primitive_id, spv::Decoration::Flat);
}
}
if (loads[IR::Attribute::Layer]) {
AddCapability(spv::Capability::Geometry);
@@ -1552,17 +1590,21 @@ void EmitContext::DefineInputs(const IR::Program& program) {
if (stage != Stage::Fragment) {
continue;
}
switch (info.interpolation[index]) {
case Interpolation::Smooth:
// Default
// Decorate(id, spv::Decoration::Smooth);
break;
case Interpolation::NoPerspective:
Decorate(id, spv::Decoration::NoPerspective);
break;
case Interpolation::Flat:
const bool is_integer = input_type == AttributeType::SignedInt ||
input_type == AttributeType::UnsignedInt;
if (is_integer) {
Decorate(id, spv::Decoration::Flat);
break;
} else {
switch (info.interpolation[index]) {
case Interpolation::Smooth:
break;
case Interpolation::NoPerspective:
Decorate(id, spv::Decoration::NoPerspective);
break;
case Interpolation::Flat:
Decorate(id, spv::Decoration::Flat);
break;
}
}
}
if (stage == Stage::TessellationEval) {
@@ -1658,7 +1700,18 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
continue;
}
frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
const AttributeType output_type{runtime_info.color_output_types[index]};
const Id vec_type = [&, output_type]() -> Id {
switch (output_type) {
case AttributeType::SignedInt:
return S32[4];
case AttributeType::UnsignedInt:
return U32[4];
default:
return F32[4];
}
}();
frag_color[index] = DefineOutput(*this, vec_type, std::nullopt);
Decorate(frag_color[index], spv::Decoration::Location, index);
Name(frag_color[index], fmt::format("frag_color{}", index));
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -36,8 +39,10 @@ struct TextureDefinition {
Id sampled_type;
Id pointer_type;
Id image_type;
Id result_type;
u32 count;
bool is_multisample;
SamplerComponentType component_type;
};
struct TextureBufferDefinition {
@@ -244,6 +249,7 @@ public:
Id output_f32{};
Id output_u32{};
Id output_s32{};
Id image_buffer_type{};
Id image_u32{};

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -22,6 +25,8 @@ public:
[[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
[[nodiscard]] virtual SamplerComponentType ReadTextureComponentType(u32 raw_handle) = 0;
[[nodiscard]] virtual TexturePixelFormat ReadTexturePixelFormat(u32 raw_handle) = 0;
[[nodiscard]] virtual bool IsTexturePixelFormatInteger(u32 raw_handle) = 0;

View File

@@ -396,6 +396,10 @@ bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf)
return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf));
}
SamplerComponentType ReadTextureComponentType(Environment& env, const ConstBufferAddr& cbuf) {
return env.ReadTextureComponentType(GetTextureHandle(env, cbuf));
}
class Descriptors {
public:
explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
@@ -433,7 +437,9 @@ public:
u32 Add(const TextureDescriptor& desc) {
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
return desc.type == existing.type &&
desc.component_type == existing.component_type &&
desc.is_depth == existing.is_depth &&
desc.has_secondary == existing.has_secondary &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
@@ -666,10 +672,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
.secondary_shift_left = cbuf.secondary_shift_left,
.count = cbuf.count,
.size_shift = DESCRIPTOR_SIZE_SHIFT,
.component_type = ReadTextureComponentType(env, cbuf),
});
} else {
index = descriptors.Add(TextureDescriptor{
.type = flags.type,
.component_type = ReadTextureComponentType(env, cbuf),
.is_depth = flags.is_depth != 0,
.is_multisample = is_multisample,
.has_secondary = cbuf.has_secondary,

View File

@@ -1,9 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <limits>
#include "common/common_types.h"
#include "shader_recompiler/stage.h"
namespace Shader {
@@ -46,6 +52,8 @@ struct Profile {
bool support_multi_viewport{};
bool support_geometry_streams{};
u32 warp_stage_support_mask{std::numeric_limits<u32>::max()};
bool warp_size_potentially_larger_than_guest{};
bool lower_left_origin_mode{};
@@ -90,6 +98,11 @@ struct Profile {
u64 min_ssbo_alignment{};
u32 max_user_clip_distances{};
bool SupportsWarpIntrinsics(Stage stage) const {
const u32 bit = 1u << static_cast<u32>(stage);
return (warp_stage_support_mask & bit) != 0;
}
};
} // namespace Shader

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -80,6 +83,7 @@ struct TransformFeedbackVarying {
struct RuntimeInfo {
std::array<AttributeType, 32> generic_input_types{};
std::array<AttributeType, 8> color_output_types{};
VaryingState previous_stage_stores;
std::map<IR::Attribute, IR::Attribute> previous_stage_legacy_stores_mapping;

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -151,6 +154,14 @@ enum class ImageFormat : u32 {
R32G32B32A32_UINT,
};
enum class SamplerComponentType : u8 {
Float,
Sint,
Uint,
Depth,
Stencil,
};
enum class Interpolation {
Smooth,
Flat,
@@ -183,6 +194,7 @@ struct TextureBufferDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
SamplerComponentType component_type;
auto operator<=>(const TextureBufferDescriptor&) const = default;
};
@@ -204,6 +216,7 @@ using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescrip
struct TextureDescriptor {
TextureType type;
SamplerComponentType component_type;
bool is_depth;
bool is_multisample;
bool has_secondary;

View File

@@ -7,7 +7,6 @@
#pragma once
#include <algorithm>
#include <cstring>
#include <memory>
#include <numeric>
@@ -16,8 +15,6 @@
#include "video_core/guest_memory.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/texture_cache/util.h"
#include "video_core/polygon_mode_utils.h"
#include "video_core/renderer_vulkan/line_loop_utils.h"
namespace VideoCommon {
@@ -356,37 +353,14 @@ void BufferCache<P>::UpdateComputeBuffers() {
template <class P>
void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (is_indexed) {
BindHostIndexBuffer();
} else {
if constexpr (!P::IS_OPENGL) {
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
if (draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
polygon_mode == Maxwell::PolygonMode::Line && draw_state.vertex_buffer.count > 1) {
const u32 vertex_count = draw_state.vertex_buffer.count;
const u32 generated_count = vertex_count + 1;
const bool use_u16 = vertex_count <= 0x10000;
const u32 element_size = use_u16 ? sizeof(u16) : sizeof(u32);
auto staging = runtime.UploadStagingBuffer(
static_cast<size_t>(generated_count) * element_size);
std::span<u8> dst_span{staging.mapped_span.data(),
generated_count * static_cast<size_t>(element_size)};
Vulkan::LineLoop::GenerateSequentialWithClosureRaw(dst_span, element_size);
const auto synthetic_format = use_u16 ? Maxwell::IndexFormat::UnsignedShort
: Maxwell::IndexFormat::UnsignedInt;
runtime.BindIndexBuffer(draw_state.topology, synthetic_format,
draw_state.vertex_buffer.first, generated_count,
staging.buffer, static_cast<u32>(staging.offset),
generated_count * element_size);
}
}
if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
draw_state.vertex_buffer.count);
}
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
draw_state.vertex_buffer.count);
}
}
BindHostVertexBuffers();
@@ -763,44 +737,6 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr);
const u32 size = channel_state->index_buffer.size;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if constexpr (!P::IS_OPENGL) {
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
const bool polygon_line =
draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
polygon_mode == Maxwell::PolygonMode::Line;
if (polygon_line && draw_state.index_buffer.count > 1) {
const u32 element_size = draw_state.index_buffer.FormatSizeInBytes();
const size_t src_bytes = static_cast<size_t>(draw_state.index_buffer.count) * element_size;
const size_t total_bytes = src_bytes + element_size;
auto staging = runtime.UploadStagingBuffer(total_bytes);
std::span<u8> dst_span{staging.mapped_span.data(), total_bytes};
std::span<const u8> src_span;
if (!draw_state.inline_index_draw_indexes.empty()) {
const u8* const src =
draw_state.inline_index_draw_indexes.data() +
static_cast<size_t>(draw_state.index_buffer.first) * element_size;
src_span = {src, src_bytes};
} else if (const u8* const cpu_base =
device_memory.GetPointer<u8>(channel_state->index_buffer.device_addr)) {
const u8* const src = cpu_base +
static_cast<size_t>(draw_state.index_buffer.first) * element_size;
src_span = {src, src_bytes};
} else {
const DAddr src_addr =
channel_state->index_buffer.device_addr +
static_cast<DAddr>(draw_state.index_buffer.first) * element_size;
device_memory.ReadBlockUnsafe(src_addr, dst_span.data(), src_bytes);
src_span = {dst_span.data(), src_bytes};
}
Vulkan::LineLoop::CopyWithClosureRaw(dst_span, src_span, element_size);
buffer.MarkUsage(offset, size);
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
draw_state.index_buffer.first, draw_state.index_buffer.count + 1,
staging.buffer, static_cast<u32>(staging.offset),
static_cast<u32>(total_bytes));
return;
}
}
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(size);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -235,6 +238,9 @@ constexpr Table MakeViewTable() {
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
Enable(view, PixelFormat::D24_UNORM_S8_UINT, PixelFormat::S8_UINT);
Enable(view, PixelFormat::S8_UINT_D24_UNORM, PixelFormat::S8_UINT);
Enable(view, PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::S8_UINT);
return view;
}

View File

@@ -43,90 +43,66 @@ void DmaPusher::DispatchCalls() {
bool DmaPusher::Step() {
if (!ib_enable || dma_pushbuffer.empty()) {
// pushbuffer empty and IB empty or nonexistent - nothing to do
return false;
}
CommandList& command_list{dma_pushbuffer.front()};
CommandList& command_list = dma_pushbuffer.front();
ASSERT_OR_EXECUTE(
command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
// Somehow the command_list is empty, in order to avoid a crash
// We ignore it and assume its size is 0.
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
return true;
});
const size_t prefetch_size = command_list.prefetch_command_list.size();
const size_t command_list_size = command_list.command_lists.size();
if (command_list.prefetch_command_list.size()) {
// Prefetched command list from nvdrv, used for things like synchronization
ProcessCommands(VideoCommon::FixSmallVectorADL(command_list.prefetch_command_list));
if (prefetch_size == 0 && command_list_size == 0) {
dma_pushbuffer.pop();
} else {
const CommandListHeader command_list_header{
command_list.command_lists[dma_pushbuffer_subindex++]};
dma_pushbuffer_subindex = 0;
return true;
}
if (signal_sync) {
std::unique_lock lk(sync_mutex);
sync_cv.wait(lk, [this]() { return synced; });
signal_sync = false;
synced = false;
}
if (prefetch_size > 0) {
ProcessCommands(command_list.prefetch_command_list);
dma_pushbuffer.pop();
return true;
}
dma_state.dma_get = command_list_header.addr;
auto& current_command = command_list.command_lists[dma_pushbuffer_subindex];
const CommandListHeader& header = current_command;
dma_state.dma_get = header.addr;
if (command_list_header.size == 0) {
return true;
}
if (signal_sync && !synced) {
std::unique_lock lk(sync_mutex);
sync_cv.wait(lk, [this]() { return synced; });
signal_sync = false;
synced = false;
}
// Push buffer non-empty, read a word
if (dma_state.method >= MacroRegistersStart) {
if (subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(
dma_state.dma_get, command_list_header.size * sizeof(u32));
}
}
if (header.size > 0 && dma_state.method >= MacroRegistersStart && subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(dma_state.dma_get, header.size * sizeof(u32));
}
const auto safe_process = [&] {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
Tegra::Memory::GuestMemoryFlags::SafeRead>
headers(memory_manager, dma_state.dma_get, command_list_header.size,
&command_headers);
if (header.size > 0) {
if (Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe()) {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::SafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
ProcessCommands(headers);
};
const auto unsafe_process = [&] {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
Tegra::Memory::GuestMemoryFlags::UnsafeRead>
headers(memory_manager, dma_state.dma_get, command_list_header.size,
&command_headers);
ProcessCommands(headers);
};
const bool use_safe = Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe();
if (use_safe) {
safe_process();
} else {
unsafe_process();
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::UnsafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
ProcessCommands(headers);
}
}
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
} else if (command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.sync_memory_operations.GetValue()) {
signal_sync = true;
}
if (++dma_pushbuffer_subindex >= command_list_size) {
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
} else {
signal_sync = command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.sync_memory_operations.GetValue();
}
if (signal_sync) {
rasterizer->SignalFence([this]() {
if (signal_sync) {
rasterizer->SignalFence([this]() {
std::scoped_lock lk(sync_mutex);
synced = true;
sync_cv.notify_all();
});
}
});
}
return true;
}

View File

@@ -91,6 +91,10 @@ public:
uncommitted_operations.clear();
}
QueueFence(new_fence);
//if (!new_fence->IsStubbed()) {
// std::scoped_lock lock{texture_cache.mutex};
// texture_cache.CommitPendingGpuAccesses(new_fence->WaitTick());
//}
fences.push(std::move(new_fence));
if (should_flush) {
rasterizer.FlushCommands();
@@ -179,7 +183,7 @@ private:
return;
}
}
PopAsyncFlushes();
PopAsyncFlushes(current_fence->WaitTick());
auto operations = std::move(pending_operations.front());
pending_operations.pop_front();
for (auto& operation : operations) {
@@ -214,7 +218,7 @@ private:
if (!current_fence->IsStubbed()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
PopAsyncFlushes(current_fence->WaitTick());
for (auto& operation : current_operations) {
operation();
}
@@ -237,10 +241,11 @@ private:
query_cache.HasUncommittedFlushes();
}
void PopAsyncFlushes() {
void PopAsyncFlushes(u64 completed_tick) {
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes();
texture_cache.CompleteGpuAccesses(completed_tick);
buffer_cache.PopAsyncFlushes();
}
query_cache.PopAsyncFlushes();

View File

@@ -1,46 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include "video_core/engines/maxwell_3d.h"
namespace VideoCore {
inline Tegra::Engines::Maxwell3D::Regs::PolygonMode EffectivePolygonMode(
const Tegra::Engines::Maxwell3D::Regs& regs) {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
const bool cull_enabled = regs.gl_cull_test_enabled != 0;
const auto cull_face = regs.gl_cull_face;
const bool cull_front = cull_enabled && (cull_face == Maxwell::CullFace::Front ||
cull_face == Maxwell::CullFace::FrontAndBack);
const bool cull_back = cull_enabled && (cull_face == Maxwell::CullFace::Back ||
cull_face == Maxwell::CullFace::FrontAndBack);
const bool render_front = !cull_front;
const bool render_back = !cull_back;
const auto front_mode = regs.polygon_mode_front;
const auto back_mode = regs.polygon_mode_back;
if (render_front && render_back && front_mode != back_mode) {
if (front_mode == Maxwell::PolygonMode::Line || back_mode == Maxwell::PolygonMode::Line) {
return Maxwell::PolygonMode::Line;
}
if (front_mode == Maxwell::PolygonMode::Point || back_mode == Maxwell::PolygonMode::Point) {
return Maxwell::PolygonMode::Point;
}
}
if (render_front) {
return front_mode;
}
if (render_back) {
return back_mode;
}
return front_mode;
}
} // namespace VideoCore

View File

@@ -211,6 +211,12 @@ void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) {
streamer->CloseCounter();
}
template <typename Traits>
bool QueryCacheBase<Traits>::HasStreamer(QueryType counter_type) const {
const size_t index = static_cast<size_t>(counter_type);
return impl->streamers[index] != nullptr;
}
template <typename Traits>
void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
size_t index = static_cast<size_t>(counter_type);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -92,6 +95,8 @@ public:
void CounterReset(QueryType counter_type);
[[nodiscard]] bool HasStreamer(QueryType counter_type) const;
void CounterClose(QueryType counter_type);
void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags,

View File

@@ -7,13 +7,50 @@
#include <cstring>
#include <bit>
#include <numeric>
#include <optional>
#include "common/cityhash.h"
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/surface.h"
namespace OpenGL {
namespace {
std::optional<VideoCore::Surface::PixelFormatNumeric>
NumericFromComponentType(Shader::SamplerComponentType component_type) {
using VideoCore::Surface::PixelFormatNumeric;
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
VideoCore::Surface::PixelFormat ResolveTexelBufferFormat(
VideoCore::Surface::PixelFormat format, Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant =
VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
} // Anonymous namespace
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
@@ -174,8 +211,12 @@ void ComputePipeline::Configure() {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
auto buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++texbuf_index;
}
@@ -205,7 +246,8 @@ void ComputePipeline::Configure() {
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
textures[texture_binding] =
image_view.SampledView(desc.type, desc.component_type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << texture_binding;
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -25,6 +28,10 @@ public:
void Wait();
[[nodiscard]] u64 WaitTick() const noexcept {
return 0;
}
private:
OGLSync sync_object;
};

View File

@@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include <optional>
#include <string>
#include <vector>
#include <bit>
@@ -18,6 +19,7 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/texture_cache.h"
#if defined(_MSC_VER) && defined(NDEBUG)
@@ -39,6 +41,38 @@ using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 8;
std::optional<VideoCore::Surface::PixelFormatNumeric>
NumericFromComponentType(Shader::SamplerComponentType component_type) {
using VideoCore::Surface::PixelFormatNumeric;
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
VideoCore::Surface::PixelFormat ResolveTexelBufferFormat(
VideoCore::Surface::PixelFormat format, Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant =
VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
GLenum Stage(size_t stage_index) {
switch (stage_index) {
case 0:
@@ -397,8 +431,12 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
auto buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
++texture_buffer_it;
@@ -483,7 +521,8 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
textures[texture_binding] =
image_view.SampledView(desc.type, desc.component_type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << stage_texture_binding;
}

View File

@@ -220,6 +220,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_gl_sparse_textures = device.HasSparseTexture2(),
.support_gl_derivative_control = device.HasDerivativeControl(),
.support_geometry_streams = true,
.warp_stage_support_mask = 0xFFFFFFFFu,
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),

View File

@@ -692,6 +692,15 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
return device.HasASTC();
}
bool TextureCacheRuntime::SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const noexcept {
using VideoCore::Surface::GetFormatType;
using VideoCore::Surface::IsPixelFormatInteger;
if (IsPixelFormatInteger(format)) {
return false;
}
return GetFormatType(format) == VideoCore::Surface::SurfaceType::ColorTexture;
}
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@@ -1229,6 +1238,13 @@ GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFor
return view;
}
GLuint ImageView::SampledView(Shader::TextureType view_type,
Shader::SamplerComponentType /*component_type*/) {
// OpenGL swizzles already configure depth/stencil selection per TIC entry,
// so fall back to the default view handle.
return Handle(view_type);
}
void ImageView::SetupView(Shader::TextureType view_type) {
views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -13,6 +16,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
@@ -129,6 +133,8 @@ public:
return false;
}
bool SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const noexcept;
bool HasBrokenTextureViewFormats() const noexcept {
return has_broken_texture_view_formats;
}
@@ -137,6 +143,8 @@ public:
void TickFrame() {}
void WaitForGpuTick(u64) {}
StateTracker& GetStateTracker() {
return state_tracker;
}
@@ -264,6 +272,9 @@ public:
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] GLuint SampledView(Shader::TextureType view_type,
Shader::SamplerComponentType component_type);
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
return views[static_cast<size_t>(handle_type)];
}

View File

@@ -525,18 +525,24 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
blit_depth_stencil_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)
: vk::ShaderModule{}),
clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)),
clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)),
clear_stencil_frag(BuildShader(device, VULKAN_DEPTHSTENCIL_CLEAR_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)
: vk::ShaderModule{}),
convert_abgr8_to_d32f_frag(BuildShader(device, CONVERT_ABGR8_TO_D32F_FRAG_SPV)),
convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
convert_abgr8_srgb_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)),
convert_abgr8_srgb_to_d24s8_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)
: vk::ShaderModule{}),
convert_rgba_to_bgra_frag(BuildShader(device, CONVERT_RGBA8_TO_BGRA8_FRAG_SPV)),
convert_yuv420_to_rgb_comp(BuildShader(device, CONVERT_YUV420_TO_RGB_COMP_SPV)),
convert_rgb_to_yuv420_comp(BuildShader(device, CONVERT_RGB_TO_YUV420_COMP_SPV)),
@@ -667,6 +673,11 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
if (!device.IsExtShaderStencilExportSupported()) {
// Shader requires VK_EXT_shader_stencil_export which is not available
LOG_WARNING(Render_Vulkan, "ConvertABGR8ToD24S8 requires shader_stencil_export, skipping");
return;
}
ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
convert_abgr8_to_d24s8_frag);
Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view);
@@ -702,6 +713,11 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
if (!device.IsExtShaderStencilExportSupported()) {
// Shader requires VK_EXT_shader_stencil_export which is not available
LOG_WARNING(Render_Vulkan, "ConvertABGR8SRGBToD24S8 requires shader_stencil_export, skipping");
return;
}
ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline,
dst_framebuffer->RenderPass(),
convert_abgr8_srgb_to_d24s8_frag);

View File

@@ -15,7 +15,6 @@
#include "video_core/engines/draw_manager.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/polygon_mode_utils.h"
namespace Vulkan {
namespace {
@@ -60,13 +59,13 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
raw1 = 0;
extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0);
extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0);
extended_dynamic_state_2_extra.Assign(features.has_extended_dynamic_state_2_extra ? 1 : 0);
extended_dynamic_state_2_logic_op.Assign(features.has_extended_dynamic_state_2_logic_op ? 1 : 0);
extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0);
extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0);
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
polygon_mode.Assign(PackPolygonMode(VideoCore::EffectivePolygonMode(regs)));
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
@@ -158,7 +157,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
return static_cast<u16>(array.stride.Value());
});
}
if (!extended_dynamic_state_2_extra) {
if (!extended_dynamic_state_2_logic_op) {
dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2);
}
if (!extended_dynamic_state_3_blend) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -20,9 +23,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct DynamicFeatures {
bool has_extended_dynamic_state;
bool has_extended_dynamic_state_2;
bool has_extended_dynamic_state_2_extra;
bool has_extended_dynamic_state_2_logic_op;
bool has_extended_dynamic_state_2_patch_control_points;
bool has_extended_dynamic_state_3_blend;
bool has_extended_dynamic_state_3_enables;
bool has_dual_source_blend;
bool has_dynamic_vertex_input;
};
@@ -186,7 +191,7 @@ struct FixedPipelineState {
u32 raw1;
BitField<0, 1, u32> extended_dynamic_state;
BitField<1, 1, u32> extended_dynamic_state_2;
BitField<2, 1, u32> extended_dynamic_state_2_extra;
BitField<2, 1, u32> extended_dynamic_state_2_logic_op;
BitField<3, 1, u32> extended_dynamic_state_3_blend;
BitField<4, 1, u32> extended_dynamic_state_3_enables;
BitField<5, 1, u32> dynamic_vertex_input;

View File

@@ -1,68 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <algorithm>
#include <cstring>
#include <span>
#include "common/assert.h"
#include "common/common_types.h"
namespace Vulkan::LineLoop {
inline void CopyWithClosureRaw(std::span<u8> dst, std::span<const u8> src, size_t element_size) {
ASSERT_MSG(dst.size() == src.size() + element_size, "Invalid line loop copy sizes");
if (src.empty()) {
if (!dst.empty()) {
std::fill(dst.begin(), dst.end(), u8{0});
}
return;
}
std::memcpy(dst.data(), src.data(), src.size());
std::memcpy(dst.data() + src.size(), src.data(), element_size);
}
inline void GenerateSequentialWithClosureRaw(std::span<u8> dst, size_t element_size,
u64 start_value = 0) {
if (dst.empty()) {
return;
}
const size_t last = dst.size() - element_size;
size_t offset = 0;
u64 value = start_value;
while (offset < last) {
std::memcpy(dst.data() + offset, &value, element_size);
offset += element_size;
++value;
}
std::memcpy(dst.data() + offset, &start_value, element_size);
}
template <typename T>
inline void CopyWithClosure(std::span<T> dst, std::span<const T> src) {
ASSERT_MSG(dst.size() == src.size() + 1, "Invalid destination size for line loop copy");
if (src.empty()) {
if (!dst.empty()) {
dst.front() = {};
}
return;
}
std::copy(src.begin(), src.end(), dst.begin());
dst.back() = src.front();
}
template <typename T>
inline void GenerateSequentialWithClosure(std::span<T> dst, T start_value = {}) {
if (dst.empty()) {
return;
}
const size_t last = dst.size() - 1;
for (size_t i = 0; i < last; ++i) {
dst[i] = static_cast<T>(start_value + static_cast<T>(i));
}
dst.back() = start_value;
}
} // namespace Vulkan::LineLoop

View File

@@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -168,7 +165,7 @@ struct FormatTuple {
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
{VK_FORMAT_R32G32B32_SFLOAT}, // R32G32B32_FLOAT
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable | Storage}, // A8B8G8R8_SRGB
{VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
{VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
{VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
@@ -180,7 +177,7 @@ struct FormatTuple {
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
{VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM
{VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM
{VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
{VK_FORMAT_B8G8R8A8_SRGB, Attachable | Storage}, // B8G8R8A8_SRGB
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
{VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
@@ -326,9 +323,44 @@ VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
}
VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
Maxwell::PrimitiveTopology topology,
Maxwell::PolygonMode polygon_mode) {
return detail::PrimitiveTopologyNoDevice(topology, polygon_mode);
Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
case Maxwell::PrimitiveTopology::Lines:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
case Maxwell::PrimitiveTopology::LineLoop:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::LineStrip:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::TriangleStrip:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::LinesAdjacency:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::Quads:
case Maxwell::PrimitiveTopology::QuadStrip:
// TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT
// whenever it releases
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
case Maxwell::PrimitiveTopology::Polygon:
LOG_WARNING(Render_Vulkan, "Draw mode is Polygon with a polygon mode of lines should be a "
"single body and not a bunch of triangles.");
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
}
UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
return {};
}
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,

View File

@@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -18,52 +15,6 @@ namespace Vulkan::MaxwellToVK {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
namespace detail {
constexpr VkPrimitiveTopology PrimitiveTopologyNoDevice(Maxwell::PrimitiveTopology topology,
Maxwell::PolygonMode polygon_mode) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
case Maxwell::PrimitiveTopology::Lines:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
case Maxwell::PrimitiveTopology::LineLoop:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::LineStrip:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::TriangleStrip:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::LinesAdjacency:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::Quads:
case Maxwell::PrimitiveTopology::QuadStrip:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
case Maxwell::PrimitiveTopology::Polygon:
switch (polygon_mode) {
case Maxwell::PolygonMode::Fill:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
case Maxwell::PolygonMode::Line:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PolygonMode::Point:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
}
break;
}
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
}
} // namespace detail
namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter);
@@ -95,8 +46,7 @@ struct FormatInfo {
VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology,
Maxwell::PolygonMode polygon_mode);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size);

View File

@@ -189,12 +189,16 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VideoCommon::SamplerId sampler_id{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
const VkImageView vk_image_view{
image_view.SampledView(desc.type, desc.component_type)};
const Sampler& sampler{texture_cache.GetSampler(sampler_id)};
const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
!image_view.SupportsAnisotropy()};
const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
: sampler.Handle()};
const bool supports_linear_filter{
texture_cache.SupportsLinearFilter(image_view.format)};
const bool supports_depth_compare_sampling{
image_view.SupportsDepthCompareSampling()};
const VkSampler vk_sampler{
sampler.SelectHandle(supports_linear_filter, image_view.SupportsAnisotropy(),
supports_depth_compare_sampling)};
guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}

View File

@@ -280,7 +280,6 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
buffer.Flush(); // Ensure host writes are visible before the GPU copy.
}
const VkBufferImageCopy copy{

View File

@@ -7,6 +7,7 @@
#include <algorithm>
#include <array>
#include <cstring>
#include <limits>
#include <span>
#include <vector>
@@ -594,7 +595,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
if (index >= device.GetMaxVertexInputBindings()) {
return;
}
if (device.IsExtExtendedDynamicStateSupported()) {
if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
ReserveNullBuffer();
buffer = *null_buffer;
offset = 0;
size = std::numeric_limits<u32>::max();
}
// Use BindVertexBuffers2EXT only if EDS1 is supported AND VIDS is not active
// When VIDS is active, the pipeline doesn't declare VERTEX_INPUT_BINDING_STRIDE as dynamic
if (device.IsExtExtendedDynamicStateSupported() && !device.IsExtVertexInputDynamicStateSupported()) {
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
@@ -634,7 +643,8 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
if (binding_count == 0) {
return;
}
if (device.IsExtExtendedDynamicStateSupported()) {
// Use BindVertexBuffers2EXT only if EDS1 is supported AND VIDS is not active
if (device.IsExtExtendedDynamicStateSupported() && !device.IsExtVertexInputDynamicStateSupported()) {
scheduler.Record([bindings_ = std::move(bindings),
buffer_handles_ = std::move(buffer_handles),
binding_count](vk::CommandBuffer cmdbuf) {
@@ -691,27 +701,50 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
}
void BufferCacheRuntime::ReserveNullBuffer() {
const VkBufferUsageFlags expected_usage = NullBufferUsageFlags();
if (null_buffer && null_buffer_usage_flags != expected_usage) {
RefreshNullBuffer();
}
if (!null_buffer) {
null_buffer = CreateNullBuffer();
}
}
VkBufferUsageFlags BufferCacheRuntime::NullBufferUsageFlags() const {
VkBufferUsageFlags usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
if (device.IsExtTransformFeedbackSupported()) {
usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
return usage;
}
void BufferCacheRuntime::RefreshNullBuffer() {
if (!null_buffer) {
return;
}
scheduler.Finish();
null_buffer.reset();
null_buffer = CreateNullBuffer();
}
vk::Buffer BufferCacheRuntime::CreateNullBuffer() {
VkBufferCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 4,
.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
.usage = NullBufferUsageFlags(),
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
vk::Buffer ret = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
null_buffer_usage_flags = create_info.usage;
if (device.HasDebuggingToolAttached()) {
ret.SetObjectNameEXT("Null buffer");
}

View File

@@ -131,6 +131,9 @@ public:
void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
/// Forces destruction and recreation of the shared null buffer so new usage flags take effect.
void RefreshNullBuffer();
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/,
[[maybe_unused]] u32 /*binding_index*/,
u32 size) {
@@ -174,6 +177,7 @@ private:
void ReserveNullBuffer();
vk::Buffer CreateNullBuffer();
VkBufferUsageFlags NullBufferUsageFlags() const;
struct UniformRing {
static constexpr size_t NUM_FRAMES = 3;
@@ -203,6 +207,7 @@ private:
std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
vk::Buffer null_buffer;
VkBufferUsageFlags null_buffer_usage_flags = 0;
std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass;

View File

@@ -418,6 +418,9 @@ ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer,
u32 src_offset, bool compare_to_zero) {
if (!device.IsExtConditionalRendering()) {
return;
}
const size_t compare_size = compare_to_zero ? 8 : 24;
compute_pass_descriptor_queue.Acquire();
@@ -448,7 +451,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.Dispatch(1, 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier);
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier);
});
}
@@ -459,10 +462,14 @@ QueriesPrefixScanPass::QueriesPrefixScanPass(
device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS,
QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>,
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT,
VK_SHADER_STAGE_COMPUTE_BIT)
? std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_COMP_SPV)
: std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV)),
scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
@@ -470,6 +477,14 @@ QueriesPrefixScanPass::QueriesPrefixScanPass(
void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer,
VkBuffer src_buffer, size_t number_of_sums,
size_t min_accumulation_limit, size_t max_accumulation_limit) {
constexpr VkAccessFlags BASE_DST_ACCESS = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT;
const VkAccessFlags conditional_access =
device.IsExtConditionalRendering() ? VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT : 0;
size_t current_runs = number_of_sums;
size_t offset = 0;
while (current_runs != 0) {
@@ -486,22 +501,18 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, min_accumulation_limit, max_accumulation_limit,
runs_to_do, used_offset](vk::CommandBuffer cmdbuf) {
runs_to_do, used_offset, conditional_access](vk::CommandBuffer cmdbuf) {
static constexpr VkMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
};
static constexpr VkMemoryBarrier write_barrier{
const VkMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT,
.dstAccessMask = BASE_DST_ACCESS | conditional_access,
};
const QueriesPrefixScanPushConstants uniforms{
.min_accumulation_base = static_cast<u32>(min_accumulation_limit),
@@ -519,8 +530,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(1, 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0,
write_barrier);
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier);
});
}
}

View File

@@ -18,14 +18,49 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_notify.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#include <optional>
namespace Vulkan {
using Shader::ImageBufferDescriptor;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatNumeric;
static std::optional<PixelFormatNumeric> NumericFromComponentType(
Shader::SamplerComponentType component_type) {
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
static PixelFormat ResolveTexelBufferFormat(PixelFormat format,
Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant = VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
DescriptorPool& descriptor_pool,
@@ -182,8 +217,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(views[index].id);
VideoCore::Surface::PixelFormat buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
}

View File

@@ -34,6 +34,10 @@ public:
void Wait();
[[nodiscard]] u64 WaitTick() const noexcept {
return wait_tick;
}
private:
Scheduler& scheduler;
u64 wait_tick = 0;

View File

@@ -5,9 +5,10 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <optional>
#include <iostream>
#include <span>
#include <string_view>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
@@ -23,9 +24,10 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/polygon_mode_utils.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_device.h"
#if defined(_MSC_VER) && defined(NDEBUG)
@@ -46,10 +48,83 @@ using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::PixelFormatNumeric;
constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
constexpr size_t MAX_IMAGE_ELEMENTS = 64;
std::optional<PixelFormatNumeric> NumericFromComponentType(
Shader::SamplerComponentType component_type) {
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
PixelFormat ResolveTexelBufferFormat(PixelFormat format,
Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant = VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
bool UsesDualSourceFactor(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Source1Color_D3D:
case Maxwell::Blend::Factor::Source1Color_GL:
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
case Maxwell::Blend::Factor::Source1Alpha_D3D:
case Maxwell::Blend::Factor::Source1Alpha_GL:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
return true;
default:
return false;
}
}
Maxwell::Blend::Factor FallbackDualSourceFactor(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Source1Color_D3D:
case Maxwell::Blend::Factor::Source1Color_GL:
return Maxwell::Blend::Factor::SourceColor_D3D;
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
return Maxwell::Blend::Factor::OneMinusSourceColor_D3D;
case Maxwell::Blend::Factor::Source1Alpha_D3D:
case Maxwell::Blend::Factor::Source1Alpha_GL:
return Maxwell::Blend::Factor::SourceAlpha_D3D;
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
return Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D;
default:
return factor;
}
}
bool AttachmentUsesDualSource(const FixedPipelineState::BlendingAttachment& blend) {
return UsesDualSourceFactor(blend.SourceRGBFactor()) ||
UsesDualSourceFactor(blend.DestRGBFactor()) ||
UsesDualSourceFactor(blend.SourceAlphaFactor()) ||
UsesDualSourceFactor(blend.DestAlphaFactor());
}
DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
DescriptorLayoutBuilder builder{device};
for (size_t index = 0; index < infos.size(); ++index) {
@@ -265,6 +340,7 @@ GraphicsPipeline::GraphicsPipeline(
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
num_textures += Shader::NumDescriptors(info->texture_descriptors);
}
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
uses_push_descriptor = builder.CanUsePushDescriptor();
@@ -418,8 +494,12 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
VideoCore::Surface::PixelFormat buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
++texture_buffer_it;
@@ -616,10 +696,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
vertex_input_ci.pNext = &input_divisor_ci;
}
const bool has_tess_stages = spv_modules[1] || spv_modules[2];
const auto polygon_mode =
FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode.Value());
auto input_assembly_topology =
MaxwellToVK::PrimitiveTopology(device, key.state.topology, polygon_mode);
auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
if (!has_tess_stages) {
LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
@@ -634,33 +711,6 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
}
}
if (key.state.topology == Maxwell::PrimitiveTopology::Polygon) {
const auto polygon_mode_name = [polygon_mode]() -> std::string_view {
switch (polygon_mode) {
case Maxwell::PolygonMode::Fill:
return "Fill";
case Maxwell::PolygonMode::Line:
return "Line";
case Maxwell::PolygonMode::Point:
return "Point";
}
return "Unknown";
}();
const auto vk_topology_name = [input_assembly_topology]() -> std::string_view {
switch (input_assembly_topology) {
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
return "TriangleFan";
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
return "LineStrip";
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
return "PointList";
default:
return "Unexpected";
}
}();
LOG_DEBUG(Render_Vulkan, "Polygon primitive in {} mode mapped to {}", polygon_mode_name,
vk_topology_name);
}
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -734,13 +784,18 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.lineWidth = 1.0f,
// TODO(alekpop): Transfer from regs
};
const bool smooth_lines_supported =
device.IsExtLineRasterizationSupported() && device.SupportsSmoothLines();
const bool stippled_lines_supported =
device.IsExtLineRasterizationSupported() && device.SupportsStippledRectangularLines();
VkPipelineRasterizationLineStateCreateInfoEXT line_state{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
.pNext = nullptr,
.lineRasterizationMode = key.state.smooth_lines != 0
.lineRasterizationMode = key.state.smooth_lines != 0 && smooth_lines_supported
? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
: VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
.stippledLineEnable = dynamic.line_stipple_enable ? VK_TRUE : VK_FALSE,
.stippledLineEnable =
(dynamic.line_stipple_enable && stippled_lines_supported) ? VK_TRUE : VK_FALSE,
.lineStippleFactor = key.state.line_stipple_factor,
.lineStipplePattern = static_cast<uint16_t>(key.state.line_stipple_pattern),
};
@@ -771,17 +826,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
}
const VkPipelineMultisampleStateCreateInfo multisample_ci{
const bool supports_alpha_output = fragment_has_color0_output;
const bool alpha_to_one_supported = device.SupportsAlphaToOne();
const auto msaa_mode = key.state.msaa_mode.Value();
const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode);
VkPipelineMultisampleStateCreateInfo multisample_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
.rasterizationSamples = vk_samples,
.sampleShadingEnable = Settings::values.sample_shading.GetValue() ? VK_TRUE : VK_FALSE,
.minSampleShading = static_cast<float>(Settings::values.sample_shading_fraction.GetValue()) / 100.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToOneEnable = key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToCoverageEnable =
supports_alpha_output && key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToOneEnable = supports_alpha_output && alpha_to_one_supported &&
key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
};
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -803,6 +866,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
}
static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const size_t num_attachments{NumAttachments(key.state)};
const bool supports_dual_source_blend = device.SupportsDualSourceBlend();
const u32 max_dual_source_attachments = supports_dual_source_blend
? device.MaxFragmentDualSrcAttachments()
: 0;
u32 granted_dual_source_attachments = 0;
bool logged_dual_source_warning = false;
for (size_t index = 0; index < num_attachments; ++index) {
static constexpr std::array mask_table{
VK_COLOR_COMPONENT_R_BIT,
@@ -816,13 +885,30 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
for (size_t i = 0; i < mask_table.size(); ++i) {
write_mask |= mask[i] ? mask_table[i] : 0;
}
const bool attachment_uses_dual_source = AttachmentUsesDualSource(blend);
const bool allow_dual_source = attachment_uses_dual_source && supports_dual_source_blend &&
granted_dual_source_attachments < max_dual_source_attachments;
if (allow_dual_source) {
++granted_dual_source_attachments;
} else if (attachment_uses_dual_source && !logged_dual_source_warning) {
LOG_WARNING(Render_Vulkan,
"Dual-source blend factors exceed device limit (maxFragmentDualSrcAttachments={}), falling back to single-source factors",
max_dual_source_attachments);
logged_dual_source_warning = true;
}
const auto sanitize_factor = [&](Maxwell::Blend::Factor factor) {
if (allow_dual_source || !UsesDualSourceFactor(factor)) {
return factor;
}
return FallbackDualSourceFactor(factor);
};
cb_attachments.push_back({
.blendEnable = blend.enable != 0,
.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
.srcColorBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.SourceRGBFactor())),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.DestRGBFactor())),
.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.SourceAlphaFactor())),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.DestAlphaFactor())),
.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
.colorWriteMask = write_mask,
});
@@ -838,14 +924,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.blendConstants = {}
};
static_vector<VkDynamicState, 34> dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_LINE_WIDTH,
};
if (device.UsesAdvancedCoreDynamicState()) {
static constexpr std::array core_dynamic_states{
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
};
dynamic_states.insert(dynamic_states.end(), core_dynamic_states.begin(),
core_dynamic_states.end());
}
if (key.state.extended_dynamic_state) {
std::vector<VkDynamicState> extended{
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
@@ -855,51 +952,68 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
};
if (!device.IsExtVertexInputDynamicStateSupported()) {
extended.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
}
if (key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
}
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
if (key.state.extended_dynamic_state_2) {
static constexpr std::array extended2{
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
// VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT is part of EDS1
// Only use it if VIDS is not active (VIDS replaces it with full vertex input control)
if (!key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
}
if (key.state.extended_dynamic_state_2_extra) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
}
// VK_DYNAMIC_STATE_VERTEX_INPUT_EXT (VIDS) - Independent from EDS
// Provides full dynamic vertex input control, replaces VERTEX_INPUT_BINDING_STRIDE
if (key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
}
// EDS2 - Core (3 states)
if (key.state.extended_dynamic_state_2) {
static constexpr std::array extended2{
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
}
// EDS2 - LogicOp (granular)
if (key.state.extended_dynamic_state_2_logic_op) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
}
// EDS3 - Blending (composite: 3 states)
if (key.state.extended_dynamic_state_3_blend) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
}
// EDS3 - Enables (composite: per-feature)
if (key.state.extended_dynamic_state_3_enables) {
if (device.SupportsDynamicState3DepthClampEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT);
}
if (key.state.extended_dynamic_state_3_blend) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
// VK_DYNAMIC_STATE_COLOR_BLEND_ADVANCED_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
if (device.SupportsDynamicState3LogicOpEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT);
}
if (key.state.extended_dynamic_state_3_enables) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT,
VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT,
// additional state3 extensions
VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT,
VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT,
VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT,
VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT,
VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT,
VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
if (device.SupportsDynamicState3LineRasterizationMode()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT);
}
if (device.SupportsDynamicState3ConservativeRasterizationMode()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT);
}
if (device.SupportsDynamicState3LineStippleEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT);
}
if (device.SupportsDynamicState3AlphaToCoverageEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT);
}
if (device.SupportsDynamicState3AlphaToOneEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT);
}
}

View File

@@ -82,6 +82,17 @@ public:
const std::array<const Shader::Info*, NUM_STAGES>& infos);
bool HasDynamicVertexInput() const noexcept { return key.state.dynamic_vertex_input; }
bool SupportsAlphaToCoverage() const noexcept {
return fragment_has_color0_output;
}
bool SupportsAlphaToOne() const noexcept {
return fragment_has_color0_output;
}
bool UsesExtendedDynamicState() const noexcept {
return key.state.extended_dynamic_state != 0;
}
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
@@ -149,6 +160,7 @@ private:
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
u32 num_textures{};
bool fragment_has_color0_output{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;

View File

@@ -36,6 +36,7 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/surface.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
@@ -105,6 +106,41 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso
return {};
}
Shader::AttributeType RenderTargetAttributeType(Tegra::RenderTargetFormat format) {
if (format == Tegra::RenderTargetFormat::NONE) {
return Shader::AttributeType::Float;
}
const auto pixel_format{
VideoCore::Surface::PixelFormatFromRenderTargetFormat(format)};
if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) {
return Shader::AttributeType::Float;
}
if (VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)) {
return Shader::AttributeType::SignedInt;
}
return Shader::AttributeType::UnsignedInt;
}
VkShaderStageFlagBits StageToVkStage(Shader::Stage stage) {
switch (stage) {
case Shader::Stage::VertexA:
case Shader::Stage::VertexB:
return VK_SHADER_STAGE_VERTEX_BIT;
case Shader::Stage::TessellationControl:
return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
case Shader::Stage::TessellationEval:
return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
case Shader::Stage::Geometry:
return VK_SHADER_STAGE_GEOMETRY_BIT;
case Shader::Stage::Fragment:
return VK_SHADER_STAGE_FRAGMENT_BIT;
case Shader::Stage::Compute:
return VK_SHADER_STAGE_COMPUTE_BIT;
default:
return VK_SHADER_STAGE_VERTEX_BIT;
}
}
Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
if (attr.enabled == 0) {
return Shader::AttributeType::Disabled;
@@ -229,6 +265,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.alpha_test_func = MaxwellToCompareFunction(
key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
info.alpha_test_reference = std::bit_cast<float>(key.state.alpha_test_ref);
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
const auto format = static_cast<Tegra::RenderTargetFormat>(key.state.color_formats[index]);
info.color_output_types[index] = RenderTargetAttributeType(format);
}
break;
default:
break;
@@ -269,9 +309,8 @@ size_t GetTotalPipelineWorkers() {
const size_t max_core_threads =
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
#ifdef ANDROID
// Leave at least a few cores free on Android; reserve two instead of three so
// pipeline compilation can consume one more worker thread for testing.
constexpr size_t free_cores = 2ULL;
// Leave at least one core free on Android to reduce thermal pressure.
constexpr size_t free_cores = 1ULL;
if (max_core_threads <= free_cores) {
return 1ULL;
}
@@ -318,6 +357,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
"VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
const bool float_controls_supported{device.IsKhrShaderFloatControlsSupported()};
const VkDriverId driver_id{device.GetDriverID()};
profile = Shader::Profile{
.supported_spirv = device.SupportedSpirvVersion(),
@@ -327,20 +367,24 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.support_vertex_instance_id = false,
.support_float_controls = device.IsKhrShaderFloatControlsSupported(),
.support_separate_denorm_behavior =
.support_float_controls = float_controls_supported,
.support_separate_denorm_behavior = float_controls_supported &&
float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_separate_rounding_mode =
.support_separate_rounding_mode = float_controls_supported &&
float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
.support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
.support_fp16_signed_zero_nan_preserve =
.support_fp16_denorm_preserve = float_controls_supported &&
float_control.shaderDenormPreserveFloat16 != VK_FALSE,
.support_fp32_denorm_preserve = float_controls_supported &&
float_control.shaderDenormPreserveFloat32 != VK_FALSE,
.support_fp16_denorm_flush = float_controls_supported &&
float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
.support_fp32_denorm_flush = float_controls_supported &&
float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
.support_fp16_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
.support_fp32_signed_zero_nan_preserve =
.support_fp32_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
.support_fp64_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
@@ -396,6 +440,27 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_conditional_barrier = device.SupportsConditionalBarriers(),
};
profile.warp_stage_support_mask = 0;
static constexpr std::array kAllStages{
Shader::Stage::VertexA, Shader::Stage::VertexB,
Shader::Stage::TessellationControl, Shader::Stage::TessellationEval,
Shader::Stage::Geometry, Shader::Stage::Fragment,
Shader::Stage::Compute,
};
for (const auto stage : kAllStages) {
const auto vk_stage = StageToVkStage(stage);
if (device.SupportsWarpIntrinsics(vk_stage)) {
profile.warp_stage_support_mask |= 1u << static_cast<u32>(stage);
}
}
profile.support_vote = profile.warp_stage_support_mask != 0;
if (!profile.SupportsWarpIntrinsics(Shader::Stage::Fragment)) {
LOG_WARNING(Render_Vulkan,
"Fragment shaders lack subgroup support on this driver; warp intrinsics will be "
"approximated and visual artifacts may remain");
}
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
device.GetMaxVertexInputAttributes(), Maxwell::NumVertexAttributes);
@@ -405,14 +470,40 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays);
}
dynamic_features = DynamicFeatures{
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(),
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported(),
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported(),
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported(),
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported(),
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
};
LOG_INFO(Render_Vulkan, "DynamicState setting value: {}", Settings::values.dyna_state.GetValue());
dynamic_features = {};
// User granularity enforced in vulkan_device.cpp switch statement:
// Level 0: Core Dynamic States only
// Level 1: Core + EDS1
// Level 2: Core + EDS1 + EDS2 (accumulative)
// Level 3: Core + EDS1 + EDS2 + EDS3 (accumulative)
// Here we only verify if extensions were successfully loaded by the device
dynamic_features.has_extended_dynamic_state =
device.IsExtExtendedDynamicStateSupported();
dynamic_features.has_extended_dynamic_state_2 =
device.IsExtExtendedDynamicState2Supported();
dynamic_features.has_extended_dynamic_state_2_logic_op =
device.IsExtExtendedDynamicState2ExtrasSupported();
dynamic_features.has_extended_dynamic_state_2_patch_control_points = false;
dynamic_features.has_extended_dynamic_state_3_blend =
device.IsExtExtendedDynamicState3BlendingSupported();
dynamic_features.has_dual_source_blend = device.SupportsDualSourceBlend();
if (!dynamic_features.has_dual_source_blend) {
LOG_WARNING(Render_Vulkan, "Dual-source blending unsupported, disabling dynamic blend");
dynamic_features.has_extended_dynamic_state_3_blend = false;
}
dynamic_features.has_extended_dynamic_state_3_enables =
device.IsExtExtendedDynamicState3EnablesSupported();
// VIDS: Independent toggle (not affected by dyna_state levels)
dynamic_features.has_dynamic_vertex_input =
device.IsExtVertexInputDynamicStateSupported() &&
Settings::values.vertex_input_dynamic_state.GetValue();
}
PipelineCache::~PipelineCache() {
@@ -422,6 +513,13 @@ PipelineCache::~PipelineCache() {
}
}
void PipelineCache::DrainPendingBuilds() {
if (!device.HasBrokenParallelShaderCompiling()) {
return;
}
workers.WaitForRequests();
}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
if (!RefreshStages(graphics_key.unique_hashes)) {
@@ -452,12 +550,17 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
.shared_memory_size = qmd.shared_alloc,
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
};
const auto [pair, is_new]{compute_cache.try_emplace(key)};
const auto [pair, inserted]{compute_cache.try_emplace(key)};
auto& pipeline{pair->second};
if (!is_new) {
return pipeline.get();
if (!pipeline) {
auto [slot, should_build] = AcquireComputeBuildSlot(key);
if (!should_build) {
WaitForBuildCompletion(slot);
} else {
pipeline = CreateComputePipeline(key, shader);
ReleaseComputeBuildSlot(key, slot);
}
}
pipeline = CreateComputePipeline(key, shader);
return pipeline.get();
}
@@ -517,8 +620,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
dynamic_features.has_extended_dynamic_state ||
(key.state.extended_dynamic_state_2 != 0) !=
dynamic_features.has_extended_dynamic_state_2 ||
(key.state.extended_dynamic_state_2_extra != 0) !=
dynamic_features.has_extended_dynamic_state_2_extra ||
(key.state.extended_dynamic_state_2_logic_op != 0) !=
dynamic_features.has_extended_dynamic_state_2_logic_op ||
(key.state.extended_dynamic_state_3_blend != 0) !=
dynamic_features.has_extended_dynamic_state_3_blend ||
(key.state.extended_dynamic_state_3_enables != 0) !=
@@ -573,13 +676,20 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
const auto [pair, inserted]{graphics_cache.try_emplace(graphics_key)};
auto& pipeline{pair->second};
if (is_new) {
pipeline = CreateGraphicsPipeline();
}
if (!pipeline) {
return nullptr;
const auto key = pair->first;
auto [slot, should_build] = AcquireGraphicsBuildSlot(key);
if (!should_build) {
WaitForBuildCompletion(slot);
} else {
pipeline = CreateGraphicsPipeline();
ReleaseGraphicsBuildSlot(key, slot);
}
if (!pipeline) {
return nullptr;
}
}
if (current_pipeline) {
current_pipeline->AddTransition(pipeline.get());
@@ -602,6 +712,7 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
return pipeline;
}
scheduler.KeepAliveTick();
return nullptr;
}
@@ -705,6 +816,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
}
LOG_ERROR(Render_Vulkan, "{}", exception.what());
return nullptr;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Failed to create graphics pipeline 0x{:016x}: {}", key.Hash(),
exception.what());
return nullptr;
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
@@ -796,6 +911,10 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
} catch (const Shader::Exception& exception) {
LOG_ERROR(Render_Vulkan, "{}", exception.what());
return nullptr;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Failed to create compute pipeline 0x{:016x}: {}", key.Hash(),
exception.what());
return nullptr;
}
void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
@@ -893,4 +1012,68 @@ vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::
}
}
auto PipelineCache::AcquireGraphicsBuildSlot(const GraphicsPipelineCacheKey& key)
-> std::pair<InFlightPipelinePtr, bool> {
std::scoped_lock lock(graphics_inflight_mutex);
auto [it, inserted] = graphics_inflight_builds.try_emplace(key);
if (inserted || !it->second) {
it->second = std::make_shared<InFlightPipelineBuild>();
return {it->second, true};
}
return {it->second, false};
}
auto PipelineCache::AcquireComputeBuildSlot(const ComputePipelineCacheKey& key)
-> std::pair<InFlightPipelinePtr, bool> {
std::scoped_lock lock(compute_inflight_mutex);
auto [it, inserted] = compute_inflight_builds.try_emplace(key);
if (inserted || !it->second) {
it->second = std::make_shared<InFlightPipelineBuild>();
return {it->second, true};
}
return {it->second, false};
}
void PipelineCache::ReleaseGraphicsBuildSlot(const GraphicsPipelineCacheKey& key,
const InFlightPipelinePtr& slot) {
if (!slot) {
return;
}
{
std::scoped_lock slot_lock(slot->mutex);
slot->building = false;
}
slot->cv.notify_all();
std::scoped_lock map_lock(graphics_inflight_mutex);
auto it = graphics_inflight_builds.find(key);
if (it != graphics_inflight_builds.end() && it->second == slot) {
graphics_inflight_builds.erase(it);
}
}
void PipelineCache::ReleaseComputeBuildSlot(const ComputePipelineCacheKey& key,
const InFlightPipelinePtr& slot) {
if (!slot) {
return;
}
{
std::scoped_lock slot_lock(slot->mutex);
slot->building = false;
}
slot->cv.notify_all();
std::scoped_lock map_lock(compute_inflight_mutex);
auto it = compute_inflight_builds.find(key);
if (it != compute_inflight_builds.end() && it->second == slot) {
compute_inflight_builds.erase(it);
}
}
void PipelineCache::WaitForBuildCompletion(const InFlightPipelinePtr& slot) const {
if (!slot) {
return;
}
std::unique_lock lock(slot->mutex);
slot->cv.wait(lock, [&] { return !slot->building; });
}
} // namespace Vulkan

View File

@@ -5,8 +5,10 @@
#include <array>
#include <cstddef>
#include <condition_variable>
#include <filesystem>
#include <memory>
#include <mutex>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -113,7 +115,17 @@ public:
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
void DrainPendingBuilds();
private:
struct InFlightPipelineBuild {
std::mutex mutex;
std::condition_variable cv;
bool building{true};
};
using InFlightPipelinePtr = std::shared_ptr<InFlightPipelineBuild>;
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
@@ -140,6 +152,14 @@ private:
vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename,
u32 expected_cache_version);
std::pair<InFlightPipelinePtr, bool> AcquireGraphicsBuildSlot(
const GraphicsPipelineCacheKey& key);
std::pair<InFlightPipelinePtr, bool> AcquireComputeBuildSlot(
const ComputePipelineCacheKey& key);
void ReleaseGraphicsBuildSlot(const GraphicsPipelineCacheKey& key, const InFlightPipelinePtr& slot);
void ReleaseComputeBuildSlot(const ComputePipelineCacheKey& key, const InFlightPipelinePtr& slot);
void WaitForBuildCompletion(const InFlightPipelinePtr& slot) const;
const Device& device;
Scheduler& scheduler;
DescriptorPool& descriptor_pool;
@@ -158,6 +178,11 @@ private:
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
std::mutex graphics_inflight_mutex;
std::unordered_map<GraphicsPipelineCacheKey, InFlightPipelinePtr> graphics_inflight_builds;
std::mutex compute_inflight_mutex;
std::unordered_map<ComputePipelineCacheKey, InFlightPipelinePtr> compute_inflight_builds;
ShaderPools main_pools;
Shader::Profile profile;

View File

@@ -42,7 +42,8 @@ public:
static constexpr size_t BANK_SIZE = 256;
static constexpr size_t QUERY_SIZE = 8;
explicit SamplesQueryBank(const Device& device_, size_t index_)
: BankBase(BANK_SIZE), device{device_}, index{index_} {
: BankBase(BANK_SIZE), device{device_}, index{index_},
supports_host_query_reset{device_.SupportsHostQueryReset()} {
const auto& dev = device.GetLogical();
query_pool = dev.CreateQueryPool({
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
@@ -60,8 +61,10 @@ public:
void Reset() override {
ASSERT(references == 0);
VideoCommon::BankBase::Reset();
const auto& dev = device.GetLogical();
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
if (supports_host_query_reset) {
const auto& dev = device.GetLogical();
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
}
host_results.fill(0ULL);
next_bank = 0;
}
@@ -99,6 +102,7 @@ public:
private:
const Device& device;
const size_t index;
const bool supports_host_query_reset;
vk::QueryPool query_pool;
std::array<u64, BANK_SIZE> host_results;
};
@@ -1200,21 +1204,24 @@ struct QueryCacheRuntimeImpl {
hcr_setup.pNext = nullptr;
hcr_setup.flags = 0;
conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
if (device.IsExtConditionalRendering()) {
conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = sizeof(u32),
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
hcr_resolve_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = sizeof(u32),
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
hcr_resolve_buffer =
memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
}
}
VideoCore::RasterizerInterface* rasterizer;

View File

@@ -37,7 +37,6 @@
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/polygon_mode_utils.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -149,8 +148,7 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3
return scissor;
}
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed,
Maxwell::PolygonMode polygon_mode) {
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed) {
DrawParams params{
.base_instance = draw_state.base_instance,
.num_instances = num_instances,
@@ -170,21 +168,6 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
params.base_vertex = 0;
params.is_indexed = true;
}
const bool polygon_line =
draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
polygon_mode == Maxwell::PolygonMode::Line;
if (polygon_line) {
if (params.is_indexed) {
if (draw_state.index_buffer.count > 1) {
params.num_vertices = draw_state.index_buffer.count + 1;
}
} else if (draw_state.vertex_buffer.count > 1) {
params.num_vertices = draw_state.vertex_buffer.count + 1;
params.is_indexed = true;
params.first_index = 0;
params.base_vertex = draw_state.vertex_buffer.first;
}
}
return params;
}
} // Anonymous namespace
@@ -214,6 +197,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
// Log multi-draw support
if (device.IsExtMultiDrawSupported()) {
LOG_INFO(Render_Vulkan, "VK_EXT_multi_draw is enabled for optimized draw calls");
}
}
RasterizerVulkan::~RasterizerVulkan() = default;
@@ -227,6 +215,10 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork();
gpu_memory->FlushCaching();
if (device.HasBrokenParallelShaderCompiling()) {
pipeline_cache.DrainPendingBuilds();
}
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
@@ -250,18 +242,45 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
PrepareDraw(is_indexed, [this, is_indexed, instance_count] {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const u32 num_instances{instance_count};
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed, polygon_mode)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
// Use VK_EXT_multi_draw if available (single draw becomes multi-draw with count=1)
if (device.IsExtMultiDrawSupported()) {
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
// Use multi-draw indexed with single draw
const VkMultiDrawIndexedInfoEXT multi_draw_info{
.firstIndex = draw_params.first_index,
.indexCount = draw_params.num_vertices,
};
const int32_t vertex_offset = static_cast<int32_t>(draw_params.base_vertex);
cmdbuf.DrawMultiIndexedEXT(1, &multi_draw_info, draw_params.num_instances,
draw_params.base_instance,
sizeof(VkMultiDrawIndexedInfoEXT), &vertex_offset);
} else {
// Use multi-draw with single draw
const VkMultiDrawInfoEXT multi_draw_info{
.firstVertex = draw_params.base_vertex,
.vertexCount = draw_params.num_vertices,
};
cmdbuf.DrawMultiEXT(1, &multi_draw_info, draw_params.num_instances,
draw_params.base_instance,
sizeof(VkMultiDrawInfoEXT));
}
});
} else {
// Fallback to standard draw calls
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
}
});
}
@@ -404,13 +423,48 @@ void RasterizerVulkan::Clear(u32 layer_count) {
.baseArrayLayer = regs.clear_surface.layer,
.layerCount = layer_count,
};
if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
const auto clamp_rect_to_render_area = [render_area](VkRect2D& rect) -> bool {
const auto clamp_axis = [](s32& offset, u32& extent, u32 limit) {
auto clamp_offset = [&offset, limit]() {
if (limit == 0) {
offset = 0;
return;
}
offset = std::clamp(offset, 0, static_cast<s32>(limit));
};
if (extent == 0) {
clamp_offset();
return;
}
if (offset < 0) {
const u32 shrink = (std::min)(extent, static_cast<u32>(-offset));
extent -= shrink;
offset = 0;
}
if (limit == 0) {
extent = 0;
offset = 0;
return;
}
if (offset >= static_cast<s32>(limit)) {
offset = static_cast<s32>(limit);
extent = 0;
return;
}
const u64 end_coord = static_cast<u64>(offset) + extent;
if (end_coord > limit) {
extent = limit - static_cast<u32>(offset);
}
};
clamp_axis(rect.offset.x, rect.extent.width, render_area.width);
clamp_axis(rect.offset.y, rect.extent.height, render_area.height);
return rect.extent.width != 0 && rect.extent.height != 0;
};
if (!clamp_rect_to_render_area(clear_rect.rect)) {
return;
}
clear_rect.rect.extent = VkExtent2D{
.width = (std::min)(clear_rect.rect.extent.width, render_area.width),
.height = (std::min)(clear_rect.rect.extent.height, render_area.height),
};
const u32 color_attachment = regs.clear_surface.RT;
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
@@ -857,23 +911,21 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load
void RasterizerVulkan::FlushWork() {
#ifdef ANDROID
static constexpr u32 DRAWS_TO_DISPATCH = 1024;
static constexpr u32 DRAWS_TO_DISPATCH = 512;
static constexpr u32 CHECK_MASK = 3;
#else
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
static constexpr u32 CHECK_MASK = 7;
#endif // ANDROID
// Only check multiples of 8 draws
static_assert(DRAWS_TO_DISPATCH % 8 == 0);
if ((++draw_counter & 7) != 7) {
static_assert(DRAWS_TO_DISPATCH % (CHECK_MASK + 1) == 0);
if ((++draw_counter & CHECK_MASK) != CHECK_MASK) {
return;
}
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
return;
}
// Otherwise (every certain number of draws) flush execution.
// This submits commands to the Vulkan driver.
scheduler.Flush();
draw_counter = 0;
}
@@ -939,6 +991,8 @@ bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info,
void RasterizerVulkan::UpdateDynamicStates() {
auto& regs = maxwell3d->regs;
// Core Dynamic States (Vulkan 1.0) - Always active regardless of dyna_state setting
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@@ -946,6 +1000,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
UpdateLineWidth(regs);
// EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest
if (device.IsExtExtendedDynamicStateSupported()) {
UpdateCullMode(regs);
UpdateDepthCompareOp(regs);
@@ -956,40 +1011,52 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthTestEnable(regs);
UpdateDepthWriteEnable(regs);
UpdateStencilTestEnable(regs);
if (device.IsExtExtendedDynamicState2Supported()) {
UpdatePrimitiveRestartEnable(regs);
UpdateRasterizerDiscardEnable(regs);
UpdateDepthBiasEnable(regs);
}
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
using namespace Tegra::Engines;
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE || device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
const auto has_float = std::any_of(
regs.vertex_attrib_format.begin(),
regs.vertex_attrib_format.end(),
[](const auto& attrib) {
return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float;
}
);
if (regs.logic_op.enable) {
regs.logic_op.enable = static_cast<u32>(!has_float);
}
}
UpdateLogicOpEnable(regs);
UpdateDepthClampEnable(regs);
}
}
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
UpdateLogicOp(regs);
}
if (device.IsExtExtendedDynamicState3BlendingSupported()) {
UpdateBlending(regs);
}
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
UpdateLineStippleEnable(regs);
UpdateConservativeRasterizationMode(regs);
}
}
// EDS2: PrimitiveRestart, RasterizerDiscard, DepthBias enable/disable
if (device.IsExtExtendedDynamicState2Supported()) {
UpdatePrimitiveRestartEnable(regs);
UpdateRasterizerDiscardEnable(regs);
UpdateDepthBiasEnable(regs);
}
// EDS2 Extras: LogicOp operation selection
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
UpdateLogicOp(regs);
}
// EDS3 Enables: LogicOpEnable, DepthClamp, LineStipple, ConservativeRaster
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
using namespace Tegra::Engines;
// AMD Workaround: LogicOp incompatible with float render targets
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
const auto has_float = std::any_of(
regs.vertex_attrib_format.begin(), regs.vertex_attrib_format.end(),
[](const auto& attrib) {
return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float;
}
);
if (regs.logic_op.enable) {
regs.logic_op.enable = static_cast<u32>(!has_float);
}
}
UpdateLogicOpEnable(regs);
UpdateDepthClampEnable(regs);
UpdateLineRasterizationMode(regs);
UpdateLineStippleEnable(regs);
UpdateConservativeRasterizationMode(regs);
UpdateAlphaToCoverageEnable(regs);
UpdateAlphaToOneEnable(regs);
}
// EDS3 Blending: ColorBlendEnable, ColorBlendEquation, ColorWriteMask
if (device.IsExtExtendedDynamicState3BlendingSupported()) {
UpdateBlending(regs);
}
// Vertex Input Dynamic State: Independent from EDS levels
if (device.IsExtVertexInputDynamicStateSupported()) {
if (auto* gp = pipeline_cache.CurrentGraphicsPipeline(); gp && gp->HasDynamicVertexInput()) {
UpdateVertexInput(regs);
@@ -1162,109 +1229,141 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg
if (!state_tracker.TouchBlendConstants()) {
return;
}
const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
regs.blend_color.a};
scheduler.Record(
[blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); });
if (!device.UsesAdvancedCoreDynamicState()) {
return;
}
const std::array<float, 4> blend_constants{
regs.blend_color.r,
regs.blend_color.g,
regs.blend_color.b,
regs.blend_color.a,
};
scheduler.Record([blend_constants](vk::CommandBuffer cmdbuf) {
cmdbuf.SetBlendConstants(blend_constants.data());
});
}
void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBounds()) {
return;
}
scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); });
if (!device.IsDepthBoundsSupported()) {
return;
}
if (!device.UsesAdvancedCoreDynamicState()) {
return;
}
const bool unrestricted = device.IsExtDepthRangeUnrestrictedSupported();
const float min_depth = unrestricted ? regs.depth_bounds[0]
: std::clamp(regs.depth_bounds[0], 0.0f, 1.0f);
const float max_depth = unrestricted ? regs.depth_bounds[1]
: std::clamp(regs.depth_bounds[1], 0.0f, 1.0f);
scheduler.Record([min_depth, max_depth](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBounds(min_depth, max_depth);
});
}
void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilProperties()) {
const bool two_sided = regs.stencil_two_side_enable != 0;
const bool update_properties = state_tracker.TouchStencilProperties();
const bool update_side = state_tracker.TouchStencilSide(two_sided);
const bool refs_dirty = state_tracker.TouchStencilReference();
const bool write_dirty = state_tracker.TouchStencilWriteMask();
const bool compare_dirty = state_tracker.TouchStencilCompare();
const bool update_references = update_properties || update_side || refs_dirty;
const bool update_write_masks = update_properties || update_side || write_dirty;
const bool update_compare_masks = update_properties || update_side || compare_dirty;
if (!update_references && !update_write_masks && !update_compare_masks) {
state_tracker.ClearStencilReset();
return;
}
bool update_references = state_tracker.TouchStencilReference();
bool update_write_mask = state_tracker.TouchStencilWriteMask();
bool update_compare_masks = state_tracker.TouchStencilCompare();
if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
update_references = true;
update_write_mask = true;
update_compare_masks = true;
if (!device.UsesAdvancedCoreDynamicState()) {
state_tracker.ClearStencilReset();
return;
}
if (update_references) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) &&
!state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) {
return;
}
} else {
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
return;
}
if (two_sided) {
const bool front_dirty =
state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref);
const bool back_dirty =
state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref);
if (front_dirty || back_dirty) {
scheduler.Record([front_ref = regs.stencil_front_ref,
back_ref = regs.stencil_back_ref,
is_two_sided = two_sided](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_ref != back_ref;
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
});
}
scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_ref != back_ref;
// Front face
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
} else if (state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
const u32 reference = regs.stencil_front_ref;
scheduler.Record([reference](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, reference);
});
}();
}
}
if (update_write_mask) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) &&
!state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
return;
}
} else {
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
return;
}
if (update_write_masks) {
if (two_sided) {
if (state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) ||
state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
scheduler.Record([
front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
is_two_sided = regs.stencil_two_side_enable
](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_write_mask != back_write_mask;
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
});
}
scheduler.Record([front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_write_mask != back_write_mask;
// Front face
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
} else if (state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
const u32 front_write_mask = regs.stencil_front_mask;
scheduler.Record([front_write_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, front_write_mask);
});
}();
}
}
if (update_compare_masks) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) &&
!state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
return;
}
} else {
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
return;
}
if (two_sided) {
if (state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) ||
state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
scheduler.Record([
front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
is_two_sided = regs.stencil_two_side_enable
](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_test_mask != back_test_mask;
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
}
});
}
scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_test_mask != back_test_mask;
// Front face
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
}
} else if (state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
const u32 front_test_mask = regs.stencil_front_func_mask;
scheduler.Record([front_test_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, front_test_mask);
});
}();
}
}
state_tracker.ClearStencilReset();
}
@@ -1287,61 +1386,15 @@ void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
});
}
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
bool enabled = regs.depth_bounds_enable;
if (enabled && !device.IsDepthBoundsSupported()) {
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
enabled = false;
}
scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthTestEnable()) {
return;
}
scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthWriteEnable()) {
return;
}
scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthWriteEnableEXT(enable);
});
}
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchPrimitiveRestartEnable()) {
return;
}
scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchRasterizerDiscardEnable()) {
return;
}
scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0);
});
}
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
void RasterizerVulkan::UpdateConservativeRasterizationMode(
Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchConservativeRasterizationMode()) {
return;
}
if (!device.SupportsDynamicState3ConservativeRasterizationMode() ||
!device.IsExtConservativeRasterizationSupported()) {
return;
}
scheduler.Record([enable = regs.conservative_raster_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetConservativeRasterizationModeEXT(
enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
@@ -1354,23 +1407,69 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs&
return;
}
if (!device.SupportsDynamicState3LineStippleEnable()) {
return;
}
scheduler.Record([enable = regs.line_stipple_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineStippleEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
// if (!state_tracker.TouchLi()) {
// return;
// }
if (!device.IsExtLineRasterizationSupported()) {
return;
}
if (!state_tracker.TouchLineRasterizationMode()) {
return;
}
// TODO: The maxwell emulator does not capture line rasters
if (!device.SupportsDynamicState3LineRasterizationMode()) {
static std::once_flag warn_missing_rect;
std::call_once(warn_missing_rect, [] {
LOG_WARNING(Render_Vulkan,
"Driver lacks rectangular line rasterization support; skipping dynamic "
"line state updates");
});
return;
}
// scheduler.Record([enable = regs.line](vk::CommandBuffer cmdbuf) {
// cmdbuf.SetConservativeRasterizationModeEXT(
// enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT
// : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
// });
const bool wants_smooth = regs.line_anti_alias_enable != 0;
VkLineRasterizationModeEXT mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
if (wants_smooth) {
if (device.SupportsSmoothLines()) {
mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
} else {
static std::once_flag warn_missing_smooth;
std::call_once(warn_missing_smooth, [] {
LOG_WARNING(Render_Vulkan,
"Line anti-aliasing requested but smoothLines feature unavailable; "
"using rectangular rasterization");
});
}
}
scheduler.Record([mode](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineRasterizationModeEXT(mode);
});
}
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchPrimitiveRestartEnable()) {
return;
}
scheduler.Record([enable = regs.primitive_restart.enabled != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchRasterizerDiscardEnable()) {
return;
}
const bool discard = regs.rasterize_enable == 0;
scheduler.Record([discard](vk::CommandBuffer cmdbuf) {
cmdbuf.SetRasterizerDiscardEnableEXT(discard);
});
}
void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -1412,6 +1511,9 @@ void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchLogicOpEnable()) {
return;
}
if (!device.SupportsDynamicState3LogicOpEnable()) {
return;
}
scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLogicOpEnableEXT(enable != 0);
});
@@ -1421,6 +1523,9 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r
if (!state_tracker.TouchDepthClampEnable()) {
return;
}
if (!device.SupportsDynamicState3DepthClampEnable()) {
return;
}
bool is_enabled = !(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
@@ -1431,6 +1536,41 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r
[is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); });
}
void RasterizerVulkan::UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchAlphaToCoverageEnable()) {
return;
}
if (!device.SupportsDynamicState3AlphaToCoverageEnable()) {
return;
}
GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToCoverage() &&
regs.anti_alias_alpha_control.alpha_to_coverage != 0;
scheduler.Record([enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetAlphaToCoverageEnableEXT(enable ? VK_TRUE : VK_FALSE);
});
}
void RasterizerVulkan::UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchAlphaToOneEnable()) {
return;
}
if (!device.SupportsDynamicState3AlphaToOneEnable()) {
static std::once_flag warn_alpha_to_one;
std::call_once(warn_alpha_to_one, [] {
LOG_WARNING(Render_Vulkan,
"Alpha-to-one is not supported on this device; forcing it disabled");
});
return;
}
GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToOne() &&
regs.anti_alias_alpha_control.alpha_to_one != 0;
scheduler.Record([enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetAlphaToOneEnableEXT(enable ? VK_TRUE : VK_FALSE);
});
}
void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthCompareOp()) {
return;
@@ -1440,6 +1580,36 @@ void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& reg
});
}
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
if (!device.IsDepthBoundsSupported()) {
return;
}
scheduler.Record([enable = regs.depth_bounds_enable != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthTestEnable()) {
return;
}
scheduler.Record([enable = regs.depth_test_enable != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthWriteEnable()) {
return;
}
scheduler.Record([enable = regs.depth_write_enabled != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthWriteEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchFrontFace()) {
return;

View File

@@ -25,6 +25,7 @@
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -168,7 +169,6 @@ private:
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -183,6 +183,8 @@ private:
void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);

View File

@@ -4,6 +4,7 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <chrono>
#include <memory>
#include <mutex>
#include <thread>
@@ -13,6 +14,7 @@
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -77,6 +79,14 @@ void Scheduler::WaitWorker() {
std::scoped_lock el{execution_mutex};
}
void Scheduler::KeepAliveTick() {
const auto now = Clock::now();
if (now - last_submission_time < KEEPALIVE_INTERVAL) {
return;
}
Flush();
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
@@ -130,9 +140,27 @@ void Scheduler::RequestOutsideRenderPassOperationContext() {
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) {
if (pipeline && pipeline->UsesExtendedDynamicState() &&
state.needs_state_enable_refresh) {
state_tracker.InvalidateStateEnableFlag();
state.needs_state_enable_refresh = false;
}
return false;
}
state.graphics_pipeline = pipeline;
if (!pipeline) {
return true;
}
if (!pipeline->UsesExtendedDynamicState()) {
state.needs_state_enable_refresh = true;
} else if (state.needs_state_enable_refresh) {
state_tracker.InvalidateStateEnableFlag();
state.needs_state_enable_refresh = false;
}
return true;
}
@@ -252,6 +280,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
});
chunk->MarkSubmit();
DispatchWork();
last_submission_time = Clock::now();
return signal_value;
}
@@ -276,8 +305,13 @@ void Scheduler::EndRenderPass()
return;
}
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
query_cache->NotifySegment(false);
if (query_cache) {
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
if (query_cache->HasStreamer(VideoCommon::QueryType::StreamingByteCount)) {
query_cache->CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
query_cache->NotifySegment(false);
}
Record([num_images = num_renderpass_images,
images = renderpass_images,

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
#include <condition_variable>
#include <cstddef>
#include <functional>
@@ -49,6 +53,9 @@ public:
/// safe to touch worker resources.
void WaitWorker();
/// Submits a tiny chunk of work if recent GPU submissions are stale.
void KeepAliveTick();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
@@ -125,6 +132,8 @@ public:
std::mutex submit_mutex;
private:
using Clock = std::chrono::steady_clock;
class Command {
public:
virtual ~Command() = default;
@@ -214,6 +223,7 @@ private:
GraphicsPipeline* graphics_pipeline = nullptr;
bool is_rescaling = false;
bool rescaling_defined = false;
bool needs_state_enable_refresh = false;
};
void WorkerThread(std::stop_token stop_token);
@@ -246,6 +256,9 @@ private:
State state;
Clock::time_point last_submission_time{Clock::time_point::min()};
static constexpr std::chrono::milliseconds KEEPALIVE_INTERVAL{4};
u32 num_renderpass_images = 0;
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};

View File

@@ -5,6 +5,7 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <optional>
#include <utility>
#include <vector>
@@ -49,6 +50,7 @@ size_t GetStreamBufferSize(const Device& device) {
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
@@ -74,13 +76,16 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
}
stream_pointer = stream_buffer.Mapped();
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
return GetStreamBuffer(size);
if (!deferred && usage == MemoryUsage::Upload) {
if (size <= region_size) {
return GetStreamBuffer(size);
}
}
return GetStagingBuffer(size, usage, deferred);
}
@@ -142,6 +147,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
}
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
scheduler.GetMasterSemaphore().Refresh();
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });

View File

@@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <climits>
#include <optional>
#include <vector>
#include "common/common_types.h"

View File

@@ -48,6 +48,7 @@ Flags MakeInvalidationFlags() {
FrontFace,
StencilOp,
StencilTestEnable,
RasterizerDiscardEnable,
VertexBuffers,
VertexInput,
StateEnable,
@@ -55,6 +56,9 @@ Flags MakeInvalidationFlags() {
DepthBiasEnable,
LogicOpEnable,
DepthClampEnable,
AlphaToCoverageEnable,
AlphaToOneEnable,
LineRasterizationMode,
LogicOp,
Blending,
ColorMask,
@@ -148,6 +152,8 @@ void SetupDirtyStateEnable(Tables& tables) {
setup(OFF(logic_op.enable), LogicOpEnable);
setup(OFF(viewport_clip_control.geometry_clip), DepthClampEnable);
setup(OFF(line_stipple_enable), LineStippleEnable);
setup(OFF(anti_alias_alpha_control.alpha_to_coverage), AlphaToCoverageEnable);
setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable);
}
void SetupDirtyDepthCompareOp(Tables& tables) {
@@ -226,6 +232,7 @@ void SetupRasterModes(Tables &tables) {
table[OFF(line_stipple_params)] = LineStippleParams;
table[OFF(conservative_raster_enable)] = ConservativeRasterizationMode;
table[OFF(line_anti_alias_enable)] = LineRasterizationMode;
}
} // Anonymous namespace

View File

@@ -54,6 +54,7 @@ enum : u8 {
PrimitiveRestartEnable,
RasterizerDiscardEnable,
ConservativeRasterizationMode,
LineRasterizationMode,
LineStippleEnable,
LineStippleParams,
DepthBiasEnable,
@@ -61,6 +62,8 @@ enum : u8 {
LogicOp,
LogicOpEnable,
DepthClampEnable,
AlphaToCoverageEnable,
AlphaToOneEnable,
Blending,
BlendEnable,
@@ -94,6 +97,10 @@ public:
(*flags)[Dirty::Scissors] = true;
}
void InvalidateStateEnableFlag() {
(*flags)[Dirty::StateEnable] = true;
}
bool TouchViewports() {
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
@@ -225,6 +232,14 @@ public:
return Exchange(Dirty::DepthClampEnable, false);
}
bool TouchAlphaToCoverageEnable() {
return Exchange(Dirty::AlphaToCoverageEnable, false);
}
bool TouchAlphaToOneEnable() {
return Exchange(Dirty::AlphaToOneEnable, false);
}
bool TouchDepthCompareOp() {
return Exchange(Dirty::DepthCompareOp, false);
}
@@ -261,6 +276,10 @@ public:
return Exchange(Dirty::LogicOp, false);
}
bool TouchLineRasterizationMode() {
return Exchange(Dirty::LineRasterizationMode, false);
}
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
const bool has_changed = current_topology != new_topology;
current_topology = new_topology;

View File

@@ -306,7 +306,17 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) {
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
// According to Vulkan spec, when using VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR,
// the base format (imageFormat) MUST be included in pViewFormats
const std::array view_formats{
swapchain_ci.imageFormat, // Base format MUST be first
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SRGB,
#ifdef ANDROID
VK_FORMAT_R8G8B8A8_UNORM, // Android may use RGBA
VK_FORMAT_R8G8B8A8_SRGB,
#endif
};
VkImageFormatListCreateInfo format_list{
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
.pNext = nullptr,

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -101,7 +104,7 @@ public:
}
VkSemaphore CurrentRenderSemaphore() const {
return *render_semaphores[frame_index];
return *render_semaphores[image_index];
}
u32 GetWidth() const {

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <optional>
#include <span>
#include "video_core/texture_cache/texture_cache_base.h"
@@ -56,6 +60,8 @@ public:
void TickFrame();
void WaitForGpuTick(u64 tick);
u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const;
@@ -110,10 +116,15 @@ public:
return view_formats[static_cast<std::size_t>(format)];
}
bool RequiresBlockCompatibleViewFormats(PixelFormat format) const noexcept {
return requires_block_view_formats[static_cast<std::size_t>(format)];
}
void BarrierFeedbackLoop();
bool IsFormatDitherable(VideoCore::Surface::PixelFormat format);
bool IsFormatScalable(VideoCore::Surface::PixelFormat format);
bool SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const;
VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const;
@@ -127,6 +138,7 @@ public:
std::unique_ptr<MSAACopyPass> msaa_copy_pass;
const Settings::ResolutionScalingInfo& resolution;
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
std::array<bool, VideoCore::Surface::MaxPixelFormat> requires_block_view_formats{};
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
@@ -173,6 +185,30 @@ public:
return (this->*current_image).UsageFlags();
}
void TrackGpuReadTick(u64 tick) noexcept {
TrackPendingReadTick(tick);
}
void TrackGpuWriteTick(u64 tick) noexcept {
TrackPendingWriteTick(tick);
}
void CompleteGpuReadTick(u64 completed_tick) noexcept {
ClearPendingReadTick(completed_tick);
}
void CompleteGpuWriteTick(u64 completed_tick) noexcept {
ClearPendingWriteTick(completed_tick);
}
[[nodiscard]] std::optional<u64> PendingGpuReadTick() const noexcept {
return PendingReadTick();
}
[[nodiscard]] std::optional<u64> PendingGpuWriteTick() const noexcept {
return PendingWriteTick();
}
/// Returns true when the image is already initialized and mark it as initialized
[[nodiscard]] bool ExchangeInitialization() noexcept {
return std::exchange(initialized, true);
@@ -235,15 +271,23 @@ public:
[[nodiscard]] VkImageView ColorView();
[[nodiscard]] VkImageView SampledView(Shader::TextureType texture_type,
Shader::SamplerComponentType component_type);
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] bool IsRescaled() const noexcept;
[[nodiscard]] bool SupportsDepthCompareSampling() const noexcept;
[[nodiscard]] bool Is3DImage() const noexcept {
return is_3d_image;
}
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
return *image_views[static_cast<size_t>(texture_type)];
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
return image_handle;
}
@@ -268,23 +312,36 @@ private:
struct StorageViews {
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> typeless;
};
static constexpr size_t NUMERIC_VIEW_TYPES = 3;
[[nodiscard]] Shader::TextureType BaseTextureType() const noexcept;
[[nodiscard]] std::optional<u32> LayerCountOverride(Shader::TextureType texture_type) const noexcept;
[[nodiscard]] VkImageView DepthView(Shader::TextureType texture_type);
[[nodiscard]] VkImageView StencilView(Shader::TextureType texture_type);
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask,
Shader::TextureType texture_type);
const Device* device = nullptr;
const SlotVector<Image>* slot_images = nullptr;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
std::unique_ptr<StorageViews> storage_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> depth_views;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> stencil_views;
std::array<std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES>, NUMERIC_VIEW_TYPES>
sampled_component_views;
vk::ImageView color_view;
vk::Image null_image;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 buffer_size = 0;
bool is_3d_image = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
@@ -305,9 +362,19 @@ public:
return static_cast<bool>(sampler_default_anisotropy);
}
[[nodiscard]] VkSampler SelectHandle(bool supports_linear_filter,
bool supports_anisotropy,
bool allow_depth_compare) const noexcept;
private:
vk::Sampler sampler;
vk::Sampler sampler_default_anisotropy;
vk::Sampler sampler_force_point;
vk::Sampler sampler_compare_disabled;
vk::Sampler sampler_default_anisotropy_compare_disabled;
vk::Sampler sampler_force_point_compare_disabled;
bool uses_linear_filter = false;
bool depth_compare_enabled = false;
};
class Framebuffer {

View File

@@ -70,6 +70,102 @@ static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture
entry.a_type, entry.srgb_conversion));
}
namespace {
[[nodiscard]] bool UsesSwizzleSource(const Tegra::Texture::TICEntry& entry,
Tegra::Texture::SwizzleSource source) {
const std::array swizzles{entry.x_source.Value(), entry.y_source.Value(),
entry.z_source.Value(), entry.w_source.Value()};
return std::ranges::any_of(swizzles, [source](Tegra::Texture::SwizzleSource current) {
return current == source;
});
}
[[nodiscard]] std::optional<Shader::SamplerComponentType> DepthStencilComponentFromSwizzle(
const Tegra::Texture::TICEntry& entry, VideoCore::Surface::PixelFormat pixel_format) {
using Tegra::Texture::SwizzleSource;
const bool uses_r = UsesSwizzleSource(entry, SwizzleSource::R);
const bool uses_g = UsesSwizzleSource(entry, SwizzleSource::G);
switch (pixel_format) {
case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
if (uses_r != uses_g) {
return uses_r ? Shader::SamplerComponentType::Depth
: Shader::SamplerComponentType::Stencil;
}
break;
case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
if (uses_r != uses_g) {
return uses_r ? Shader::SamplerComponentType::Stencil
: Shader::SamplerComponentType::Depth;
}
break;
default:
break;
}
return std::nullopt;
}
} // Anonymous namespace
static Shader::SamplerComponentType ConvertSamplerComponentType(
const Tegra::Texture::TICEntry& entry) {
const auto pixel_format = PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type,
entry.b_type, entry.a_type,
entry.srgb_conversion);
const auto surface_type = VideoCore::Surface::GetFormatType(pixel_format);
if (entry.depth_texture != 0 || surface_type == VideoCore::Surface::SurfaceType::Depth) {
return Shader::SamplerComponentType::Depth;
}
if (surface_type == VideoCore::Surface::SurfaceType::Stencil) {
return Shader::SamplerComponentType::Stencil;
}
if (surface_type == VideoCore::Surface::SurfaceType::DepthStencil) {
if (const auto inferred = DepthStencilComponentFromSwizzle(entry, pixel_format)) {
return *inferred;
}
return entry.depth_texture != 0 ? Shader::SamplerComponentType::Depth
: Shader::SamplerComponentType::Stencil;
}
const auto accumulate = [](const Tegra::Texture::ComponentType component,
bool& has_signed, bool& has_unsigned) {
switch (component) {
case Tegra::Texture::ComponentType::SINT:
has_signed = true;
break;
case Tegra::Texture::ComponentType::UINT:
has_unsigned = true;
break;
default:
break;
}
};
bool has_signed{};
bool has_unsigned{};
accumulate(entry.r_type, has_signed, has_unsigned);
accumulate(entry.g_type, has_signed, has_unsigned);
accumulate(entry.b_type, has_signed, has_unsigned);
accumulate(entry.a_type, has_signed, has_unsigned);
if (has_signed && !has_unsigned) {
return Shader::SamplerComponentType::Sint;
}
if (has_unsigned && !has_signed) {
return Shader::SamplerComponentType::Uint;
}
if (has_signed) {
return Shader::SamplerComponentType::Sint;
}
if (has_unsigned) {
return Shader::SamplerComponentType::Uint;
}
return Shader::SamplerComponentType::Float;
}
static std::string_view StageToPrefix(Shader::Stage stage) {
switch (stage) {
case Shader::Stage::VertexB:
@@ -200,6 +296,7 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 code_size{static_cast<u64>(CachedSizeBytes())};
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
const u64 num_texture_component_types{static_cast<u64>(texture_component_types.size())};
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};
@@ -207,6 +304,8 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
.write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
sizeof(num_texture_pixel_formats))
.write(reinterpret_cast<const char*>(&num_texture_component_types),
sizeof(num_texture_component_types))
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
.write(reinterpret_cast<const char*>(&num_cbuf_replacement_values),
sizeof(num_cbuf_replacement_values))
@@ -223,6 +322,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&type), sizeof(type));
}
for (const auto& [key, component] : texture_component_types) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&component), sizeof(component));
}
for (const auto& [key, format] : texture_pixel_formats) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&format), sizeof(format));
@@ -277,7 +380,17 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
bool via_header_index, u32 raw) {
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
ASSERT(handle.first <= tic_limit);
if (handle.first > tic_limit) {
static std::atomic<size_t> oob_count{0};
const size_t n = ++oob_count;
if (n <= 4 || (n & 63) == 0) {
LOG_WARNING(Shader,
"TIC handle {} exceeds limit {} (via_header_index={}) — returning empty",
handle.first, tic_limit, via_header_index);
}
return {};
}
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
@@ -374,6 +487,21 @@ Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
const Shader::TextureType result{ConvertTextureType(entry)};
texture_types.emplace(handle, result);
texture_component_types.emplace(handle, ConvertSamplerComponentType(entry));
return result;
}
Shader::SamplerComponentType GraphicsEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it != texture_component_types.end()) {
return it->second;
}
const auto& regs{maxwell3d->regs};
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
auto entry =
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
const Shader::SamplerComponentType result{ConvertSamplerComponentType(entry)};
texture_component_types.emplace(handle, result);
return result;
}
@@ -430,6 +558,20 @@ Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
const Shader::TextureType result{ConvertTextureType(entry)};
texture_types.emplace(handle, result);
texture_component_types.emplace(handle, ConvertSamplerComponentType(entry));
return result;
}
Shader::SamplerComponentType ComputeEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it != texture_component_types.end()) {
return it->second;
}
const auto& regs{kepler_compute->regs};
const auto& qmd{kepler_compute->launch_description};
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
const Shader::SamplerComponentType result{ConvertSamplerComponentType(entry)};
texture_component_types.emplace(handle, result);
return result;
}
@@ -455,12 +597,15 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
u64 code_size{};
u64 num_texture_types{};
u64 num_texture_pixel_formats{};
u64 num_texture_component_types{};
u64 num_cbuf_values{};
u64 num_cbuf_replacement_values{};
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
sizeof(num_texture_pixel_formats))
.read(reinterpret_cast<char*>(&num_texture_component_types),
sizeof(num_texture_component_types))
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
.read(reinterpret_cast<char*>(&num_cbuf_replacement_values),
sizeof(num_cbuf_replacement_values))
@@ -480,6 +625,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
.read(reinterpret_cast<char*>(&type), sizeof(type));
texture_types.emplace(key, type);
}
for (size_t i = 0; i < num_texture_component_types; ++i) {
u32 key;
Shader::SamplerComponentType component;
file.read(reinterpret_cast<char*>(&key), sizeof(key))
.read(reinterpret_cast<char*>(&component), sizeof(component));
texture_component_types.emplace(key, component);
}
for (size_t i = 0; i < num_texture_pixel_formats; ++i) {
u32 key;
Shader::TexturePixelFormat format;
@@ -534,6 +686,15 @@ u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
return it->second;
}
Shader::SamplerComponentType FileEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it == texture_component_types.end()) {
LOG_WARNING(Render_Vulkan, "Texture component descriptor {:08x} not found", handle);
return Shader::SamplerComponentType::Float;
}
return it->second;
}
Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
const auto it{texture_types.find(handle)};
if (it == texture_types.end()) {

View File

@@ -80,6 +80,7 @@ protected:
std::vector<u64> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
std::unordered_map<u32, Shader::SamplerComponentType> texture_component_types;
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
@@ -116,6 +117,8 @@ public:
Shader::TextureType ReadTextureType(u32 handle) override;
Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -142,6 +145,8 @@ public:
Shader::TextureType ReadTextureType(u32 handle) override;
Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -176,6 +181,8 @@ public:
[[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
[[nodiscard]] Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
[[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
[[nodiscard]] bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -202,6 +209,7 @@ public:
private:
std::vector<u64> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
std::unordered_map<u32, Shader::SamplerComponentType> texture_component_types;
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;

View File

@@ -4,6 +4,8 @@
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/settings.h"
@@ -408,6 +410,126 @@ bool IsPixelFormatSignedInteger(PixelFormat format) {
}
}
namespace {
struct NumericVariantSet {
PixelFormat float_format = PixelFormat::Invalid;
PixelFormat uint_format = PixelFormat::Invalid;
PixelFormat sint_format = PixelFormat::Invalid;
[[nodiscard]] std::optional<PixelFormat> Select(PixelFormatNumeric numeric) const {
PixelFormat candidate = PixelFormat::Invalid;
switch (numeric) {
case PixelFormatNumeric::Float:
candidate = float_format;
break;
case PixelFormatNumeric::Uint:
candidate = uint_format;
break;
case PixelFormatNumeric::Sint:
candidate = sint_format;
break;
}
if (candidate == PixelFormat::Invalid) {
return std::nullopt;
}
return candidate;
}
};
constexpr NumericVariantSet MakeVariant(PixelFormat float_format, PixelFormat uint_format,
PixelFormat sint_format) {
return NumericVariantSet{
.float_format = float_format,
.uint_format = uint_format,
.sint_format = sint_format,
};
}
std::optional<NumericVariantSet> LookupNumericVariantSet(PixelFormat format) {
switch (format) {
case PixelFormat::R8_UNORM:
case PixelFormat::R8_SNORM:
case PixelFormat::R8_UINT:
case PixelFormat::R8_SINT:
return MakeVariant(PixelFormat::R8_UNORM, PixelFormat::R8_UINT, PixelFormat::R8_SINT);
case PixelFormat::R16_FLOAT:
case PixelFormat::R16_UNORM:
case PixelFormat::R16_SNORM:
case PixelFormat::R16_UINT:
case PixelFormat::R16_SINT:
return MakeVariant(PixelFormat::R16_FLOAT, PixelFormat::R16_UINT, PixelFormat::R16_SINT);
case PixelFormat::R32_FLOAT:
case PixelFormat::R32_UINT:
case PixelFormat::R32_SINT:
return MakeVariant(PixelFormat::R32_FLOAT, PixelFormat::R32_UINT, PixelFormat::R32_SINT);
case PixelFormat::R8G8_UNORM:
case PixelFormat::R8G8_SNORM:
case PixelFormat::R8G8_UINT:
case PixelFormat::R8G8_SINT:
return MakeVariant(PixelFormat::R8G8_UNORM, PixelFormat::R8G8_UINT, PixelFormat::R8G8_SINT);
case PixelFormat::R16G16_FLOAT:
case PixelFormat::R16G16_UNORM:
case PixelFormat::R16G16_SNORM:
case PixelFormat::R16G16_UINT:
case PixelFormat::R16G16_SINT:
return MakeVariant(PixelFormat::R16G16_FLOAT, PixelFormat::R16G16_UINT,
PixelFormat::R16G16_SINT);
case PixelFormat::R32G32_FLOAT:
case PixelFormat::R32G32_UINT:
case PixelFormat::R32G32_SINT:
return MakeVariant(PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
PixelFormat::R32G32_SINT);
case PixelFormat::R16G16B16A16_FLOAT:
case PixelFormat::R16G16B16A16_UNORM:
case PixelFormat::R16G16B16A16_SNORM:
case PixelFormat::R16G16B16A16_UINT:
case PixelFormat::R16G16B16A16_SINT:
return MakeVariant(PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
PixelFormat::R16G16B16A16_SINT);
case PixelFormat::R32G32B32A32_FLOAT:
case PixelFormat::R32G32B32A32_UINT:
case PixelFormat::R32G32B32A32_SINT:
return MakeVariant(PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_UINT,
PixelFormat::R32G32B32A32_SINT);
case PixelFormat::A8B8G8R8_UNORM:
case PixelFormat::A8B8G8R8_SNORM:
case PixelFormat::A8B8G8R8_SRGB:
case PixelFormat::A8B8G8R8_UINT:
case PixelFormat::A8B8G8R8_SINT:
return MakeVariant(PixelFormat::A8B8G8R8_UNORM, PixelFormat::A8B8G8R8_UINT,
PixelFormat::A8B8G8R8_SINT);
case PixelFormat::A2B10G10R10_UNORM:
case PixelFormat::A2B10G10R10_UINT:
return MakeVariant(PixelFormat::A2B10G10R10_UNORM, PixelFormat::A2B10G10R10_UINT,
PixelFormat::Invalid);
default:
return std::nullopt;
}
}
} // Anonymous namespace
PixelFormatNumeric GetPixelFormatNumericType(PixelFormat format) {
if (IsPixelFormatInteger(format)) {
return IsPixelFormatSignedInteger(format) ? PixelFormatNumeric::Sint
: PixelFormatNumeric::Uint;
}
return PixelFormatNumeric::Float;
}
std::optional<PixelFormat> FindPixelFormatVariant(PixelFormat format,
PixelFormatNumeric target_numeric) {
const auto variants = LookupNumericVariantSet(format);
if (!variants) {
return std::nullopt;
}
if (const auto candidate = variants->Select(target_numeric)) {
return candidate;
}
return std::nullopt;
}
size_t PixelComponentSizeBitsInteger(PixelFormat format) {
switch (format) {
case PixelFormat::A8B8G8R8_SINT:

View File

@@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <climits>
#include <optional>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
@@ -517,6 +521,16 @@ bool IsPixelFormatSignedInteger(PixelFormat format);
size_t PixelComponentSizeBitsInteger(PixelFormat format);
enum class PixelFormatNumeric {
Float,
Uint,
Sint,
};
PixelFormatNumeric GetPixelFormatNumericType(PixelFormat format);
std::optional<PixelFormat> FindPixelFormatVariant(PixelFormat format,
PixelFormatNumeric target_numeric);
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
u64 TranscodedAstcSize(u64 base_size, PixelFormat format);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -21,6 +24,27 @@ constexpr auto FLOAT = ComponentType::FLOAT;
constexpr bool LINEAR = false;
constexpr bool SRGB = true;
constexpr TextureFormat SanitizeFormat(TextureFormat format) {
if (format == static_cast<TextureFormat>(0)) {
return TextureFormat::A8B8G8R8;
}
return format;
}
constexpr ComponentType SanitizeComponent(ComponentType component) {
if (component == static_cast<ComponentType>(0)) {
return ComponentType::UNORM;
}
switch (component) {
case ComponentType::SNORM_FORCE_FP16:
return ComponentType::SNORM;
case ComponentType::UNORM_FORCE_FP16:
return ComponentType::UNORM;
default:
return component;
}
}
constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
u32 hash = is_srgb ? 1 : 0;
@@ -41,6 +65,11 @@ constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb =
PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
ComponentType blue, ComponentType alpha,
bool is_srgb) noexcept {
format = SanitizeFormat(format);
red = SanitizeComponent(red);
green = SanitizeComponent(green);
blue = SanitizeComponent(blue);
alpha = SanitizeComponent(alpha);
switch (Hash(format, red, green, blue, alpha, is_srgb)) {
case Hash(TextureFormat::A8B8G8R8, UNORM):
return PixelFormat::A8B8G8R8_UNORM;

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <array>
#include <optional>
#include <vector>
@@ -58,6 +62,50 @@ struct ImageBase {
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
explicit ImageBase(const NullImageParams&);
void TrackPendingReadTick(u64 tick) noexcept {
if (pending_read_tick) {
*pending_read_tick = std::max(*pending_read_tick, tick);
} else {
pending_read_tick = tick;
}
}
void TrackPendingWriteTick(u64 tick) noexcept {
if (pending_write_tick) {
*pending_write_tick = std::max(*pending_write_tick, tick);
} else {
pending_write_tick = tick;
}
}
void ClearPendingReadTick(u64 completed_tick) noexcept {
if (pending_read_tick && completed_tick >= *pending_read_tick) {
pending_read_tick.reset();
}
}
void ClearPendingWriteTick(u64 completed_tick) noexcept {
if (pending_write_tick && completed_tick >= *pending_write_tick) {
pending_write_tick.reset();
}
}
[[nodiscard]] bool HasPendingReadTick() const noexcept {
return pending_read_tick.has_value();
}
[[nodiscard]] bool HasPendingWriteTick() const noexcept {
return pending_write_tick.has_value();
}
[[nodiscard]] std::optional<u64> PendingReadTick() const noexcept {
return pending_read_tick;
}
[[nodiscard]] std::optional<u64> PendingWriteTick() const noexcept {
return pending_write_tick;
}
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
[[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
@@ -115,6 +163,9 @@ struct ImageBase {
std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images;
ImageMapId map_view_id{};
std::optional<u64> pending_read_tick;
std::optional<u64> pending_write_tick;
};
struct ImageMapView {

View File

@@ -33,6 +33,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
dma_downloaded = forced_flushed;
format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
config.a_type, config.srgb_conversion);
num_samples = NumSamples(config.msaa_mode);
resources.levels = config.max_mip_level + 1;
if (config.IsPitchLinear()) {

View File

@@ -1,15 +1,22 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
constexpr inline std::size_t MaxSampleLocationSlots = 16;
[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
switch (num_samples) {
case 1:
@@ -95,4 +102,24 @@ namespace VideoCommon {
return 1;
}
// NVN guarantees up to sixteen programmable sample slots shared across a repeating pixel grid
// (per the 0.7.0 addon release notes), so the grid dimensions shrink as MSAA increases.
[[nodiscard]] inline std::pair<u32, u32> SampleLocationGridSize(Tegra::Texture::MsaaMode msaa_mode) {
const int samples = NumSamples(msaa_mode);
switch (samples) {
case 1:
return {4u, 4u};
case 2:
return {4u, 2u};
case 4:
return {2u, 2u};
case 8:
return {2u, 1u};
case 16:
return {1u, 1u};
}
ASSERT_MSG(false, "Unsupported sample count for grid size={}", samples);
return {1u, 1u};
}
} // namespace VideoCommon

View File

@@ -540,6 +540,7 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
if (True(image.flags & ImageFlagBits::CpuModified)) {
return;
}
EnsureImageReady(image, ImageAccessType::Write);
image.flags |= ImageFlagBits::CpuModified;
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
@@ -550,11 +551,12 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
template <class P>
void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
boost::container::small_vector<ImageId, 16> images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) {
return;
}
image.flags &= ~ImageFlagBits::GpuModified;
EnsureImageReady(this->slot_images[image_id], ImageAccessType::Read);
images.push_back(image_id);
});
if (images.empty()) {
@@ -606,6 +608,7 @@ void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
EnsureImageReady(image, ImageAccessType::Write);
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id);
}
@@ -621,6 +624,7 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
EnsureImageReady(image, ImageAccessType::Write);
if (False(image.flags & ImageFlagBits::CpuModified)) {
image.flags |= ImageFlagBits::CpuModified;
if (True(image.flags & ImageFlagBits::Tracked)) {
@@ -2423,6 +2427,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
template <class P>
void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
Image& image = slot_images[image_id];
EnsureImageReady(image, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
TrackGpuImageAccess(image_id, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
runtime.TransitionImageLayout(image);
if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
if (False(image.flags & ImageFlagBits::Tracked)) {
@@ -2438,6 +2445,85 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
lru_cache.Touch(image.lru_index, frame_tick);
}
template <class P>
void TextureCache<P>::TrackGpuImageAccess(ImageId image_id, ImageAccessType access) {
staged_gpu_accesses.push_back({image_id, access});
}
template <class P>
void TextureCache<P>::CommitPendingGpuAccesses(u64 fence_value) {
if (staged_gpu_accesses.empty()) {
return;
}
if (fence_value == 0) {
return;
}
auto& batch = committed_gpu_accesses.emplace_back(std::move(staged_gpu_accesses));
staged_gpu_accesses.clear();
committed_gpu_ticks.push_back(fence_value);
for (const PendingImageAccess& access : batch) {
ImageBase& image = slot_images[access.image_id];
if (access.access == ImageAccessType::Read) {
image.TrackPendingReadTick(fence_value);
} else {
image.TrackPendingWriteTick(fence_value);
}
}
}
template <class P>
void TextureCache<P>::CompleteGpuAccesses(u64 completed_fence) {
if (completed_fence == 0) {
return;
}
while (!committed_gpu_ticks.empty() && committed_gpu_ticks.front() <= completed_fence) {
auto accesses = std::move(committed_gpu_accesses.front());
committed_gpu_accesses.pop_front();
committed_gpu_ticks.pop_front();
for (const PendingImageAccess& access : accesses) {
if (!slot_images.Contains(access.image_id)) {
continue;
}
ImageBase& image = slot_images[access.image_id];
if (access.access == ImageAccessType::Read) {
image.ClearPendingReadTick(completed_fence);
} else {
image.ClearPendingWriteTick(completed_fence);
}
}
}
}
template <class P>
void TextureCache<P>::EnsureImageReady(ImageBase& image, ImageAccessType access) {
auto wait_tick = [this](std::optional<u64> tick) -> std::optional<u64> {
if (!tick) {
return std::nullopt;
}
runtime.WaitForGpuTick(*tick);
return tick;
};
if (access == ImageAccessType::Write) {
if (const auto tick = image.PendingReadTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingReadTick(*waited);
}
}
if (const auto tick = image.PendingWriteTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingWriteTick(*waited);
}
}
} else {
if (const auto tick = image.PendingWriteTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingWriteTick(*waited);
}
}
}
}
template <class P>
void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
bool invalidate) {
@@ -2455,6 +2541,8 @@ template <class P>
void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
Image& dst = slot_images[dst_id];
Image& src = slot_images[src_id];
EnsureImageReady(dst, ImageAccessType::Write);
EnsureImageReady(src, ImageAccessType::Read);
const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
@@ -2471,20 +2559,30 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
}
}
}
const auto TrackCopyAccesses = [this, dst_id, src_id]() {
TrackGpuImageAccess(dst_id, ImageAccessType::Write);
TrackGpuImageAccess(src_id, ImageAccessType::Read);
};
const auto dst_format_type = GetFormatType(dst.info.format);
const auto src_format_type = GetFormatType(src.info.format);
if (src_format_type == dst_format_type) {
if constexpr (HAS_EMULATED_COPIES) {
if (!runtime.CanImageBeCopied(dst, src)) {
return runtime.EmulateCopyImage(dst, src, copies);
runtime.EmulateCopyImage(dst, src, copies);
TrackCopyAccesses();
return;
}
}
return runtime.CopyImage(dst, src, copies);
runtime.CopyImage(dst, src, copies);
TrackCopyAccesses();
return;
}
UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
if (runtime.ShouldReinterpret(dst, src)) {
return runtime.ReinterpretImage(dst, src, copies);
runtime.ReinterpretImage(dst, src, copies);
TrackCopyAccesses();
return;
}
for (const ImageCopy& copy : copies) {
UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
@@ -2537,6 +2635,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
}
TrackCopyAccesses();
}
template <class P>

View File

@@ -182,6 +182,11 @@ public:
/// Return a reference to the given sampler id
[[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
/// Returns true when the runtime format supports linear filtering
[[nodiscard]] bool SupportsLinearFilter(PixelFormat format) const noexcept {
return runtime.SupportsLinearFilter(format);
}
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
@@ -258,6 +263,15 @@ public:
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Track that an image participates in upcoming GPU work with the given access type
void TrackGpuImageAccess(ImageId image_id, ImageAccessType access);
/// Notify the cache that tracked GPU work has been submitted with the specified fence value
void CommitPendingGpuAccesses(u64 fence_value);
/// Notify the cache that a fence value has completed so tracked accesses can be released
void CompleteGpuAccesses(u64 completed_fence);
std::recursive_mutex mutex;
private:
@@ -408,6 +422,8 @@ private:
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
void EnsureImageReady(ImageBase& image, ImageAccessType access);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
@@ -465,6 +481,11 @@ private:
Common::SlotId object_id;
};
struct PendingImageAccess {
ImageId image_id;
ImageAccessType access;
};
Common::SlotVector<Image> slot_images;
Common::SlotVector<ImageMapView> slot_map_views;
Common::SlotVector<ImageView> slot_image_views;
@@ -480,6 +501,9 @@ private:
std::vector<AsyncBuffer> uncommitted_async_buffers;
std::deque<std::vector<AsyncBuffer>> async_buffers;
std::deque<AsyncBuffer> async_buffers_death_ring;
std::vector<PendingImageAccess> staged_gpu_accesses;
std::deque<std::vector<PendingImageAccess>> committed_gpu_accesses;
std::deque<u64> committed_gpu_ticks;
struct LRUItemParams {
using ObjectType = ImageId;

View File

@@ -155,4 +155,9 @@ struct SwizzleParameters {
s32 level;
};
enum class ImageAccessType : u8 {
Read,
Write,
};
} // namespace VideoCommon

View File

@@ -22,6 +22,14 @@
#include <vulkan/vulkan.h>
// Define maintenance 7-9 extension names (not yet in official Vulkan headers)
#ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7"
#endif
#ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8"
#endif
// Sanitize macros
#undef CreateEvent
#undef CreateSemaphore

View File

@@ -7,6 +7,7 @@
#include <algorithm>
#include <bitset>
#include <chrono>
#include <mutex>
#include <optional>
#include <thread>
#include <unordered_set>
@@ -90,8 +91,57 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
VK_FORMAT_UNDEFINED,
};
constexpr std::array B10G11R11_UFLOAT_PACK32{
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R16G16B16A16_UNORM,
VK_FORMAT_UNDEFINED,
};
constexpr std::array R16G16B16A16_SFLOAT{
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_UNDEFINED,
};
constexpr std::array R16G16B16A16_UNORM{
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_UNDEFINED,
};
} // namespace Alternatives
struct UnsupportedFormatKey {
VkFormat format;
VkFormatFeatureFlags usage;
FormatType type;
bool operator==(const UnsupportedFormatKey&) const noexcept = default;
};
struct UnsupportedFormatKeyHash {
size_t operator()(const UnsupportedFormatKey& key) const noexcept {
size_t seed = std::hash<int>{}(static_cast<int>(key.format));
seed ^= static_cast<size_t>(key.usage) + 0x9e3779b97f4a7c15ULL + (seed << 6) + (seed >> 2);
seed ^= static_cast<size_t>(key.type) + 0x9e3779b97f4a7c15ULL + (seed << 6) + (seed >> 2);
return seed;
}
};
bool ShouldLogUnsupportedFormat(VkFormat format, VkFormatFeatureFlags usage, FormatType type) {
static std::mutex mutex;
static std::unordered_set<UnsupportedFormatKey, UnsupportedFormatKeyHash> logged_keys;
const UnsupportedFormatKey key{format, usage, type};
std::scoped_lock lock{mutex};
const auto [it, inserted] = logged_keys.insert(key);
return inserted;
}
[[maybe_unused]] constexpr VkShaderStageFlags GraphicsStageMask =
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT |
VK_SHADER_STAGE_FRAGMENT_BIT;
template <typename T>
void SetNext(void**& next, T& data) {
*next = &data;
@@ -122,6 +172,12 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data();
case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data();
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
return Alternatives::B10G11R11_UFLOAT_PACK32.data();
case VK_FORMAT_R16G16B16A16_SFLOAT:
return Alternatives::R16G16B16A16_SFLOAT.data();
case VK_FORMAT_R16G16B16A16_UNORM:
return Alternatives::R16G16B16A16_UNORM.data();
default:
return nullptr;
}
@@ -289,32 +345,6 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
return format_properties;
}
#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
void OverrideBcnFormats(std::unordered_map<VkFormat, VkFormatProperties>& format_properties) {
// These properties are extracted from Adreno driver 512.687.0
constexpr VkFormatFeatureFlags tiling_features{
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
constexpr VkFormatFeatureFlags buffer_features{VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT};
static constexpr std::array bcn_formats{
VK_FORMAT_BC1_RGBA_SRGB_BLOCK, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, VK_FORMAT_BC2_SRGB_BLOCK,
VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK,
VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK,
VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC6H_SFLOAT_BLOCK, VK_FORMAT_BC6H_UFLOAT_BLOCK,
VK_FORMAT_BC7_SRGB_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK,
};
for (const auto format : bcn_formats) {
format_properties[format].linearTilingFeatures = tiling_features;
format_properties[format].optimalTilingFeatures = tiling_features;
format_properties[format].bufferFeatures = buffer_features;
}
}
#endif
NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
const std::set<std::string, std::less<>>& exts) {
VkPhysicalDeviceProperties2 physical_properties{};
@@ -376,10 +406,10 @@ void Device::RemoveExtension(bool& extension, const std::string& extension_name)
loaded_extensions.erase(extension_name);
}
void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) {
if (loaded_extensions.contains(extension_name) && !is_suitable) {
void Device::RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name) {
if (loaded_extensions.contains(extension_name) && !extension) {
LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name);
this->RemoveExtension(is_suitable, extension_name);
this->RemoveExtension(extension, extension_name);
}
}
@@ -400,11 +430,12 @@ void Device::RemoveExtensionFeature(bool& extension, Feature& feature,
}
template <typename Feature>
void Device::RemoveExtensionFeatureIfUnsuitable(bool is_suitable, Feature& feature,
void Device::RemoveExtensionFeatureIfUnsuitable(bool& extension, Feature& feature,
const std::string& extension_name) {
if (loaded_extensions.contains(extension_name) && !is_suitable) {
LOG_WARNING(Render_Vulkan, "Removing features for unsuitable extension {}", extension_name);
this->RemoveExtensionFeature(is_suitable, feature, extension_name);
if (loaded_extensions.contains(extension_name) && !extension) {
LOG_WARNING(Render_Vulkan,
"Removing features for unsuitable extension {}", extension_name);
this->RemoveExtensionFeature(extension, feature, extension_name);
}
}
@@ -416,7 +447,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_suitable = GetSuitability(surface != nullptr);
const VkDriverId driver_id = properties.driver.driverID;
const auto device_id = properties.properties.deviceID;
const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
const bool is_amd_driver =
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
@@ -427,11 +457,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
const bool is_s8gen2 = device_id == 0x43050a01;
//const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
if (!is_suitable)
LOG_WARNING(Render_Vulkan, "Unsuitable driver - continuing anyways");
if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) {
LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway");
} else if (!is_suitable) {
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
}
if (is_nvidia) {
nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions);
@@ -492,36 +524,27 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
CollectToolingInfo();
if (is_qualcomm) {
// Qualcomm Adreno GPUs doesn't handle scaled vertex attributes; keep emulation enabled
must_emulate_scaled_formats = true;
LOG_WARNING(Render_Vulkan,
"Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
"Qualcomm drivers require scaled vertex format emulation; forcing fallback");
if (extensions.shader_float_controls) {
LOG_WARNING(Render_Vulkan,
"Qualcomm drivers: VK_KHR_shader_float_controls is unstable; disabling usage");
RemoveExtension(extensions.shader_float_controls,
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
} else {
LOG_INFO(Render_Vulkan,
"Qualcomm drivers: VK_KHR_shader_float_controls already unavailable");
}
disable_shader_float_controls_usage = true;
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
features.shader_atomic_int64.shaderBufferInt64Atomics = false;
features.shader_atomic_int64.shaderSharedInt64Atomics = false;
features.features.shaderInt64 = false;
#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
// Patch the driver to enable BCn textures.
const auto major = (properties.properties.driverVersion >> 24) << 2;
const auto minor = (properties.properties.driverVersion >> 12) & 0xFFFU;
const auto vendor = properties.properties.vendorID;
const auto patch_status = adrenotools_get_bcn_type(major, minor, vendor);
if (patch_status == ADRENOTOOLS_BCN_PATCH) {
LOG_INFO(Render_Vulkan, "Patching Adreno driver to support BCn texture formats");
if (adrenotools_patch_bcn(
reinterpret_cast<void*>(dld.vkGetPhysicalDeviceFormatProperties))) {
OverrideBcnFormats(format_properties);
} else {
LOG_ERROR(Render_Vulkan, "Patch failed! Driver code may now crash");
}
} else if (patch_status == ADRENOTOOLS_BCN_BLOB) {
LOG_INFO(Render_Vulkan, "Adreno driver supports BCn textures without patches");
} else {
LOG_WARNING(Render_Vulkan, "Adreno driver can't be patched to enable BCn textures");
}
#endif
}
if (is_nvidia) {
@@ -533,35 +556,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
if (nv_major_version >= 510) {
LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
LOG_WARNING(Render_Vulkan,
"NVIDIA Drivers >= 510 do not support MSAA->MSAA image blits. "
"MSAA scaling will use 3D helpers. MSAA resolves work normally.");
cant_blit_msaa = true;
}
}
if (extensions.extended_dynamic_state3 && is_radv) {
LOG_WARNING(Render_Vulkan, "RADV has broken extendedDynamicState3ColorBlendEquation");
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
dynamic_state3_blending = false;
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 23, 1, 0)) {
LOG_WARNING(Render_Vulkan,
"RADV versions older than 23.1.0 have broken depth clamp dynamic state");
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
dynamic_state3_enables = false;
// Mali/ NVIDIA proprietary drivers: Shader stencil export not supported
// Use hardware depth/stencil blits instead when available
if (!extensions.shader_stencil_export) {
LOG_INFO(Render_Vulkan,
"NVIDIA: VK_EXT_shader_stencil_export not supported, using hardware blits "
"for depth/stencil operations");
LOG_INFO(Render_Vulkan, " D24S8 hardware blit support: {}",
is_blit_depth24_stencil8_supported);
LOG_INFO(Render_Vulkan, " D32S8 hardware blit support: {}",
is_blit_depth32_stencil8_supported);
if (!is_blit_depth24_stencil8_supported && !is_blit_depth32_stencil8_supported) {
LOG_WARNING(Render_Vulkan,
"NVIDIA: Neither shader export nor hardware blits available for "
"depth/stencil. Performance may be degraded.");
}
}
}
if (extensions.extended_dynamic_state3 && (is_amd_driver || driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) {
// AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation
LOG_WARNING(Render_Vulkan,
"AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation");
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
dynamic_state3_blending = false;
}
sets_per_pool = 64;
if (is_amd_driver) {
// AMD drivers need a higher amount of Sets per Pool in certain circumstances like in XC2.
@@ -574,11 +593,44 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
}
const bool needs_mobile_alignment_clamp = is_qualcomm || is_arm;
if (is_qualcomm) {
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
has_broken_parallel_compiling = true;
has_broken_parallel_compiling = true;
LOG_WARNING(Render_Vulkan,
"Adreno drivers exhibit instability with parallel shader compilation; "
"forcing single-threaded pipeline builds");
if (extensions.push_descriptor) {
LOG_WARNING(Render_Vulkan,
"Qualcomm driver mishandles push descriptors; disabling "
"VK_KHR_push_descriptor to avoid incomplete descriptor writes");
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
}
}
if (needs_mobile_alignment_clamp) {
const char* driver_label = is_qualcomm ? "Qualcomm" : "ARM";
constexpr VkDeviceSize MIN_UNIFORM_ALIGNMENT = 256;
const VkDeviceSize reported_uniform_alignment =
properties.properties.limits.minUniformBufferOffsetAlignment;
if (reported_uniform_alignment < MIN_UNIFORM_ALIGNMENT) {
uniform_buffer_alignment_minimum = MIN_UNIFORM_ALIGNMENT;
LOG_WARNING(Render_Vulkan,
"{} driver reports {}-byte minUniformBufferOffsetAlignment; clamping to {}",
driver_label, reported_uniform_alignment, uniform_buffer_alignment_minimum);
}
constexpr VkDeviceSize MIN_STORAGE_ALIGNMENT = 64;
const VkDeviceSize reported_storage_alignment =
properties.properties.limits.minStorageBufferOffsetAlignment;
if (reported_storage_alignment < MIN_STORAGE_ALIGNMENT) {
storage_buffer_alignment_minimum = MIN_STORAGE_ALIGNMENT;
LOG_WARNING(Render_Vulkan,
"{} driver reports {}-byte minStorageBufferOffsetAlignment; clamping to {}",
driver_label, reported_storage_alignment, storage_buffer_alignment_minimum);
}
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
if (sampler_limit > 0) {
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
@@ -587,9 +639,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
sampler_heap_budget = derived_budget;
LOG_WARNING(Render_Vulkan,
"Qualcomm driver reports max {} samplers; reserving {} (25%) and "
"{} driver reports max {} samplers; reserving {} (25%) and "
"allowing Eden to use {} (75%) to avoid heap exhaustion",
sampler_limit, reserved, sampler_heap_budget);
driver_label, sampler_limit, reserved, sampler_heap_budget);
}
}
@@ -610,15 +662,27 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
if (is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
LOG_WARNING(Render_Vulkan,
"Intel proprietary drivers do not support MSAA->MSAA image blits. "
"MSAA scaling will use 3D helpers. MSAA resolves work normally.");
cant_blit_msaa = true;
}
has_broken_compute =
CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
!Settings::values.enable_compute_pipelines.GetValue();
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
must_emulate_bgr565 = false; // Default: assume emulation isn't required
if (is_intel_anv) {
LOG_WARNING(Render_Vulkan, "Intel ANV driver does not support native BGR format");
must_emulate_bgr565 = true;
} else if (is_qualcomm) {
LOG_WARNING(Render_Vulkan,
"Qualcomm driver mishandles BGR5 formats, forcing emulation");
must_emulate_bgr565 = true;
} else if (is_arm) {
LOG_WARNING(Render_Vulkan,
"ARM Mali driver mishandles BGR5 formats, forcing emulation");
must_emulate_bgr565 = true;
}
@@ -631,51 +695,55 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
(std::min)(properties.properties.limits.maxVertexInputBindings, 16U);
}
if (is_turnip) {
LOG_WARNING(Render_Vulkan, "Turnip requires higher-than-reported binding limits");
if (is_turnip || is_qualcomm) {
LOG_WARNING(Render_Vulkan, "Driver requires higher-than-reported binding limits");
properties.properties.limits.maxVertexInputBindings = 32;
}
if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) {
LOG_INFO(Render_Vulkan,
"Removing extendedDynamicState2 due to missing extendedDynamicState");
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
}
if (!extensions.extended_dynamic_state2 && extensions.extended_dynamic_state3) {
LOG_INFO(Render_Vulkan,
"Removing extendedDynamicState3 due to missing extendedDynamicState2");
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
dynamic_state3_blending = false;
dynamic_state3_enables = false;
}
// Mesa Intel drivers on UHD 620 have broken EDS causing extreme flickering - unknown if it affects other iGPUs
// ALSO affects ALL versions of UHD drivers on Windows 10+, seems to cause even worse issues like straight up crashing
// So... Yeah, UHD drivers fucking suck -- maybe one day we can work past this, maybe; some driver hacking?
// And then we can rest in peace by doing `< VK_MAKE_API_VERSION(26, 0, 0)` for our beloved mesa drivers... one day
if ((is_mvk || (is_integrated && is_intel_anv) || (is_integrated && is_intel_windows)) && Settings::values.dyna_state.GetValue() != 0) {
LOG_WARNING(Render_Vulkan, "Driver has broken dynamic state, forcing to 0 to prevent graphical issues");
Settings::values.dyna_state.SetValue(0);
}
// Base dynamic states (VIEWPORT, SCISSOR, DEPTH_BIAS, etc.) are ALWAYS active in vk_graphics_pipeline.cpp
// This slider controls EXTENDED dynamic states with accumulative levels per Vulkan specs:
// Level 0 = Core Dynamic States only (Vulkan 1.0)
// Level 1 = Core + VK_EXT_extended_dynamic_state
// Level 2 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2
// Level 3 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2 + VK_EXT_extended_dynamic_state3
switch (Settings::values.dyna_state.GetValue()) {
case 0:
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
[[fallthrough]];
case 1:
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
[[fallthrough]];
case 2:
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
// Level 0: Disable all extended dynamic state extensions
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
dynamic_state3_blending = false;
dynamic_state3_enables = false;
break;
case 1:
// Level 1: Enable EDS1, disable EDS2 and EDS3
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
dynamic_state3_blending = false;
dynamic_state3_enables = false;
break;
case 2:
// Level 2: Enable EDS1 + EDS2, disable EDS3
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
dynamic_state3_blending = false;
dynamic_state3_enables = false;
break;
case 3:
default:
// Level 3: Enable all (EDS1 + EDS2 + EDS3)
break;
}
if (!Settings::values.vertex_input_dynamic_state.GetValue() || !extensions.extended_dynamic_state) {
// VK_EXT_vertex_input_dynamic_state is independent from EDS
// It can be enabled even without extended_dynamic_state
if (!Settings::values.vertex_input_dynamic_state.GetValue()) {
RemoveExtensionFeature(extensions.vertex_input_dynamic_state, features.vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
}
@@ -720,32 +788,63 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
return wanted_format;
}
// The wanted format is not supported by hardware, search for alternatives
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} has no defined alternatives and host "
"hardware does not support it",
wanted_format, wanted_usage, format_type);
if (ShouldLogUnsupportedFormat(wanted_format, wanted_usage, format_type)) {
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} has no defined alternatives and host "
"hardware does not support it",
wanted_format, wanted_usage, format_type);
}
return wanted_format;
}
std::size_t i = 0;
for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) {
#if defined(ANDROID)
const auto downgrade_r16_to_ldr = [&](VkFormat candidate) -> VkFormat {
if (wanted_format != VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
return candidate;
}
if (candidate != VK_FORMAT_R16G16B16A16_SFLOAT &&
candidate != VK_FORMAT_R16G16B16A16_UNORM) {
return candidate;
}
static constexpr std::array ldr_candidates{
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SRGB,
};
for (const VkFormat format : ldr_candidates) {
if (IsFormatSupported(format, wanted_usage, format_type)) {
return format;
}
}
return candidate;
};
#endif
for (std::size_t i = 0; VkFormat alternative = alternatives[i]; ++i) {
if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue;
}
#if defined(ANDROID)
const VkFormat selected = downgrade_r16_to_ldr(alternative);
#else
const VkFormat selected = alternative;
#endif
LOG_DEBUG(Render_Vulkan,
"Emulating format={} with alternative format={} with usage={} and type={}",
wanted_format, alternative, wanted_usage, format_type);
return alternative;
wanted_format, selected, wanted_usage, format_type);
return selected;
}
// No alternatives found, panic
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} is not supported by the host hardware and "
"doesn't support any of the alternatives",
wanted_format, wanted_usage, format_type);
if (ShouldLogUnsupportedFormat(wanted_format, wanted_usage, format_type)) {
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} is not supported by the host hardware and "
"doesn't support any of the alternatives",
wanted_format, wanted_usage, format_type);
}
return wanted_format;
}
@@ -763,8 +862,8 @@ void Device::SaveShader(std::span<const u32> spirv) const {
}
bool Device::ComputeIsOptimalAstcSupported() const {
// Disable for now to avoid converting ASTC twice.
static constexpr std::array astc_formats = {
// Verify hardware supports all ASTC formats with optimal tiling to avoid software conversion
static constexpr std::array<VkFormat, 28> astc_formats = {
VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
@@ -783,9 +882,10 @@ bool Device::ComputeIsOptimalAstcSupported() const {
if (!features.features.textureCompressionASTC_LDR) {
return false;
}
const auto format_feature_usage{
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
const auto format_feature_usage{VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT |
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
for (const auto format : astc_formats) {
const auto physical_format_properties{physical.GetFormatProperties(format)};
@@ -891,11 +991,8 @@ bool Device::ShouldBoostClocks() const {
}
bool Device::HasTimelineSemaphore() const {
if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
GetDriverID() == VK_DRIVER_ID_MESA_TURNIP) {
// Timeline semaphores do not work properly on all Qualcomm drivers.
// They generally work properly with Turnip drivers, but are problematic on some devices
// (e.g. ZTE handsets with Snapdragon 870).
const VkDriverIdKHR driver_id = GetDriverID();
if (driver_id == VK_DRIVER_ID_MESA_TURNIP || driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
return false;
}
return features.timeline_semaphore.timelineSemaphore;
@@ -982,14 +1079,16 @@ bool Device::GetSuitability(bool requires_swapchain) {
// Set next pointer.
void** next = &features2.pNext;
// Vulkan 1.2, 1.3 and 1.4 features
// Vulkan 1.2 and 1.3 features
if (instance_version >= VK_API_VERSION_1_2) {
features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
features_1_2.pNext = &features_1_3;
features_1_2.pNext = nullptr;
*next = &features_1_2;
if (instance_version >= VK_API_VERSION_1_3) {
features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
features_1_3.pNext = nullptr;
features_1_2.pNext = &features_1_3;
}
}
// Test all features we know about. If the feature is not available in core at our
@@ -1088,6 +1187,16 @@ bool Device::GetSuitability(bool requires_swapchain) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
SetNext(next, properties.transform_feedback);
}
if (extensions.maintenance5) {
properties.maintenance5.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_PROPERTIES_KHR;
SetNext(next, properties.maintenance5);
}
if (extensions.multi_draw) {
properties.multi_draw.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT;
SetNext(next, properties.multi_draw);
}
// Perform the property fetch.
physical.GetProperties2(properties2);
@@ -1120,14 +1229,85 @@ bool Device::GetSuitability(bool requires_swapchain) {
}
}
// VK_DYNAMIC_STATE
// Driver detection variables for workarounds in GetSuitability
const VkDriverId driver_id = properties.driver.driverID;
[[maybe_unused]] const bool is_amd_driver =
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
[[maybe_unused]] const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
// VK_EXT_CUSTOM_BORDER_COLOR
if (extensions.custom_border_color && (is_qualcomm || is_turnip)) {
const char* driver_name = is_turnip ? "Turnip" : "Adreno";
LOG_WARNING(Render_Vulkan,
"{} driver previously required custom border color disablement; leaving VK_EXT_custom_border_color enabled for testing",
driver_name);
}
// VK_EXT_extended_dynamic_state2 below this will appear drivers that need workarounds.
// VK_EXT_extended_dynamic_state3 below this will appear drivers that need workarounds.
[[maybe_unused]] const auto device_id = properties.properties.deviceID;
// Samsung: Broken extendedDynamicState3ColorBlendEquation
// Disable blend equation dynamic state, force static pipeline state
if (extensions.extended_dynamic_state3 &&
(driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) {
LOG_WARNING(Render_Vulkan,
"Samsung: Disabling broken extendedDynamicState3ColorBlendEquation");
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
}
// Intel Windows < 27.20.100.0: Broken VertexInputDynamicState
// Disable VertexInputDynamicState on old Intel Windows drivers
if (extensions.vertex_input_dynamic_state && is_intel_windows) {
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
LOG_WARNING(Render_Vulkan,
"Intel Windows < 27.20.100.0: Disabling broken VK_EXT_vertex_input_dynamic_state");
RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
}
}
if (Settings::values.dyna_state.GetValue() == 0) {
LOG_INFO(Render_Vulkan, "Extended Dynamic State disabled by user setting, clearing all EDS features");
features.custom_border_color.customBorderColors = false;
features.custom_border_color.customBorderColorWithoutFormat = false;
features.extended_dynamic_state.extendedDynamicState = false;
features.extended_dynamic_state2.extendedDynamicState2 = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask = false;
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable = false;
}
// Return whether we were suitable.
return suitable;
}
void Device::RemoveUnsuitableExtensions() {
// VK_EXT_custom_border_color
extensions.custom_border_color = features.custom_border_color.customBorderColors &&
features.custom_border_color.customBorderColorWithoutFormat;
// Enable extension if driver supports it, then check individual features
// - customBorderColors: Required to use VK_BORDER_COLOR_FLOAT_CUSTOM_EXT
// - customBorderColorWithoutFormat: Optional, allows VK_FORMAT_UNDEFINED
// If only customBorderColors is available, we must provide a specific format
if (extensions.custom_border_color) {
// Verify that at least customBorderColors is available
if (!features.custom_border_color.customBorderColors) {
LOG_WARNING(Render_Vulkan,
"VK_EXT_custom_border_color reported but customBorderColors feature not available, disabling");
extensions.custom_border_color = false;
}
}
RemoveExtensionFeatureIfUnsuitable(extensions.custom_border_color, features.custom_border_color,
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
@@ -1156,21 +1336,79 @@ void Device::RemoveUnsuitableExtensions() {
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
// VK_EXT_extended_dynamic_state3
dynamic_state3_blending =
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable &&
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation &&
const bool supports_color_blend_enable =
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable;
const bool supports_color_blend_equation =
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation;
const bool supports_color_write_mask =
features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask;
dynamic_state3_enables =
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable &&
dynamic_state3_blending = supports_color_blend_enable && supports_color_blend_equation &&
supports_color_write_mask;
const bool supports_depth_clamp_enable =
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable;
const bool supports_logic_op_enable =
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable;
const bool supports_line_raster_mode =
features.extended_dynamic_state3.extendedDynamicState3LineRasterizationMode &&
extensions.line_rasterization && features.line_rasterization.rectangularLines;
const bool supports_conservative_raster_mode =
features.extended_dynamic_state3.extendedDynamicState3ConservativeRasterizationMode &&
extensions.conservative_rasterization;
const bool supports_line_stipple_enable =
features.extended_dynamic_state3.extendedDynamicState3LineStippleEnable &&
extensions.line_rasterization && features.line_rasterization.stippledRectangularLines;
const bool supports_alpha_to_coverage =
features.extended_dynamic_state3.extendedDynamicState3AlphaToCoverageEnable;
const bool supports_alpha_to_one =
features.extended_dynamic_state3.extendedDynamicState3AlphaToOneEnable &&
features.features.alphaToOne;
dynamic_state3_depth_clamp_enable = supports_depth_clamp_enable;
dynamic_state3_logic_op_enable = supports_logic_op_enable;
dynamic_state3_line_raster_mode = supports_line_raster_mode;
dynamic_state3_conservative_raster_mode = supports_conservative_raster_mode;
dynamic_state3_line_stipple_enable = supports_line_stipple_enable;
dynamic_state3_alpha_to_coverage = supports_alpha_to_coverage;
dynamic_state3_alpha_to_one = supports_alpha_to_one;
dynamic_state3_enables = dynamic_state3_depth_clamp_enable || dynamic_state3_logic_op_enable ||
dynamic_state3_line_raster_mode ||
dynamic_state3_conservative_raster_mode ||
dynamic_state3_line_stipple_enable ||
dynamic_state3_alpha_to_coverage || dynamic_state3_alpha_to_one;
extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables;
dynamic_state3_blending = dynamic_state3_blending && extensions.extended_dynamic_state3;
dynamic_state3_enables = dynamic_state3_enables && extensions.extended_dynamic_state3;
if (!extensions.extended_dynamic_state3) {
dynamic_state3_blending = false;
dynamic_state3_enables = false;
dynamic_state3_depth_clamp_enable = false;
dynamic_state3_logic_op_enable = false;
dynamic_state3_line_raster_mode = false;
dynamic_state3_conservative_raster_mode = false;
dynamic_state3_line_stipple_enable = false;
dynamic_state3_alpha_to_coverage = false;
dynamic_state3_alpha_to_one = false;
}
RemoveExtensionFeatureIfUnsuitable(extensions.extended_dynamic_state3,
features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
// VK_EXT_robustness2
// Enable if at least one robustness2 feature is available
extensions.robustness_2 = features.robustness2.robustBufferAccess2 ||
features.robustness2.robustImageAccess2 ||
features.robustness2.nullDescriptor;
RemoveExtensionFeatureIfUnsuitable(extensions.robustness_2, features.robustness2,
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
// VK_EXT_image_robustness
// Enable if robustImageAccess is available
extensions.image_robustness = features.image_robustness.robustImageAccess;
RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness,
VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME);
// VK_EXT_provoking_vertex
if (Settings::values.provoking_vertex.GetValue()) {
extensions.provoking_vertex = features.provoking_vertex.provokingVertexLast
@@ -1215,6 +1453,14 @@ void Device::RemoveUnsuitableExtensions() {
properties.transform_feedback.maxTransformFeedbackBuffers > 0 &&
properties.transform_feedback.transformFeedbackQueries &&
properties.transform_feedback.transformFeedbackDraw;
if (extensions.transform_feedback) {
features.transform_feedback.transformFeedback = VK_TRUE;
features.transform_feedback.geometryStreams = VK_TRUE;
} else {
features.transform_feedback.transformFeedback = VK_FALSE;
features.transform_feedback.geometryStreams = VK_FALSE;
}
RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback,
VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
@@ -1225,6 +1471,18 @@ void Device::RemoveUnsuitableExtensions() {
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
// VK_EXT_multi_draw
extensions.multi_draw = features.multi_draw.multiDraw;
if (extensions.multi_draw) {
features.multi_draw.multiDraw = VK_TRUE;
LOG_INFO(Render_Vulkan, "VK_EXT_multi_draw: maxMultiDrawCount={}",
properties.multi_draw.maxMultiDrawCount);
} else {
features.multi_draw.multiDraw = VK_FALSE;
}
RemoveExtensionFeatureIfUnsuitable(extensions.multi_draw, features.multi_draw,
VK_EXT_MULTI_DRAW_EXTENSION_NAME);
// VK_KHR_pipeline_executable_properties
if (Settings::values.renderer_shader_feedback.GetValue()) {
extensions.pipeline_executable_properties =
@@ -1248,6 +1506,98 @@ void Device::RemoveUnsuitableExtensions() {
RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout,
features.workgroup_memory_explicit_layout,
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
// VK_EXT_swapchain_maintenance1 (extension only, has features)
// Requires VK_EXT_surface_maintenance1 instance extension
extensions.swapchain_maintenance1 = features.swapchain_maintenance1.swapchainMaintenance1;
if (extensions.swapchain_maintenance1) {
// Check if VK_EXT_surface_maintenance1 instance extension is available
const auto instance_extensions = vk::EnumerateInstanceExtensionProperties(dld);
const bool has_surface_maintenance1 = instance_extensions && std::ranges::any_of(*instance_extensions,
[](const VkExtensionProperties& prop) {
return std::strcmp(prop.extensionName, VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME) == 0;
});
if (!has_surface_maintenance1) {
LOG_WARNING(Render_Vulkan,
"VK_EXT_swapchain_maintenance1 requires VK_EXT_surface_maintenance1, disabling");
extensions.swapchain_maintenance1 = false;
features.swapchain_maintenance1.swapchainMaintenance1 = false;
}
}
RemoveExtensionFeatureIfUnsuitable(extensions.swapchain_maintenance1, features.swapchain_maintenance1,
VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
// VK_KHR_maintenance1 (core in Vulkan 1.1, no features)
extensions.maintenance1 = loaded_extensions.contains(VK_KHR_MAINTENANCE_1_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance1, VK_KHR_MAINTENANCE_1_EXTENSION_NAME);
// VK_KHR_maintenance2 (core in Vulkan 1.1, no features)
extensions.maintenance2 = loaded_extensions.contains(VK_KHR_MAINTENANCE_2_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance2, VK_KHR_MAINTENANCE_2_EXTENSION_NAME);
// VK_KHR_maintenance3 (core in Vulkan 1.1, no features)
extensions.maintenance3 = loaded_extensions.contains(VK_KHR_MAINTENANCE_3_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance3, VK_KHR_MAINTENANCE_3_EXTENSION_NAME);
// VK_KHR_maintenance4
extensions.maintenance4 = features.maintenance4.maintenance4;
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance4, features.maintenance4,
VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
// VK_KHR_maintenance5
extensions.maintenance5 = features.maintenance5.maintenance5;
if (extensions.maintenance5) {
LOG_INFO(Render_Vulkan, "VK_KHR_maintenance5 properties: polygonModePointSize={} "
"depthStencilSwizzleOne={} earlyFragmentTests={} nonStrictWideLines={}",
properties.maintenance5.polygonModePointSize,
properties.maintenance5.depthStencilSwizzleOneSupport,
properties.maintenance5.earlyFragmentMultisampleCoverageAfterSampleCounting &&
properties.maintenance5.earlyFragmentSampleMaskTestBeforeSampleCounting,
properties.maintenance5.nonStrictWideLinesUseParallelogram);
}
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance5, features.maintenance5,
VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
// VK_KHR_maintenance6
extensions.maintenance6 = features.maintenance6.maintenance6;
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance6, features.maintenance6,
VK_KHR_MAINTENANCE_6_EXTENSION_NAME);
// VK_KHR_maintenance7 (proposed for Vulkan 1.4, no features)
extensions.maintenance7 = loaded_extensions.contains(VK_KHR_MAINTENANCE_7_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance7, VK_KHR_MAINTENANCE_7_EXTENSION_NAME);
// VK_KHR_maintenance8 (proposed for Vulkan 1.4, no features)
extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
}
bool Device::SupportsSubgroupStage(VkShaderStageFlags stage_mask) const {
if (stage_mask == 0) {
return true;
}
const VkShaderStageFlags supported = properties.subgroup_properties.supportedStages;
return (supported & stage_mask) == stage_mask;
}
bool Device::IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature,
VkShaderStageFlags stage_mask) const {
if ((properties.subgroup_properties.supportedOperations & feature) == 0) {
return false;
}
return SupportsSubgroupStage(stage_mask);
}
bool Device::SupportsWarpIntrinsics(VkShaderStageFlagBits stage) const {
constexpr VkSubgroupFeatureFlags required_ops =
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
if ((properties.subgroup_properties.supportedOperations & required_ops) != required_ops) {
return false;
}
return SupportsSubgroupStage(stage);
}
void Device::SetupFamilies(VkSurfaceKHR surface) {

View File

@@ -6,6 +6,7 @@
#pragma once
#include <algorithm>
#include <optional>
#include <set>
#include <span>
@@ -38,9 +39,13 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore)
#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \
FEATURE(EXT, ImageRobustness, IMAGE_ROBUSTNESS, image_robustness) \
FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \
shader_demote_to_helper_invocation) \
FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control)
FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \
FEATURE(KHR, Maintenance4, MAINTENANCE_4, maintenance4)
#define FOR_EACH_VK_FEATURE_1_4(FEATURE)
// Define all features which may be used by the implementation and require an extension here.
#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \
@@ -53,12 +58,16 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \
FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
FEATURE(EXT, MultiDraw, MULTI_DRAW, multi_draw) \
FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
primitive_topology_list_restart) \
FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \
FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \
FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \
FEATURE(EXT, SwapchainMaintenance1, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \
FEATURE(KHR, Maintenance5, MAINTENANCE_5, maintenance5) \
FEATURE(KHR, Maintenance6, MAINTENANCE_6, maintenance6) \
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
pipeline_executable_properties) \
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
@@ -85,6 +94,11 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(KHR, SWAPCHAIN, swapchain) \
EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
EXTENSION(KHR, MAINTENANCE_7, maintenance7) \
EXTENSION(KHR, MAINTENANCE_8, maintenance8) \
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
@@ -111,6 +125,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \
@@ -162,6 +177,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE_NAME(depth_bias_control, depthBiasExact) \
FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \
FEATURE_NAME(format_a4b4g4r4, formatA4B4G4R4) \
FEATURE_NAME(image_robustness, robustImageAccess) \
FEATURE_NAME(index_type_uint8, indexTypeUint8) \
FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \
FEATURE_NAME(provoking_vertex, provokingVertexLast) \
@@ -288,12 +304,14 @@ public:
/// Returns uniform buffer alignment requirement.
VkDeviceSize GetUniformBufferAlignment() const {
return properties.properties.limits.minUniformBufferOffsetAlignment;
return (std::max)(properties.properties.limits.minUniformBufferOffsetAlignment,
uniform_buffer_alignment_minimum);
}
/// Returns storage alignment requirement.
VkDeviceSize GetStorageBufferAlignment() const {
return properties.properties.limits.minStorageBufferOffsetAlignment;
return (std::max)(properties.properties.limits.minStorageBufferOffsetAlignment,
storage_buffer_alignment_minimum);
}
/// Returns the maximum range for storage buffers.
@@ -326,6 +344,7 @@ public:
return properties.float_controls;
}
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return features.features.textureCompressionASTC_LDR;
@@ -341,6 +360,11 @@ public:
return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
}
/// Returns true if vkResetQueryPool can be used from the host.
bool SupportsHostQueryReset() const {
return features.host_query_reset.hostQueryReset;
}
/// Returns true if the device supports float64 natively.
bool IsFloat64Supported() const {
return features.features.shaderFloat64;
@@ -371,11 +395,26 @@ public:
return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
}
/// Returns true if the device supports the provided subgroup feature.
bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature) const {
return properties.subgroup_properties.supportedOperations & feature;
/// Returns true if the device supports the provided subgroup feature for the given stages.
bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature,
VkShaderStageFlags stage_mask = 0) const;
/// Returns true if the device reports subgroup support for the provided shader stages.
bool SupportsSubgroupStage(VkShaderStageFlags stage_mask) const;
/// Returns the set of stages that report subgroup support.
VkShaderStageFlags GetSubgroupSupportedStages() const {
return properties.subgroup_properties.supportedStages;
}
/// Returns the set of subgroup operations reported by the driver.
VkSubgroupFeatureFlags GetSubgroupSupportedOperations() const {
return properties.subgroup_properties.supportedOperations;
}
/// Returns true if the driver can execute all warp intrinsics for the given shader stage.
bool SupportsWarpIntrinsics(VkShaderStageFlagBits stage) const;
/// Returns the maximum number of push descriptors.
u32 MaxPushDescriptors() const {
return properties.push_descriptor.maxPushDescriptors;
@@ -451,9 +490,14 @@ public:
return extensions.swapchain_mutable_format;
}
/// Returns true if VK_EXT_swapchain_maintenance1 is enabled.
bool IsExtSwapchainMaintenance1Enabled() const {
return extensions.swapchain_maintenance1;
}
/// Returns true if VK_KHR_shader_float_controls is enabled.
bool IsKhrShaderFloatControlsSupported() const {
return extensions.shader_float_controls;
return extensions.shader_float_controls && !disable_shader_float_controls_usage;
}
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
@@ -487,10 +531,18 @@ public:
}
/// Returns true if the device supports VK_EXT_shader_stencil_export.
/// Note: Most Mali/NVIDIA drivers don't support this. Use hardware blits as fallback.
bool IsExtShaderStencilExportSupported() const {
return extensions.shader_stencil_export;
}
/// Returns true if depth/stencil operations can be performed efficiently.
/// Either through shader export or hardware blits.
bool CanPerformDepthStencilOperations() const {
return extensions.shader_stencil_export || is_blit_depth24_stencil8_supported ||
is_blit_depth32_stencil8_supported;
}
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
bool IsExtDepthRangeUnrestrictedSupported() const {
return extensions.depth_range_unrestricted;
@@ -531,6 +583,46 @@ public:
return extensions.custom_border_color;
}
/// Returns true if the device supports VK_EXT_image_robustness.
bool IsExtImageRobustnessSupported() const {
return extensions.image_robustness;
}
/// Returns true if robustImageAccess is supported.
bool IsRobustImageAccessSupported() const {
return features.image_robustness.robustImageAccess;
}
/// Returns true if the device supports VK_EXT_robustness2.
bool IsExtRobustness2Supported() const {
return extensions.robustness_2;
}
/// Returns true if robustBufferAccess2 is supported.
bool IsRobustBufferAccess2Supported() const {
return features.robustness2.robustBufferAccess2;
}
/// Returns true if robustImageAccess2 is supported.
bool IsRobustImageAccess2Supported() const {
return features.robustness2.robustImageAccess2;
}
/// Returns true if nullDescriptor is supported.
bool IsNullDescriptorSupported() const {
return features.robustness2.nullDescriptor;
}
/// Returns true if customBorderColors feature is available.
bool IsCustomBorderColorsSupported() const {
return features.custom_border_color.customBorderColors;
}
/// Returns true if customBorderColorWithoutFormat feature is available.
bool IsCustomBorderColorWithoutFormatSupported() const {
return features.custom_border_color.customBorderColorWithoutFormat;
}
/// Returns true if the device supports VK_EXT_extended_dynamic_state.
bool IsExtExtendedDynamicStateSupported() const {
return extensions.extended_dynamic_state;
@@ -580,9 +672,69 @@ public:
return extensions.line_rasterization;
}
/// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
bool SupportsRectangularLines() const {
return features.line_rasterization.rectangularLines != VK_FALSE;
}
bool SupportsSmoothLines() const {
return features.line_rasterization.smoothLines != VK_FALSE;
}
bool SupportsStippledRectangularLines() const {
return features.line_rasterization.stippledRectangularLines != VK_FALSE;
}
bool SupportsAlphaToOne() const {
return features.features.alphaToOne != VK_FALSE;
}
bool SupportsDualSourceBlend(u32 required_dual_source_attachments = 1) const {
const u32 max_dual = properties.properties.limits.maxFragmentDualSrcAttachments;
return features.features.dualSrcBlend != VK_FALSE &&
max_dual >= required_dual_source_attachments;
}
u32 MaxFragmentDualSrcAttachments() const {
return properties.properties.limits.maxFragmentDualSrcAttachments;
}
bool SupportsDynamicState3DepthClampEnable() const {
return dynamic_state3_depth_clamp_enable;
}
bool SupportsDynamicState3LogicOpEnable() const {
return dynamic_state3_logic_op_enable;
}
bool SupportsDynamicState3LineRasterizationMode() const {
return dynamic_state3_line_raster_mode;
}
bool SupportsDynamicState3ConservativeRasterizationMode() const {
return dynamic_state3_conservative_raster_mode;
}
bool SupportsDynamicState3LineStippleEnable() const {
return dynamic_state3_line_stipple_enable;
}
bool SupportsDynamicState3AlphaToCoverageEnable() const {
return dynamic_state3_alpha_to_coverage;
}
bool SupportsDynamicState3AlphaToOneEnable() const {
return dynamic_state3_alpha_to_one;
}
/// Returns true when the user enabled extended core dynamic states (level > 0).
bool UsesAdvancedCoreDynamicState() const {
return Settings::values.dyna_state.GetValue() > 0;
}
/// Returns true if VK_EXT_vertex_input_dynamic_state is enabled on the device.
bool IsExtVertexInputDynamicStateSupported() const {
return extensions.vertex_input_dynamic_state;
return extensions.vertex_input_dynamic_state &&
features.vertex_input_dynamic_state.vertexInputDynamicState;
}
/// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation
@@ -613,10 +765,11 @@ public:
/// Returns the minimum supported version of SPIR-V.
u32 SupportedSpirvVersion() const {
if (instance_version >= VK_API_VERSION_1_3) {
const bool has_float_controls = extensions.shader_float_controls;
if (instance_version >= VK_API_VERSION_1_3 && has_float_controls) {
return 0x00010600U;
}
if (extensions.spirv_1_4) {
if (extensions.spirv_1_4 && has_float_controls) {
return 0x00010400U;
}
return 0x00010300U;
@@ -716,6 +869,68 @@ public:
return features2.features.multiViewport;
}
/// Returns true if the device supports VK_KHR_maintenance1.
bool IsKhrMaintenance1Supported() const {
return extensions.maintenance1;
}
/// Returns true if the device supports VK_KHR_maintenance2.
bool IsKhrMaintenance2Supported() const {
return extensions.maintenance2;
}
/// Returns true if the device supports VK_KHR_maintenance3.
bool IsKhrMaintenance3Supported() const {
return extensions.maintenance3;
}
/// Returns true if the device supports VK_KHR_maintenance4.
bool IsKhrMaintenance4Supported() const {
return extensions.maintenance4;
}
/// Returns true if the device supports VK_KHR_maintenance5.
bool IsKhrMaintenance5Supported() const {
return extensions.maintenance5;
}
/// Returns true if polygon mode POINT supports gl_PointSize.
bool SupportsPolygonModePointSize() const {
return extensions.maintenance5 && properties.maintenance5.polygonModePointSize;
}
/// Returns true if depth/stencil swizzle ONE is supported.
bool SupportsDepthStencilSwizzleOne() const {
return extensions.maintenance5 && properties.maintenance5.depthStencilSwizzleOneSupport;
}
/// Returns true if early fragment tests optimizations are available.
bool SupportsEarlyFragmentTests() const {
return extensions.maintenance5 &&
properties.maintenance5.earlyFragmentMultisampleCoverageAfterSampleCounting &&
properties.maintenance5.earlyFragmentSampleMaskTestBeforeSampleCounting;
}
/// Returns true if the device supports VK_KHR_maintenance6.
bool IsKhrMaintenance6Supported() const {
return extensions.maintenance6;
}
/// Returns true if the device supports VK_EXT_multi_draw.
bool IsExtMultiDrawSupported() const {
return extensions.multi_draw;
}
/// Returns true if the device supports VK_KHR_maintenance7.
bool IsKhrMaintenance7Supported() const {
return extensions.maintenance7;
}
/// Returns true if the device supports VK_KHR_maintenance8.
bool IsKhrMaintenance8Supported() const {
return extensions.maintenance8;
}
[[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
u32 driver_version) {
if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
@@ -753,13 +968,13 @@ private:
void RemoveUnsuitableExtensions();
void RemoveExtension(bool& extension, const std::string& extension_name);
void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name);
void RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name);
template <typename Feature>
void RemoveExtensionFeature(bool& extension, Feature& feature,
const std::string& extension_name);
template <typename Feature>
void RemoveExtensionFeatureIfUnsuitable(bool is_suitable, Feature& feature,
void RemoveExtensionFeatureIfUnsuitable(bool& extension, Feature& feature,
const std::string& extension_name);
/// Sets up queue families.
@@ -799,6 +1014,7 @@ private:
FOR_EACH_VK_FEATURE_1_1(FEATURE);
FOR_EACH_VK_FEATURE_1_2(FEATURE);
FOR_EACH_VK_FEATURE_1_3(FEATURE);
FOR_EACH_VK_FEATURE_1_4(FEATURE);
FOR_EACH_VK_FEATURE_EXT(FEATURE);
FOR_EACH_VK_EXTENSION(EXTENSION);
@@ -815,6 +1031,7 @@ private:
FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE);
FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE);
FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE);
FOR_EACH_VK_FEATURE_1_4(FEATURE_CORE);
FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT);
#undef FEATURE_CORE
@@ -830,6 +1047,8 @@ private:
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
VkPhysicalDeviceMaintenance5PropertiesKHR maintenance5{};
VkPhysicalDeviceMultiDrawPropertiesEXT multi_draw{};
VkPhysicalDeviceProperties properties{};
};
@@ -859,10 +1078,20 @@ private:
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
bool disable_shader_float_controls_usage{}; ///< True when VK_KHR_shader_float_controls cannot be safely used.
bool dynamic_state3_blending{}; ///< Has blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has at least one enable feature of dynamic_state3.
bool dynamic_state3_depth_clamp_enable{};
bool dynamic_state3_logic_op_enable{};
bool dynamic_state3_line_raster_mode{};
bool dynamic_state3_conservative_raster_mode{};
bool dynamic_state3_line_stipple_enable{};
bool dynamic_state3_alpha_to_coverage{};
bool dynamic_state3_alpha_to_one{};
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
VkDeviceSize uniform_buffer_alignment_minimum{}; ///< Minimum enforced UBO alignment.
VkDeviceSize storage_buffer_alignment_minimum{}; ///< Minimum enforced SSBO alignment.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};

View File

@@ -116,6 +116,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdDrawIndirectCount);
X(vkCmdDrawIndexedIndirectCount);
X(vkCmdDrawIndirectByteCountEXT);
X(vkCmdDrawMultiEXT);
X(vkCmdDrawMultiIndexedEXT);
X(vkCmdEndConditionalRenderingEXT);
X(vkCmdEndQuery);
X(vkCmdEndRenderPass);
@@ -145,6 +147,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetDepthWriteEnableEXT);
X(vkCmdSetPrimitiveRestartEnableEXT);
X(vkCmdSetRasterizerDiscardEnableEXT);
X(vkCmdSetAlphaToCoverageEnableEXT);
X(vkCmdSetAlphaToOneEnableEXT);
X(vkCmdSetConservativeRasterizationModeEXT);
X(vkCmdSetLineRasterizationModeEXT);
X(vkCmdSetLineStippleEnableEXT);

View File

@@ -216,6 +216,8 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
PFN_vkCmdDrawIndirectByteCountEXT vkCmdDrawIndirectByteCountEXT{};
PFN_vkCmdDrawMultiEXT vkCmdDrawMultiEXT{};
PFN_vkCmdDrawMultiIndexedEXT vkCmdDrawMultiIndexedEXT{};
PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{};
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdEndQuery vkCmdEndQuery{};
@@ -238,6 +240,8 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
PFN_vkCmdSetPrimitiveRestartEnableEXT vkCmdSetPrimitiveRestartEnableEXT{};
PFN_vkCmdSetRasterizerDiscardEnableEXT vkCmdSetRasterizerDiscardEnableEXT{};
PFN_vkCmdSetAlphaToCoverageEnableEXT vkCmdSetAlphaToCoverageEnableEXT{};
PFN_vkCmdSetAlphaToOneEnableEXT vkCmdSetAlphaToOneEnableEXT{};
PFN_vkCmdSetConservativeRasterizationModeEXT vkCmdSetConservativeRasterizationModeEXT{};
PFN_vkCmdSetLineRasterizationModeEXT vkCmdSetLineRasterizationModeEXT{};
PFN_vkCmdSetLineStippleEnableEXT vkCmdSetLineStippleEnableEXT{};
@@ -1239,6 +1243,19 @@ public:
counter_buffer_offset, counter_offset, stride);
}
void DrawMultiEXT(u32 draw_count, const VkMultiDrawInfoEXT* vertex_info,
u32 instance_count, u32 first_instance, u32 stride) const noexcept {
dld->vkCmdDrawMultiEXT(handle, draw_count, vertex_info, instance_count, first_instance,
stride);
}
void DrawMultiIndexedEXT(u32 draw_count, const VkMultiDrawIndexedInfoEXT* index_info,
u32 instance_count, u32 first_instance, u32 stride,
const int32_t* vertex_offset) const noexcept {
dld->vkCmdDrawMultiIndexedEXT(handle, draw_count, index_info, instance_count,
first_instance, stride, vertex_offset);
}
void ClearAttachments(Span<VkClearAttachment> attachments,
Span<VkClearRect> rects) const noexcept {
dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
@@ -1471,6 +1488,14 @@ public:
dld->vkCmdSetLogicOpEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}
void SetAlphaToCoverageEnableEXT(bool enable) const noexcept {
dld->vkCmdSetAlphaToCoverageEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}
void SetAlphaToOneEnableEXT(bool enable) const noexcept {
dld->vkCmdSetAlphaToOneEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}
void SetDepthClampEnableEXT(bool enable) const noexcept {
dld->vkCmdSetDepthClampEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}