Compare commits

...

218 Commits

Author SHA1 Message Date
CamilleLaVey
f6a73525a3 Revert "[spv, vk, qcom] SFC 1 x 2 + SPVE." 2025-12-26 00:13:42 -03:00
CamilleLaVey
afab4aa558 Revert "Fix building" 2025-12-26 00:13:42 -03:00
CamilleLaVey
5fbfbb66cf Revert "another smol fix" 2025-12-26 00:13:42 -03:00
CamilleLaVey
99258dd645 Revert "duplicate noms noms" 2025-12-26 00:13:42 -03:00
CamilleLaVey
48c732d7f6 Revert "fix for the fix of the fix" 2025-12-26 00:13:42 -03:00
CamilleLaVey
77265157de Revert "fix it goddamn" 2025-12-26 00:13:42 -03:00
CamilleLaVey
5c6488e6de Revert "Ya basta vulkaaaan" 2025-12-26 00:13:41 -03:00
CamilleLaVey
12465c4bee Ya basta vulkaaaan 2025-12-26 00:13:41 -03:00
CamilleLaVey
7e846ed444 fix it goddamn 2025-12-26 00:13:41 -03:00
CamilleLaVey
d4edf36c47 fix for the fix of the fix 2025-12-26 00:13:41 -03:00
CamilleLaVey
7b21bb07d5 duplicate noms noms 2025-12-26 00:13:41 -03:00
CamilleLaVey
8063f80df9 another smol fix 2025-12-26 00:13:41 -03:00
CamilleLaVey
075ecc6e39 Fix building 2025-12-26 00:13:40 -03:00
CamilleLaVey
81cd6cea66 [spv, vk, qcom] SFC 1 x 2 + SPVE. 2025-12-26 00:13:40 -03:00
CamilleLaVey
2d78333479 [shader_enviroment] TICEntry limits to Empty 2025-12-26 00:13:40 -03:00
CamilleLaVey
6a0b274b1a [vk] Start of a downhill 2025-12-26 00:12:35 -03:00
CamilleLaVey
805b53657c [vk] sRGB format handling for Storage. 2025-12-26 00:12:35 -03:00
CamilleLaVey
51c6f898a6 [vk] Ordering UnsupportedFormatKey 2025-12-26 00:12:34 -03:00
CamilleLaVey
2141ab8bdf [vk] Image Remaining Layers 3D 2025-12-26 00:12:34 -03:00
CamilleLaVey
40169ed983 [vk] Another try with Depth/Stencil handling 2025-12-26 00:12:34 -03:00
CamilleLaVey
dd8bad8486 [vk, qcom] Code clean-up. 2025-12-26 00:12:34 -03:00
CamilleLaVey
0fe10f57f0 [revert] Added linear filtering in texture blitting operations 2025-12-26 00:12:34 -03:00
CamilleLaVey
8e3475632a Fix building 2025-12-26 00:12:34 -03:00
CamilleLaVey
8651647736 [vk] SL complete removal 2025-12-26 00:12:32 -03:00
CamilleLaVey
185e0c18f2 [revert] Allocate data transfer from Maxwell to VK using Sample Locations as channel for MSAA 2025-12-26 00:11:45 -03:00
CamilleLaVey
9eab1a6d05 [revert] SL Sample Count Clamp 2025-12-26 00:11:45 -03:00
CamilleLaVey
ddf441d30e [revert] SL Table order 2025-12-26 00:11:45 -03:00
CamilleLaVey
2d617438b0 [vk] SL Table order 2025-12-26 00:11:45 -03:00
CamilleLaVey
7d69ff6044 [vk] SL Sample Count Clamp 2025-12-26 00:11:45 -03:00
CamilleLaVey
4b0f581973 [vk] DrainPendingBuild 2025-12-26 00:11:44 -03:00
CamilleLaVey
b03cc654f3 [vk, qcom[ Removed parallel compilling from qcom entirely 2025-12-26 00:11:44 -03:00
CamilleLaVey
2fa36de465 [vk, qcom, turnip] TimelineSemaphore removal 2025-12-26 00:11:44 -03:00
CamilleLaVey
2434be8610 [vk] Adjusted Texel Block View for Depth/Stencil attachment images 2025-12-26 00:11:44 -03:00
CamilleLaVey
624879a21b [vk, qcom] SFC disabled 2025-12-26 00:11:44 -03:00
CamilleLaVey
8bd4c76b22 [maxwell, vk] Allocate data transfer from Maxwell to VK using Sample Locations as channel for MSAA 2025-12-26 00:11:44 -03:00
CamilleLaVey
e96972c62e [vk] Removed unused extensions 2025-12-26 00:11:43 -03:00
CamilleLaVey
f96297d188 [revert] Adjusting unused features 2025-12-26 00:11:43 -03:00
CamilleLaVey
66968f2f47 [revert] fix building 2025-12-26 00:11:43 -03:00
CamilleLaVey
1585cfd5af [vk] TEST: SFC + WGML 2025-12-26 00:11:43 -03:00
CamilleLaVey
963cf1b726 fix building 2025-12-26 00:11:43 -03:00
CamilleLaVey
db0e652299 [vk, qcom] Adjusting unused features 2025-12-26 00:11:43 -03:00
CamilleLaVey
9c7be0fafd [vk] Hotfix for DepthBounds and StencilMask. 2025-12-26 00:11:43 -03:00
CamilleLaVey
05b067a18f [revert] Update Gradle to 8.13.1 2025-12-26 00:11:42 -03:00
CamilleLaVey
a95de7c62b [Android] Core 1++ 2025-12-26 00:11:42 -03:00
CamilleLaVey
bbce9ffeed another one 2025-12-26 00:11:42 -03:00
CamilleLaVey
2168b76f91 Missing header 2025-12-26 00:11:42 -03:00
CamilleLaVey
e03c453e8f [vk] Exception modified & logged 2025-12-26 00:11:42 -03:00
CamilleLaVey
be38719fa0 [hle] BufferDescritorC 2025-12-26 00:11:42 -03:00
CamilleLaVey
eba14cf142 [vk, qcom] TimelineSemaphore syncs to GPUTick. 2025-12-26 00:11:41 -03:00
CamilleLaVey
c05eff593c [gl] WaitTick 2025-12-26 00:11:41 -03:00
CamilleLaVey
a8cbd910d8 [gl, vk] Access Tracking Synchronization 2025-12-26 00:11:41 -03:00
CamilleLaVey
bfcce2609b [vk] SurfaceType Depth/Stencil 2025-12-26 00:11:41 -03:00
CamilleLaVey
a9ca6e59bb [vk] SamplerComponentType upgraded DepthCompareSampling 2025-12-26 00:11:41 -03:00
CamilleLaVey
4f53810276 [Vk] Sample Locations Adjustments for Depth/Stencil 2025-12-26 00:11:41 -03:00
CamilleLaVey
400744178c [vk] Sample Location Depth Bit 2025-12-26 00:11:40 -03:00
CamilleLaVey
e30654b76b [vk] Increased PipelineWorkers for testing purposes 2025-12-26 00:11:40 -03:00
CamilleLaVey
d9d021be62 [vk] KeepAliveTick in Scheduler 2025-12-26 00:11:40 -03:00
CamilleLaVey
9411fcd352 [vk] Sample Locations ordering 2025-12-26 00:11:40 -03:00
CamilleLaVey
9190b6c761 Hotfix 2025-12-26 00:11:40 -03:00
CamilleLaVey
4a3d1913ac [vk] Re-introduction to MSAA - Sample Locations 2025-12-26 00:11:40 -03:00
CamilleLaVey
95782cc7a9 [vk, mobile, vendor] MegaBuffer removal 2025-12-26 00:11:40 -03:00
CamilleLaVey
525e878ddc [vk, pipeline] Added In-flight conditional for multiple pipeline compilations 2025-12-26 00:11:39 -03:00
CamilleLaVey
49894f3897 [vk, qcom] Extending GetTotalPipelineWorker resources. 2025-12-26 00:11:39 -03:00
CamilleLaVey
822d485e18 [vk] SanitizeComponent Adjustment 2025-12-26 00:11:39 -03:00
CamilleLaVey
ae48fe0a13 [vk] DualBlendFactor 2025-12-26 00:11:39 -03:00
CamilleLaVey
1a75757bbf [vk] CounterStreamer 2025-12-26 00:11:39 -03:00
CamilleLaVey
f8d93780b5 [vk, qcom] Re-enabling CBC for further testing 2025-12-26 00:11:39 -03:00
CamilleLaVey
b047a13f31 [vk] Sanitize Format/Component 2025-12-26 00:11:38 -03:00
CamilleLaVey
59271973a5 [vk] Extended PixelFormat for Depth/Stencil 2025-12-26 00:11:38 -03:00
CamilleLaVey
679aeb7692 [vk] Reordering PixelFormatNumeric 2025-12-26 00:11:38 -03:00
CamilleLaVey
fd40618fd8 Added missing headers 2025-12-26 00:11:38 -03:00
CamilleLaVey
a064cf76a3 [gl, vk] Corroborating new helpers order 2025-12-26 00:11:38 -03:00
CamilleLaVey
25636a90be Meow 2025-12-26 00:11:38 -03:00
CamilleLaVey
0152770fd7 [vk] Extending TF handling 2025-12-26 00:11:37 -03:00
CamilleLaVey
457beec066 [gl, vk, spv] Added component type handling for texture buffers and resolve pixel format variants 2025-12-26 00:11:37 -03:00
CamilleLaVey
01faf544e8 Saving Private Windows 2025-12-26 00:11:37 -03:00
CamilleLaVey
1164f17d8f [vk] Formatting missing formats 2025-12-26 00:11:37 -03:00
CamilleLaVey
89c679d08d another missing brace 2025-12-26 00:11:37 -03:00
CamilleLaVey
1d1e352a73 If this get builded i'll be able to actually, truly and sincely fly 2025-12-26 00:11:37 -03:00
CamilleLaVey
d026d17805 missing brace 2025-12-26 00:11:37 -03:00
CamilleLaVey
fc3b5a0b68 quick fix 2025-12-26 00:11:36 -03:00
CamilleLaVey
993a34273f [vk] TextureType extended 2025-12-26 00:11:36 -03:00
CamilleLaVey
54b186ef34 [vk] Adjustment BitScaleHelper 2025-12-26 00:11:36 -03:00
CamilleLaVey
71bf2b33a7 [vk] BufferCache NullBuffer handling 2025-12-26 00:11:36 -03:00
CamilleLaVey
409dbb9581 [vk] Runtime to change image layout 2025-12-26 00:11:36 -03:00
CamilleLaVey
0aa138b707 [vk] TextureFilter 2025-12-26 00:11:36 -03:00
CamilleLaVey
f5668bada0 [gl, vk] SupportLinearFilter patch 2025-12-26 00:11:35 -03:00
CamilleLaVey
e366f702ef [vk] Wrapper for Sampler Image Filter 2025-12-26 00:11:35 -03:00
CamilleLaVey
088de307d2 [vk, rasterizer] Re-order post Sample Locations removal 2025-12-26 00:11:35 -03:00
CamilleLaVey
11b83e7b73 [revert] Opcode Promotion path emulation 2025-12-26 00:11:35 -03:00
CamilleLaVey
771786523c [revert] lambda enemy of da world 2025-12-26 00:11:35 -03:00
CamilleLaVey
6b9135d53b [revert] The next step of the human kind before it's doom 2025-12-26 00:11:35 -03:00
CamilleLaVey
48a80afa79 [vk] The next step of the human kind before it's doom 2025-12-26 00:11:35 -03:00
CamilleLaVey
cbc65e20dc lambda enemy of da world 2025-12-26 00:11:34 -03:00
CamilleLaVey
9dccd2013d [vk, gl, spv] Opcode Promotion path emulation 2025-12-26 00:11:34 -03:00
CamilleLaVey
f5172f88e8 [vk] Fixing wrong enabling logic 2025-12-26 00:11:34 -03:00
CamilleLaVey
5ca605fb8a [vk] NullDescriptor guard 2025-12-26 00:11:34 -03:00
CamilleLaVey
91e134ee17 [vk] Adjusted Transform Feedback 2025-12-26 00:11:32 -03:00
CamilleLaVey
061fe400d8 [vk] Adjusted Query Cache 2025-12-26 00:08:10 -03:00
CamilleLaVey
7e79effa03 [vk, qcom] Shader Float Control changed handling 2025-12-26 00:08:10 -03:00
CamilleLaVey
ab8905e36b [vk] Removed Sample Locations 2025-12-26 00:08:10 -03:00
CamilleLaVey
da344eb513 [vk] removed ImageViewType function 2025-12-26 00:08:09 -03:00
CamilleLaVey
7b78b2dd1f [spv, vk] reworked texture view handling and added layer count overrides 2025-12-26 00:08:09 -03:00
CamilleLaVey
93f4cccb76 [vk] Unused mark for subgroups variables 2025-12-26 00:08:09 -03:00
CamilleLaVey
b190dcb937 [revert] UWU 2025-12-26 00:08:09 -03:00
CamilleLaVey
c8d01912e1 [Re-introduced] Color output handling in SPIR-V emission 2025-12-26 00:08:09 -03:00
CamilleLaVey
7347be7196 [Re-introduced] Added linear filtering in texture blitting operations 2025-12-26 00:08:09 -03:00
CamilleLaVey
d3ea3e8852 [spv, qcom] Ensuring SPV 1.3 2025-12-26 00:08:09 -03:00
CamilleLaVey
e4b6e5feff [android] Update Gradle to 8.13.1 2025-12-26 00:08:08 -03:00
CamilleLaVey
3ca3161d5c [vk, qcom] UWU 2025-12-26 00:08:08 -03:00
CamilleLaVey
5029fcf5bf [revert] Added linear filtering in texture blitting operations 2025-12-26 00:08:08 -03:00
CamilleLaVey
aa3953f3e2 [revert] Color output handling in SPIR-V emission. 2025-12-26 00:08:08 -03:00
CamilleLaVey
cef455d515 [vk] Remove forced stencil format handling in TextureCacheRuntime 2025-12-26 00:08:08 -03:00
CamilleLaVey
02d45d6a8c [revert] TiledCacheBarrier starter 2025-12-26 00:08:08 -03:00
CamilleLaVey
0c45096e6f [vk, qcom] Returned subgroups functions to QCOM 2025-12-26 00:08:07 -03:00
CamilleLaVey
3117521da9 [vk] Added support for Stencil component type in texture handling 2025-12-26 00:08:07 -03:00
CamilleLaVey
459b26465e [vk, qcom] Graphics Subgroup bugged 2025-12-26 00:08:07 -03:00
CamilleLaVey
105817f9d2 [vk] Added support for sample locations in depth and depth-stencil surfaces 2025-12-26 00:08:07 -03:00
CamilleLaVey
4c1b86a4bb [spv] SamplerComponentType 2025-12-26 00:08:07 -03:00
CamilleLaVey
90aebee96b [revert] Tightened SSBO tracking heuristics 2025-12-26 00:08:07 -03:00
CamilleLaVey
b48bc13c7c [Revert] Adjusted Track function for bias handling and alignment checks for storage buffers 2025-12-26 00:08:07 -03:00
CamilleLaVey
0026b477c5 [gl, vk] Implement SampledView method for ImageView 2025-12-26 00:08:06 -03:00
CamilleLaVey
9992e5d39c [ir, spv] Added support for sampler component types in texture handling 2025-12-26 00:08:06 -03:00
CamilleLaVey
611b3332ce [spv] Color output handling in SPIR-V emission. 2025-12-26 00:08:06 -03:00
CamilleLaVey
60f00f58ba [vk] Added linear filtering in texture blitting operations 2025-12-26 00:08:06 -03:00
CamilleLaVey
458302b3f5 [spv, qcom] Implement warp intrinsics support 2025-12-26 00:08:06 -03:00
CamilleLaVey
a793a132fc [vk] Conditioning Conditional Rendering #2 2025-12-26 00:08:06 -03:00
CamilleLaVey
122385b795 [vk, qcom] Removed SPIR-V 1.4 for qcom 2025-12-26 00:08:05 -03:00
CamilleLaVey
adbbd92a9d [vk] Adjustments to Sample Locations 2025-12-26 00:08:05 -03:00
CamilleLaVey
d3392954bf [host] Adjusted Track function for bias handling and alignment checks for storage buffers 2025-12-26 00:08:05 -03:00
CamilleLaVey
39506ae599 [host] Added logging for OOM cases with fastmem relation 2025-12-26 00:08:05 -03:00
CamilleLaVey
4bafce0e2a [ir, nvn] Tightened SSBO tracking heuristics 2025-12-26 00:08:05 -03:00
Caio Oliveira
145f1bae5c Revert "Controlled SPV features on QCOM"
This reverts commit 907b041ec6fb4f16750155f4c41e17389f2e385d.
2025-12-26 00:08:05 -03:00
CamilleLaVey
d6f14f5519 Controlled SPV features on QCOM 2025-12-26 00:08:04 -03:00
CamilleLaVey
f40774a4b9 [vk, qcom] Disabling VK_KHR_push_descriptor for qcom 2025-12-26 00:08:04 -03:00
CamilleLaVey
b30b1d6b09 [vk, vendor, mobile] Improved mobile staging buffer data 2025-12-26 00:08:04 -03:00
CamilleLaVey
3b91d0b146 [vk, rasterizer] Update sample location handling for MSAA configurations 2025-12-26 00:08:04 -03:00
CamilleLaVey
75bfe1b4d8 [vk, rasterizer] offsets float x Uint 2025-12-26 00:08:04 -03:00
CamilleLaVey
edc6b909b5 [vk] Sample Locations Structure 2025-12-26 00:08:04 -03:00
CamilleLaVey
09817947c4 [vk, rasterizer] TiledCacheBarrier starter 2025-12-26 00:08:04 -03:00
CamilleLaVey
e66bc40b8b [maxwell, vk] VK_EXT_Sample_Locations 2025-12-26 00:08:02 -03:00
CamilleLaVey
1bd8012450 [vk, qcom] Removed 500.800.51 compilling parallel restriction 2025-12-26 00:07:38 -03:00
CamilleLaVey
d763cafca6 [vk, qcom] Adjusting Sampler Budget reserved value 2025-12-26 00:07:38 -03:00
CamilleLaVey
c14175e481 [vk, qcom] UniformBufferAlignment set by hardware capabilities 2025-12-26 00:07:38 -03:00
CamilleLaVey
79f6a6096f [vk, qcom] Samplers Budget Management 2025-12-26 00:07:38 -03:00
CamilleLaVey
fd53dc0748 [vk, qcom] Extending limits of broken parallel compiling to 512.800.51 2025-12-26 00:07:37 -03:00
CamilleLaVey
3619fe7e99 [vk, qcom] Binding buffer limits 2025-12-26 00:07:37 -03:00
CamilleLaVey
79ca29d468 [vk, vendor] Clamping memory usage in mobile gpu's 2025-12-26 00:07:37 -03:00
CamilleLaVey
8ed13cf80a [vk, qcom] Remove VK_EXT_CUSTOM_BORDER_COLOR 2025-12-26 00:07:37 -03:00
CamilleLaVey
04c29645a2 Fix building 2025-12-25 23:57:02 -03:00
CamilleLaVey
5666401685 Warning fix 2025-12-25 23:57:01 -03:00
CamilleLaVey
be6e577bc2 [vk, pipeline, query_cache, rasterizer] transformFeedback buffering handling update 2025-12-25 23:57:01 -03:00
Caio Oliveira
875829f8fc Partially reverts "[vk] Changing conditions for Swapchain maintenance1 "
This reverts commit 3fc3b5fdad.
2025-12-25 23:57:01 -03:00
CamilleLaVey
fec98b8913 [revert] Resolving conflicting changes 2025-12-25 23:57:01 -03:00
CamilleLaVey
e6ec42ee9d Revert "[vk] Ensure image view flags are resolved" 2025-12-25 23:57:01 -03:00
CamilleLaVey
bc70f1b32d Revert "[vk, texture_cache] Preveting ARGB8 get misinterpretated with depth formats" 2025-12-25 23:57:01 -03:00
CamilleLaVey
94674c9143 Revert "[vk, texture_cache] BGRA8 Depth/Stencil format convertions" 2025-12-25 23:57:00 -03:00
CamilleLaVey
287dcd0bcf Revert "[maxwell] Logging for HDR wrong convertions into depth formats"
This reverts commit 66c26e39fe.
2025-12-25 23:57:00 -03:00
CamilleLaVey
cb19579b44 Revert "[surface, vk, pipeline, texture_cache] Refactor image view handling and add normalized compatible format utility"
This reverts commit 6a230bec1a.
2025-12-25 23:57:00 -03:00
CamilleLaVey
dd4deb9796 Revert "[spir-v] Add is_integer flag to texture descriptors and update image type handling" 2025-12-25 23:57:00 -03:00
CamilleLaVey
38caca49c6 Revert "[vk, pipeline, texture_cache] Renamed MakeView parametter" 2025-12-25 23:57:00 -03:00
CamilleLaVey
5a5d4b549a [vk] Gating advanced ExtendedDynamicState1 2025-12-25 23:57:00 -03:00
CamilleLaVey
2add5905b5 [licences] Updating licenses on missing files 2025-12-25 23:57:00 -03:00
CamilleLaVey
b517cfcbf3 [vk] Line rasterization and Alpha features adjusments (again) 2025-12-25 23:56:59 -03:00
CamilleLaVey
79b78de780 [vk, scheduler] Applying finising call for TF when it's not getting used 2025-12-25 23:56:59 -03:00
CamilleLaVey
352da451c0 [vk, qcom] Returning forced SScaled and UScaled formats emulations to Adreno. 2025-12-25 23:56:59 -03:00
CamilleLaVey
8ffdf69831 [vk, vendor] Forcing BGR5 emulation path due to driver misbehavior. 2025-12-25 23:56:59 -03:00
CamilleLaVey
0e4cd4ecd9 [vk] ExtendedDynamicState repair #2 2025-12-25 23:56:59 -03:00
CamilleLaVey
5919f2d860 [vk] Depth State Refresh Update. 2025-12-25 23:56:59 -03:00
CamilleLaVey
cd2f57d1e2 [vk, compute_pass] Conditioning Conditional Rendering 2025-12-25 23:56:58 -03:00
CamilleLaVey
c441d1883b [spir-v, emit] Flat Decoration Adjustment 2025-12-25 23:56:58 -03:00
CamilleLaVey
e56accf146 [spir-v, emit] SPV Image Missmatch 2025-12-25 23:56:58 -03:00
CamilleLaVey
cf37a68e07 [vk, rasterizer] Clamping Render-Area out of limits 2025-12-25 23:56:58 -03:00
CamilleLaVey
a2892fccdb [vk, rasterizer, state_tracker] LineMode disabled from scheduler 2025-12-25 23:56:58 -03:00
CamilleLaVey
c4b0d116e9 [surface, vk, pipeline, texture_cache] Texture Sampling Fix 2025-12-25 23:56:58 -03:00
CamilleLaVey
58fb3487d1 [vk, swapchain] Swapchaing Image VkQueue 2025-12-25 23:56:58 -03:00
CamilleLaVey
7d8c1baa87 [vk, graphics, pipeline, rasterizer] Alpha Coverage Adjustment 2025-12-25 23:56:57 -03:00
CamilleLaVey
ac23e3100f [vk, pipeline, texture_cache] Renamed MakeView parametter 2025-12-25 23:56:57 -03:00
CamilleLaVey
14d3d5814b [spir-v] Add is_integer flag to texture descriptors and update image type handling 2025-12-25 23:56:57 -03:00
CamilleLaVey
04e0b3147b [surface, vk, pipeline, texture_cache] Refactor image view handling and add normalized compatible format utility 2025-12-25 23:56:57 -03:00
CamilleLaVey
cb3495621e [vk] Removing false remove feature logging for robustness2 and image robustness. 2025-12-25 23:56:57 -03:00
CamilleLaVey
fbb513c83d [vk] ExtendedDynamicState repair #1 2025-12-25 23:56:57 -03:00
CamilleLaVey
8dfc42b45a [spir-v] Flat decorations for input interfaces 2025-12-25 23:56:56 -03:00
CamilleLaVey
977904cd27 [vk] VK_EXT_multi_draw 2025-12-25 23:56:54 -03:00
CamilleLaVey
0c26513484 [vk] Declaring features from Maintenance5 2025-12-25 23:53:26 -03:00
CamilleLaVey
2fb238ab56 [vk] Fixing logging statements 2025-12-25 23:53:26 -03:00
CamilleLaVey
2209efb618 [vk] Removing Image Robustness from EXT list. 2025-12-25 23:53:26 -03:00
CamilleLaVey
7d3380b38d [vk] ExtendedDynamicState impl close to Vulkan specs 2025-12-25 23:53:26 -03:00
CamilleLaVey
d161d5e7a2 [vk, rasterizer] Reduce FlushWork constant drawcalls 2025-12-25 23:53:26 -03:00
CamilleLaVey
2aa0bc9f0f [vk] Moving Maintenance features to wrapper 2025-12-25 23:53:26 -03:00
CamilleLaVey
07cf4451c2 [vk] Re-ordering tiling format features 2025-12-25 23:53:25 -03:00
CamilleLaVey
2e8e808958 [vk] Re-ordering format feature 2025-12-25 23:53:25 -03:00
CamilleLaVey
6aa581abeb [vk] Robustness2 and Image Robustness 2025-12-25 23:53:25 -03:00
CamilleLaVey
f9043627be [maxwell] Logging for HDR wrong convertions into depth formats 2025-12-25 23:53:25 -03:00
CamilleLaVey
1ddad7c59a [vk, texture_cache] BGRA8 Depth/Stencil format convertions 2025-12-25 23:53:25 -03:00
CamilleLaVey
5cea80588b [vk, texture_cache] Preveting ARGB8 get misinterpretated with depth formats 2025-12-25 23:53:25 -03:00
CamilleLaVey
df34184914 [vk] Adjusting Custom Border Color 2025-12-25 23:53:24 -03:00
CamilleLaVey
c78ad84d1e [vk] Adjusting VIDS 2025-12-25 23:53:24 -03:00
CamilleLaVey
3fe0f1829a [vk] Changing conditions for Swapchain maintenance1 2025-12-25 23:53:24 -03:00
CamilleLaVey
63527a377b [vk] Ensure image view flags are resolved 2025-12-25 23:53:24 -03:00
CamilleLaVey
c58d42a5e5 [vk] Aliging ExtendedDynamicState2 2025-12-25 23:53:24 -03:00
CamilleLaVey
eb3b4a9242 [vk, spir-v] Conditioning creation of VK_EXT_Shader_Stencil_Export in SPIR-V 2025-12-25 23:53:24 -03:00
CamilleLaVey
03c9adf480 fixing building error. 2025-12-25 23:53:24 -03:00
CamilleLaVey
66e97f718d [vk, texture_cache, vendor] Adding path for hardware resolve on shader stencil export/ MSAA image blits 2025-12-25 23:53:23 -03:00
CamilleLaVey
7ba5a3d6a0 [vk] Return VK 1.3 as main target, treat VK 1.4 core features as extensions if driver supports it 2025-12-25 23:53:23 -03:00
CamilleLaVey
4c223fdd11 [vk] Ordering double cases specified and allocating them in the correct please on GetSuitability phase 2025-12-25 23:53:23 -03:00
CamilleLaVey
b7fcb84749 Dammed macros. 2025-12-25 23:53:23 -03:00
CamilleLaVey
607dd6adac Fix building issues 2025-12-25 23:53:23 -03:00
CamilleLaVey
8663923709 [vk, qcom] VertexInputDynamicState ban removal 2025-12-25 23:53:23 -03:00
CamilleLaVey
273a0788be [vk] Bumping features to 1.4 2025-12-25 23:53:22 -03:00
CamilleLaVey
1cc6a3c5b3 [vk] Updated maintenance features 2025-12-25 23:53:22 -03:00
CamilleLaVey
2b059c5584 [vk, amd, qcom] Removed older driver workarounds 2025-12-25 23:53:22 -03:00
CamilleLaVey
ba5bb52c11 [vk, spir-v] Adding decoration for NonWritable buffers if vertexPipelineStoresAndAtomics isn't available 2025-12-25 23:53:22 -03:00
CamilleLaVey
a65552af38 [vk, buffer_cache] Aligning VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT logic 2025-12-25 23:53:22 -03:00
CamilleLaVey
50e85e232e [vk., vendor] Adding driver id flag in blacklist 2025-12-25 23:53:22 -03:00
CamilleLaVey
1bc297dc70 [Refactor, vk] DynamicState, ExtendedDynamicState and VertexInputDynamicState 2025-12-25 23:53:21 -03:00
Caio Oliveira
d25f9501e8 Revert "[vk] attempt to continue even if unsuitable driver (#3087)"
This reverts commit 7a98ee4ead.
2025-12-25 23:53:21 -03:00
Caio Oliveira
8db7de42a4 Revert "[video_core] Fix inconsistency between EDS and VIDS settings (#3148)"
This reverts commit 7157d5167e.
2025-12-25 23:53:21 -03:00
72 changed files with 3905 additions and 742 deletions

View File

@@ -578,7 +578,10 @@ public:
#endif
int flags = (fd > 0 ? MAP_SHARED : MAP_PRIVATE) | MAP_FIXED;
void* ret = mmap(virtual_base + virtual_offset, length, prot_flags, flags, fd, host_offset);
ASSERT_MSG(ret != MAP_FAILED, "mmap: {}", strerror(errno));
ASSERT_MSG(ret != MAP_FAILED,
"mmap(virt_off=0x{:X}, host_off=0x{:X}, len=0x{:X}, virt_size=0x{:X}, backing_size=0x{:X}, perms=0x{:X}) failed: {}",
virtual_offset, host_offset, length, virtual_size, backing_size,
static_cast<u32>(perms), strerror(errno));
}
void Unmap(size_t virtual_offset, size_t length) {

View File

@@ -130,6 +130,17 @@ public:
ResetStorageBit(id.index);
}
[[nodiscard]] bool Contains(SlotId id) const noexcept {
if (!id) {
return false;
}
const size_t word = id.index / 64;
if (word >= stored_bitset.size()) {
return false;
}
return ((stored_bitset[word] >> (id.index % 64)) & 1) != 0;
}
[[nodiscard]] Iterator begin() noexcept {
const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
if (it == stored_bitset.end()) {

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstring>
#include <sstream>
#include <boost/range/algorithm_ext/erase.hpp>
@@ -187,7 +191,7 @@ void HLERequestContext::ParseCommandBuffer(u32_le* src_cmdbuf, bool incoming) {
buffer_w_descriptors.push_back(rp.PopRaw<IPC::BufferDescriptorABW>());
}
const auto buffer_c_offset = rp.GetCurrentOffset() + command_header->data_size;
buffer_c_offset = rp.GetCurrentOffset() + command_header->data_size;
if (!command_header->IsTipc()) {
// Padding to align to 16 bytes
@@ -294,7 +298,15 @@ Result HLERequestContext::WriteToOutgoingCommandBuffer() {
}
// Write the domain objects to the command buffer, these go after the raw untranslated data.
// TODO(Subv): This completely ignores C buffers.
if (buffer_c_offset != 0 && !buffer_c_descriptors.empty()) {
constexpr u32 WORDS_PER_DESCRIPTOR = sizeof(IPC::BufferDescriptorC) / sizeof(u32);
u32 descriptor_offset = buffer_c_offset;
for (const auto& descriptor : buffer_c_descriptors) {
std::memcpy(&cmd_buf[descriptor_offset], &descriptor, sizeof(descriptor));
descriptor_offset += WORDS_PER_DESCRIPTOR;
}
}
if (GetManager()->IsDomain()) {
current_offset = domain_offset - static_cast<u32>(outgoing_domain_objects.size());
@@ -393,10 +405,14 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
BufferDescriptorB()[buffer_index].Size()};
const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBuffer target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
if (is_buffer_b) {
@@ -418,15 +434,25 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
std::size_t HLERequestContext::WriteBufferB(const void* buffer, std::size_t size,
std::size_t buffer_index) const {
if (buffer_index >= BufferDescriptorB().size() || size == 0) {
if (buffer_index >= BufferDescriptorB().size()) {
LOG_WARNING(Core, "WriteBufferB invalid buffer index {}", buffer_index);
return 0;
}
if (size == 0) {
LOG_WARNING(Core, "skip empty buffer write (B)");
return 0;
}
const auto buffer_size{BufferDescriptorB()[buffer_index].Size()};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBufferB target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
@@ -435,15 +461,25 @@ std::size_t HLERequestContext::WriteBufferB(const void* buffer, std::size_t size
std::size_t HLERequestContext::WriteBufferC(const void* buffer, std::size_t size,
std::size_t buffer_index) const {
if (buffer_index >= BufferDescriptorC().size() || size == 0) {
if (buffer_index >= BufferDescriptorC().size()) {
LOG_WARNING(Core, "WriteBufferC invalid buffer index {}", buffer_index);
return 0;
}
if (size == 0) {
LOG_WARNING(Core, "skip empty buffer write (C)");
return 0;
}
const auto buffer_size{BufferDescriptorC()[buffer_index].Size()};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBufferC target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
@@ -473,12 +509,20 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons
ASSERT_OR_EXECUTE_MSG(
BufferDescriptorB().size() > buffer_index, { return 0; },
"BufferDescriptorB invalid buffer_index {}", buffer_index);
return BufferDescriptorB()[buffer_index].Size();
const auto size = BufferDescriptorB()[buffer_index].Size();
if (size == 0) {
LOG_WARNING(Core, "BufferDescriptorB index {} has zero size", buffer_index);
}
return size;
} else {
ASSERT_OR_EXECUTE_MSG(
BufferDescriptorC().size() > buffer_index, { return 0; },
"BufferDescriptorC invalid buffer_index {}", buffer_index);
return BufferDescriptorC()[buffer_index].Size();
const auto size = BufferDescriptorC()[buffer_index].Size();
if (size == 0) {
LOG_WARNING(Core, "BufferDescriptorC index {} has zero size", buffer_index);
}
return size;
}
return 0;
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -422,6 +425,7 @@ private:
u32 data_payload_offset{};
u32 handles_offset{};
u32 domain_offset{};
u32 buffer_c_offset{};
std::weak_ptr<SessionRequestManager> manager{};
bool is_deferred{false};

View File

@@ -11,6 +11,7 @@
#include <vector>
#include <spirv-tools/optimizer.hpp>
#include "common/logging/log.h"
#include "common/settings.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@@ -439,15 +440,23 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
info.uses_subgroup_shuffles) &&
profile.support_vote) {
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(ctx.stage);
const bool needs_warp_intrinsics = info.uses_subgroup_vote ||
info.uses_subgroup_invocation_id ||
info.uses_subgroup_shuffles;
if (needs_warp_intrinsics && profile.support_vote && stage_supports_warp) {
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
ctx.AddCapability(spv::Capability::GroupNonUniformShuffle);
if (!profile.warp_size_potentially_larger_than_guest) {
// vote ops are only used when not taking the long path
ctx.AddCapability(spv::Capability::GroupNonUniformVote);
}
} else if (needs_warp_intrinsics && !stage_supports_warp) {
LOG_WARNING(Shader,
"Warp intrinsics requested in stage {} but the device does not report subgroup "
"support; falling back to scalar approximations",
static_cast<u32>(ctx.stage));
}
if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
ctx.AddCapability(spv::Capability::Int64Atomics);

View File

@@ -491,9 +491,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
}
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
const AttributeType output_type{ctx.runtime_info.color_output_types[index]};
Id pointer_type{ctx.output_f32};
Id store_value{value};
switch (output_type) {
case AttributeType::SignedInt:
pointer_type = ctx.output_s32;
store_value = ctx.OpBitcast(ctx.S32[1], value);
break;
case AttributeType::UnsignedInt:
pointer_type = ctx.output_u32;
store_value = ctx.OpBitcast(ctx.U32[1], value);
break;
default:
break;
}
const Id component_id{ctx.Const(component)};
const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
ctx.OpStore(pointer, value);
const Id pointer{ctx.OpAccessChain(pointer_type, ctx.frag_color.at(index), component_id)};
ctx.OpStore(pointer, store_value);
}
void EmitSetSampleMask(EmitContext& ctx, Id value) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -195,6 +198,41 @@ Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR
}
}
Id TextureColorResultType(EmitContext& ctx, const TextureDefinition& def) {
switch (def.component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return ctx.F32[4];
case SamplerComponentType::Sint:
return ctx.S32[4];
case SamplerComponentType::Stencil:
return ctx.U32[4];
case SamplerComponentType::Uint:
return ctx.U32[4];
}
throw InvalidArgument("Invalid sampler component type {}", def.component_type);
}
Id TextureSampleResultToFloat(EmitContext& ctx, const TextureDefinition& def, Id color) {
switch (def.component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return color;
case SamplerComponentType::Sint:
return ctx.OpConvertSToF(ctx.F32[4], color);
case SamplerComponentType::Stencil:
{
const Id converted{ctx.OpConvertUToF(ctx.F32[4], color)};
const Id inv255{ctx.Const(1.0f / 255.0f)};
const Id scale{ctx.ConstantComposite(ctx.F32[4], inv255, inv255, inv255, inv255)};
return ctx.OpFMul(ctx.F32[4], converted, scale);
}
case SamplerComponentType::Uint:
return ctx.OpConvertUToF(ctx.F32[4], color);
}
throw InvalidArgument("Invalid sampler component type {}", def.component_type);
}
Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
if (!index.IsImmediate() || index.U32() != 0) {
throw NotImplementedException("Indirect image indexing");
@@ -449,31 +487,39 @@ Id EmitBoundImageWrite(EmitContext&) {
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id bias_lc, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const Id texture{Texture(ctx, info, index)};
Id color{};
if (ctx.stage == Stage::Fragment) {
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
bias_lc, offset);
return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
&EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
color = Emit(&EmitContext::OpImageSparseSampleImplicitLod,
&EmitContext::OpImageSampleImplicitLod, ctx, inst, color_type, texture,
coords, operands.MaskOptional(), operands.Span());
} else {
// We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
// if the lod was explicitly zero. This may change on Turing with implicit compute
// derivatives
const Id lod{ctx.Const(0.0f)};
const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
color = Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type, texture,
coords, operands.Mask(), operands.Span());
}
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const ImageOperands operands(ctx, false, true, false, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
@@ -509,13 +555,18 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const ImageOperands operands(ctx, offset, offset2);
const Id texture{Texture(ctx, info, index)};
if (ctx.profile.need_gather_subpixel_offset) {
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
}
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
operands.MaskOptional(), operands.Span());
const Id color{
Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, color_type,
texture, coords, ctx.Const(info.gather_component), operands.MaskOptional(),
operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
@@ -533,6 +584,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition* def =
info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index);
const Id result_type{def ? TextureColorResultType(ctx, *def) : ctx.F32[4]};
AddOffsetToCoordinates(ctx, info, coords, offset);
if (info.type == TextureType::Buffer) {
lod = Id{};
@@ -542,8 +596,13 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
lod = Id{};
}
const ImageOperands operands(lod, ms);
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
Id color{Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst,
result_type, TextureImage(ctx, info, index), coords, operands.MaskOptional(),
operands.Span())};
if (def) {
color = TextureSampleResultToFloat(ctx, *def, color);
}
return color;
}
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
@@ -588,14 +647,17 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id derivatives, const IR::Value& offset, Id lod_clamp) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const auto operands = info.num_derivatives == 3
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
ctx.Def(offset), {}, lod_clamp)
: ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
info.num_derivatives, offset, lod_clamp);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -78,9 +81,25 @@ Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
}
bool SupportsWarpIntrinsics(const EmitContext& ctx) {
return ctx.profile.SupportsWarpIntrinsics(ctx.stage);
}
void SetAlwaysInBounds(EmitContext& ctx, IR::Inst* inst) {
SetInBoundsFlag(inst, ctx.true_value);
}
Id FallbackBallotMask(EmitContext& ctx, Id pred) {
const Id full_mask{ctx.Const(0xFFFFFFFFu)};
return ctx.OpSelect(ctx.U32[1], pred, full_mask, ctx.u32_zero_value);
}
} // Anonymous namespace
Id EmitLaneId(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
const Id id{GetThreadId(ctx)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return id;
@@ -89,6 +108,9 @@ Id EmitLaneId(EmitContext& ctx) {
}
Id EmitVoteAll(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAll(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -102,6 +124,9 @@ Id EmitVoteAll(EmitContext& ctx, Id pred) {
}
Id EmitVoteAny(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAny(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -115,6 +140,9 @@ Id EmitVoteAny(EmitContext& ctx, Id pred) {
}
Id EmitVoteEqual(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAllEqual(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -129,6 +157,9 @@ Id EmitVoteEqual(EmitContext& ctx, Id pred) {
}
Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return FallbackBallotMask(ctx, pred);
}
const Id ballot{ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
@@ -137,27 +168,46 @@ Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
}
Id EmitSubgroupEqMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_eq);
}
Id EmitSubgroupLtMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_lt);
}
Id EmitSubgroupLeMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_le);
}
Id EmitSubgroupGtMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_gt);
}
Id EmitSubgroupGeMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_ge);
}
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
const Id thread_id{EmitLaneId(ctx)};
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
@@ -177,6 +227,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
@@ -192,6 +246,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
@@ -207,6 +265,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};

View File

@@ -28,27 +28,41 @@ enum class Operation {
FPMax,
};
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
Id ComponentScalarType(EmitContext& ctx, SamplerComponentType component_type) {
switch (component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return ctx.F32[1];
case SamplerComponentType::Sint:
return ctx.S32[1];
case SamplerComponentType::Stencil:
return ctx.U32[1];
case SamplerComponentType::Uint:
return ctx.U32[1];
}
throw InvalidArgument("Invalid sampler component type {}", component_type);
}
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc, Id sampled_type) {
const spv::ImageFormat format{spv::ImageFormat::Unknown};
const Id type{ctx.F32[1]};
const bool depth{desc.is_depth};
const bool ms{desc.is_multisample};
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, false, false, 1, format);
case TextureType::ColorArray1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, true, false, 1, format);
case TextureType::Color2D:
case TextureType::Color2DRect:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, depth, false, ms, 1, format);
case TextureType::ColorArray2D:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, depth, true, ms, 1, format);
case TextureType::Color3D:
return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, depth, false, false, 1, format);
case TextureType::ColorCube:
return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Cube, depth, false, false, 1, format);
case TextureType::ColorArrayCube:
return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Cube, depth, true, false, 1, format);
case TextureType::Buffer:
break;
}
@@ -315,6 +329,9 @@ void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
ctx.Decorate(id, spv::Decoration::Binding, binding);
ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
ctx.Name(id, fmt::format("ssbo{}", index));
if (!desc.is_written) {
ctx.Decorate(id, spv::Decoration::NonWritable);
}
if (ctx.profile.supported_spirv >= 0x00010400) {
ctx.interfaces.push_back(id);
}
@@ -546,6 +563,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
output_s32 = Name(TypePointer(spv::StorageClass::Output, S32[1]), "output_s32");
if (info.uses_int8 && profile.support_int8) {
AddCapability(spv::Capability::Int8);
@@ -1359,7 +1377,8 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) {
textures.reserve(info.texture_descriptors.size());
for (const TextureDescriptor& desc : info.texture_descriptors) {
const Id image_type{ImageType(*this, desc)};
const Id result_type{ComponentScalarType(*this, desc.component_type)};
const Id image_type{ImageType(*this, desc, result_type)};
const Id sampled_type{TypeSampledImage(image_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
@@ -1372,8 +1391,10 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in
.sampled_type = sampled_type,
.pointer_type = pointer_type,
.image_type = image_type,
.result_type = result_type,
.count = desc.count,
.is_multisample = desc.is_multisample,
.component_type = desc.component_type,
});
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(id);
@@ -1416,6 +1437,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde
void EmitContext::DefineInputs(const IR::Program& program) {
const Info& info{program.info};
const VaryingState loads{info.loads.mask | info.passthrough.mask};
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(stage);
if (info.uses_workgroup_id) {
workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
@@ -1432,24 +1454,37 @@ void EmitContext::DefineInputs(const IR::Program& program) {
}
if (info.uses_sample_id) {
sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
if (stage == Stage::Fragment) {
Decorate(sample_id, spv::Decoration::Flat);
}
}
if (info.uses_is_helper_invocation) {
is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
}
if (info.uses_subgroup_mask) {
if (info.uses_subgroup_mask && stage_supports_warp) {
subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
if (stage == Stage::Fragment) {
Decorate(subgroup_mask_eq, spv::Decoration::Flat);
Decorate(subgroup_mask_lt, spv::Decoration::Flat);
Decorate(subgroup_mask_le, spv::Decoration::Flat);
Decorate(subgroup_mask_gt, spv::Decoration::Flat);
Decorate(subgroup_mask_ge, spv::Decoration::Flat);
}
}
if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
(profile.warp_size_potentially_larger_than_guest &&
(info.uses_subgroup_vote || info.uses_subgroup_mask))) {
if (stage_supports_warp &&
(info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
(profile.warp_size_potentially_larger_than_guest &&
(info.uses_subgroup_vote || info.uses_subgroup_mask)))) {
AddCapability(spv::Capability::GroupNonUniform);
subgroup_local_invocation_id =
DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
if (stage == Stage::Fragment) {
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
}
if (info.uses_fswzadd) {
const Id f32_one{Const(1.0f)};
@@ -1461,6 +1496,9 @@ void EmitContext::DefineInputs(const IR::Program& program) {
}
if (loads[IR::Attribute::PrimitiveId]) {
primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
if (stage == Stage::Fragment) {
Decorate(primitive_id, spv::Decoration::Flat);
}
}
if (loads[IR::Attribute::Layer]) {
AddCapability(spv::Capability::Geometry);
@@ -1552,17 +1590,21 @@ void EmitContext::DefineInputs(const IR::Program& program) {
if (stage != Stage::Fragment) {
continue;
}
switch (info.interpolation[index]) {
case Interpolation::Smooth:
// Default
// Decorate(id, spv::Decoration::Smooth);
break;
case Interpolation::NoPerspective:
Decorate(id, spv::Decoration::NoPerspective);
break;
case Interpolation::Flat:
const bool is_integer = input_type == AttributeType::SignedInt ||
input_type == AttributeType::UnsignedInt;
if (is_integer) {
Decorate(id, spv::Decoration::Flat);
break;
} else {
switch (info.interpolation[index]) {
case Interpolation::Smooth:
break;
case Interpolation::NoPerspective:
Decorate(id, spv::Decoration::NoPerspective);
break;
case Interpolation::Flat:
Decorate(id, spv::Decoration::Flat);
break;
}
}
}
if (stage == Stage::TessellationEval) {
@@ -1658,7 +1700,18 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
continue;
}
frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
const AttributeType output_type{runtime_info.color_output_types[index]};
const Id vec_type = [&, output_type]() -> Id {
switch (output_type) {
case AttributeType::SignedInt:
return S32[4];
case AttributeType::UnsignedInt:
return U32[4];
default:
return F32[4];
}
}();
frag_color[index] = DefineOutput(*this, vec_type, std::nullopt);
Decorate(frag_color[index], spv::Decoration::Location, index);
Name(frag_color[index], fmt::format("frag_color{}", index));
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -36,8 +39,10 @@ struct TextureDefinition {
Id sampled_type;
Id pointer_type;
Id image_type;
Id result_type;
u32 count;
bool is_multisample;
SamplerComponentType component_type;
};
struct TextureBufferDefinition {
@@ -244,6 +249,7 @@ public:
Id output_f32{};
Id output_u32{};
Id output_s32{};
Id image_buffer_type{};
Id image_u32{};

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -22,6 +25,8 @@ public:
[[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
[[nodiscard]] virtual SamplerComponentType ReadTextureComponentType(u32 raw_handle) = 0;
[[nodiscard]] virtual TexturePixelFormat ReadTexturePixelFormat(u32 raw_handle) = 0;
[[nodiscard]] virtual bool IsTexturePixelFormatInteger(u32 raw_handle) = 0;

View File

@@ -396,6 +396,10 @@ bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf)
return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf));
}
SamplerComponentType ReadTextureComponentType(Environment& env, const ConstBufferAddr& cbuf) {
return env.ReadTextureComponentType(GetTextureHandle(env, cbuf));
}
class Descriptors {
public:
explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
@@ -433,7 +437,9 @@ public:
u32 Add(const TextureDescriptor& desc) {
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
return desc.type == existing.type &&
desc.component_type == existing.component_type &&
desc.is_depth == existing.is_depth &&
desc.has_secondary == existing.has_secondary &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
@@ -666,10 +672,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
.secondary_shift_left = cbuf.secondary_shift_left,
.count = cbuf.count,
.size_shift = DESCRIPTOR_SIZE_SHIFT,
.component_type = ReadTextureComponentType(env, cbuf),
});
} else {
index = descriptors.Add(TextureDescriptor{
.type = flags.type,
.component_type = ReadTextureComponentType(env, cbuf),
.is_depth = flags.is_depth != 0,
.is_multisample = is_multisample,
.has_secondary = cbuf.has_secondary,

View File

@@ -1,9 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <limits>
#include "common/common_types.h"
#include "shader_recompiler/stage.h"
namespace Shader {
@@ -46,6 +52,8 @@ struct Profile {
bool support_multi_viewport{};
bool support_geometry_streams{};
u32 warp_stage_support_mask{std::numeric_limits<u32>::max()};
bool warp_size_potentially_larger_than_guest{};
bool lower_left_origin_mode{};
@@ -90,6 +98,11 @@ struct Profile {
u64 min_ssbo_alignment{};
u32 max_user_clip_distances{};
bool SupportsWarpIntrinsics(Stage stage) const {
const u32 bit = 1u << static_cast<u32>(stage);
return (warp_stage_support_mask & bit) != 0;
}
};
} // namespace Shader

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -80,6 +83,7 @@ struct TransformFeedbackVarying {
struct RuntimeInfo {
std::array<AttributeType, 32> generic_input_types{};
std::array<AttributeType, 8> color_output_types{};
VaryingState previous_stage_stores;
std::map<IR::Attribute, IR::Attribute> previous_stage_legacy_stores_mapping;

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -151,6 +154,14 @@ enum class ImageFormat : u32 {
R32G32B32A32_UINT,
};
enum class SamplerComponentType : u8 {
Float,
Sint,
Uint,
Depth,
Stencil,
};
enum class Interpolation {
Smooth,
Flat,
@@ -183,6 +194,7 @@ struct TextureBufferDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
SamplerComponentType component_type;
auto operator<=>(const TextureBufferDescriptor&) const = default;
};
@@ -204,6 +216,7 @@ using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescrip
struct TextureDescriptor {
TextureType type;
SamplerComponentType component_type;
bool is_depth;
bool is_multisample;
bool has_secondary;

View File

@@ -407,6 +407,12 @@ void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
template <class P>
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers()) {
channel_state->total_graphics_storage_buffers -=
static_cast<u32>(std::popcount(channel_state->enabled_storage_buffers[stage]));
}
}
channel_state->enabled_storage_buffers[stage] = 0;
channel_state->written_storage_buffers[stage] = 0;
}
@@ -414,8 +420,26 @@ void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
template <class P>
bool BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
u32 cbuf_offset, bool is_written) {
const bool already_enabled =
((channel_state->enabled_storage_buffers[stage] >> ssbo_index) & 1U) != 0;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
if (channel_state->total_graphics_storage_buffers >= max_bindings) {
LOG_WARNING(HW_GPU,
"Skipping graphics storage buffer {} due to driver limit {}",
ssbo_index, max_bindings);
return false;
}
}
}
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
++channel_state->total_graphics_storage_buffers;
}
}
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
@@ -446,6 +470,12 @@ void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, G
template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() {
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers()) {
channel_state->total_compute_storage_buffers -=
static_cast<u32>(std::popcount(channel_state->enabled_compute_storage_buffers));
}
}
channel_state->enabled_compute_storage_buffers = 0;
channel_state->written_compute_storage_buffers = 0;
channel_state->image_compute_texture_buffers = 0;
@@ -459,8 +489,26 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
ssbo_index);
return;
}
const bool already_enabled =
((channel_state->enabled_compute_storage_buffers >> ssbo_index) & 1U) != 0;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
if (channel_state->total_compute_storage_buffers >= max_bindings) {
LOG_WARNING(HW_GPU,
"Skipping compute storage buffer {} due to driver limit {}",
ssbo_index, max_bindings);
return;
}
}
}
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
++channel_state->total_compute_storage_buffers;
}
}
const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) == 0) {
@@ -793,9 +841,23 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size);
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
const u32 offset = has_host_buffer ? buffer.Offset(device_addr) : 0;
const bool needs_alignment_stream = [&]() {
if constexpr (IS_OPENGL) {
return false;
} else {
if (!has_host_buffer) {
return false;
}
const u32 alignment = runtime.GetUniformBufferAlignment();
return alignment > 1 && (offset % alignment) != 0;
}
}();
const bool use_fast_buffer = needs_alignment_stream ||
(has_host_buffer &&
size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size));
if (use_fast_buffer) {
if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) {
@@ -834,7 +896,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if (!needs_bind) {
return;
}
const u32 offset = buffer.Offset(device_addr);
if constexpr (IS_OPENGL) {
// Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
@@ -951,9 +1012,30 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
TouchBuffer(buffer, binding.buffer_id);
const u32 size =
(std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
const u32 offset = has_host_buffer ? buffer.Offset(binding.device_addr) : 0;
const bool needs_alignment_stream = [&]() {
if constexpr (IS_OPENGL) {
return false;
} else {
if (!has_host_buffer) {
return false;
}
const u32 alignment = runtime.GetUniformBufferAlignment();
return alignment > 1 && (offset % alignment) != 0;
}
}();
if constexpr (!IS_OPENGL) {
if (needs_alignment_stream) {
const std::span<u8> span =
runtime.BindMappedUniformBuffer(0, binding_index, size);
device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size);
return;
}
}
SynchronizeBuffer(buffer, binding.device_addr, size);
const u32 offset = buffer.Offset(binding.device_addr);
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);

View File

@@ -8,6 +8,7 @@
#include <algorithm>
#include <array>
#include <bit>
#include <functional>
#include <memory>
#include <mutex>
@@ -132,6 +133,9 @@ public:
u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0;
u32 total_graphics_storage_buffers = 0;
u32 total_compute_storage_buffers = 0;
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
std::array<u32, NUM_STAGES> written_texture_buffers{};
std::array<u32, NUM_STAGES> image_texture_buffers{};

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -235,6 +238,9 @@ constexpr Table MakeViewTable() {
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
Enable(view, PixelFormat::D24_UNORM_S8_UINT, PixelFormat::S8_UINT);
Enable(view, PixelFormat::S8_UINT_D24_UNORM, PixelFormat::S8_UINT);
Enable(view, PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::S8_UINT);
return view;
}

View File

@@ -91,6 +91,10 @@ public:
uncommitted_operations.clear();
}
QueueFence(new_fence);
if (!new_fence->IsStubbed()) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.CommitPendingGpuAccesses(new_fence->WaitTick());
}
fences.push(std::move(new_fence));
if (should_flush) {
rasterizer.FlushCommands();
@@ -179,7 +183,7 @@ private:
return;
}
}
PopAsyncFlushes();
PopAsyncFlushes(current_fence->WaitTick());
auto operations = std::move(pending_operations.front());
pending_operations.pop_front();
for (auto& operation : operations) {
@@ -214,7 +218,7 @@ private:
if (!current_fence->IsStubbed()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
PopAsyncFlushes(current_fence->WaitTick());
for (auto& operation : current_operations) {
operation();
}
@@ -237,10 +241,11 @@ private:
query_cache.HasUncommittedFlushes();
}
void PopAsyncFlushes() {
void PopAsyncFlushes(u64 completed_tick) {
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes();
texture_cache.CompleteGpuAccesses(completed_tick);
buffer_cache.PopAsyncFlushes();
}
query_cache.PopAsyncFlushes();

View File

@@ -211,6 +211,12 @@ void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) {
streamer->CloseCounter();
}
template <typename Traits>
bool QueryCacheBase<Traits>::HasStreamer(QueryType counter_type) const {
const size_t index = static_cast<size_t>(counter_type);
return impl->streamers[index] != nullptr;
}
template <typename Traits>
void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
size_t index = static_cast<size_t>(counter_type);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -92,6 +95,8 @@ public:
void CounterReset(QueryType counter_type);
[[nodiscard]] bool HasStreamer(QueryType counter_type) const;
void CounterClose(QueryType counter_type);
void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags,

View File

@@ -198,6 +198,10 @@ public:
return device.CanReportMemoryUsage();
}
u32 GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment());
}
u32 GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetShaderStorageBufferAlignment());
}

View File

@@ -7,13 +7,50 @@
#include <cstring>
#include <bit>
#include <numeric>
#include <optional>
#include "common/cityhash.h"
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/surface.h"
namespace OpenGL {
namespace {
std::optional<VideoCore::Surface::PixelFormatNumeric>
NumericFromComponentType(Shader::SamplerComponentType component_type) {
using VideoCore::Surface::PixelFormatNumeric;
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
VideoCore::Surface::PixelFormat ResolveTexelBufferFormat(
VideoCore::Surface::PixelFormat format, Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant =
VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
} // Anonymous namespace
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
@@ -174,8 +211,12 @@ void ComputePipeline::Configure() {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
auto buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++texbuf_index;
}
@@ -205,7 +246,8 @@ void ComputePipeline::Configure() {
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
textures[texture_binding] =
image_view.SampledView(desc.type, desc.component_type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << texture_binding;
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -25,6 +28,10 @@ public:
void Wait();
[[nodiscard]] u64 WaitTick() const noexcept {
return 0;
}
private:
OGLSync sync_object;
};

View File

@@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include <optional>
#include <string>
#include <vector>
#include <bit>
@@ -18,6 +19,7 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/texture_cache.h"
#if defined(_MSC_VER) && defined(NDEBUG)
@@ -39,6 +41,38 @@ using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 8;
std::optional<VideoCore::Surface::PixelFormatNumeric>
NumericFromComponentType(Shader::SamplerComponentType component_type) {
using VideoCore::Surface::PixelFormatNumeric;
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
VideoCore::Surface::PixelFormat ResolveTexelBufferFormat(
VideoCore::Surface::PixelFormat format, Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant =
VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
GLenum Stage(size_t stage_index) {
switch (stage_index) {
case 0:
@@ -397,8 +431,12 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
auto buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
++texture_buffer_it;
@@ -483,7 +521,8 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
textures[texture_binding] =
image_view.SampledView(desc.type, desc.component_type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << stage_texture_binding;
}

View File

@@ -220,6 +220,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_gl_sparse_textures = device.HasSparseTexture2(),
.support_gl_derivative_control = device.HasDerivativeControl(),
.support_geometry_streams = true,
.warp_stage_support_mask = 0xFFFFFFFFu,
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),

View File

@@ -692,6 +692,15 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
return device.HasASTC();
}
bool TextureCacheRuntime::SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const noexcept {
using VideoCore::Surface::GetFormatType;
using VideoCore::Surface::IsPixelFormatInteger;
if (IsPixelFormatInteger(format)) {
return false;
}
return GetFormatType(format) == VideoCore::Surface::SurfaceType::ColorTexture;
}
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@@ -1229,6 +1238,13 @@ GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFor
return view;
}
GLuint ImageView::SampledView(Shader::TextureType view_type,
Shader::SamplerComponentType /*component_type*/) {
// OpenGL swizzles already configure depth/stencil selection per TIC entry,
// so fall back to the default view handle.
return Handle(view_type);
}
void ImageView::SetupView(Shader::TextureType view_type) {
views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -13,6 +16,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
@@ -129,6 +133,8 @@ public:
return false;
}
bool SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const noexcept;
bool HasBrokenTextureViewFormats() const noexcept {
return has_broken_texture_view_formats;
}
@@ -137,6 +143,8 @@ public:
void TickFrame() {}
void WaitForGpuTick(u64) {}
StateTracker& GetStateTracker() {
return state_tracker;
}
@@ -264,6 +272,9 @@ public:
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] GLuint SampledView(Shader::TextureType view_type,
Shader::SamplerComponentType component_type);
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
return views[static_cast<size_t>(handle_type)];
}

View File

@@ -525,18 +525,24 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
blit_depth_stencil_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)
: vk::ShaderModule{}),
clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)),
clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)),
clear_stencil_frag(BuildShader(device, VULKAN_DEPTHSTENCIL_CLEAR_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)
: vk::ShaderModule{}),
convert_abgr8_to_d32f_frag(BuildShader(device, CONVERT_ABGR8_TO_D32F_FRAG_SPV)),
convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
convert_abgr8_srgb_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)),
convert_abgr8_srgb_to_d24s8_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)
: vk::ShaderModule{}),
convert_rgba_to_bgra_frag(BuildShader(device, CONVERT_RGBA8_TO_BGRA8_FRAG_SPV)),
convert_yuv420_to_rgb_comp(BuildShader(device, CONVERT_YUV420_TO_RGB_COMP_SPV)),
convert_rgb_to_yuv420_comp(BuildShader(device, CONVERT_RGB_TO_YUV420_COMP_SPV)),
@@ -667,6 +673,11 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
if (!device.IsExtShaderStencilExportSupported()) {
// Shader requires VK_EXT_shader_stencil_export which is not available
LOG_WARNING(Render_Vulkan, "ConvertABGR8ToD24S8 requires shader_stencil_export, skipping");
return;
}
ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
convert_abgr8_to_d24s8_frag);
Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view);
@@ -702,6 +713,11 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
if (!device.IsExtShaderStencilExportSupported()) {
// Shader requires VK_EXT_shader_stencil_export which is not available
LOG_WARNING(Render_Vulkan, "ConvertABGR8SRGBToD24S8 requires shader_stencil_export, skipping");
return;
}
ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline,
dst_framebuffer->RenderPass(),
convert_abgr8_srgb_to_d24s8_frag);

View File

@@ -59,7 +59,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
raw1 = 0;
extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0);
extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0);
extended_dynamic_state_2_extra.Assign(features.has_extended_dynamic_state_2_extra ? 1 : 0);
extended_dynamic_state_2_logic_op.Assign(features.has_extended_dynamic_state_2_logic_op ? 1 : 0);
extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0);
extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0);
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
@@ -157,7 +157,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
return static_cast<u16>(array.stride.Value());
});
}
if (!extended_dynamic_state_2_extra) {
if (!extended_dynamic_state_2_logic_op) {
dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2);
}
if (!extended_dynamic_state_3_blend) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -20,9 +23,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct DynamicFeatures {
bool has_extended_dynamic_state;
bool has_extended_dynamic_state_2;
bool has_extended_dynamic_state_2_extra;
bool has_extended_dynamic_state_2_logic_op;
bool has_extended_dynamic_state_2_patch_control_points;
bool has_extended_dynamic_state_3_blend;
bool has_extended_dynamic_state_3_enables;
bool has_dual_source_blend;
bool has_dynamic_vertex_input;
};
@@ -186,7 +191,7 @@ struct FixedPipelineState {
u32 raw1;
BitField<0, 1, u32> extended_dynamic_state;
BitField<1, 1, u32> extended_dynamic_state_2;
BitField<2, 1, u32> extended_dynamic_state_2_extra;
BitField<2, 1, u32> extended_dynamic_state_2_logic_op;
BitField<3, 1, u32> extended_dynamic_state_3_blend;
BitField<4, 1, u32> extended_dynamic_state_3_enables;
BitField<5, 1, u32> dynamic_vertex_input;

View File

@@ -165,7 +165,7 @@ struct FormatTuple {
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
{VK_FORMAT_R32G32B32_SFLOAT}, // R32G32B32_FLOAT
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable | Storage}, // A8B8G8R8_SRGB
{VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
{VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
{VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
@@ -177,7 +177,7 @@ struct FormatTuple {
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
{VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM
{VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM
{VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
{VK_FORMAT_B8G8R8A8_SRGB, Attachable | Storage}, // B8G8R8A8_SRGB
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
{VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB

View File

@@ -189,12 +189,16 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VideoCommon::SamplerId sampler_id{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
const VkImageView vk_image_view{
image_view.SampledView(desc.type, desc.component_type)};
const Sampler& sampler{texture_cache.GetSampler(sampler_id)};
const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
!image_view.SupportsAnisotropy()};
const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
: sampler.Handle()};
const bool supports_linear_filter{
texture_cache.SupportsLinearFilter(image_view.format)};
const bool supports_depth_compare_sampling{
image_view.SupportsDepthCompareSampling()};
const VkSampler vk_sampler{
sampler.SelectHandle(supports_linear_filter, image_view.SupportsAnisotropy(),
supports_depth_compare_sampling)};
guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}

View File

@@ -280,7 +280,6 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
buffer.Flush(); // Ensure host writes are visible before the GPU copy.
}
const VkBufferImageCopy copy{

View File

@@ -7,6 +7,7 @@
#include <algorithm>
#include <array>
#include <cstring>
#include <limits>
#include <span>
#include <vector>
@@ -333,6 +334,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
compute_pass_descriptor_queue) {
const VkDriverIdKHR driver_id = device.GetDriverID();
limit_dynamic_storage_buffers = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
if (limit_dynamic_storage_buffers) {
max_dynamic_storage_buffers = device.GetMaxDescriptorSetStorageBuffersDynamic();
}
if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
// TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
@@ -408,6 +416,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
u32 BufferCacheRuntime::GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment());
}
u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetStorageBufferAlignment());
}
@@ -583,7 +595,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
if (index >= device.GetMaxVertexInputBindings()) {
return;
}
if (device.IsExtExtendedDynamicStateSupported()) {
if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
ReserveNullBuffer();
buffer = *null_buffer;
offset = 0;
size = std::numeric_limits<u32>::max();
}
// Use BindVertexBuffers2EXT only if EDS1 is supported AND VIDS is not active
// When VIDS is active, the pipeline doesn't declare VERTEX_INPUT_BINDING_STRIDE as dynamic
if (device.IsExtExtendedDynamicStateSupported() && !device.IsExtVertexInputDynamicStateSupported()) {
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
@@ -623,7 +643,8 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
if (binding_count == 0) {
return;
}
if (device.IsExtExtendedDynamicStateSupported()) {
// Use BindVertexBuffers2EXT only if EDS1 is supported AND VIDS is not active
if (device.IsExtExtendedDynamicStateSupported() && !device.IsExtVertexInputDynamicStateSupported()) {
scheduler.Record([bindings_ = std::move(bindings),
buffer_handles_ = std::move(buffer_handles),
binding_count](vk::CommandBuffer cmdbuf) {
@@ -680,27 +701,50 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
}
void BufferCacheRuntime::ReserveNullBuffer() {
const VkBufferUsageFlags expected_usage = NullBufferUsageFlags();
if (null_buffer && null_buffer_usage_flags != expected_usage) {
RefreshNullBuffer();
}
if (!null_buffer) {
null_buffer = CreateNullBuffer();
}
}
VkBufferUsageFlags BufferCacheRuntime::NullBufferUsageFlags() const {
VkBufferUsageFlags usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
if (device.IsExtTransformFeedbackSupported()) {
usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
return usage;
}
void BufferCacheRuntime::RefreshNullBuffer() {
if (!null_buffer) {
return;
}
scheduler.Finish();
null_buffer.reset();
null_buffer = CreateNullBuffer();
}
vk::Buffer BufferCacheRuntime::CreateNullBuffer() {
VkBufferCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 4,
.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
.usage = NullBufferUsageFlags(),
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
vk::Buffer ret = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
null_buffer_usage_flags = create_info.usage;
if (device.HasDebuggingToolAttached()) {
ret.SetObjectNameEXT("Null buffer");
}

View File

@@ -6,6 +6,8 @@
#pragma once
#include <limits>
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h"
@@ -94,6 +96,8 @@ public:
bool CanReportMemoryUsage() const;
u32 GetUniformBufferAlignment() const;
u32 GetStorageBufferAlignment() const;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
@@ -127,6 +131,9 @@ public:
void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
/// Forces destruction and recreation of the shared null buffer so new usage flags take effect.
void RefreshNullBuffer();
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/,
[[maybe_unused]] u32 /*binding_index*/,
u32 size) {
@@ -155,6 +162,14 @@ public:
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
bool ShouldLimitDynamicStorageBuffers() const {
return limit_dynamic_storage_buffers;
}
u32 GetMaxDynamicStorageBuffers() const {
return max_dynamic_storage_buffers;
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
guest_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -162,6 +177,7 @@ private:
void ReserveNullBuffer();
vk::Buffer CreateNullBuffer();
VkBufferUsageFlags NullBufferUsageFlags() const;
struct UniformRing {
static constexpr size_t NUM_FRAMES = 3;
@@ -191,9 +207,13 @@ private:
std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
vk::Buffer null_buffer;
VkBufferUsageFlags null_buffer_usage_flags = 0;
std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass;
bool limit_dynamic_storage_buffers = false;
u32 max_dynamic_storage_buffers = std::numeric_limits<u32>::max();
};
struct BufferCacheParams {

View File

@@ -418,6 +418,9 @@ ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer,
u32 src_offset, bool compare_to_zero) {
if (!device.IsExtConditionalRendering()) {
return;
}
const size_t compare_size = compare_to_zero ? 8 : 24;
compute_pass_descriptor_queue.Acquire();
@@ -448,7 +451,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.Dispatch(1, 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier);
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier);
});
}
@@ -459,10 +462,14 @@ QueriesPrefixScanPass::QueriesPrefixScanPass(
device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS,
QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>,
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT,
VK_SHADER_STAGE_COMPUTE_BIT)
? std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_COMP_SPV)
: std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV)),
scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
@@ -470,6 +477,14 @@ QueriesPrefixScanPass::QueriesPrefixScanPass(
void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer,
VkBuffer src_buffer, size_t number_of_sums,
size_t min_accumulation_limit, size_t max_accumulation_limit) {
constexpr VkAccessFlags BASE_DST_ACCESS = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT;
const VkAccessFlags conditional_access =
device.IsExtConditionalRendering() ? VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT : 0;
size_t current_runs = number_of_sums;
size_t offset = 0;
while (current_runs != 0) {
@@ -486,22 +501,18 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, min_accumulation_limit, max_accumulation_limit,
runs_to_do, used_offset](vk::CommandBuffer cmdbuf) {
runs_to_do, used_offset, conditional_access](vk::CommandBuffer cmdbuf) {
static constexpr VkMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
};
static constexpr VkMemoryBarrier write_barrier{
const VkMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT,
.dstAccessMask = BASE_DST_ACCESS | conditional_access,
};
const QueriesPrefixScanPushConstants uniforms{
.min_accumulation_base = static_cast<u32>(min_accumulation_limit),
@@ -519,8 +530,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(1, 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0,
write_barrier);
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier);
});
}
}

View File

@@ -18,14 +18,49 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_notify.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#include <optional>
namespace Vulkan {
using Shader::ImageBufferDescriptor;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatNumeric;
static std::optional<PixelFormatNumeric> NumericFromComponentType(
Shader::SamplerComponentType component_type) {
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
static PixelFormat ResolveTexelBufferFormat(PixelFormat format,
Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant = VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
DescriptorPool& descriptor_pool,
@@ -182,8 +217,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(views[index].id);
VideoCore::Surface::PixelFormat buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
}

View File

@@ -34,6 +34,10 @@ public:
void Wait();
[[nodiscard]] u64 WaitTick() const noexcept {
return wait_tick;
}
private:
Scheduler& scheduler;
u64 wait_tick = 0;

View File

@@ -5,6 +5,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <optional>
#include <iostream>
#include <span>
@@ -23,7 +25,9 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_device.h"
#if defined(_MSC_VER) && defined(NDEBUG)
@@ -44,10 +48,83 @@ using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::PixelFormatNumeric;
constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
constexpr size_t MAX_IMAGE_ELEMENTS = 64;
std::optional<PixelFormatNumeric> NumericFromComponentType(
Shader::SamplerComponentType component_type) {
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
PixelFormat ResolveTexelBufferFormat(PixelFormat format,
Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant = VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
bool UsesDualSourceFactor(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Source1Color_D3D:
case Maxwell::Blend::Factor::Source1Color_GL:
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
case Maxwell::Blend::Factor::Source1Alpha_D3D:
case Maxwell::Blend::Factor::Source1Alpha_GL:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
return true;
default:
return false;
}
}
Maxwell::Blend::Factor FallbackDualSourceFactor(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Source1Color_D3D:
case Maxwell::Blend::Factor::Source1Color_GL:
return Maxwell::Blend::Factor::SourceColor_D3D;
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
return Maxwell::Blend::Factor::OneMinusSourceColor_D3D;
case Maxwell::Blend::Factor::Source1Alpha_D3D:
case Maxwell::Blend::Factor::Source1Alpha_GL:
return Maxwell::Blend::Factor::SourceAlpha_D3D;
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
return Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D;
default:
return factor;
}
}
bool AttachmentUsesDualSource(const FixedPipelineState::BlendingAttachment& blend) {
return UsesDualSourceFactor(blend.SourceRGBFactor()) ||
UsesDualSourceFactor(blend.DestRGBFactor()) ||
UsesDualSourceFactor(blend.SourceAlphaFactor()) ||
UsesDualSourceFactor(blend.DestAlphaFactor());
}
DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
DescriptorLayoutBuilder builder{device};
for (size_t index = 0; index < infos.size(); ++index) {
@@ -263,6 +340,7 @@ GraphicsPipeline::GraphicsPipeline(
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
num_textures += Shader::NumDescriptors(info->texture_descriptors);
}
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
uses_push_descriptor = builder.CanUsePushDescriptor();
@@ -416,8 +494,12 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
VideoCore::Surface::PixelFormat buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
++texture_buffer_it;
@@ -702,13 +784,18 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.lineWidth = 1.0f,
// TODO(alekpop): Transfer from regs
};
const bool smooth_lines_supported =
device.IsExtLineRasterizationSupported() && device.SupportsSmoothLines();
const bool stippled_lines_supported =
device.IsExtLineRasterizationSupported() && device.SupportsStippledRectangularLines();
VkPipelineRasterizationLineStateCreateInfoEXT line_state{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
.pNext = nullptr,
.lineRasterizationMode = key.state.smooth_lines != 0
.lineRasterizationMode = key.state.smooth_lines != 0 && smooth_lines_supported
? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
: VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
.stippledLineEnable = dynamic.line_stipple_enable ? VK_TRUE : VK_FALSE,
.stippledLineEnable =
(dynamic.line_stipple_enable && stippled_lines_supported) ? VK_TRUE : VK_FALSE,
.lineStippleFactor = key.state.line_stipple_factor,
.lineStipplePattern = static_cast<uint16_t>(key.state.line_stipple_pattern),
};
@@ -739,17 +826,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
}
const VkPipelineMultisampleStateCreateInfo multisample_ci{
const bool supports_alpha_output = fragment_has_color0_output;
const bool alpha_to_one_supported = device.SupportsAlphaToOne();
const auto msaa_mode = key.state.msaa_mode.Value();
const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode);
VkPipelineMultisampleStateCreateInfo multisample_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
.rasterizationSamples = vk_samples,
.sampleShadingEnable = Settings::values.sample_shading.GetValue() ? VK_TRUE : VK_FALSE,
.minSampleShading = static_cast<float>(Settings::values.sample_shading_fraction.GetValue()) / 100.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToOneEnable = key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToCoverageEnable =
supports_alpha_output && key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToOneEnable = supports_alpha_output && alpha_to_one_supported &&
key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
};
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -771,6 +866,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
}
static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const size_t num_attachments{NumAttachments(key.state)};
const bool supports_dual_source_blend = device.SupportsDualSourceBlend();
const u32 max_dual_source_attachments = supports_dual_source_blend
? device.MaxFragmentDualSrcAttachments()
: 0;
u32 granted_dual_source_attachments = 0;
bool logged_dual_source_warning = false;
for (size_t index = 0; index < num_attachments; ++index) {
static constexpr std::array mask_table{
VK_COLOR_COMPONENT_R_BIT,
@@ -784,13 +885,30 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
for (size_t i = 0; i < mask_table.size(); ++i) {
write_mask |= mask[i] ? mask_table[i] : 0;
}
const bool attachment_uses_dual_source = AttachmentUsesDualSource(blend);
const bool allow_dual_source = attachment_uses_dual_source && supports_dual_source_blend &&
granted_dual_source_attachments < max_dual_source_attachments;
if (allow_dual_source) {
++granted_dual_source_attachments;
} else if (attachment_uses_dual_source && !logged_dual_source_warning) {
LOG_WARNING(Render_Vulkan,
"Dual-source blend factors exceed device limit (maxFragmentDualSrcAttachments={}), falling back to single-source factors",
max_dual_source_attachments);
logged_dual_source_warning = true;
}
const auto sanitize_factor = [&](Maxwell::Blend::Factor factor) {
if (allow_dual_source || !UsesDualSourceFactor(factor)) {
return factor;
}
return FallbackDualSourceFactor(factor);
};
cb_attachments.push_back({
.blendEnable = blend.enable != 0,
.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
.srcColorBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.SourceRGBFactor())),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.DestRGBFactor())),
.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.SourceAlphaFactor())),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.DestAlphaFactor())),
.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
.colorWriteMask = write_mask,
});
@@ -806,14 +924,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.blendConstants = {}
};
static_vector<VkDynamicState, 34> dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_LINE_WIDTH,
};
if (device.UsesAdvancedCoreDynamicState()) {
static constexpr std::array core_dynamic_states{
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
};
dynamic_states.insert(dynamic_states.end(), core_dynamic_states.begin(),
core_dynamic_states.end());
}
if (key.state.extended_dynamic_state) {
std::vector<VkDynamicState> extended{
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
@@ -823,51 +952,68 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
};
if (!device.IsExtVertexInputDynamicStateSupported()) {
extended.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
}
if (key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
}
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
if (key.state.extended_dynamic_state_2) {
static constexpr std::array extended2{
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
// VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT is part of EDS1
// Only use it if VIDS is not active (VIDS replaces it with full vertex input control)
if (!key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
}
if (key.state.extended_dynamic_state_2_extra) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
}
// VK_DYNAMIC_STATE_VERTEX_INPUT_EXT (VIDS) - Independent from EDS
// Provides full dynamic vertex input control, replaces VERTEX_INPUT_BINDING_STRIDE
if (key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
}
// EDS2 - Core (3 states)
if (key.state.extended_dynamic_state_2) {
static constexpr std::array extended2{
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
}
// EDS2 - LogicOp (granular)
if (key.state.extended_dynamic_state_2_logic_op) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
}
// EDS3 - Blending (composite: 3 states)
if (key.state.extended_dynamic_state_3_blend) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
}
// EDS3 - Enables (composite: per-feature)
if (key.state.extended_dynamic_state_3_enables) {
if (device.SupportsDynamicState3DepthClampEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT);
}
if (key.state.extended_dynamic_state_3_blend) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
// VK_DYNAMIC_STATE_COLOR_BLEND_ADVANCED_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
if (device.SupportsDynamicState3LogicOpEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT);
}
if (key.state.extended_dynamic_state_3_enables) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT,
VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT,
// additional state3 extensions
VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT,
VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT,
VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT,
VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT,
VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT,
VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
if (device.SupportsDynamicState3LineRasterizationMode()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT);
}
if (device.SupportsDynamicState3ConservativeRasterizationMode()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT);
}
if (device.SupportsDynamicState3LineStippleEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT);
}
if (device.SupportsDynamicState3AlphaToCoverageEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT);
}
if (device.SupportsDynamicState3AlphaToOneEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT);
}
}

View File

@@ -82,6 +82,17 @@ public:
const std::array<const Shader::Info*, NUM_STAGES>& infos);
bool HasDynamicVertexInput() const noexcept { return key.state.dynamic_vertex_input; }
bool SupportsAlphaToCoverage() const noexcept {
return fragment_has_color0_output;
}
bool SupportsAlphaToOne() const noexcept {
return fragment_has_color0_output;
}
bool UsesExtendedDynamicState() const noexcept {
return key.state.extended_dynamic_state != 0;
}
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
@@ -149,6 +160,7 @@ private:
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
u32 num_textures{};
bool fragment_has_color0_output{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;

View File

@@ -36,6 +36,7 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/surface.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
@@ -105,6 +106,41 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso
return {};
}
Shader::AttributeType RenderTargetAttributeType(Tegra::RenderTargetFormat format) {
if (format == Tegra::RenderTargetFormat::NONE) {
return Shader::AttributeType::Float;
}
const auto pixel_format{
VideoCore::Surface::PixelFormatFromRenderTargetFormat(format)};
if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) {
return Shader::AttributeType::Float;
}
if (VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)) {
return Shader::AttributeType::SignedInt;
}
return Shader::AttributeType::UnsignedInt;
}
VkShaderStageFlagBits StageToVkStage(Shader::Stage stage) {
switch (stage) {
case Shader::Stage::VertexA:
case Shader::Stage::VertexB:
return VK_SHADER_STAGE_VERTEX_BIT;
case Shader::Stage::TessellationControl:
return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
case Shader::Stage::TessellationEval:
return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
case Shader::Stage::Geometry:
return VK_SHADER_STAGE_GEOMETRY_BIT;
case Shader::Stage::Fragment:
return VK_SHADER_STAGE_FRAGMENT_BIT;
case Shader::Stage::Compute:
return VK_SHADER_STAGE_COMPUTE_BIT;
default:
return VK_SHADER_STAGE_VERTEX_BIT;
}
}
Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
if (attr.enabled == 0) {
return Shader::AttributeType::Disabled;
@@ -146,7 +182,8 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde
Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs,
const GraphicsPipelineCacheKey& key,
const Shader::IR::Program& program,
const Shader::IR::Program* previous_program) {
const Shader::IR::Program* previous_program,
const Vulkan::Device& device) {
Shader::RuntimeInfo info;
if (previous_program) {
info.previous_stage_stores = previous_program->info.stores;
@@ -168,10 +205,14 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled) {
auto [varyings, count] =
VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
info.xfb_varyings = varyings;
info.xfb_count = count;
if (device.IsExtTransformFeedbackSupported()) {
auto [varyings, count] =
VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
info.xfb_varyings = varyings;
info.xfb_count = count;
} else {
LOG_WARNING(Render_Vulkan, "XFB requested in pipeline key but device lacks VK_EXT_transform_feedback; ignoring XFB decorations");
}
}
info.convert_depth_mode = gl_ndc;
}
@@ -218,10 +259,14 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled != 0) {
auto [varyings, count] =
VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
info.xfb_varyings = varyings;
info.xfb_count = count;
if (device.IsExtTransformFeedbackSupported()) {
auto [varyings, count] =
VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
info.xfb_varyings = varyings;
info.xfb_count = count;
} else {
LOG_WARNING(Render_Vulkan, "XFB requested in pipeline key but device lacks VK_EXT_transform_feedback; ignoring XFB decorations");
}
}
info.convert_depth_mode = gl_ndc;
break;
@@ -229,6 +274,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.alpha_test_func = MaxwellToCompareFunction(
key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
info.alpha_test_reference = std::bit_cast<float>(key.state.alpha_test_ref);
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
const auto format = static_cast<Tegra::RenderTargetFormat>(key.state.color_formats[index]);
info.color_output_types[index] = RenderTargetAttributeType(format);
}
break;
default:
break;
@@ -269,8 +318,8 @@ size_t GetTotalPipelineWorkers() {
const size_t max_core_threads =
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
#ifdef ANDROID
// Leave at least a few cores free in android
constexpr size_t free_cores = 3ULL;
// Leave at least one core free on Android to reduce thermal pressure.
constexpr size_t free_cores = 1ULL;
if (max_core_threads <= free_cores) {
return 1ULL;
}
@@ -317,6 +366,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
"VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
const bool float_controls_supported{device.IsKhrShaderFloatControlsSupported()};
const VkDriverId driver_id{device.GetDriverID()};
profile = Shader::Profile{
.supported_spirv = device.SupportedSpirvVersion(),
@@ -326,20 +376,24 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.support_vertex_instance_id = false,
.support_float_controls = device.IsKhrShaderFloatControlsSupported(),
.support_separate_denorm_behavior =
.support_float_controls = float_controls_supported,
.support_separate_denorm_behavior = float_controls_supported &&
float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_separate_rounding_mode =
.support_separate_rounding_mode = float_controls_supported &&
float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
.support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
.support_fp16_signed_zero_nan_preserve =
.support_fp16_denorm_preserve = float_controls_supported &&
float_control.shaderDenormPreserveFloat16 != VK_FALSE,
.support_fp32_denorm_preserve = float_controls_supported &&
float_control.shaderDenormPreserveFloat32 != VK_FALSE,
.support_fp16_denorm_flush = float_controls_supported &&
float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
.support_fp32_denorm_flush = float_controls_supported &&
float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
.support_fp16_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
.support_fp32_signed_zero_nan_preserve =
.support_fp32_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
.support_fp64_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
@@ -395,6 +449,27 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_conditional_barrier = device.SupportsConditionalBarriers(),
};
profile.warp_stage_support_mask = 0;
static constexpr std::array kAllStages{
Shader::Stage::VertexA, Shader::Stage::VertexB,
Shader::Stage::TessellationControl, Shader::Stage::TessellationEval,
Shader::Stage::Geometry, Shader::Stage::Fragment,
Shader::Stage::Compute,
};
for (const auto stage : kAllStages) {
const auto vk_stage = StageToVkStage(stage);
if (device.SupportsWarpIntrinsics(vk_stage)) {
profile.warp_stage_support_mask |= 1u << static_cast<u32>(stage);
}
}
profile.support_vote = profile.warp_stage_support_mask != 0;
if (!profile.SupportsWarpIntrinsics(Shader::Stage::Fragment)) {
LOG_WARNING(Render_Vulkan,
"Fragment shaders lack subgroup support on this driver; warp intrinsics will be "
"approximated and visual artifacts may remain");
}
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
device.GetMaxVertexInputAttributes(), Maxwell::NumVertexAttributes);
@@ -404,14 +479,40 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays);
}
dynamic_features = DynamicFeatures{
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(),
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported(),
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported(),
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported(),
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported(),
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
};
LOG_INFO(Render_Vulkan, "DynamicState setting value: {}", Settings::values.dyna_state.GetValue());
dynamic_features = {};
// User granularity enforced in vulkan_device.cpp switch statement:
// Level 0: Core Dynamic States only
// Level 1: Core + EDS1
// Level 2: Core + EDS1 + EDS2 (accumulative)
// Level 3: Core + EDS1 + EDS2 + EDS3 (accumulative)
// Here we only verify if extensions were successfully loaded by the device
dynamic_features.has_extended_dynamic_state =
device.IsExtExtendedDynamicStateSupported();
dynamic_features.has_extended_dynamic_state_2 =
device.IsExtExtendedDynamicState2Supported();
dynamic_features.has_extended_dynamic_state_2_logic_op =
device.IsExtExtendedDynamicState2ExtrasSupported();
dynamic_features.has_extended_dynamic_state_2_patch_control_points = false;
dynamic_features.has_extended_dynamic_state_3_blend =
device.IsExtExtendedDynamicState3BlendingSupported();
dynamic_features.has_dual_source_blend = device.SupportsDualSourceBlend();
if (!dynamic_features.has_dual_source_blend) {
LOG_WARNING(Render_Vulkan, "Dual-source blending unsupported, disabling dynamic blend");
dynamic_features.has_extended_dynamic_state_3_blend = false;
}
dynamic_features.has_extended_dynamic_state_3_enables =
device.IsExtExtendedDynamicState3EnablesSupported();
// VIDS: Independent toggle (not affected by dyna_state levels)
dynamic_features.has_dynamic_vertex_input =
device.IsExtVertexInputDynamicStateSupported() &&
Settings::values.vertex_input_dynamic_state.GetValue();
}
PipelineCache::~PipelineCache() {
@@ -421,6 +522,13 @@ PipelineCache::~PipelineCache() {
}
}
void PipelineCache::DrainPendingBuilds() {
if (!device.HasBrokenParallelShaderCompiling()) {
return;
}
workers.WaitForRequests();
}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
if (!RefreshStages(graphics_key.unique_hashes)) {
@@ -451,12 +559,17 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
.shared_memory_size = qmd.shared_alloc,
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
};
const auto [pair, is_new]{compute_cache.try_emplace(key)};
const auto [pair, inserted]{compute_cache.try_emplace(key)};
auto& pipeline{pair->second};
if (!is_new) {
return pipeline.get();
if (!pipeline) {
auto [slot, should_build] = AcquireComputeBuildSlot(key);
if (!should_build) {
WaitForBuildCompletion(slot);
} else {
pipeline = CreateComputePipeline(key, shader);
ReleaseComputeBuildSlot(key, slot);
}
}
pipeline = CreateComputePipeline(key, shader);
return pipeline.get();
}
@@ -516,8 +629,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
dynamic_features.has_extended_dynamic_state ||
(key.state.extended_dynamic_state_2 != 0) !=
dynamic_features.has_extended_dynamic_state_2 ||
(key.state.extended_dynamic_state_2_extra != 0) !=
dynamic_features.has_extended_dynamic_state_2_extra ||
(key.state.extended_dynamic_state_2_logic_op != 0) !=
dynamic_features.has_extended_dynamic_state_2_logic_op ||
(key.state.extended_dynamic_state_3_blend != 0) !=
dynamic_features.has_extended_dynamic_state_3_blend ||
(key.state.extended_dynamic_state_3_enables != 0) !=
@@ -572,13 +685,20 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
const auto [pair, inserted]{graphics_cache.try_emplace(graphics_key)};
auto& pipeline{pair->second};
if (is_new) {
pipeline = CreateGraphicsPipeline();
}
if (!pipeline) {
return nullptr;
const auto key = pair->first;
auto [slot, should_build] = AcquireGraphicsBuildSlot(key);
if (!should_build) {
WaitForBuildCompletion(slot);
} else {
pipeline = CreateGraphicsPipeline();
ReleaseGraphicsBuildSlot(key, slot);
}
if (!pipeline) {
return nullptr;
}
}
if (current_pipeline) {
current_pipeline->AddTransition(pipeline.get());
@@ -601,6 +721,7 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
return pipeline;
}
scheduler.KeepAliveTick();
return nullptr;
}
@@ -671,7 +792,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const size_t stage_index{index - 1};
infos[stage_index] = &program.info;
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage, device)};
ConvertLegacyToGeneric(program, runtime_info);
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)};
device.SaveShader(code);
@@ -704,6 +825,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
}
LOG_ERROR(Render_Vulkan, "{}", exception.what());
return nullptr;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Failed to create graphics pipeline 0x{:016x}: {}", key.Hash(),
exception.what());
return nullptr;
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
@@ -767,6 +892,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const VkDriverIdKHR driver_id = device.GetDriverID();
const bool needs_shared_mem_clamp =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
if (needs_shared_mem_clamp && program.shared_memory_size > max_shared_memory) {
LOG_WARNING(Render_Vulkan,
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
key.unique_hash,
program.shared_memory_size / 1024,
max_shared_memory / 1024);
program.shared_memory_size = max_shared_memory;
}
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)};
@@ -782,6 +920,10 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
} catch (const Shader::Exception& exception) {
LOG_ERROR(Render_Vulkan, "{}", exception.what());
return nullptr;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Failed to create compute pipeline 0x{:016x}: {}", key.Hash(),
exception.what());
return nullptr;
}
void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
@@ -879,4 +1021,68 @@ vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::
}
}
auto PipelineCache::AcquireGraphicsBuildSlot(const GraphicsPipelineCacheKey& key)
-> std::pair<InFlightPipelinePtr, bool> {
std::scoped_lock lock(graphics_inflight_mutex);
auto [it, inserted] = graphics_inflight_builds.try_emplace(key);
if (inserted || !it->second) {
it->second = std::make_shared<InFlightPipelineBuild>();
return {it->second, true};
}
return {it->second, false};
}
auto PipelineCache::AcquireComputeBuildSlot(const ComputePipelineCacheKey& key)
-> std::pair<InFlightPipelinePtr, bool> {
std::scoped_lock lock(compute_inflight_mutex);
auto [it, inserted] = compute_inflight_builds.try_emplace(key);
if (inserted || !it->second) {
it->second = std::make_shared<InFlightPipelineBuild>();
return {it->second, true};
}
return {it->second, false};
}
void PipelineCache::ReleaseGraphicsBuildSlot(const GraphicsPipelineCacheKey& key,
const InFlightPipelinePtr& slot) {
if (!slot) {
return;
}
{
std::scoped_lock slot_lock(slot->mutex);
slot->building = false;
}
slot->cv.notify_all();
std::scoped_lock map_lock(graphics_inflight_mutex);
auto it = graphics_inflight_builds.find(key);
if (it != graphics_inflight_builds.end() && it->second == slot) {
graphics_inflight_builds.erase(it);
}
}
void PipelineCache::ReleaseComputeBuildSlot(const ComputePipelineCacheKey& key,
const InFlightPipelinePtr& slot) {
if (!slot) {
return;
}
{
std::scoped_lock slot_lock(slot->mutex);
slot->building = false;
}
slot->cv.notify_all();
std::scoped_lock map_lock(compute_inflight_mutex);
auto it = compute_inflight_builds.find(key);
if (it != compute_inflight_builds.end() && it->second == slot) {
compute_inflight_builds.erase(it);
}
}
void PipelineCache::WaitForBuildCompletion(const InFlightPipelinePtr& slot) const {
if (!slot) {
return;
}
std::unique_lock lock(slot->mutex);
slot->cv.wait(lock, [&] { return !slot->building; });
}
} // namespace Vulkan

View File

@@ -5,8 +5,10 @@
#include <array>
#include <cstddef>
#include <condition_variable>
#include <filesystem>
#include <memory>
#include <mutex>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -113,7 +115,17 @@ public:
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
void DrainPendingBuilds();
private:
struct InFlightPipelineBuild {
std::mutex mutex;
std::condition_variable cv;
bool building{true};
};
using InFlightPipelinePtr = std::shared_ptr<InFlightPipelineBuild>;
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
@@ -140,6 +152,14 @@ private:
vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename,
u32 expected_cache_version);
std::pair<InFlightPipelinePtr, bool> AcquireGraphicsBuildSlot(
const GraphicsPipelineCacheKey& key);
std::pair<InFlightPipelinePtr, bool> AcquireComputeBuildSlot(
const ComputePipelineCacheKey& key);
void ReleaseGraphicsBuildSlot(const GraphicsPipelineCacheKey& key, const InFlightPipelinePtr& slot);
void ReleaseComputeBuildSlot(const ComputePipelineCacheKey& key, const InFlightPipelinePtr& slot);
void WaitForBuildCompletion(const InFlightPipelinePtr& slot) const;
const Device& device;
Scheduler& scheduler;
DescriptorPool& descriptor_pool;
@@ -158,6 +178,11 @@ private:
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
std::mutex graphics_inflight_mutex;
std::unordered_map<GraphicsPipelineCacheKey, InFlightPipelinePtr> graphics_inflight_builds;
std::mutex compute_inflight_mutex;
std::unordered_map<ComputePipelineCacheKey, InFlightPipelinePtr> compute_inflight_builds;
ShaderPools main_pools;
Shader::Profile profile;

View File

@@ -42,7 +42,8 @@ public:
static constexpr size_t BANK_SIZE = 256;
static constexpr size_t QUERY_SIZE = 8;
explicit SamplesQueryBank(const Device& device_, size_t index_)
: BankBase(BANK_SIZE), device{device_}, index{index_} {
: BankBase(BANK_SIZE), device{device_}, index{index_},
supports_host_query_reset{device_.SupportsHostQueryReset()} {
const auto& dev = device.GetLogical();
query_pool = dev.CreateQueryPool({
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
@@ -60,8 +61,10 @@ public:
void Reset() override {
ASSERT(references == 0);
VideoCommon::BankBase::Reset();
const auto& dev = device.GetLogical();
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
if (supports_host_query_reset) {
const auto& dev = device.GetLogical();
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
}
host_results.fill(0ULL);
next_bank = 0;
}
@@ -99,6 +102,7 @@ public:
private:
const Device& device;
const size_t index;
const bool supports_host_query_reset;
vk::QueryPool query_pool;
std::array<u64, BANK_SIZE> host_results;
};
@@ -872,17 +876,18 @@ private:
return;
}
has_flushed_end_pending = true;
if (!has_started || buffers_count == 0) {
// Refresh buffers state before beginning transform feedback so counters are up-to-date
UpdateBuffers();
if (buffers_count == 0) {
// No counter buffers available: begin without counters
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
});
UpdateBuffers();
return;
}
scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
cmdbuf.BeginTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
});
UpdateBuffers();
}
void FlushEndTFB() {
@@ -892,11 +897,15 @@ private:
}
has_flushed_end_pending = false;
// Refresh buffer state before ending transform feedback to ensure counters_count is up-to-date.
UpdateBuffers();
if (buffers_count == 0) {
LOG_DEBUG(Render_Vulkan, "EndTransformFeedbackEXT called with no counters (buffers_count=0)");
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr);
});
} else {
LOG_DEBUG(Render_Vulkan, "EndTransformFeedbackEXT called with counters (buffers_count={})", buffers_count);
scheduler.Record([this,
total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
@@ -907,6 +916,7 @@ private:
void UpdateBuffers() {
last_queries.fill(0);
last_queries_stride.fill(1);
streams_mask = 0; // reset previously recorded streams
runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
buffers_count = 0;
out_topology = maxwell3d.draw_manager->GetDrawState().topology;
@@ -916,6 +926,10 @@ private:
continue;
}
const size_t stream = tf.controls[i].stream;
if (stream >= last_queries_stride.size()) {
LOG_WARNING(Render_Vulkan, "TransformFeedback stream {} out of range", stream);
continue;
}
last_queries_stride[stream] = tf.controls[i].stride;
streams_mask |= 1ULL << stream;
buffers_count = std::max<size_t>(buffers_count, stream + 1);
@@ -1116,16 +1130,21 @@ public:
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
u64 num_vertices = 0;
// Protect against stride == 0 (avoid divide-by-zero). Use fallback stride=1 and warn.
u64 safe_stride = query->stride == 0 ? 1 : query->stride;
if (query->stride == 0) {
LOG_WARNING(Render_Vulkan, "TransformFeedback query has stride 0; using 1 to avoid div-by-zero (addr=0x{:x})", query->dependant_address);
}
if (query->dependant_manage) {
auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
num_vertices = dependant_query->value / query->stride;
num_vertices = dependant_query->value / safe_stride;
tfb_streamer.Free(query->dependant_index);
} else {
u8* pointer = device_memory.GetPointer<u8>(query->dependant_address);
if (pointer != nullptr) {
u32 result;
std::memcpy(&result, pointer, sizeof(u32));
num_vertices = static_cast<u64>(result) / query->stride;
num_vertices = static_cast<u64>(result) / safe_stride;
}
}
query->value = [&]() -> u64 {
@@ -1200,21 +1219,24 @@ struct QueryCacheRuntimeImpl {
hcr_setup.pNext = nullptr;
hcr_setup.flags = 0;
conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
if (device.IsExtConditionalRendering()) {
conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = sizeof(u32),
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
hcr_resolve_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = sizeof(u32),
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
hcr_resolve_buffer =
memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
}
}
VideoCore::RasterizerInterface* rasterizer;

View File

@@ -197,6 +197,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
// Log multi-draw support
if (device.IsExtMultiDrawSupported()) {
LOG_INFO(Render_Vulkan, "VK_EXT_multi_draw is enabled for optimized draw calls");
}
}
RasterizerVulkan::~RasterizerVulkan() = default;
@@ -210,6 +215,10 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork();
gpu_memory->FlushCaching();
if (device.HasBrokenParallelShaderCompiling()) {
pipeline_cache.DrainPendingBuilds();
}
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
@@ -234,16 +243,44 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const u32 num_instances{instance_count};
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
// Use VK_EXT_multi_draw if available (single draw becomes multi-draw with count=1)
if (device.IsExtMultiDrawSupported()) {
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
// Use multi-draw indexed with single draw
const VkMultiDrawIndexedInfoEXT multi_draw_info{
.firstIndex = draw_params.first_index,
.indexCount = draw_params.num_vertices,
};
const int32_t vertex_offset = static_cast<int32_t>(draw_params.base_vertex);
cmdbuf.DrawMultiIndexedEXT(1, &multi_draw_info, draw_params.num_instances,
draw_params.base_instance,
sizeof(VkMultiDrawIndexedInfoEXT), &vertex_offset);
} else {
// Use multi-draw with single draw
const VkMultiDrawInfoEXT multi_draw_info{
.firstVertex = draw_params.base_vertex,
.vertexCount = draw_params.num_vertices,
};
cmdbuf.DrawMultiEXT(1, &multi_draw_info, draw_params.num_instances,
draw_params.base_instance,
sizeof(VkMultiDrawInfoEXT));
}
});
} else {
// Fallback to standard draw calls
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
}
});
}
@@ -386,13 +423,48 @@ void RasterizerVulkan::Clear(u32 layer_count) {
.baseArrayLayer = regs.clear_surface.layer,
.layerCount = layer_count,
};
if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
const auto clamp_rect_to_render_area = [render_area](VkRect2D& rect) -> bool {
const auto clamp_axis = [](s32& offset, u32& extent, u32 limit) {
auto clamp_offset = [&offset, limit]() {
if (limit == 0) {
offset = 0;
return;
}
offset = std::clamp(offset, 0, static_cast<s32>(limit));
};
if (extent == 0) {
clamp_offset();
return;
}
if (offset < 0) {
const u32 shrink = (std::min)(extent, static_cast<u32>(-offset));
extent -= shrink;
offset = 0;
}
if (limit == 0) {
extent = 0;
offset = 0;
return;
}
if (offset >= static_cast<s32>(limit)) {
offset = static_cast<s32>(limit);
extent = 0;
return;
}
const u64 end_coord = static_cast<u64>(offset) + extent;
if (end_coord > limit) {
extent = limit - static_cast<u32>(offset);
}
};
clamp_axis(rect.offset.x, rect.extent.width, render_area.width);
clamp_axis(rect.offset.y, rect.extent.height, render_area.height);
return rect.extent.width != 0 && rect.extent.height != 0;
};
if (!clamp_rect_to_render_area(clear_rect.rect)) {
return;
}
clear_rect.rect.extent = VkExtent2D{
.width = (std::min)(clear_rect.rect.extent.width, render_area.width),
.height = (std::min)(clear_rect.rect.extent.height, render_area.height),
};
const u32 color_attachment = regs.clear_surface.RT;
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
@@ -839,23 +911,21 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load
void RasterizerVulkan::FlushWork() {
#ifdef ANDROID
static constexpr u32 DRAWS_TO_DISPATCH = 1024;
static constexpr u32 DRAWS_TO_DISPATCH = 512;
static constexpr u32 CHECK_MASK = 3;
#else
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
static constexpr u32 CHECK_MASK = 7;
#endif // ANDROID
// Only check multiples of 8 draws
static_assert(DRAWS_TO_DISPATCH % 8 == 0);
if ((++draw_counter & 7) != 7) {
static_assert(DRAWS_TO_DISPATCH % (CHECK_MASK + 1) == 0);
if ((++draw_counter & CHECK_MASK) != CHECK_MASK) {
return;
}
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
return;
}
// Otherwise (every certain number of draws) flush execution.
// This submits commands to the Vulkan driver.
scheduler.Flush();
draw_counter = 0;
}
@@ -921,6 +991,8 @@ bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info,
void RasterizerVulkan::UpdateDynamicStates() {
auto& regs = maxwell3d->regs;
// Core Dynamic States (Vulkan 1.0) - Always active regardless of dyna_state setting
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@@ -928,6 +1000,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
UpdateLineWidth(regs);
// EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest
if (device.IsExtExtendedDynamicStateSupported()) {
UpdateCullMode(regs);
UpdateDepthCompareOp(regs);
@@ -938,40 +1011,52 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthTestEnable(regs);
UpdateDepthWriteEnable(regs);
UpdateStencilTestEnable(regs);
if (device.IsExtExtendedDynamicState2Supported()) {
UpdatePrimitiveRestartEnable(regs);
UpdateRasterizerDiscardEnable(regs);
UpdateDepthBiasEnable(regs);
}
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
using namespace Tegra::Engines;
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE || device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
const auto has_float = std::any_of(
regs.vertex_attrib_format.begin(),
regs.vertex_attrib_format.end(),
[](const auto& attrib) {
return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float;
}
);
if (regs.logic_op.enable) {
regs.logic_op.enable = static_cast<u32>(!has_float);
}
}
UpdateLogicOpEnable(regs);
UpdateDepthClampEnable(regs);
}
}
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
UpdateLogicOp(regs);
}
if (device.IsExtExtendedDynamicState3BlendingSupported()) {
UpdateBlending(regs);
}
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
UpdateLineStippleEnable(regs);
UpdateConservativeRasterizationMode(regs);
}
}
// EDS2: PrimitiveRestart, RasterizerDiscard, DepthBias enable/disable
if (device.IsExtExtendedDynamicState2Supported()) {
UpdatePrimitiveRestartEnable(regs);
UpdateRasterizerDiscardEnable(regs);
UpdateDepthBiasEnable(regs);
}
// EDS2 Extras: LogicOp operation selection
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
UpdateLogicOp(regs);
}
// EDS3 Enables: LogicOpEnable, DepthClamp, LineStipple, ConservativeRaster
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
using namespace Tegra::Engines;
// AMD Workaround: LogicOp incompatible with float render targets
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
const auto has_float = std::any_of(
regs.vertex_attrib_format.begin(), regs.vertex_attrib_format.end(),
[](const auto& attrib) {
return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float;
}
);
if (regs.logic_op.enable) {
regs.logic_op.enable = static_cast<u32>(!has_float);
}
}
UpdateLogicOpEnable(regs);
UpdateDepthClampEnable(regs);
UpdateLineRasterizationMode(regs);
UpdateLineStippleEnable(regs);
UpdateConservativeRasterizationMode(regs);
UpdateAlphaToCoverageEnable(regs);
UpdateAlphaToOneEnable(regs);
}
// EDS3 Blending: ColorBlendEnable, ColorBlendEquation, ColorWriteMask
if (device.IsExtExtendedDynamicState3BlendingSupported()) {
UpdateBlending(regs);
}
// Vertex Input Dynamic State: Independent from EDS levels
if (device.IsExtVertexInputDynamicStateSupported()) {
if (auto* gp = pipeline_cache.CurrentGraphicsPipeline(); gp && gp->HasDynamicVertexInput()) {
UpdateVertexInput(regs);
@@ -984,9 +1069,16 @@ void RasterizerVulkan::HandleTransformFeedback() {
const auto& regs = maxwell3d->regs;
if (!device.IsExtTransformFeedbackSupported()) {
std::call_once(warn_unsupported, [&] {
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
});
// If the guest enabled transform feedback, warn once that the device lacks support.
if (regs.transform_feedback_enabled != 0) {
std::call_once(warn_unsupported, [&] {
LOG_WARNING(Render_Vulkan, "Transform feedback requested by guest but VK_EXT_transform_feedback is unavailable; queries disabled");
});
} else {
std::call_once(warn_unsupported, [&] {
LOG_INFO(Render_Vulkan, "VK_EXT_transform_feedback not available on device");
});
}
return;
}
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount,
@@ -995,7 +1087,7 @@ void RasterizerVulkan::HandleTransformFeedback() {
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
}
}
}
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchViewports()) {
@@ -1144,109 +1236,141 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg
if (!state_tracker.TouchBlendConstants()) {
return;
}
const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
regs.blend_color.a};
scheduler.Record(
[blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); });
if (!device.UsesAdvancedCoreDynamicState()) {
return;
}
const std::array<float, 4> blend_constants{
regs.blend_color.r,
regs.blend_color.g,
regs.blend_color.b,
regs.blend_color.a,
};
scheduler.Record([blend_constants](vk::CommandBuffer cmdbuf) {
cmdbuf.SetBlendConstants(blend_constants.data());
});
}
void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBounds()) {
return;
}
scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); });
if (!device.IsDepthBoundsSupported()) {
return;
}
if (!device.UsesAdvancedCoreDynamicState()) {
return;
}
const bool unrestricted = device.IsExtDepthRangeUnrestrictedSupported();
const float min_depth = unrestricted ? regs.depth_bounds[0]
: std::clamp(regs.depth_bounds[0], 0.0f, 1.0f);
const float max_depth = unrestricted ? regs.depth_bounds[1]
: std::clamp(regs.depth_bounds[1], 0.0f, 1.0f);
scheduler.Record([min_depth, max_depth](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBounds(min_depth, max_depth);
});
}
void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilProperties()) {
const bool two_sided = regs.stencil_two_side_enable != 0;
const bool update_properties = state_tracker.TouchStencilProperties();
const bool update_side = state_tracker.TouchStencilSide(two_sided);
const bool refs_dirty = state_tracker.TouchStencilReference();
const bool write_dirty = state_tracker.TouchStencilWriteMask();
const bool compare_dirty = state_tracker.TouchStencilCompare();
const bool update_references = update_properties || update_side || refs_dirty;
const bool update_write_masks = update_properties || update_side || write_dirty;
const bool update_compare_masks = update_properties || update_side || compare_dirty;
if (!update_references && !update_write_masks && !update_compare_masks) {
state_tracker.ClearStencilReset();
return;
}
bool update_references = state_tracker.TouchStencilReference();
bool update_write_mask = state_tracker.TouchStencilWriteMask();
bool update_compare_masks = state_tracker.TouchStencilCompare();
if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
update_references = true;
update_write_mask = true;
update_compare_masks = true;
if (!device.UsesAdvancedCoreDynamicState()) {
state_tracker.ClearStencilReset();
return;
}
if (update_references) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) &&
!state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) {
return;
}
} else {
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
return;
}
if (two_sided) {
const bool front_dirty =
state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref);
const bool back_dirty =
state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref);
if (front_dirty || back_dirty) {
scheduler.Record([front_ref = regs.stencil_front_ref,
back_ref = regs.stencil_back_ref,
is_two_sided = two_sided](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_ref != back_ref;
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
});
}
scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_ref != back_ref;
// Front face
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
} else if (state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
const u32 reference = regs.stencil_front_ref;
scheduler.Record([reference](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, reference);
});
}();
}
}
if (update_write_mask) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) &&
!state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
return;
}
} else {
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
return;
}
if (update_write_masks) {
if (two_sided) {
if (state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) ||
state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
scheduler.Record([
front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
is_two_sided = regs.stencil_two_side_enable
](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_write_mask != back_write_mask;
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
});
}
scheduler.Record([front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_write_mask != back_write_mask;
// Front face
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
} else if (state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
const u32 front_write_mask = regs.stencil_front_mask;
scheduler.Record([front_write_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, front_write_mask);
});
}();
}
}
if (update_compare_masks) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) &&
!state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
return;
}
} else {
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
return;
}
if (two_sided) {
if (state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) ||
state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
scheduler.Record([
front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
is_two_sided = regs.stencil_two_side_enable
](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_test_mask != back_test_mask;
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
}
});
}
scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_test_mask != back_test_mask;
// Front face
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
}
} else if (state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
const u32 front_test_mask = regs.stencil_front_func_mask;
scheduler.Record([front_test_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, front_test_mask);
});
}();
}
}
state_tracker.ClearStencilReset();
}
@@ -1269,61 +1393,15 @@ void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
});
}
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
bool enabled = regs.depth_bounds_enable;
if (enabled && !device.IsDepthBoundsSupported()) {
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
enabled = false;
}
scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthTestEnable()) {
return;
}
scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthWriteEnable()) {
return;
}
scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthWriteEnableEXT(enable);
});
}
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchPrimitiveRestartEnable()) {
return;
}
scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchRasterizerDiscardEnable()) {
return;
}
scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0);
});
}
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
void RasterizerVulkan::UpdateConservativeRasterizationMode(
Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchConservativeRasterizationMode()) {
return;
}
if (!device.SupportsDynamicState3ConservativeRasterizationMode() ||
!device.IsExtConservativeRasterizationSupported()) {
return;
}
scheduler.Record([enable = regs.conservative_raster_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetConservativeRasterizationModeEXT(
enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
@@ -1336,23 +1414,69 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs&
return;
}
if (!device.SupportsDynamicState3LineStippleEnable()) {
return;
}
scheduler.Record([enable = regs.line_stipple_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineStippleEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
// if (!state_tracker.TouchLi()) {
// return;
// }
if (!device.IsExtLineRasterizationSupported()) {
return;
}
if (!state_tracker.TouchLineRasterizationMode()) {
return;
}
// TODO: The maxwell emulator does not capture line rasters
if (!device.SupportsDynamicState3LineRasterizationMode()) {
static std::once_flag warn_missing_rect;
std::call_once(warn_missing_rect, [] {
LOG_WARNING(Render_Vulkan,
"Driver lacks rectangular line rasterization support; skipping dynamic "
"line state updates");
});
return;
}
// scheduler.Record([enable = regs.line](vk::CommandBuffer cmdbuf) {
// cmdbuf.SetConservativeRasterizationModeEXT(
// enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT
// : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
// });
const bool wants_smooth = regs.line_anti_alias_enable != 0;
VkLineRasterizationModeEXT mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
if (wants_smooth) {
if (device.SupportsSmoothLines()) {
mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
} else {
static std::once_flag warn_missing_smooth;
std::call_once(warn_missing_smooth, [] {
LOG_WARNING(Render_Vulkan,
"Line anti-aliasing requested but smoothLines feature unavailable; "
"using rectangular rasterization");
});
}
}
scheduler.Record([mode](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineRasterizationModeEXT(mode);
});
}
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchPrimitiveRestartEnable()) {
return;
}
scheduler.Record([enable = regs.primitive_restart.enabled != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchRasterizerDiscardEnable()) {
return;
}
const bool discard = regs.rasterize_enable == 0;
scheduler.Record([discard](vk::CommandBuffer cmdbuf) {
cmdbuf.SetRasterizerDiscardEnableEXT(discard);
});
}
void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -1394,6 +1518,9 @@ void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchLogicOpEnable()) {
return;
}
if (!device.SupportsDynamicState3LogicOpEnable()) {
return;
}
scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLogicOpEnableEXT(enable != 0);
});
@@ -1403,6 +1530,9 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r
if (!state_tracker.TouchDepthClampEnable()) {
return;
}
if (!device.SupportsDynamicState3DepthClampEnable()) {
return;
}
bool is_enabled = !(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
@@ -1413,6 +1543,41 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r
[is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); });
}
void RasterizerVulkan::UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchAlphaToCoverageEnable()) {
return;
}
if (!device.SupportsDynamicState3AlphaToCoverageEnable()) {
return;
}
GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToCoverage() &&
regs.anti_alias_alpha_control.alpha_to_coverage != 0;
scheduler.Record([enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetAlphaToCoverageEnableEXT(enable ? VK_TRUE : VK_FALSE);
});
}
void RasterizerVulkan::UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchAlphaToOneEnable()) {
return;
}
if (!device.SupportsDynamicState3AlphaToOneEnable()) {
static std::once_flag warn_alpha_to_one;
std::call_once(warn_alpha_to_one, [] {
LOG_WARNING(Render_Vulkan,
"Alpha-to-one is not supported on this device; forcing it disabled");
});
return;
}
GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToOne() &&
regs.anti_alias_alpha_control.alpha_to_one != 0;
scheduler.Record([enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetAlphaToOneEnableEXT(enable ? VK_TRUE : VK_FALSE);
});
}
void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthCompareOp()) {
return;
@@ -1422,6 +1587,36 @@ void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& reg
});
}
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
if (!device.IsDepthBoundsSupported()) {
return;
}
scheduler.Record([enable = regs.depth_bounds_enable != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthTestEnable()) {
return;
}
scheduler.Record([enable = regs.depth_test_enable != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthWriteEnable()) {
return;
}
scheduler.Record([enable = regs.depth_write_enabled != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthWriteEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchFrontFace()) {
return;

View File

@@ -25,6 +25,7 @@
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -168,7 +169,6 @@ private:
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -183,6 +183,8 @@ private:
void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);

View File

@@ -4,6 +4,7 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <chrono>
#include <memory>
#include <mutex>
#include <thread>
@@ -13,6 +14,7 @@
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -77,6 +79,14 @@ void Scheduler::WaitWorker() {
std::scoped_lock el{execution_mutex};
}
void Scheduler::KeepAliveTick() {
const auto now = Clock::now();
if (now - last_submission_time < KEEPALIVE_INTERVAL) {
return;
}
Flush();
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
@@ -130,9 +140,27 @@ void Scheduler::RequestOutsideRenderPassOperationContext() {
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) {
if (pipeline && pipeline->UsesExtendedDynamicState() &&
state.needs_state_enable_refresh) {
state_tracker.InvalidateStateEnableFlag();
state.needs_state_enable_refresh = false;
}
return false;
}
state.graphics_pipeline = pipeline;
if (!pipeline) {
return true;
}
if (!pipeline->UsesExtendedDynamicState()) {
state.needs_state_enable_refresh = true;
} else if (state.needs_state_enable_refresh) {
state_tracker.InvalidateStateEnableFlag();
state.needs_state_enable_refresh = false;
}
return true;
}
@@ -252,6 +280,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
});
chunk->MarkSubmit();
DispatchWork();
last_submission_time = Clock::now();
return signal_value;
}
@@ -276,8 +305,13 @@ void Scheduler::EndRenderPass()
return;
}
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
query_cache->NotifySegment(false);
if (query_cache) {
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
if (query_cache->HasStreamer(VideoCommon::QueryType::StreamingByteCount)) {
query_cache->CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
query_cache->NotifySegment(false);
}
Record([num_images = num_renderpass_images,
images = renderpass_images,

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
#include <condition_variable>
#include <cstddef>
#include <functional>
@@ -49,6 +53,9 @@ public:
/// safe to touch worker resources.
void WaitWorker();
/// Submits a tiny chunk of work if recent GPU submissions are stale.
void KeepAliveTick();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
@@ -125,6 +132,8 @@ public:
std::mutex submit_mutex;
private:
using Clock = std::chrono::steady_clock;
class Command {
public:
virtual ~Command() = default;
@@ -214,6 +223,7 @@ private:
GraphicsPipeline* graphics_pipeline = nullptr;
bool is_rescaling = false;
bool rescaling_defined = false;
bool needs_state_enable_refresh = false;
};
void WorkerThread(std::stop_token stop_token);
@@ -246,6 +256,9 @@ private:
State state;
Clock::time_point last_submission_time{Clock::time_point::min()};
static constexpr std::chrono::milliseconds KEEPALIVE_INTERVAL{4};
u32 num_renderpass_images = 0;
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};

View File

@@ -5,6 +5,7 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <optional>
#include <utility>
#include <vector>
@@ -49,6 +50,7 @@ size_t GetStreamBufferSize(const Device& device) {
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
@@ -74,13 +76,16 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
}
stream_pointer = stream_buffer.Mapped();
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
return GetStreamBuffer(size);
if (!deferred && usage == MemoryUsage::Upload) {
if (size <= region_size) {
return GetStreamBuffer(size);
}
}
return GetStagingBuffer(size, usage, deferred);
}
@@ -142,6 +147,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
}
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
scheduler.GetMasterSemaphore().Refresh();
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });

View File

@@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <climits>
#include <optional>
#include <vector>
#include "common/common_types.h"

View File

@@ -48,6 +48,7 @@ Flags MakeInvalidationFlags() {
FrontFace,
StencilOp,
StencilTestEnable,
RasterizerDiscardEnable,
VertexBuffers,
VertexInput,
StateEnable,
@@ -55,6 +56,9 @@ Flags MakeInvalidationFlags() {
DepthBiasEnable,
LogicOpEnable,
DepthClampEnable,
AlphaToCoverageEnable,
AlphaToOneEnable,
LineRasterizationMode,
LogicOp,
Blending,
ColorMask,
@@ -148,6 +152,8 @@ void SetupDirtyStateEnable(Tables& tables) {
setup(OFF(logic_op.enable), LogicOpEnable);
setup(OFF(viewport_clip_control.geometry_clip), DepthClampEnable);
setup(OFF(line_stipple_enable), LineStippleEnable);
setup(OFF(anti_alias_alpha_control.alpha_to_coverage), AlphaToCoverageEnable);
setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable);
}
void SetupDirtyDepthCompareOp(Tables& tables) {
@@ -226,6 +232,7 @@ void SetupRasterModes(Tables &tables) {
table[OFF(line_stipple_params)] = LineStippleParams;
table[OFF(conservative_raster_enable)] = ConservativeRasterizationMode;
table[OFF(line_anti_alias_enable)] = LineRasterizationMode;
}
} // Anonymous namespace

View File

@@ -54,6 +54,7 @@ enum : u8 {
PrimitiveRestartEnable,
RasterizerDiscardEnable,
ConservativeRasterizationMode,
LineRasterizationMode,
LineStippleEnable,
LineStippleParams,
DepthBiasEnable,
@@ -61,6 +62,8 @@ enum : u8 {
LogicOp,
LogicOpEnable,
DepthClampEnable,
AlphaToCoverageEnable,
AlphaToOneEnable,
Blending,
BlendEnable,
@@ -94,6 +97,10 @@ public:
(*flags)[Dirty::Scissors] = true;
}
void InvalidateStateEnableFlag() {
(*flags)[Dirty::StateEnable] = true;
}
bool TouchViewports() {
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
@@ -225,6 +232,14 @@ public:
return Exchange(Dirty::DepthClampEnable, false);
}
bool TouchAlphaToCoverageEnable() {
return Exchange(Dirty::AlphaToCoverageEnable, false);
}
bool TouchAlphaToOneEnable() {
return Exchange(Dirty::AlphaToOneEnable, false);
}
bool TouchDepthCompareOp() {
return Exchange(Dirty::DepthCompareOp, false);
}
@@ -261,6 +276,10 @@ public:
return Exchange(Dirty::LogicOp, false);
}
bool TouchLineRasterizationMode() {
return Exchange(Dirty::LineRasterizationMode, false);
}
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
const bool has_changed = current_topology != new_topology;
current_topology = new_topology;

View File

@@ -306,7 +306,17 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) {
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
// According to Vulkan spec, when using VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR,
// the base format (imageFormat) MUST be included in pViewFormats
const std::array view_formats{
swapchain_ci.imageFormat, // Base format MUST be first
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SRGB,
#ifdef ANDROID
VK_FORMAT_R8G8B8A8_UNORM, // Android may use RGBA
VK_FORMAT_R8G8B8A8_SRGB,
#endif
};
VkImageFormatListCreateInfo format_list{
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
.pNext = nullptr,

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -101,7 +104,7 @@ public:
}
VkSemaphore CurrentRenderSemaphore() const {
return *render_semaphores[frame_index];
return *render_semaphores[image_index];
}
u32 GetWidth() const {

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <optional>
#include <span>
#include "video_core/texture_cache/texture_cache_base.h"
@@ -56,12 +60,16 @@ public:
void TickFrame();
void WaitForGpuTick(u64 tick);
u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const;
std::optional<size_t> GetSamplerHeapBudget() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
@@ -108,10 +116,15 @@ public:
return view_formats[static_cast<std::size_t>(format)];
}
bool RequiresBlockCompatibleViewFormats(PixelFormat format) const noexcept {
return requires_block_view_formats[static_cast<std::size_t>(format)];
}
void BarrierFeedbackLoop();
bool IsFormatDitherable(VideoCore::Surface::PixelFormat format);
bool IsFormatScalable(VideoCore::Surface::PixelFormat format);
bool SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const;
VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const;
@@ -125,6 +138,7 @@ public:
std::unique_ptr<MSAACopyPass> msaa_copy_pass;
const Settings::ResolutionScalingInfo& resolution;
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
std::array<bool, VideoCore::Surface::MaxPixelFormat> requires_block_view_formats{};
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
@@ -171,6 +185,30 @@ public:
return (this->*current_image).UsageFlags();
}
void TrackGpuReadTick(u64 tick) noexcept {
TrackPendingReadTick(tick);
}
void TrackGpuWriteTick(u64 tick) noexcept {
TrackPendingWriteTick(tick);
}
void CompleteGpuReadTick(u64 completed_tick) noexcept {
ClearPendingReadTick(completed_tick);
}
void CompleteGpuWriteTick(u64 completed_tick) noexcept {
ClearPendingWriteTick(completed_tick);
}
[[nodiscard]] std::optional<u64> PendingGpuReadTick() const noexcept {
return PendingReadTick();
}
[[nodiscard]] std::optional<u64> PendingGpuWriteTick() const noexcept {
return PendingWriteTick();
}
/// Returns true when the image is already initialized and mark it as initialized
[[nodiscard]] bool ExchangeInitialization() noexcept {
return std::exchange(initialized, true);
@@ -233,15 +271,23 @@ public:
[[nodiscard]] VkImageView ColorView();
[[nodiscard]] VkImageView SampledView(Shader::TextureType texture_type,
Shader::SamplerComponentType component_type);
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] bool IsRescaled() const noexcept;
[[nodiscard]] bool SupportsDepthCompareSampling() const noexcept;
[[nodiscard]] bool Is3DImage() const noexcept {
return is_3d_image;
}
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
return *image_views[static_cast<size_t>(texture_type)];
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
return image_handle;
}
@@ -266,23 +312,36 @@ private:
struct StorageViews {
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> typeless;
};
static constexpr size_t NUMERIC_VIEW_TYPES = 3;
[[nodiscard]] Shader::TextureType BaseTextureType() const noexcept;
[[nodiscard]] std::optional<u32> LayerCountOverride(Shader::TextureType texture_type) const noexcept;
[[nodiscard]] VkImageView DepthView(Shader::TextureType texture_type);
[[nodiscard]] VkImageView StencilView(Shader::TextureType texture_type);
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask,
Shader::TextureType texture_type);
const Device* device = nullptr;
const SlotVector<Image>* slot_images = nullptr;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
std::unique_ptr<StorageViews> storage_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> depth_views;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> stencil_views;
std::array<std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES>, NUMERIC_VIEW_TYPES>
sampled_component_views;
vk::ImageView color_view;
vk::Image null_image;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 buffer_size = 0;
bool is_3d_image = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
@@ -303,9 +362,19 @@ public:
return static_cast<bool>(sampler_default_anisotropy);
}
[[nodiscard]] VkSampler SelectHandle(bool supports_linear_filter,
bool supports_anisotropy,
bool allow_depth_compare) const noexcept;
private:
vk::Sampler sampler;
vk::Sampler sampler_default_anisotropy;
vk::Sampler sampler_force_point;
vk::Sampler sampler_compare_disabled;
vk::Sampler sampler_default_anisotropy_compare_disabled;
vk::Sampler sampler_force_point_compare_disabled;
bool uses_linear_filter = false;
bool depth_compare_enabled = false;
};
class Framebuffer {

View File

@@ -70,6 +70,102 @@ static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture
entry.a_type, entry.srgb_conversion));
}
namespace {
[[nodiscard]] bool UsesSwizzleSource(const Tegra::Texture::TICEntry& entry,
Tegra::Texture::SwizzleSource source) {
const std::array swizzles{entry.x_source.Value(), entry.y_source.Value(),
entry.z_source.Value(), entry.w_source.Value()};
return std::ranges::any_of(swizzles, [source](Tegra::Texture::SwizzleSource current) {
return current == source;
});
}
[[nodiscard]] std::optional<Shader::SamplerComponentType> DepthStencilComponentFromSwizzle(
const Tegra::Texture::TICEntry& entry, VideoCore::Surface::PixelFormat pixel_format) {
using Tegra::Texture::SwizzleSource;
const bool uses_r = UsesSwizzleSource(entry, SwizzleSource::R);
const bool uses_g = UsesSwizzleSource(entry, SwizzleSource::G);
switch (pixel_format) {
case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
if (uses_r != uses_g) {
return uses_r ? Shader::SamplerComponentType::Depth
: Shader::SamplerComponentType::Stencil;
}
break;
case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
if (uses_r != uses_g) {
return uses_r ? Shader::SamplerComponentType::Stencil
: Shader::SamplerComponentType::Depth;
}
break;
default:
break;
}
return std::nullopt;
}
} // Anonymous namespace
static Shader::SamplerComponentType ConvertSamplerComponentType(
const Tegra::Texture::TICEntry& entry) {
const auto pixel_format = PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type,
entry.b_type, entry.a_type,
entry.srgb_conversion);
const auto surface_type = VideoCore::Surface::GetFormatType(pixel_format);
if (entry.depth_texture != 0 || surface_type == VideoCore::Surface::SurfaceType::Depth) {
return Shader::SamplerComponentType::Depth;
}
if (surface_type == VideoCore::Surface::SurfaceType::Stencil) {
return Shader::SamplerComponentType::Stencil;
}
if (surface_type == VideoCore::Surface::SurfaceType::DepthStencil) {
if (const auto inferred = DepthStencilComponentFromSwizzle(entry, pixel_format)) {
return *inferred;
}
return entry.depth_texture != 0 ? Shader::SamplerComponentType::Depth
: Shader::SamplerComponentType::Stencil;
}
const auto accumulate = [](const Tegra::Texture::ComponentType component,
bool& has_signed, bool& has_unsigned) {
switch (component) {
case Tegra::Texture::ComponentType::SINT:
has_signed = true;
break;
case Tegra::Texture::ComponentType::UINT:
has_unsigned = true;
break;
default:
break;
}
};
bool has_signed{};
bool has_unsigned{};
accumulate(entry.r_type, has_signed, has_unsigned);
accumulate(entry.g_type, has_signed, has_unsigned);
accumulate(entry.b_type, has_signed, has_unsigned);
accumulate(entry.a_type, has_signed, has_unsigned);
if (has_signed && !has_unsigned) {
return Shader::SamplerComponentType::Sint;
}
if (has_unsigned && !has_signed) {
return Shader::SamplerComponentType::Uint;
}
if (has_signed) {
return Shader::SamplerComponentType::Sint;
}
if (has_unsigned) {
return Shader::SamplerComponentType::Uint;
}
return Shader::SamplerComponentType::Float;
}
static std::string_view StageToPrefix(Shader::Stage stage) {
switch (stage) {
case Shader::Stage::VertexB:
@@ -200,6 +296,7 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 code_size{static_cast<u64>(CachedSizeBytes())};
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
const u64 num_texture_component_types{static_cast<u64>(texture_component_types.size())};
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};
@@ -207,6 +304,8 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
.write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
sizeof(num_texture_pixel_formats))
.write(reinterpret_cast<const char*>(&num_texture_component_types),
sizeof(num_texture_component_types))
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
.write(reinterpret_cast<const char*>(&num_cbuf_replacement_values),
sizeof(num_cbuf_replacement_values))
@@ -223,6 +322,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&type), sizeof(type));
}
for (const auto& [key, component] : texture_component_types) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&component), sizeof(component));
}
for (const auto& [key, format] : texture_pixel_formats) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&format), sizeof(format));
@@ -277,7 +380,17 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
bool via_header_index, u32 raw) {
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
ASSERT(handle.first <= tic_limit);
if (handle.first > tic_limit) {
static std::atomic<size_t> oob_count{0};
const size_t n = ++oob_count;
if (n <= 4 || (n & 63) == 0) {
LOG_WARNING(Shader,
"TIC handle {} exceeds limit {} (via_header_index={}) — returning empty",
handle.first, tic_limit, via_header_index);
}
return {};
}
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
@@ -374,6 +487,21 @@ Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
const Shader::TextureType result{ConvertTextureType(entry)};
texture_types.emplace(handle, result);
texture_component_types.emplace(handle, ConvertSamplerComponentType(entry));
return result;
}
Shader::SamplerComponentType GraphicsEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it != texture_component_types.end()) {
return it->second;
}
const auto& regs{maxwell3d->regs};
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
auto entry =
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
const Shader::SamplerComponentType result{ConvertSamplerComponentType(entry)};
texture_component_types.emplace(handle, result);
return result;
}
@@ -430,6 +558,20 @@ Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
const Shader::TextureType result{ConvertTextureType(entry)};
texture_types.emplace(handle, result);
texture_component_types.emplace(handle, ConvertSamplerComponentType(entry));
return result;
}
Shader::SamplerComponentType ComputeEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it != texture_component_types.end()) {
return it->second;
}
const auto& regs{kepler_compute->regs};
const auto& qmd{kepler_compute->launch_description};
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
const Shader::SamplerComponentType result{ConvertSamplerComponentType(entry)};
texture_component_types.emplace(handle, result);
return result;
}
@@ -455,12 +597,15 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
u64 code_size{};
u64 num_texture_types{};
u64 num_texture_pixel_formats{};
u64 num_texture_component_types{};
u64 num_cbuf_values{};
u64 num_cbuf_replacement_values{};
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
sizeof(num_texture_pixel_formats))
.read(reinterpret_cast<char*>(&num_texture_component_types),
sizeof(num_texture_component_types))
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
.read(reinterpret_cast<char*>(&num_cbuf_replacement_values),
sizeof(num_cbuf_replacement_values))
@@ -480,6 +625,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
.read(reinterpret_cast<char*>(&type), sizeof(type));
texture_types.emplace(key, type);
}
for (size_t i = 0; i < num_texture_component_types; ++i) {
u32 key;
Shader::SamplerComponentType component;
file.read(reinterpret_cast<char*>(&key), sizeof(key))
.read(reinterpret_cast<char*>(&component), sizeof(component));
texture_component_types.emplace(key, component);
}
for (size_t i = 0; i < num_texture_pixel_formats; ++i) {
u32 key;
Shader::TexturePixelFormat format;
@@ -534,6 +686,15 @@ u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
return it->second;
}
Shader::SamplerComponentType FileEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it == texture_component_types.end()) {
LOG_WARNING(Render_Vulkan, "Texture component descriptor {:08x} not found", handle);
return Shader::SamplerComponentType::Float;
}
return it->second;
}
Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
const auto it{texture_types.find(handle)};
if (it == texture_types.end()) {

View File

@@ -80,6 +80,7 @@ protected:
std::vector<u64> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
std::unordered_map<u32, Shader::SamplerComponentType> texture_component_types;
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
@@ -116,6 +117,8 @@ public:
Shader::TextureType ReadTextureType(u32 handle) override;
Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -142,6 +145,8 @@ public:
Shader::TextureType ReadTextureType(u32 handle) override;
Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -176,6 +181,8 @@ public:
[[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
[[nodiscard]] Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
[[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
[[nodiscard]] bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -202,6 +209,7 @@ public:
private:
std::vector<u64> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
std::unordered_map<u32, Shader::SamplerComponentType> texture_component_types;
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;

View File

@@ -4,6 +4,8 @@
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/settings.h"
@@ -408,6 +410,126 @@ bool IsPixelFormatSignedInteger(PixelFormat format) {
}
}
namespace {
struct NumericVariantSet {
PixelFormat float_format = PixelFormat::Invalid;
PixelFormat uint_format = PixelFormat::Invalid;
PixelFormat sint_format = PixelFormat::Invalid;
[[nodiscard]] std::optional<PixelFormat> Select(PixelFormatNumeric numeric) const {
PixelFormat candidate = PixelFormat::Invalid;
switch (numeric) {
case PixelFormatNumeric::Float:
candidate = float_format;
break;
case PixelFormatNumeric::Uint:
candidate = uint_format;
break;
case PixelFormatNumeric::Sint:
candidate = sint_format;
break;
}
if (candidate == PixelFormat::Invalid) {
return std::nullopt;
}
return candidate;
}
};
constexpr NumericVariantSet MakeVariant(PixelFormat float_format, PixelFormat uint_format,
PixelFormat sint_format) {
return NumericVariantSet{
.float_format = float_format,
.uint_format = uint_format,
.sint_format = sint_format,
};
}
std::optional<NumericVariantSet> LookupNumericVariantSet(PixelFormat format) {
switch (format) {
case PixelFormat::R8_UNORM:
case PixelFormat::R8_SNORM:
case PixelFormat::R8_UINT:
case PixelFormat::R8_SINT:
return MakeVariant(PixelFormat::R8_UNORM, PixelFormat::R8_UINT, PixelFormat::R8_SINT);
case PixelFormat::R16_FLOAT:
case PixelFormat::R16_UNORM:
case PixelFormat::R16_SNORM:
case PixelFormat::R16_UINT:
case PixelFormat::R16_SINT:
return MakeVariant(PixelFormat::R16_FLOAT, PixelFormat::R16_UINT, PixelFormat::R16_SINT);
case PixelFormat::R32_FLOAT:
case PixelFormat::R32_UINT:
case PixelFormat::R32_SINT:
return MakeVariant(PixelFormat::R32_FLOAT, PixelFormat::R32_UINT, PixelFormat::R32_SINT);
case PixelFormat::R8G8_UNORM:
case PixelFormat::R8G8_SNORM:
case PixelFormat::R8G8_UINT:
case PixelFormat::R8G8_SINT:
return MakeVariant(PixelFormat::R8G8_UNORM, PixelFormat::R8G8_UINT, PixelFormat::R8G8_SINT);
case PixelFormat::R16G16_FLOAT:
case PixelFormat::R16G16_UNORM:
case PixelFormat::R16G16_SNORM:
case PixelFormat::R16G16_UINT:
case PixelFormat::R16G16_SINT:
return MakeVariant(PixelFormat::R16G16_FLOAT, PixelFormat::R16G16_UINT,
PixelFormat::R16G16_SINT);
case PixelFormat::R32G32_FLOAT:
case PixelFormat::R32G32_UINT:
case PixelFormat::R32G32_SINT:
return MakeVariant(PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
PixelFormat::R32G32_SINT);
case PixelFormat::R16G16B16A16_FLOAT:
case PixelFormat::R16G16B16A16_UNORM:
case PixelFormat::R16G16B16A16_SNORM:
case PixelFormat::R16G16B16A16_UINT:
case PixelFormat::R16G16B16A16_SINT:
return MakeVariant(PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
PixelFormat::R16G16B16A16_SINT);
case PixelFormat::R32G32B32A32_FLOAT:
case PixelFormat::R32G32B32A32_UINT:
case PixelFormat::R32G32B32A32_SINT:
return MakeVariant(PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_UINT,
PixelFormat::R32G32B32A32_SINT);
case PixelFormat::A8B8G8R8_UNORM:
case PixelFormat::A8B8G8R8_SNORM:
case PixelFormat::A8B8G8R8_SRGB:
case PixelFormat::A8B8G8R8_UINT:
case PixelFormat::A8B8G8R8_SINT:
return MakeVariant(PixelFormat::A8B8G8R8_UNORM, PixelFormat::A8B8G8R8_UINT,
PixelFormat::A8B8G8R8_SINT);
case PixelFormat::A2B10G10R10_UNORM:
case PixelFormat::A2B10G10R10_UINT:
return MakeVariant(PixelFormat::A2B10G10R10_UNORM, PixelFormat::A2B10G10R10_UINT,
PixelFormat::Invalid);
default:
return std::nullopt;
}
}
} // Anonymous namespace
PixelFormatNumeric GetPixelFormatNumericType(PixelFormat format) {
if (IsPixelFormatInteger(format)) {
return IsPixelFormatSignedInteger(format) ? PixelFormatNumeric::Sint
: PixelFormatNumeric::Uint;
}
return PixelFormatNumeric::Float;
}
std::optional<PixelFormat> FindPixelFormatVariant(PixelFormat format,
PixelFormatNumeric target_numeric) {
const auto variants = LookupNumericVariantSet(format);
if (!variants) {
return std::nullopt;
}
if (const auto candidate = variants->Select(target_numeric)) {
return candidate;
}
return std::nullopt;
}
size_t PixelComponentSizeBitsInteger(PixelFormat format) {
switch (format) {
case PixelFormat::A8B8G8R8_SINT:

View File

@@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <climits>
#include <optional>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
@@ -517,6 +521,16 @@ bool IsPixelFormatSignedInteger(PixelFormat format);
size_t PixelComponentSizeBitsInteger(PixelFormat format);
enum class PixelFormatNumeric {
Float,
Uint,
Sint,
};
PixelFormatNumeric GetPixelFormatNumericType(PixelFormat format);
std::optional<PixelFormat> FindPixelFormatVariant(PixelFormat format,
PixelFormatNumeric target_numeric);
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
u64 TranscodedAstcSize(u64 base_size, PixelFormat format);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -21,6 +24,27 @@ constexpr auto FLOAT = ComponentType::FLOAT;
constexpr bool LINEAR = false;
constexpr bool SRGB = true;
constexpr TextureFormat SanitizeFormat(TextureFormat format) {
if (format == static_cast<TextureFormat>(0)) {
return TextureFormat::A8B8G8R8;
}
return format;
}
constexpr ComponentType SanitizeComponent(ComponentType component) {
if (component == static_cast<ComponentType>(0)) {
return ComponentType::UNORM;
}
switch (component) {
case ComponentType::SNORM_FORCE_FP16:
return ComponentType::SNORM;
case ComponentType::UNORM_FORCE_FP16:
return ComponentType::UNORM;
default:
return component;
}
}
constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
u32 hash = is_srgb ? 1 : 0;
@@ -41,6 +65,11 @@ constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb =
PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
ComponentType blue, ComponentType alpha,
bool is_srgb) noexcept {
format = SanitizeFormat(format);
red = SanitizeComponent(red);
green = SanitizeComponent(green);
blue = SanitizeComponent(blue);
alpha = SanitizeComponent(alpha);
switch (Hash(format, red, green, blue, alpha, is_srgb)) {
case Hash(TextureFormat::A8B8G8R8, UNORM):
return PixelFormat::A8B8G8R8_UNORM;

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <array>
#include <optional>
#include <vector>
@@ -58,6 +62,50 @@ struct ImageBase {
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
explicit ImageBase(const NullImageParams&);
void TrackPendingReadTick(u64 tick) noexcept {
if (pending_read_tick) {
*pending_read_tick = std::max(*pending_read_tick, tick);
} else {
pending_read_tick = tick;
}
}
void TrackPendingWriteTick(u64 tick) noexcept {
if (pending_write_tick) {
*pending_write_tick = std::max(*pending_write_tick, tick);
} else {
pending_write_tick = tick;
}
}
void ClearPendingReadTick(u64 completed_tick) noexcept {
if (pending_read_tick && completed_tick >= *pending_read_tick) {
pending_read_tick.reset();
}
}
void ClearPendingWriteTick(u64 completed_tick) noexcept {
if (pending_write_tick && completed_tick >= *pending_write_tick) {
pending_write_tick.reset();
}
}
[[nodiscard]] bool HasPendingReadTick() const noexcept {
return pending_read_tick.has_value();
}
[[nodiscard]] bool HasPendingWriteTick() const noexcept {
return pending_write_tick.has_value();
}
[[nodiscard]] std::optional<u64> PendingReadTick() const noexcept {
return pending_read_tick;
}
[[nodiscard]] std::optional<u64> PendingWriteTick() const noexcept {
return pending_write_tick;
}
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
[[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
@@ -115,6 +163,9 @@ struct ImageBase {
std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images;
ImageMapId map_view_id{};
std::optional<u64> pending_read_tick;
std::optional<u64> pending_write_tick;
};
struct ImageMapView {

View File

@@ -33,6 +33,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
dma_downloaded = forced_flushed;
format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
config.a_type, config.srgb_conversion);
num_samples = NumSamples(config.msaa_mode);
resources.levels = config.max_mip_level + 1;
if (config.IsPitchLinear()) {

View File

@@ -1,15 +1,22 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
constexpr inline std::size_t MaxSampleLocationSlots = 16;
[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
switch (num_samples) {
case 1:
@@ -95,4 +102,24 @@ namespace VideoCommon {
return 1;
}
// NVN guarantees up to sixteen programmable sample slots shared across a repeating pixel grid
// (per the 0.7.0 addon release notes), so the grid dimensions shrink as MSAA increases.
[[nodiscard]] inline std::pair<u32, u32> SampleLocationGridSize(Tegra::Texture::MsaaMode msaa_mode) {
const int samples = NumSamples(msaa_mode);
switch (samples) {
case 1:
return {4u, 4u};
case 2:
return {4u, 2u};
case 4:
return {2u, 2u};
case 8:
return {2u, 1u};
case 16:
return {1u, 1u};
}
ASSERT_MSG(false, "Unsupported sample count for grid size={}", samples);
return {1u, 1u};
}
} // namespace VideoCommon

View File

@@ -6,6 +6,8 @@
#pragma once
#include <limits>
#include <optional>
#include <unordered_set>
#include <boost/container/small_vector.hpp>
@@ -538,6 +540,7 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
if (True(image.flags & ImageFlagBits::CpuModified)) {
return;
}
EnsureImageReady(image, ImageAccessType::Write);
image.flags |= ImageFlagBits::CpuModified;
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
@@ -548,11 +551,12 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
template <class P>
void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
boost::container::small_vector<ImageId, 16> images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) {
return;
}
image.flags &= ~ImageFlagBits::GpuModified;
EnsureImageReady(this->slot_images[image_id], ImageAccessType::Read);
images.push_back(image_id);
});
if (images.empty()) {
@@ -604,6 +608,7 @@ void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
EnsureImageReady(image, ImageAccessType::Write);
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id);
}
@@ -619,6 +624,7 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
EnsureImageReady(image, ImageAccessType::Write);
if (False(image.flags & ImageFlagBits::CpuModified)) {
image.flags |= ImageFlagBits::CpuModified;
if (True(image.flags & ImageFlagBits::Tracked)) {
@@ -1736,11 +1742,89 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
}
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
if (is_new) {
EnforceSamplerBudget();
pair->second = slot_samplers.insert(runtime, config);
}
return pair->second;
}
template <class P>
std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
return runtime.GetSamplerHeapBudget();
} else {
return std::nullopt;
}
}
template <class P>
void TextureCache<P>::EnforceSamplerBudget() {
const auto budget = QuerySamplerBudget();
if (!budget) {
return;
}
if (slot_samplers.size() < *budget) {
return;
}
if (!channel_state) {
return;
}
if (last_sampler_gc_frame == frame_tick) {
return;
}
last_sampler_gc_frame = frame_tick;
TrimInactiveSamplers(*budget);
}
template <class P>
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
if (channel_state->samplers.empty()) {
return;
}
static constexpr size_t SAMPLER_GC_SLACK = 1024;
auto mark_active = [](auto& set, SamplerId id) {
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
return;
}
set.insert(id);
};
std::unordered_set<SamplerId> active;
active.reserve(channel_state->graphics_sampler_ids.size() +
channel_state->compute_sampler_ids.size());
for (const SamplerId id : channel_state->graphics_sampler_ids) {
mark_active(active, id);
}
for (const SamplerId id : channel_state->compute_sampler_ids) {
mark_active(active, id);
}
size_t removed = 0;
auto& sampler_map = channel_state->samplers;
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
const SamplerId sampler_id = it->second;
if (!sampler_id || sampler_id == CORRUPT_ID) {
it = sampler_map.erase(it);
continue;
}
if (active.find(sampler_id) != active.end()) {
++it;
continue;
}
slot_samplers.erase(sampler_id);
it = sampler_map.erase(it);
++removed;
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
break;
}
}
if (removed != 0) {
LOG_WARNING(HW_GPU,
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
budget, removed);
}
}
template <class P>
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs;
@@ -2343,6 +2427,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
template <class P>
void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
Image& image = slot_images[image_id];
EnsureImageReady(image, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
TrackGpuImageAccess(image_id, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
runtime.TransitionImageLayout(image);
if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
if (False(image.flags & ImageFlagBits::Tracked)) {
@@ -2358,6 +2445,85 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
lru_cache.Touch(image.lru_index, frame_tick);
}
template <class P>
void TextureCache<P>::TrackGpuImageAccess(ImageId image_id, ImageAccessType access) {
staged_gpu_accesses.push_back({image_id, access});
}
template <class P>
void TextureCache<P>::CommitPendingGpuAccesses(u64 fence_value) {
if (staged_gpu_accesses.empty()) {
return;
}
if (fence_value == 0) {
return;
}
auto& batch = committed_gpu_accesses.emplace_back(std::move(staged_gpu_accesses));
staged_gpu_accesses.clear();
committed_gpu_ticks.push_back(fence_value);
for (const PendingImageAccess& access : batch) {
ImageBase& image = slot_images[access.image_id];
if (access.access == ImageAccessType::Read) {
image.TrackPendingReadTick(fence_value);
} else {
image.TrackPendingWriteTick(fence_value);
}
}
}
template <class P>
void TextureCache<P>::CompleteGpuAccesses(u64 completed_fence) {
if (completed_fence == 0) {
return;
}
while (!committed_gpu_ticks.empty() && committed_gpu_ticks.front() <= completed_fence) {
auto accesses = std::move(committed_gpu_accesses.front());
committed_gpu_accesses.pop_front();
committed_gpu_ticks.pop_front();
for (const PendingImageAccess& access : accesses) {
if (!slot_images.Contains(access.image_id)) {
continue;
}
ImageBase& image = slot_images[access.image_id];
if (access.access == ImageAccessType::Read) {
image.ClearPendingReadTick(completed_fence);
} else {
image.ClearPendingWriteTick(completed_fence);
}
}
}
}
template <class P>
void TextureCache<P>::EnsureImageReady(ImageBase& image, ImageAccessType access) {
auto wait_tick = [this](std::optional<u64> tick) -> std::optional<u64> {
if (!tick) {
return std::nullopt;
}
runtime.WaitForGpuTick(*tick);
return tick;
};
if (access == ImageAccessType::Write) {
if (const auto tick = image.PendingReadTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingReadTick(*waited);
}
}
if (const auto tick = image.PendingWriteTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingWriteTick(*waited);
}
}
} else {
if (const auto tick = image.PendingWriteTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingWriteTick(*waited);
}
}
}
}
template <class P>
void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
bool invalidate) {
@@ -2375,6 +2541,8 @@ template <class P>
void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
Image& dst = slot_images[dst_id];
Image& src = slot_images[src_id];
EnsureImageReady(dst, ImageAccessType::Write);
EnsureImageReady(src, ImageAccessType::Read);
const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
@@ -2391,20 +2559,30 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
}
}
}
const auto TrackCopyAccesses = [this, dst_id, src_id]() {
TrackGpuImageAccess(dst_id, ImageAccessType::Write);
TrackGpuImageAccess(src_id, ImageAccessType::Read);
};
const auto dst_format_type = GetFormatType(dst.info.format);
const auto src_format_type = GetFormatType(src.info.format);
if (src_format_type == dst_format_type) {
if constexpr (HAS_EMULATED_COPIES) {
if (!runtime.CanImageBeCopied(dst, src)) {
return runtime.EmulateCopyImage(dst, src, copies);
runtime.EmulateCopyImage(dst, src, copies);
TrackCopyAccesses();
return;
}
}
return runtime.CopyImage(dst, src, copies);
runtime.CopyImage(dst, src, copies);
TrackCopyAccesses();
return;
}
UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
if (runtime.ShouldReinterpret(dst, src)) {
return runtime.ReinterpretImage(dst, src, copies);
runtime.ReinterpretImage(dst, src, copies);
TrackCopyAccesses();
return;
}
for (const ImageCopy& copy : copies) {
UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
@@ -2457,6 +2635,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
}
TrackCopyAccesses();
}
template <class P>

View File

@@ -182,6 +182,11 @@ public:
/// Return a reference to the given sampler id
[[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
/// Returns true when the runtime format supports linear filtering
[[nodiscard]] bool SupportsLinearFilter(PixelFormat format) const noexcept {
return runtime.SupportsLinearFilter(format);
}
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
@@ -258,6 +263,15 @@ public:
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Track that an image participates in upcoming GPU work with the given access type
void TrackGpuImageAccess(ImageId image_id, ImageAccessType access);
/// Notify the cache that tracked GPU work has been submitted with the specified fence value
void CommitPendingGpuAccesses(u64 fence_value);
/// Notify the cache that a fence value has completed so tracked accesses can be released
void CompleteGpuAccesses(u64 completed_fence);
std::recursive_mutex mutex;
private:
@@ -408,6 +422,8 @@ private:
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
void EnsureImageReady(ImageBase& image, ImageAccessType access);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
@@ -429,6 +445,9 @@ private:
void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode();
void EnforceSamplerBudget();
void TrimInactiveSamplers(size_t budget);
std::optional<size_t> QuerySamplerBudget() const;
Runtime& runtime;
@@ -462,6 +481,11 @@ private:
Common::SlotId object_id;
};
struct PendingImageAccess {
ImageId image_id;
ImageAccessType access;
};
Common::SlotVector<Image> slot_images;
Common::SlotVector<ImageMapView> slot_map_views;
Common::SlotVector<ImageView> slot_image_views;
@@ -477,6 +501,9 @@ private:
std::vector<AsyncBuffer> uncommitted_async_buffers;
std::deque<std::vector<AsyncBuffer>> async_buffers;
std::deque<AsyncBuffer> async_buffers_death_ring;
std::vector<PendingImageAccess> staged_gpu_accesses;
std::deque<std::vector<PendingImageAccess>> committed_gpu_accesses;
std::deque<u64> committed_gpu_ticks;
struct LRUItemParams {
using ObjectType = ImageId;
@@ -500,6 +527,7 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
u64 last_sampler_gc_frame = (std::numeric_limits<u64>::max)();
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;

View File

@@ -155,4 +155,9 @@ struct SwizzleParameters {
s32 level;
};
enum class ImageAccessType : u8 {
Read,
Write,
};
} // namespace VideoCommon

View File

@@ -22,6 +22,14 @@
#include <vulkan/vulkan.h>
// Define maintenance 7-9 extension names (not yet in official Vulkan headers)
#ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7"
#endif
#ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8"
#endif
// Sanitize macros
#undef CreateEvent
#undef CreateSemaphore

View File

@@ -7,6 +7,7 @@
#include <algorithm>
#include <bitset>
#include <chrono>
#include <mutex>
#include <optional>
#include <thread>
#include <unordered_set>
@@ -90,8 +91,57 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
VK_FORMAT_UNDEFINED,
};
constexpr std::array B10G11R11_UFLOAT_PACK32{
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R16G16B16A16_UNORM,
VK_FORMAT_UNDEFINED,
};
constexpr std::array R16G16B16A16_SFLOAT{
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_UNDEFINED,
};
constexpr std::array R16G16B16A16_UNORM{
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_UNDEFINED,
};
} // namespace Alternatives
struct UnsupportedFormatKey {
VkFormat format;
VkFormatFeatureFlags usage;
FormatType type;
bool operator==(const UnsupportedFormatKey&) const noexcept = default;
};
struct UnsupportedFormatKeyHash {
size_t operator()(const UnsupportedFormatKey& key) const noexcept {
size_t seed = std::hash<int>{}(static_cast<int>(key.format));
seed ^= static_cast<size_t>(key.usage) + 0x9e3779b97f4a7c15ULL + (seed << 6) + (seed >> 2);
seed ^= static_cast<size_t>(key.type) + 0x9e3779b97f4a7c15ULL + (seed << 6) + (seed >> 2);
return seed;
}
};
bool ShouldLogUnsupportedFormat(VkFormat format, VkFormatFeatureFlags usage, FormatType type) {
static std::mutex mutex;
static std::unordered_set<UnsupportedFormatKey, UnsupportedFormatKeyHash> logged_keys;
const UnsupportedFormatKey key{format, usage, type};
std::scoped_lock lock{mutex};
const auto [it, inserted] = logged_keys.insert(key);
return inserted;
}
[[maybe_unused]] constexpr VkShaderStageFlags GraphicsStageMask =
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT |
VK_SHADER_STAGE_FRAGMENT_BIT;
template <typename T>
void SetNext(void**& next, T& data) {
*next = &data;
@@ -122,6 +172,12 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data();
case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data();
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
return Alternatives::B10G11R11_UFLOAT_PACK32.data();
case VK_FORMAT_R16G16B16A16_SFLOAT:
return Alternatives::R16G16B16A16_SFLOAT.data();
case VK_FORMAT_R16G16B16A16_UNORM:
return Alternatives::R16G16B16A16_UNORM.data();
default:
return nullptr;
}
@@ -289,32 +345,6 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
return format_properties;
}
#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
void OverrideBcnFormats(std::unordered_map<VkFormat, VkFormatProperties>& format_properties) {
// These properties are extracted from Adreno driver 512.687.0
constexpr VkFormatFeatureFlags tiling_features{
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
constexpr VkFormatFeatureFlags buffer_features{VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT};
static constexpr std::array bcn_formats{
VK_FORMAT_BC1_RGBA_SRGB_BLOCK, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, VK_FORMAT_BC2_SRGB_BLOCK,
VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK,
VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK,
VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC6H_SFLOAT_BLOCK, VK_FORMAT_BC6H_UFLOAT_BLOCK,
VK_FORMAT_BC7_SRGB_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK,
};
for (const auto format : bcn_formats) {
format_properties[format].linearTilingFeatures = tiling_features;
format_properties[format].optimalTilingFeatures = tiling_features;
format_properties[format].bufferFeatures = buffer_features;
}
}
#endif
NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
const std::set<std::string, std::less<>>& exts) {
VkPhysicalDeviceProperties2 physical_properties{};
@@ -376,10 +406,10 @@ void Device::RemoveExtension(bool& extension, const std::string& extension_name)
loaded_extensions.erase(extension_name);
}
void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) {
if (loaded_extensions.contains(extension_name) && !is_suitable) {
void Device::RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name) {
if (loaded_extensions.contains(extension_name) && !extension) {
LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name);
this->RemoveExtension(is_suitable, extension_name);
this->RemoveExtension(extension, extension_name);
}
}
@@ -400,11 +430,12 @@ void Device::RemoveExtensionFeature(bool& extension, Feature& feature,
}
template <typename Feature>
void Device::RemoveExtensionFeatureIfUnsuitable(bool is_suitable, Feature& feature,
void Device::RemoveExtensionFeatureIfUnsuitable(bool& extension, Feature& feature,
const std::string& extension_name) {
if (loaded_extensions.contains(extension_name) && !is_suitable) {
LOG_WARNING(Render_Vulkan, "Removing features for unsuitable extension {}", extension_name);
this->RemoveExtensionFeature(is_suitable, feature, extension_name);
if (loaded_extensions.contains(extension_name) && !extension) {
LOG_WARNING(Render_Vulkan,
"Removing features for unsuitable extension {}", extension_name);
this->RemoveExtensionFeature(extension, feature, extension_name);
}
}
@@ -416,7 +447,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_suitable = GetSuitability(surface != nullptr);
const VkDriverId driver_id = properties.driver.driverID;
const auto device_id = properties.properties.deviceID;
const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
const bool is_amd_driver =
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
@@ -427,11 +457,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
const bool is_s8gen2 = device_id == 0x43050a01;
//const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
if (!is_suitable)
LOG_WARNING(Render_Vulkan, "Unsuitable driver - continuing anyways");
if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) {
LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway");
} else if (!is_suitable) {
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
}
if (is_nvidia) {
nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions);
@@ -492,36 +524,27 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
CollectToolingInfo();
if (is_qualcomm) {
// Qualcomm Adreno GPUs doesn't handle scaled vertex attributes; keep emulation enabled
must_emulate_scaled_formats = true;
LOG_WARNING(Render_Vulkan,
"Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
"Qualcomm drivers require scaled vertex format emulation; forcing fallback");
if (extensions.shader_float_controls) {
LOG_WARNING(Render_Vulkan,
"Qualcomm drivers: VK_KHR_shader_float_controls is unstable; disabling usage");
RemoveExtension(extensions.shader_float_controls,
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
} else {
LOG_INFO(Render_Vulkan,
"Qualcomm drivers: VK_KHR_shader_float_controls already unavailable");
}
disable_shader_float_controls_usage = true;
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
features.shader_atomic_int64.shaderBufferInt64Atomics = false;
features.shader_atomic_int64.shaderSharedInt64Atomics = false;
features.features.shaderInt64 = false;
#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
// Patch the driver to enable BCn textures.
const auto major = (properties.properties.driverVersion >> 24) << 2;
const auto minor = (properties.properties.driverVersion >> 12) & 0xFFFU;
const auto vendor = properties.properties.vendorID;
const auto patch_status = adrenotools_get_bcn_type(major, minor, vendor);
if (patch_status == ADRENOTOOLS_BCN_PATCH) {
LOG_INFO(Render_Vulkan, "Patching Adreno driver to support BCn texture formats");
if (adrenotools_patch_bcn(
reinterpret_cast<void*>(dld.vkGetPhysicalDeviceFormatProperties))) {
OverrideBcnFormats(format_properties);
} else {
LOG_ERROR(Render_Vulkan, "Patch failed! Driver code may now crash");
}
} else if (patch_status == ADRENOTOOLS_BCN_BLOB) {
LOG_INFO(Render_Vulkan, "Adreno driver supports BCn textures without patches");
} else {
LOG_WARNING(Render_Vulkan, "Adreno driver can't be patched to enable BCn textures");
}
#endif
}
if (is_nvidia) {
@@ -533,35 +556,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
if (nv_major_version >= 510) {
LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
LOG_WARNING(Render_Vulkan,
"NVIDIA Drivers >= 510 do not support MSAA->MSAA image blits. "
"MSAA scaling will use 3D helpers. MSAA resolves work normally.");
cant_blit_msaa = true;
}
}
if (extensions.extended_dynamic_state3 && is_radv) {
LOG_WARNING(Render_Vulkan, "RADV has broken extendedDynamicState3ColorBlendEquation");
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
dynamic_state3_blending = false;
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 23, 1, 0)) {
LOG_WARNING(Render_Vulkan,
"RADV versions older than 23.1.0 have broken depth clamp dynamic state");
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
dynamic_state3_enables = false;
// Mali/ NVIDIA proprietary drivers: Shader stencil export not supported
// Use hardware depth/stencil blits instead when available
if (!extensions.shader_stencil_export) {
LOG_INFO(Render_Vulkan,
"NVIDIA: VK_EXT_shader_stencil_export not supported, using hardware blits "
"for depth/stencil operations");
LOG_INFO(Render_Vulkan, " D24S8 hardware blit support: {}",
is_blit_depth24_stencil8_supported);
LOG_INFO(Render_Vulkan, " D32S8 hardware blit support: {}",
is_blit_depth32_stencil8_supported);
if (!is_blit_depth24_stencil8_supported && !is_blit_depth32_stencil8_supported) {
LOG_WARNING(Render_Vulkan,
"NVIDIA: Neither shader export nor hardware blits available for "
"depth/stencil. Performance may be degraded.");
}
}
}
if (extensions.extended_dynamic_state3 && (is_amd_driver || driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) {
// AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation
LOG_WARNING(Render_Vulkan,
"AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation");
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
dynamic_state3_blending = false;
}
sets_per_pool = 64;
if (is_amd_driver) {
// AMD drivers need a higher amount of Sets per Pool in certain circumstances like in XC2.
@@ -574,10 +593,55 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
}
const bool needs_mobile_alignment_clamp = is_qualcomm || is_arm;
if (is_qualcomm) {
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
has_broken_parallel_compiling = true;
has_broken_parallel_compiling = true;
LOG_WARNING(Render_Vulkan,
"Adreno drivers exhibit instability with parallel shader compilation; "
"forcing single-threaded pipeline builds");
if (extensions.push_descriptor) {
LOG_WARNING(Render_Vulkan,
"Qualcomm driver mishandles push descriptors; disabling "
"VK_KHR_push_descriptor to avoid incomplete descriptor writes");
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
}
}
if (needs_mobile_alignment_clamp) {
const char* driver_label = is_qualcomm ? "Qualcomm" : "ARM";
constexpr VkDeviceSize MIN_UNIFORM_ALIGNMENT = 256;
const VkDeviceSize reported_uniform_alignment =
properties.properties.limits.minUniformBufferOffsetAlignment;
if (reported_uniform_alignment < MIN_UNIFORM_ALIGNMENT) {
uniform_buffer_alignment_minimum = MIN_UNIFORM_ALIGNMENT;
LOG_WARNING(Render_Vulkan,
"{} driver reports {}-byte minUniformBufferOffsetAlignment; clamping to {}",
driver_label, reported_uniform_alignment, uniform_buffer_alignment_minimum);
}
constexpr VkDeviceSize MIN_STORAGE_ALIGNMENT = 64;
const VkDeviceSize reported_storage_alignment =
properties.properties.limits.minStorageBufferOffsetAlignment;
if (reported_storage_alignment < MIN_STORAGE_ALIGNMENT) {
storage_buffer_alignment_minimum = MIN_STORAGE_ALIGNMENT;
LOG_WARNING(Render_Vulkan,
"{} driver reports {}-byte minStorageBufferOffsetAlignment; clamping to {}",
driver_label, reported_storage_alignment, storage_buffer_alignment_minimum);
}
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
if (sampler_limit > 0) {
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
const size_t reserved = sampler_limit / 4U;
const size_t derived_budget =
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
sampler_heap_budget = derived_budget;
LOG_WARNING(Render_Vulkan,
"{} driver reports max {} samplers; reserving {} (25%) and "
"allowing Eden to use {} (75%) to avoid heap exhaustion",
driver_label, sampler_limit, reserved, sampler_heap_budget);
}
}
@@ -598,15 +662,27 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
if (is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
LOG_WARNING(Render_Vulkan,
"Intel proprietary drivers do not support MSAA->MSAA image blits. "
"MSAA scaling will use 3D helpers. MSAA resolves work normally.");
cant_blit_msaa = true;
}
has_broken_compute =
CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
!Settings::values.enable_compute_pipelines.GetValue();
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
must_emulate_bgr565 = false; // Default: assume emulation isn't required
if (is_intel_anv) {
LOG_WARNING(Render_Vulkan, "Intel ANV driver does not support native BGR format");
must_emulate_bgr565 = true;
} else if (is_qualcomm) {
LOG_WARNING(Render_Vulkan,
"Qualcomm driver mishandles BGR5 formats, forcing emulation");
must_emulate_bgr565 = true;
} else if (is_arm) {
LOG_WARNING(Render_Vulkan,
"ARM Mali driver mishandles BGR5 formats, forcing emulation");
must_emulate_bgr565 = true;
}
@@ -619,51 +695,55 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
(std::min)(properties.properties.limits.maxVertexInputBindings, 16U);
}
if (is_turnip) {
LOG_WARNING(Render_Vulkan, "Turnip requires higher-than-reported binding limits");
if (is_turnip || is_qualcomm) {
LOG_WARNING(Render_Vulkan, "Driver requires higher-than-reported binding limits");
properties.properties.limits.maxVertexInputBindings = 32;
}
if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) {
LOG_INFO(Render_Vulkan,
"Removing extendedDynamicState2 due to missing extendedDynamicState");
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
}
if (!extensions.extended_dynamic_state2 && extensions.extended_dynamic_state3) {
LOG_INFO(Render_Vulkan,
"Removing extendedDynamicState3 due to missing extendedDynamicState2");
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
dynamic_state3_blending = false;
dynamic_state3_enables = false;
}
// Mesa Intel drivers on UHD 620 have broken EDS causing extreme flickering - unknown if it affects other iGPUs
// ALSO affects ALL versions of UHD drivers on Windows 10+, seems to cause even worse issues like straight up crashing
// So... Yeah, UHD drivers fucking suck -- maybe one day we can work past this, maybe; some driver hacking?
// And then we can rest in peace by doing `< VK_MAKE_API_VERSION(26, 0, 0)` for our beloved mesa drivers... one day
if ((is_mvk || (is_integrated && is_intel_anv) || (is_integrated && is_intel_windows)) && Settings::values.dyna_state.GetValue() != 0) {
LOG_WARNING(Render_Vulkan, "Driver has broken dynamic state, forcing to 0 to prevent graphical issues");
Settings::values.dyna_state.SetValue(0);
}
// Base dynamic states (VIEWPORT, SCISSOR, DEPTH_BIAS, etc.) are ALWAYS active in vk_graphics_pipeline.cpp
// This slider controls EXTENDED dynamic states with accumulative levels per Vulkan specs:
// Level 0 = Core Dynamic States only (Vulkan 1.0)
// Level 1 = Core + VK_EXT_extended_dynamic_state
// Level 2 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2
// Level 3 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2 + VK_EXT_extended_dynamic_state3
switch (Settings::values.dyna_state.GetValue()) {
case 0:
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
[[fallthrough]];
case 1:
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
[[fallthrough]];
case 2:
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
// Level 0: Disable all extended dynamic state extensions
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
dynamic_state3_blending = false;
dynamic_state3_enables = false;
break;
case 1:
// Level 1: Enable EDS1, disable EDS2 and EDS3
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
dynamic_state3_blending = false;
dynamic_state3_enables = false;
break;
case 2:
// Level 2: Enable EDS1 + EDS2, disable EDS3
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
dynamic_state3_blending = false;
dynamic_state3_enables = false;
break;
case 3:
default:
// Level 3: Enable all (EDS1 + EDS2 + EDS3)
break;
}
if (!Settings::values.vertex_input_dynamic_state.GetValue() || !extensions.extended_dynamic_state) {
// VK_EXT_vertex_input_dynamic_state is independent from EDS
// It can be enabled even without extended_dynamic_state
if (!Settings::values.vertex_input_dynamic_state.GetValue()) {
RemoveExtensionFeature(extensions.vertex_input_dynamic_state, features.vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
}
@@ -708,32 +788,63 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
return wanted_format;
}
// The wanted format is not supported by hardware, search for alternatives
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} has no defined alternatives and host "
"hardware does not support it",
wanted_format, wanted_usage, format_type);
if (ShouldLogUnsupportedFormat(wanted_format, wanted_usage, format_type)) {
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} has no defined alternatives and host "
"hardware does not support it",
wanted_format, wanted_usage, format_type);
}
return wanted_format;
}
std::size_t i = 0;
for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) {
#if defined(ANDROID)
const auto downgrade_r16_to_ldr = [&](VkFormat candidate) -> VkFormat {
if (wanted_format != VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
return candidate;
}
if (candidate != VK_FORMAT_R16G16B16A16_SFLOAT &&
candidate != VK_FORMAT_R16G16B16A16_UNORM) {
return candidate;
}
static constexpr std::array ldr_candidates{
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SRGB,
};
for (const VkFormat format : ldr_candidates) {
if (IsFormatSupported(format, wanted_usage, format_type)) {
return format;
}
}
return candidate;
};
#endif
for (std::size_t i = 0; VkFormat alternative = alternatives[i]; ++i) {
if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue;
}
#if defined(ANDROID)
const VkFormat selected = downgrade_r16_to_ldr(alternative);
#else
const VkFormat selected = alternative;
#endif
LOG_DEBUG(Render_Vulkan,
"Emulating format={} with alternative format={} with usage={} and type={}",
wanted_format, alternative, wanted_usage, format_type);
return alternative;
wanted_format, selected, wanted_usage, format_type);
return selected;
}
// No alternatives found, panic
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} is not supported by the host hardware and "
"doesn't support any of the alternatives",
wanted_format, wanted_usage, format_type);
if (ShouldLogUnsupportedFormat(wanted_format, wanted_usage, format_type)) {
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} is not supported by the host hardware and "
"doesn't support any of the alternatives",
wanted_format, wanted_usage, format_type);
}
return wanted_format;
}
@@ -751,8 +862,8 @@ void Device::SaveShader(std::span<const u32> spirv) const {
}
bool Device::ComputeIsOptimalAstcSupported() const {
// Disable for now to avoid converting ASTC twice.
static constexpr std::array astc_formats = {
// Verify hardware supports all ASTC formats with optimal tiling to avoid software conversion
static constexpr std::array<VkFormat, 28> astc_formats = {
VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
@@ -771,9 +882,10 @@ bool Device::ComputeIsOptimalAstcSupported() const {
if (!features.features.textureCompressionASTC_LDR) {
return false;
}
const auto format_feature_usage{
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
const auto format_feature_usage{VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT |
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
for (const auto format : astc_formats) {
const auto physical_format_properties{physical.GetFormatProperties(format)};
@@ -879,11 +991,8 @@ bool Device::ShouldBoostClocks() const {
}
bool Device::HasTimelineSemaphore() const {
if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
GetDriverID() == VK_DRIVER_ID_MESA_TURNIP) {
// Timeline semaphores do not work properly on all Qualcomm drivers.
// They generally work properly with Turnip drivers, but are problematic on some devices
// (e.g. ZTE handsets with Snapdragon 870).
const VkDriverIdKHR driver_id = GetDriverID();
if (driver_id == VK_DRIVER_ID_MESA_TURNIP || driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
return false;
}
return features.timeline_semaphore.timelineSemaphore;
@@ -970,14 +1079,16 @@ bool Device::GetSuitability(bool requires_swapchain) {
// Set next pointer.
void** next = &features2.pNext;
// Vulkan 1.2, 1.3 and 1.4 features
// Vulkan 1.2 and 1.3 features
if (instance_version >= VK_API_VERSION_1_2) {
features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
features_1_2.pNext = &features_1_3;
features_1_2.pNext = nullptr;
*next = &features_1_2;
if (instance_version >= VK_API_VERSION_1_3) {
features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
features_1_3.pNext = nullptr;
features_1_2.pNext = &features_1_3;
}
}
// Test all features we know about. If the feature is not available in core at our
@@ -1076,6 +1187,16 @@ bool Device::GetSuitability(bool requires_swapchain) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
SetNext(next, properties.transform_feedback);
}
if (extensions.maintenance5) {
properties.maintenance5.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_PROPERTIES_KHR;
SetNext(next, properties.maintenance5);
}
if (extensions.multi_draw) {
properties.multi_draw.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT;
SetNext(next, properties.multi_draw);
}
// Perform the property fetch.
physical.GetProperties2(properties2);
@@ -1108,14 +1229,85 @@ bool Device::GetSuitability(bool requires_swapchain) {
}
}
// VK_DYNAMIC_STATE
// Driver detection variables for workarounds in GetSuitability
const VkDriverId driver_id = properties.driver.driverID;
[[maybe_unused]] const bool is_amd_driver =
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
[[maybe_unused]] const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
// VK_EXT_CUSTOM_BORDER_COLOR
if (extensions.custom_border_color && (is_qualcomm || is_turnip)) {
const char* driver_name = is_turnip ? "Turnip" : "Adreno";
LOG_WARNING(Render_Vulkan,
"{} driver previously required custom border color disablement; leaving VK_EXT_custom_border_color enabled for testing",
driver_name);
}
// VK_EXT_extended_dynamic_state2 below this will appear drivers that need workarounds.
// VK_EXT_extended_dynamic_state3 below this will appear drivers that need workarounds.
[[maybe_unused]] const auto device_id = properties.properties.deviceID;
// Samsung: Broken extendedDynamicState3ColorBlendEquation
// Disable blend equation dynamic state, force static pipeline state
if (extensions.extended_dynamic_state3 &&
(driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) {
LOG_WARNING(Render_Vulkan,
"Samsung: Disabling broken extendedDynamicState3ColorBlendEquation");
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
}
// Intel Windows < 27.20.100.0: Broken VertexInputDynamicState
// Disable VertexInputDynamicState on old Intel Windows drivers
if (extensions.vertex_input_dynamic_state && is_intel_windows) {
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
LOG_WARNING(Render_Vulkan,
"Intel Windows < 27.20.100.0: Disabling broken VK_EXT_vertex_input_dynamic_state");
RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
}
}
if (Settings::values.dyna_state.GetValue() == 0) {
LOG_INFO(Render_Vulkan, "Extended Dynamic State disabled by user setting, clearing all EDS features");
features.custom_border_color.customBorderColors = false;
features.custom_border_color.customBorderColorWithoutFormat = false;
features.extended_dynamic_state.extendedDynamicState = false;
features.extended_dynamic_state2.extendedDynamicState2 = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask = false;
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable = false;
}
// Return whether we were suitable.
return suitable;
}
void Device::RemoveUnsuitableExtensions() {
// VK_EXT_custom_border_color
extensions.custom_border_color = features.custom_border_color.customBorderColors &&
features.custom_border_color.customBorderColorWithoutFormat;
// Enable extension if driver supports it, then check individual features
// - customBorderColors: Required to use VK_BORDER_COLOR_FLOAT_CUSTOM_EXT
// - customBorderColorWithoutFormat: Optional, allows VK_FORMAT_UNDEFINED
// If only customBorderColors is available, we must provide a specific format
if (extensions.custom_border_color) {
// Verify that at least customBorderColors is available
if (!features.custom_border_color.customBorderColors) {
LOG_WARNING(Render_Vulkan,
"VK_EXT_custom_border_color reported but customBorderColors feature not available, disabling");
extensions.custom_border_color = false;
}
}
RemoveExtensionFeatureIfUnsuitable(extensions.custom_border_color, features.custom_border_color,
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
@@ -1144,21 +1336,79 @@ void Device::RemoveUnsuitableExtensions() {
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
// VK_EXT_extended_dynamic_state3
dynamic_state3_blending =
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable &&
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation &&
const bool supports_color_blend_enable =
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable;
const bool supports_color_blend_equation =
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation;
const bool supports_color_write_mask =
features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask;
dynamic_state3_enables =
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable &&
dynamic_state3_blending = supports_color_blend_enable && supports_color_blend_equation &&
supports_color_write_mask;
const bool supports_depth_clamp_enable =
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable;
const bool supports_logic_op_enable =
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable;
const bool supports_line_raster_mode =
features.extended_dynamic_state3.extendedDynamicState3LineRasterizationMode &&
extensions.line_rasterization && features.line_rasterization.rectangularLines;
const bool supports_conservative_raster_mode =
features.extended_dynamic_state3.extendedDynamicState3ConservativeRasterizationMode &&
extensions.conservative_rasterization;
const bool supports_line_stipple_enable =
features.extended_dynamic_state3.extendedDynamicState3LineStippleEnable &&
extensions.line_rasterization && features.line_rasterization.stippledRectangularLines;
const bool supports_alpha_to_coverage =
features.extended_dynamic_state3.extendedDynamicState3AlphaToCoverageEnable;
const bool supports_alpha_to_one =
features.extended_dynamic_state3.extendedDynamicState3AlphaToOneEnable &&
features.features.alphaToOne;
dynamic_state3_depth_clamp_enable = supports_depth_clamp_enable;
dynamic_state3_logic_op_enable = supports_logic_op_enable;
dynamic_state3_line_raster_mode = supports_line_raster_mode;
dynamic_state3_conservative_raster_mode = supports_conservative_raster_mode;
dynamic_state3_line_stipple_enable = supports_line_stipple_enable;
dynamic_state3_alpha_to_coverage = supports_alpha_to_coverage;
dynamic_state3_alpha_to_one = supports_alpha_to_one;
dynamic_state3_enables = dynamic_state3_depth_clamp_enable || dynamic_state3_logic_op_enable ||
dynamic_state3_line_raster_mode ||
dynamic_state3_conservative_raster_mode ||
dynamic_state3_line_stipple_enable ||
dynamic_state3_alpha_to_coverage || dynamic_state3_alpha_to_one;
extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables;
dynamic_state3_blending = dynamic_state3_blending && extensions.extended_dynamic_state3;
dynamic_state3_enables = dynamic_state3_enables && extensions.extended_dynamic_state3;
if (!extensions.extended_dynamic_state3) {
dynamic_state3_blending = false;
dynamic_state3_enables = false;
dynamic_state3_depth_clamp_enable = false;
dynamic_state3_logic_op_enable = false;
dynamic_state3_line_raster_mode = false;
dynamic_state3_conservative_raster_mode = false;
dynamic_state3_line_stipple_enable = false;
dynamic_state3_alpha_to_coverage = false;
dynamic_state3_alpha_to_one = false;
}
RemoveExtensionFeatureIfUnsuitable(extensions.extended_dynamic_state3,
features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
// VK_EXT_robustness2
// Enable if at least one robustness2 feature is available
extensions.robustness_2 = features.robustness2.robustBufferAccess2 ||
features.robustness2.robustImageAccess2 ||
features.robustness2.nullDescriptor;
RemoveExtensionFeatureIfUnsuitable(extensions.robustness_2, features.robustness2,
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
// VK_EXT_image_robustness
// Enable if robustImageAccess is available
extensions.image_robustness = features.image_robustness.robustImageAccess;
RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness,
VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME);
// VK_EXT_provoking_vertex
if (Settings::values.provoking_vertex.GetValue()) {
extensions.provoking_vertex = features.provoking_vertex.provokingVertexLast
@@ -1196,15 +1446,29 @@ void Device::RemoveUnsuitableExtensions() {
VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
// VK_EXT_transform_feedback
// We only require the basic transformFeedback feature and at least
// one transform feedback buffer. We keep transformFeedbackQueries as it's used by
// the streaming byte count implementation. GeometryStreams and multiple streams
// are not strictly required since we currently support only stream 0.
extensions.transform_feedback =
features.transform_feedback.transformFeedback &&
features.transform_feedback.geometryStreams &&
properties.transform_feedback.maxTransformFeedbackStreams >= 4 &&
properties.transform_feedback.maxTransformFeedbackBuffers > 0 &&
properties.transform_feedback.transformFeedbackQueries &&
properties.transform_feedback.transformFeedbackDraw;
properties.transform_feedback.transformFeedbackQueries;
if (extensions.transform_feedback) {
features.transform_feedback.transformFeedback = VK_TRUE;
features.transform_feedback.geometryStreams = VK_TRUE;
} else {
features.transform_feedback.transformFeedback = VK_FALSE;
features.transform_feedback.geometryStreams = VK_FALSE;
}
RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback,
VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
if (extensions.transform_feedback) {
LOG_INFO(Render_Vulkan, "VK_EXT_transform_feedback enabled (buffers={}, queries={})",
properties.transform_feedback.maxTransformFeedbackBuffers,
properties.transform_feedback.transformFeedbackQueries);
}
// VK_EXT_vertex_input_dynamic_state
extensions.vertex_input_dynamic_state =
@@ -1213,6 +1477,18 @@ void Device::RemoveUnsuitableExtensions() {
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
// VK_EXT_multi_draw
extensions.multi_draw = features.multi_draw.multiDraw;
if (extensions.multi_draw) {
features.multi_draw.multiDraw = VK_TRUE;
LOG_INFO(Render_Vulkan, "VK_EXT_multi_draw: maxMultiDrawCount={}",
properties.multi_draw.maxMultiDrawCount);
} else {
features.multi_draw.multiDraw = VK_FALSE;
}
RemoveExtensionFeatureIfUnsuitable(extensions.multi_draw, features.multi_draw,
VK_EXT_MULTI_DRAW_EXTENSION_NAME);
// VK_KHR_pipeline_executable_properties
if (Settings::values.renderer_shader_feedback.GetValue()) {
extensions.pipeline_executable_properties =
@@ -1236,6 +1512,98 @@ void Device::RemoveUnsuitableExtensions() {
RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout,
features.workgroup_memory_explicit_layout,
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
// VK_EXT_swapchain_maintenance1 (extension only, has features)
// Requires VK_EXT_surface_maintenance1 instance extension
extensions.swapchain_maintenance1 = features.swapchain_maintenance1.swapchainMaintenance1;
if (extensions.swapchain_maintenance1) {
// Check if VK_EXT_surface_maintenance1 instance extension is available
const auto instance_extensions = vk::EnumerateInstanceExtensionProperties(dld);
const bool has_surface_maintenance1 = instance_extensions && std::ranges::any_of(*instance_extensions,
[](const VkExtensionProperties& prop) {
return std::strcmp(prop.extensionName, VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME) == 0;
});
if (!has_surface_maintenance1) {
LOG_WARNING(Render_Vulkan,
"VK_EXT_swapchain_maintenance1 requires VK_EXT_surface_maintenance1, disabling");
extensions.swapchain_maintenance1 = false;
features.swapchain_maintenance1.swapchainMaintenance1 = false;
}
}
RemoveExtensionFeatureIfUnsuitable(extensions.swapchain_maintenance1, features.swapchain_maintenance1,
VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
// VK_KHR_maintenance1 (core in Vulkan 1.1, no features)
extensions.maintenance1 = loaded_extensions.contains(VK_KHR_MAINTENANCE_1_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance1, VK_KHR_MAINTENANCE_1_EXTENSION_NAME);
// VK_KHR_maintenance2 (core in Vulkan 1.1, no features)
extensions.maintenance2 = loaded_extensions.contains(VK_KHR_MAINTENANCE_2_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance2, VK_KHR_MAINTENANCE_2_EXTENSION_NAME);
// VK_KHR_maintenance3 (core in Vulkan 1.1, no features)
extensions.maintenance3 = loaded_extensions.contains(VK_KHR_MAINTENANCE_3_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance3, VK_KHR_MAINTENANCE_3_EXTENSION_NAME);
// VK_KHR_maintenance4
extensions.maintenance4 = features.maintenance4.maintenance4;
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance4, features.maintenance4,
VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
// VK_KHR_maintenance5
extensions.maintenance5 = features.maintenance5.maintenance5;
if (extensions.maintenance5) {
LOG_INFO(Render_Vulkan, "VK_KHR_maintenance5 properties: polygonModePointSize={} "
"depthStencilSwizzleOne={} earlyFragmentTests={} nonStrictWideLines={}",
properties.maintenance5.polygonModePointSize,
properties.maintenance5.depthStencilSwizzleOneSupport,
properties.maintenance5.earlyFragmentMultisampleCoverageAfterSampleCounting &&
properties.maintenance5.earlyFragmentSampleMaskTestBeforeSampleCounting,
properties.maintenance5.nonStrictWideLinesUseParallelogram);
}
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance5, features.maintenance5,
VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
// VK_KHR_maintenance6
extensions.maintenance6 = features.maintenance6.maintenance6;
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance6, features.maintenance6,
VK_KHR_MAINTENANCE_6_EXTENSION_NAME);
// VK_KHR_maintenance7 (proposed for Vulkan 1.4, no features)
extensions.maintenance7 = loaded_extensions.contains(VK_KHR_MAINTENANCE_7_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance7, VK_KHR_MAINTENANCE_7_EXTENSION_NAME);
// VK_KHR_maintenance8 (proposed for Vulkan 1.4, no features)
extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
}
bool Device::SupportsSubgroupStage(VkShaderStageFlags stage_mask) const {
if (stage_mask == 0) {
return true;
}
const VkShaderStageFlags supported = properties.subgroup_properties.supportedStages;
return (supported & stage_mask) == stage_mask;
}
bool Device::IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature,
VkShaderStageFlags stage_mask) const {
if ((properties.subgroup_properties.supportedOperations & feature) == 0) {
return false;
}
return SupportsSubgroupStage(stage_mask);
}
bool Device::SupportsWarpIntrinsics(VkShaderStageFlagBits stage) const {
constexpr VkSubgroupFeatureFlags required_ops =
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
if ((properties.subgroup_properties.supportedOperations & required_ops) != required_ops) {
return false;
}
return SupportsSubgroupStage(stage);
}
void Device::SetupFamilies(VkSurfaceKHR surface) {
@@ -1273,6 +1641,13 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
}
}
std::optional<size_t> Device::GetSamplerHeapBudget() const {
if (sampler_heap_budget == 0) {
return std::nullopt;
}
return sampler_heap_budget;
}
u64 Device::GetDeviceMemoryUsage() const {
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;

View File

@@ -6,6 +6,8 @@
#pragma once
#include <algorithm>
#include <optional>
#include <set>
#include <span>
#include <string>
@@ -37,9 +39,13 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore)
#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \
FEATURE(EXT, ImageRobustness, IMAGE_ROBUSTNESS, image_robustness) \
FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \
shader_demote_to_helper_invocation) \
FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control)
FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \
FEATURE(KHR, Maintenance4, MAINTENANCE_4, maintenance4)
#define FOR_EACH_VK_FEATURE_1_4(FEATURE)
// Define all features which may be used by the implementation and require an extension here.
#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \
@@ -52,12 +58,16 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \
FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
FEATURE(EXT, MultiDraw, MULTI_DRAW, multi_draw) \
FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
primitive_topology_list_restart) \
FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \
FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \
FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \
FEATURE(EXT, SwapchainMaintenance1, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \
FEATURE(KHR, Maintenance5, MAINTENANCE_5, maintenance5) \
FEATURE(KHR, Maintenance6, MAINTENANCE_6, maintenance6) \
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
pipeline_executable_properties) \
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
@@ -84,6 +94,11 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(KHR, SWAPCHAIN, swapchain) \
EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
EXTENSION(KHR, MAINTENANCE_7, maintenance7) \
EXTENSION(KHR, MAINTENANCE_8, maintenance8) \
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
@@ -110,6 +125,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \
@@ -161,6 +177,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE_NAME(depth_bias_control, depthBiasExact) \
FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \
FEATURE_NAME(format_a4b4g4r4, formatA4B4G4R4) \
FEATURE_NAME(image_robustness, robustImageAccess) \
FEATURE_NAME(index_type_uint8, indexTypeUint8) \
FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \
FEATURE_NAME(provoking_vertex, provokingVertexLast) \
@@ -287,12 +304,14 @@ public:
/// Returns uniform buffer alignment requirement.
VkDeviceSize GetUniformBufferAlignment() const {
return properties.properties.limits.minUniformBufferOffsetAlignment;
return (std::max)(properties.properties.limits.minUniformBufferOffsetAlignment,
uniform_buffer_alignment_minimum);
}
/// Returns storage alignment requirement.
VkDeviceSize GetStorageBufferAlignment() const {
return properties.properties.limits.minStorageBufferOffsetAlignment;
return (std::max)(properties.properties.limits.minStorageBufferOffsetAlignment,
storage_buffer_alignment_minimum);
}
/// Returns the maximum range for storage buffers.
@@ -310,11 +329,22 @@ public:
return properties.properties.limits.maxComputeSharedMemorySize;
}
/// Returns the maximum number of dynamic storage buffer descriptors per set.
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
}
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
}
/// Returns float control properties of the device.
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
return properties.float_controls;
}
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return features.features.textureCompressionASTC_LDR;
@@ -330,6 +360,11 @@ public:
return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
}
/// Returns true if vkResetQueryPool can be used from the host.
bool SupportsHostQueryReset() const {
return features.host_query_reset.hostQueryReset;
}
/// Returns true if the device supports float64 natively.
bool IsFloat64Supported() const {
return features.features.shaderFloat64;
@@ -360,11 +395,26 @@ public:
return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
}
/// Returns true if the device supports the provided subgroup feature.
bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature) const {
return properties.subgroup_properties.supportedOperations & feature;
/// Returns true if the device supports the provided subgroup feature for the given stages.
bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature,
VkShaderStageFlags stage_mask = 0) const;
/// Returns true if the device reports subgroup support for the provided shader stages.
bool SupportsSubgroupStage(VkShaderStageFlags stage_mask) const;
/// Returns the set of stages that report subgroup support.
VkShaderStageFlags GetSubgroupSupportedStages() const {
return properties.subgroup_properties.supportedStages;
}
/// Returns the set of subgroup operations reported by the driver.
VkSubgroupFeatureFlags GetSubgroupSupportedOperations() const {
return properties.subgroup_properties.supportedOperations;
}
/// Returns true if the driver can execute all warp intrinsics for the given shader stage.
bool SupportsWarpIntrinsics(VkShaderStageFlagBits stage) const;
/// Returns the maximum number of push descriptors.
u32 MaxPushDescriptors() const {
return properties.push_descriptor.maxPushDescriptors;
@@ -440,9 +490,14 @@ public:
return extensions.swapchain_mutable_format;
}
/// Returns true if VK_EXT_swapchain_maintenance1 is enabled.
bool IsExtSwapchainMaintenance1Enabled() const {
return extensions.swapchain_maintenance1;
}
/// Returns true if VK_KHR_shader_float_controls is enabled.
bool IsKhrShaderFloatControlsSupported() const {
return extensions.shader_float_controls;
return extensions.shader_float_controls && !disable_shader_float_controls_usage;
}
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
@@ -476,10 +531,18 @@ public:
}
/// Returns true if the device supports VK_EXT_shader_stencil_export.
/// Note: Most Mali/NVIDIA drivers don't support this. Use hardware blits as fallback.
bool IsExtShaderStencilExportSupported() const {
return extensions.shader_stencil_export;
}
/// Returns true if depth/stencil operations can be performed efficiently.
/// Either through shader export or hardware blits.
bool CanPerformDepthStencilOperations() const {
return extensions.shader_stencil_export || is_blit_depth24_stencil8_supported ||
is_blit_depth32_stencil8_supported;
}
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
bool IsExtDepthRangeUnrestrictedSupported() const {
return extensions.depth_range_unrestricted;
@@ -520,6 +583,46 @@ public:
return extensions.custom_border_color;
}
/// Returns true if the device supports VK_EXT_image_robustness.
bool IsExtImageRobustnessSupported() const {
return extensions.image_robustness;
}
/// Returns true if robustImageAccess is supported.
bool IsRobustImageAccessSupported() const {
return features.image_robustness.robustImageAccess;
}
/// Returns true if the device supports VK_EXT_robustness2.
bool IsExtRobustness2Supported() const {
return extensions.robustness_2;
}
/// Returns true if robustBufferAccess2 is supported.
bool IsRobustBufferAccess2Supported() const {
return features.robustness2.robustBufferAccess2;
}
/// Returns true if robustImageAccess2 is supported.
bool IsRobustImageAccess2Supported() const {
return features.robustness2.robustImageAccess2;
}
/// Returns true if nullDescriptor is supported.
bool IsNullDescriptorSupported() const {
return features.robustness2.nullDescriptor;
}
/// Returns true if customBorderColors feature is available.
bool IsCustomBorderColorsSupported() const {
return features.custom_border_color.customBorderColors;
}
/// Returns true if customBorderColorWithoutFormat feature is available.
bool IsCustomBorderColorWithoutFormatSupported() const {
return features.custom_border_color.customBorderColorWithoutFormat;
}
/// Returns true if the device supports VK_EXT_extended_dynamic_state.
bool IsExtExtendedDynamicStateSupported() const {
return extensions.extended_dynamic_state;
@@ -569,9 +672,69 @@ public:
return extensions.line_rasterization;
}
/// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
bool SupportsRectangularLines() const {
return features.line_rasterization.rectangularLines != VK_FALSE;
}
bool SupportsSmoothLines() const {
return features.line_rasterization.smoothLines != VK_FALSE;
}
bool SupportsStippledRectangularLines() const {
return features.line_rasterization.stippledRectangularLines != VK_FALSE;
}
bool SupportsAlphaToOne() const {
return features.features.alphaToOne != VK_FALSE;
}
bool SupportsDualSourceBlend(u32 required_dual_source_attachments = 1) const {
const u32 max_dual = properties.properties.limits.maxFragmentDualSrcAttachments;
return features.features.dualSrcBlend != VK_FALSE &&
max_dual >= required_dual_source_attachments;
}
u32 MaxFragmentDualSrcAttachments() const {
return properties.properties.limits.maxFragmentDualSrcAttachments;
}
bool SupportsDynamicState3DepthClampEnable() const {
return dynamic_state3_depth_clamp_enable;
}
bool SupportsDynamicState3LogicOpEnable() const {
return dynamic_state3_logic_op_enable;
}
bool SupportsDynamicState3LineRasterizationMode() const {
return dynamic_state3_line_raster_mode;
}
bool SupportsDynamicState3ConservativeRasterizationMode() const {
return dynamic_state3_conservative_raster_mode;
}
bool SupportsDynamicState3LineStippleEnable() const {
return dynamic_state3_line_stipple_enable;
}
bool SupportsDynamicState3AlphaToCoverageEnable() const {
return dynamic_state3_alpha_to_coverage;
}
bool SupportsDynamicState3AlphaToOneEnable() const {
return dynamic_state3_alpha_to_one;
}
/// Returns true when the user enabled extended core dynamic states (level > 0).
bool UsesAdvancedCoreDynamicState() const {
return Settings::values.dyna_state.GetValue() > 0;
}
/// Returns true if VK_EXT_vertex_input_dynamic_state is enabled on the device.
bool IsExtVertexInputDynamicStateSupported() const {
return extensions.vertex_input_dynamic_state;
return extensions.vertex_input_dynamic_state &&
features.vertex_input_dynamic_state.vertexInputDynamicState;
}
/// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation
@@ -602,10 +765,11 @@ public:
/// Returns the minimum supported version of SPIR-V.
u32 SupportedSpirvVersion() const {
if (instance_version >= VK_API_VERSION_1_3) {
const bool has_float_controls = extensions.shader_float_controls;
if (instance_version >= VK_API_VERSION_1_3 && has_float_controls) {
return 0x00010600U;
}
if (extensions.spirv_1_4) {
if (extensions.spirv_1_4 && has_float_controls) {
return 0x00010400U;
}
return 0x00010300U;
@@ -631,6 +795,8 @@ public:
return has_broken_parallel_compiling;
}
std::optional<size_t> GetSamplerHeapBudget() const;
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return properties.driver.driverName;
@@ -703,6 +869,68 @@ public:
return features2.features.multiViewport;
}
/// Returns true if the device supports VK_KHR_maintenance1.
bool IsKhrMaintenance1Supported() const {
return extensions.maintenance1;
}
/// Returns true if the device supports VK_KHR_maintenance2.
bool IsKhrMaintenance2Supported() const {
return extensions.maintenance2;
}
/// Returns true if the device supports VK_KHR_maintenance3.
bool IsKhrMaintenance3Supported() const {
return extensions.maintenance3;
}
/// Returns true if the device supports VK_KHR_maintenance4.
bool IsKhrMaintenance4Supported() const {
return extensions.maintenance4;
}
/// Returns true if the device supports VK_KHR_maintenance5.
bool IsKhrMaintenance5Supported() const {
return extensions.maintenance5;
}
/// Returns true if polygon mode POINT supports gl_PointSize.
bool SupportsPolygonModePointSize() const {
return extensions.maintenance5 && properties.maintenance5.polygonModePointSize;
}
/// Returns true if depth/stencil swizzle ONE is supported.
bool SupportsDepthStencilSwizzleOne() const {
return extensions.maintenance5 && properties.maintenance5.depthStencilSwizzleOneSupport;
}
/// Returns true if early fragment tests optimizations are available.
bool SupportsEarlyFragmentTests() const {
return extensions.maintenance5 &&
properties.maintenance5.earlyFragmentMultisampleCoverageAfterSampleCounting &&
properties.maintenance5.earlyFragmentSampleMaskTestBeforeSampleCounting;
}
/// Returns true if the device supports VK_KHR_maintenance6.
bool IsKhrMaintenance6Supported() const {
return extensions.maintenance6;
}
/// Returns true if the device supports VK_EXT_multi_draw.
bool IsExtMultiDrawSupported() const {
return extensions.multi_draw;
}
/// Returns true if the device supports VK_KHR_maintenance7.
bool IsKhrMaintenance7Supported() const {
return extensions.maintenance7;
}
/// Returns true if the device supports VK_KHR_maintenance8.
bool IsKhrMaintenance8Supported() const {
return extensions.maintenance8;
}
[[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
u32 driver_version) {
if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
@@ -740,13 +968,13 @@ private:
void RemoveUnsuitableExtensions();
void RemoveExtension(bool& extension, const std::string& extension_name);
void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name);
void RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name);
template <typename Feature>
void RemoveExtensionFeature(bool& extension, Feature& feature,
const std::string& extension_name);
template <typename Feature>
void RemoveExtensionFeatureIfUnsuitable(bool is_suitable, Feature& feature,
void RemoveExtensionFeatureIfUnsuitable(bool& extension, Feature& feature,
const std::string& extension_name);
/// Sets up queue families.
@@ -786,6 +1014,7 @@ private:
FOR_EACH_VK_FEATURE_1_1(FEATURE);
FOR_EACH_VK_FEATURE_1_2(FEATURE);
FOR_EACH_VK_FEATURE_1_3(FEATURE);
FOR_EACH_VK_FEATURE_1_4(FEATURE);
FOR_EACH_VK_FEATURE_EXT(FEATURE);
FOR_EACH_VK_EXTENSION(EXTENSION);
@@ -802,6 +1031,7 @@ private:
FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE);
FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE);
FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE);
FOR_EACH_VK_FEATURE_1_4(FEATURE_CORE);
FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT);
#undef FEATURE_CORE
@@ -817,6 +1047,8 @@ private:
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
VkPhysicalDeviceMaintenance5PropertiesKHR maintenance5{};
VkPhysicalDeviceMultiDrawPropertiesEXT multi_draw{};
VkPhysicalDeviceProperties properties{};
};
@@ -846,9 +1078,20 @@ private:
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
bool disable_shader_float_controls_usage{}; ///< True when VK_KHR_shader_float_controls cannot be safely used.
bool dynamic_state3_blending{}; ///< Has blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has at least one enable feature of dynamic_state3.
bool dynamic_state3_depth_clamp_enable{};
bool dynamic_state3_logic_op_enable{};
bool dynamic_state3_line_raster_mode{};
bool dynamic_state3_conservative_raster_mode{};
bool dynamic_state3_line_stipple_enable{};
bool dynamic_state3_alpha_to_coverage{};
bool dynamic_state3_alpha_to_one{};
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
VkDeviceSize uniform_buffer_alignment_minimum{}; ///< Minimum enforced UBO alignment.
VkDeviceSize storage_buffer_alignment_minimum{}; ///< Minimum enforced SSBO alignment.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};

View File

@@ -116,6 +116,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdDrawIndirectCount);
X(vkCmdDrawIndexedIndirectCount);
X(vkCmdDrawIndirectByteCountEXT);
X(vkCmdDrawMultiEXT);
X(vkCmdDrawMultiIndexedEXT);
X(vkCmdEndConditionalRenderingEXT);
X(vkCmdEndQuery);
X(vkCmdEndRenderPass);
@@ -145,6 +147,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetDepthWriteEnableEXT);
X(vkCmdSetPrimitiveRestartEnableEXT);
X(vkCmdSetRasterizerDiscardEnableEXT);
X(vkCmdSetAlphaToCoverageEnableEXT);
X(vkCmdSetAlphaToOneEnableEXT);
X(vkCmdSetConservativeRasterizationModeEXT);
X(vkCmdSetLineRasterizationModeEXT);
X(vkCmdSetLineStippleEnableEXT);

View File

@@ -216,6 +216,8 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
PFN_vkCmdDrawIndirectByteCountEXT vkCmdDrawIndirectByteCountEXT{};
PFN_vkCmdDrawMultiEXT vkCmdDrawMultiEXT{};
PFN_vkCmdDrawMultiIndexedEXT vkCmdDrawMultiIndexedEXT{};
PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{};
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdEndQuery vkCmdEndQuery{};
@@ -238,6 +240,8 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
PFN_vkCmdSetPrimitiveRestartEnableEXT vkCmdSetPrimitiveRestartEnableEXT{};
PFN_vkCmdSetRasterizerDiscardEnableEXT vkCmdSetRasterizerDiscardEnableEXT{};
PFN_vkCmdSetAlphaToCoverageEnableEXT vkCmdSetAlphaToCoverageEnableEXT{};
PFN_vkCmdSetAlphaToOneEnableEXT vkCmdSetAlphaToOneEnableEXT{};
PFN_vkCmdSetConservativeRasterizationModeEXT vkCmdSetConservativeRasterizationModeEXT{};
PFN_vkCmdSetLineRasterizationModeEXT vkCmdSetLineRasterizationModeEXT{};
PFN_vkCmdSetLineStippleEnableEXT vkCmdSetLineStippleEnableEXT{};
@@ -1239,6 +1243,19 @@ public:
counter_buffer_offset, counter_offset, stride);
}
void DrawMultiEXT(u32 draw_count, const VkMultiDrawInfoEXT* vertex_info,
u32 instance_count, u32 first_instance, u32 stride) const noexcept {
dld->vkCmdDrawMultiEXT(handle, draw_count, vertex_info, instance_count, first_instance,
stride);
}
void DrawMultiIndexedEXT(u32 draw_count, const VkMultiDrawIndexedInfoEXT* index_info,
u32 instance_count, u32 first_instance, u32 stride,
const int32_t* vertex_offset) const noexcept {
dld->vkCmdDrawMultiIndexedEXT(handle, draw_count, index_info, instance_count,
first_instance, stride, vertex_offset);
}
void ClearAttachments(Span<VkClearAttachment> attachments,
Span<VkClearRect> rects) const noexcept {
dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
@@ -1471,6 +1488,14 @@ public:
dld->vkCmdSetLogicOpEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}
void SetAlphaToCoverageEnableEXT(bool enable) const noexcept {
dld->vkCmdSetAlphaToCoverageEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}
void SetAlphaToOneEnableEXT(bool enable) const noexcept {
dld->vkCmdSetAlphaToOneEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}
void SetDepthClampEnableEXT(bool enable) const noexcept {
dld->vkCmdSetDepthClampEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}