Compare commits

..

209 Commits

Author SHA1 Message Date
MaranBr
18704cc65f Reduce synchronization overhead and improve performance in DMA operations 2025-12-21 23:49:16 -03:00
PavelBARABANOV
b40f91974b disabling for Android "Vulkan Asynchronous Presentation" 2025-12-21 23:48:43 -03:00
Caio Oliveira
5f6ee64dc3 DONOTMERGE 2025-12-21 23:46:09 -03:00
CamilleLaVey
722a8e4ede [shader_enviroment] TICEntry limits to Empty 2025-12-21 21:16:37 -03:00
CamilleLaVey
9caa1d5b1f [revert] StreamingByteCount conditional to TFB Active 2025-12-21 21:16:37 -03:00
CamilleLaVey
c0ea48e440 [revert] TIC entry + Kill Switch for TFC on TFB 2025-12-21 21:16:37 -03:00
CamilleLaVey
03e872f69c [shader_enviroment, vk] TIC entry + Kill Switch for TFC on TFB 2025-12-21 21:16:37 -03:00
CamilleLaVey
61ff781e0a [vk, rasterizer] StreamingByteCount conditional to TFB Active 2025-12-21 21:16:37 -03:00
CamilleLaVey
ee2f1a62de [vk] Start of a downhill 2025-12-21 21:16:36 -03:00
crueter
aee49e4877 Revert "[vk] Correct polygon draw topology mapping for line and point modes (#2834)"
This reverts commit 6ba25b6cc0.
2025-12-21 21:16:36 -03:00
CamilleLaVey
c5f30776b6 [vk] sRGB format handling for Storage. 2025-12-21 21:16:36 -03:00
CamilleLaVey
ae1c974c2e [vk] Ordering UnsupportedFormatKey 2025-12-21 21:16:36 -03:00
CamilleLaVey
1aec0c0c50 [vk] Image Remaining Layers 3D 2025-12-21 21:16:36 -03:00
CamilleLaVey
c15cd06711 [vk] Another try with Depth/Stencil handling 2025-12-21 21:16:36 -03:00
CamilleLaVey
1067a69daf [vk, qcom] Code clean-up. 2025-12-21 21:16:35 -03:00
CamilleLaVey
d6a8c0b013 [revert] Added linear filtering in texture blitting operations 2025-12-21 21:16:35 -03:00
CamilleLaVey
59a2fdba90 Fix building 2025-12-21 21:16:35 -03:00
CamilleLaVey
7e875e9072 [vk] SL complete removal 2025-12-21 21:16:35 -03:00
CamilleLaVey
fc6081d3b9 [revert] Allocate data transfer from Maxwell to VK using Sample Locations as channel for MSAA 2025-12-21 21:16:35 -03:00
CamilleLaVey
ff36a072e3 [revert] SL Sample Count Clamp 2025-12-21 21:16:35 -03:00
CamilleLaVey
7fccd7573a [revert] SL Table order 2025-12-21 21:16:35 -03:00
CamilleLaVey
3e5ba87a4a [vk] SL Table order 2025-12-21 21:16:34 -03:00
CamilleLaVey
731965ac8f [vk] SL Sample Count Clamp 2025-12-21 21:16:34 -03:00
CamilleLaVey
094b2a3274 [vk] DrainPendingBuild 2025-12-21 21:16:34 -03:00
CamilleLaVey
2ce3e09d62 [vk, qcom[ Removed parallel compilling from qcom entirely 2025-12-21 21:16:34 -03:00
CamilleLaVey
cc47aa473a [vk, qcom, turnip] TimelineSemaphore removal 2025-12-21 21:16:34 -03:00
CamilleLaVey
b0cd2e8cbe [vk] Adjusted Texel Block View for Depth/Stencil attachment images 2025-12-21 21:16:34 -03:00
CamilleLaVey
2ccf9811b7 [vk, qcom] SFC disabled 2025-12-21 21:16:33 -03:00
CamilleLaVey
583a9fd320 [maxwell, vk] Allocate data transfer from Maxwell to VK using Sample Locations as channel for MSAA 2025-12-21 21:16:33 -03:00
CamilleLaVey
6df9d06e65 [vk] Removed unused extensions 2025-12-21 21:16:33 -03:00
CamilleLaVey
d7a120fe74 [revert] Adjusting unused features 2025-12-21 21:16:33 -03:00
CamilleLaVey
6f6ce56528 [revert] fix building 2025-12-21 21:16:33 -03:00
CamilleLaVey
dc8c6ce6d5 [vk] TEST: SFC + WGML 2025-12-21 21:16:33 -03:00
CamilleLaVey
22e8b24361 fix building 2025-12-21 21:16:32 -03:00
CamilleLaVey
f63226ff10 [vk, qcom] Adjusting unused features 2025-12-21 21:16:32 -03:00
CamilleLaVey
9f3803c986 [vk] Hotfix for DepthBounds and StencilMask. 2025-12-21 21:16:32 -03:00
CamilleLaVey
5899816f7c [revert] Update Gradle to 8.13.1 2025-12-21 21:16:32 -03:00
CamilleLaVey
280d05452a [Android] Core 1++ 2025-12-21 21:16:32 -03:00
CamilleLaVey
ccc31edfbb another one 2025-12-21 21:16:32 -03:00
CamilleLaVey
7ce570f420 Missing header 2025-12-21 21:16:32 -03:00
CamilleLaVey
fbf023fa88 [vk] Exception modified & logged 2025-12-21 21:16:31 -03:00
CamilleLaVey
98b4bfb461 [hle] BufferDescritorC 2025-12-21 21:16:31 -03:00
CamilleLaVey
4e6c978574 [vk, qcom] TimelineSemaphore syncs to GPUTick. 2025-12-21 21:16:31 -03:00
CamilleLaVey
09b897dbf2 [gl] WaitTick 2025-12-21 21:16:31 -03:00
CamilleLaVey
3fc9f0efe3 [gl, vk] Access Tracking Synchronization 2025-12-21 21:16:31 -03:00
CamilleLaVey
b17b70e09d [vk] SurfaceType Depth/Stencil 2025-12-21 21:16:31 -03:00
CamilleLaVey
65083b10d8 [vk] SamplerComponentType upgraded DepthCompareSampling 2025-12-21 21:16:30 -03:00
CamilleLaVey
f2227af36c [Vk] Sample Locations Adjustments for Depth/Stencil 2025-12-21 21:16:30 -03:00
CamilleLaVey
f1a5f9137b [vk] Sample Location Depth Bit 2025-12-21 21:16:30 -03:00
CamilleLaVey
f933828749 [vk] Increased PipelineWorkers for testing purposes 2025-12-21 21:16:30 -03:00
CamilleLaVey
05e8e1d494 [vk] KeepAliveTick in Scheduler 2025-12-21 21:16:30 -03:00
CamilleLaVey
5937d19750 [vk] Sample Locations ordering 2025-12-21 21:16:30 -03:00
CamilleLaVey
313e885f7a Hotfix 2025-12-21 21:16:29 -03:00
CamilleLaVey
6b0b72e034 [vk] Re-introduction to MSAA - Sample Locations 2025-12-21 21:16:29 -03:00
CamilleLaVey
c6d85b7589 [vk, mobile, vendor] MegaBuffer removal 2025-12-21 21:16:29 -03:00
CamilleLaVey
005b1dd3be [vk, pipeline] Added In-flight conditional for multiple pipeline compilations 2025-12-21 21:16:29 -03:00
CamilleLaVey
2c4748cc3d [vk, qcom] Extending GetTotalPipelineWorker resources. 2025-12-21 21:16:29 -03:00
CamilleLaVey
a2dbff0bc3 [vk] SanitizeComponent Adjustment 2025-12-21 21:16:29 -03:00
CamilleLaVey
1574e7e804 [vk] DualBlendFactor 2025-12-21 21:16:28 -03:00
CamilleLaVey
ef0061f72b [vk] CounterStreamer 2025-12-21 21:16:28 -03:00
CamilleLaVey
b76b74dcb7 [vk, qcom] Re-enabling CBC for further testing 2025-12-21 21:16:28 -03:00
CamilleLaVey
f6ecb812ca [vk] Sanitize Format/Component 2025-12-21 21:16:28 -03:00
CamilleLaVey
54d6a2015d [vk] Extended PixelFormat for Depth/Stencil 2025-12-21 21:16:28 -03:00
CamilleLaVey
9a2e2f922f [vk] Reordering PixelFormatNumeric 2025-12-21 21:16:28 -03:00
CamilleLaVey
1f74ed2272 Added missing headers 2025-12-21 21:16:27 -03:00
CamilleLaVey
1307614ad0 [gl, vk] Corroborating new helpers order 2025-12-21 21:16:27 -03:00
CamilleLaVey
989a6b7870 Meow 2025-12-21 21:16:27 -03:00
CamilleLaVey
1cf65cba2a [vk] Extending TF handling 2025-12-21 21:16:27 -03:00
CamilleLaVey
3fd7821fc8 [gl, vk, spv] Added component type handling for texture buffers and resolve pixel format variants 2025-12-21 21:16:27 -03:00
CamilleLaVey
59521d7e96 Saving Private Windows 2025-12-21 21:16:27 -03:00
CamilleLaVey
1fccea8db1 [vk] Formatting missing formats 2025-12-21 21:16:27 -03:00
CamilleLaVey
94ac08fcd7 another missing brace 2025-12-21 21:16:26 -03:00
CamilleLaVey
992ea70ce7 If this get builded i'll be able to actually, truly and sincely fly 2025-12-21 21:16:26 -03:00
CamilleLaVey
9de6c125a7 missing brace 2025-12-21 21:16:26 -03:00
CamilleLaVey
5447ed0d60 quick fix 2025-12-21 21:16:26 -03:00
CamilleLaVey
8b9792855f [vk] TextureType extended 2025-12-21 21:16:26 -03:00
CamilleLaVey
419be467ee [vk] Adjustment BitScaleHelper 2025-12-21 21:16:26 -03:00
CamilleLaVey
6df35d859a [vk] BufferCache NullBuffer handling 2025-12-21 21:16:25 -03:00
CamilleLaVey
bd28d4269b [vk] Runtime to change image layout 2025-12-21 21:16:25 -03:00
CamilleLaVey
0baf482214 [vk] TextureFilter 2025-12-21 21:16:25 -03:00
CamilleLaVey
690dfc66f5 [gl, vk] SupportLinearFilter patch 2025-12-21 21:16:25 -03:00
CamilleLaVey
ef7e43bc30 [vk] Wrapper for Sampler Image Filter 2025-12-21 21:16:25 -03:00
CamilleLaVey
d4d704cc26 [vk, rasterizer] Re-order post Sample Locations removal 2025-12-21 21:16:25 -03:00
CamilleLaVey
1c9f603947 [revert] Opcode Promotion path emulation 2025-12-21 21:16:24 -03:00
CamilleLaVey
3873b3fe6c [revert] lambda enemy of da world 2025-12-21 21:16:24 -03:00
CamilleLaVey
fb7d5086b7 [revert] The next step of the human kind before it's doom 2025-12-21 21:16:24 -03:00
CamilleLaVey
c5cbd67dbc [vk] The next step of the human kind before it's doom 2025-12-21 21:16:24 -03:00
CamilleLaVey
7bd0d42bab lambda enemy of da world 2025-12-21 21:16:24 -03:00
CamilleLaVey
08dbacdf53 [vk, gl, spv] Opcode Promotion path emulation 2025-12-21 21:16:24 -03:00
CamilleLaVey
da8809c240 [vk] Fixing wrong enabling logic 2025-12-21 21:16:24 -03:00
CamilleLaVey
eb7159a859 [vk] NullDescriptor guard 2025-12-21 21:16:23 -03:00
CamilleLaVey
65bef7ec08 [vk] Adjusted Transform Feedback 2025-12-21 21:16:23 -03:00
CamilleLaVey
78a44a4ef6 [vk] Adjusted Query Cache 2025-12-21 21:16:23 -03:00
CamilleLaVey
b07356fcbf [vk, qcom] Shader Float Control changed handling 2025-12-21 21:16:23 -03:00
CamilleLaVey
500802cb72 [vk] Removed Sample Locations 2025-12-21 21:16:23 -03:00
CamilleLaVey
e31411170a [vk] removed ImageViewType function 2025-12-21 21:16:23 -03:00
CamilleLaVey
8c350dfd37 [spv, vk] reworked texture view handling and added layer count overrides 2025-12-21 21:16:22 -03:00
CamilleLaVey
de3ac7ad4e [vk] Unused mark for subgroups variables 2025-12-21 21:16:22 -03:00
CamilleLaVey
66903b4b98 [revert] UWU 2025-12-21 21:16:22 -03:00
CamilleLaVey
df088c6442 [Re-introduced] Color output handling in SPIR-V emission 2025-12-21 21:16:22 -03:00
CamilleLaVey
3da798ba05 [Re-introduced] Added linear filtering in texture blitting operations 2025-12-21 21:16:22 -03:00
CamilleLaVey
58c76ece13 [spv, qcom] Ensuring SPV 1.3 2025-12-21 21:16:22 -03:00
CamilleLaVey
e1108010a6 [android] Update Gradle to 8.13.1 2025-12-21 21:16:22 -03:00
CamilleLaVey
3553c372dd [vk, qcom] UWU 2025-12-21 21:16:21 -03:00
CamilleLaVey
261dcc30c1 [revert] Added linear filtering in texture blitting operations 2025-12-21 21:16:21 -03:00
CamilleLaVey
095f8e92b1 [revert] Color output handling in SPIR-V emission. 2025-12-21 21:16:21 -03:00
CamilleLaVey
0e6a271e40 [vk] Remove forced stencil format handling in TextureCacheRuntime 2025-12-21 21:16:21 -03:00
CamilleLaVey
f22407de1e [revert] TiledCacheBarrier starter 2025-12-21 21:16:21 -03:00
CamilleLaVey
8fa7b068b8 [vk, qcom] Returned subgroups functions to QCOM 2025-12-21 21:16:21 -03:00
CamilleLaVey
29c629737b [vk] Added support for Stencil component type in texture handling 2025-12-21 21:16:20 -03:00
CamilleLaVey
f03b4d77c2 [vk, qcom] Graphics Subgroup bugged 2025-12-21 21:16:20 -03:00
CamilleLaVey
41148132f0 [vk] Added support for sample locations in depth and depth-stencil surfaces 2025-12-21 21:16:20 -03:00
CamilleLaVey
bf1efc6a4c [spv] SamplerComponentType 2025-12-21 21:16:20 -03:00
CamilleLaVey
7d535a06c3 [revert] Tightened SSBO tracking heuristics 2025-12-21 21:16:20 -03:00
CamilleLaVey
3a7091d193 [Revert] Adjusted Track function for bias handling and alignment checks for storage buffers 2025-12-21 21:16:20 -03:00
CamilleLaVey
bdc7acce27 [gl, vk] Implement SampledView method for ImageView 2025-12-21 21:16:19 -03:00
CamilleLaVey
68593f9ddc [ir, spv] Added support for sampler component types in texture handling 2025-12-21 21:16:19 -03:00
CamilleLaVey
979d203a77 [spv] Color output handling in SPIR-V emission. 2025-12-21 21:16:19 -03:00
CamilleLaVey
e086078e41 [vk] Added linear filtering in texture blitting operations 2025-12-21 21:16:19 -03:00
CamilleLaVey
535b33bc6b [spv, qcom] Implement warp intrinsics support 2025-12-21 21:16:19 -03:00
CamilleLaVey
e6b3f98b13 [vk] Conditioning Conditional Rendering #2 2025-12-21 21:16:19 -03:00
CamilleLaVey
b03eb6bd78 [vk, qcom] Removed SPIR-V 1.4 for qcom 2025-12-21 21:16:19 -03:00
CamilleLaVey
e4e71a36de [vk] Adjustments to Sample Locations 2025-12-21 21:16:18 -03:00
CamilleLaVey
a83157f0a9 [host] Adjusted Track function for bias handling and alignment checks for storage buffers 2025-12-21 21:16:18 -03:00
CamilleLaVey
8c1e47bbda [host] Added logging for OOM cases with fastmem relation 2025-12-21 21:16:18 -03:00
CamilleLaVey
c18662d039 [ir, nvn] Tightened SSBO tracking heuristics 2025-12-21 21:16:18 -03:00
Caio Oliveira
4fb7aea7b4 Revert "Controlled SPV features on QCOM"
This reverts commit 907b041ec6fb4f16750155f4c41e17389f2e385d.
2025-12-21 21:16:18 -03:00
CamilleLaVey
a8af150df4 Controlled SPV features on QCOM 2025-12-21 21:16:18 -03:00
CamilleLaVey
5b41abc1b1 [vk, qcom] Disabling VK_KHR_push_descriptor for qcom 2025-12-21 21:16:17 -03:00
CamilleLaVey
91f3a4e4bb [vk, vendor, mobile] Improved mobile staging buffer data 2025-12-21 21:16:17 -03:00
CamilleLaVey
aff095523d [vk, rasterizer] Update sample location handling for MSAA configurations 2025-12-21 21:16:17 -03:00
CamilleLaVey
49ba71a594 [vk, rasterizer] offsets float x Uint 2025-12-21 21:16:17 -03:00
CamilleLaVey
97cfd9786f [vk] Sample Locations Structure 2025-12-21 21:16:17 -03:00
CamilleLaVey
bc1873c944 [vk, rasterizer] TiledCacheBarrier starter 2025-12-21 21:16:17 -03:00
CamilleLaVey
e3b10eba10 [maxwell, vk] VK_EXT_Sample_Locations 2025-12-21 21:16:16 -03:00
CamilleLaVey
ce26d82d09 [vk, qcom] Removed 500.800.51 compilling parallel restriction 2025-12-21 21:16:16 -03:00
CamilleLaVey
a353f07b8f [vk, qcom] Adjusting Sampler Budget reserved value 2025-12-21 21:16:16 -03:00
CamilleLaVey
1b160c6091 [vk, qcom] UniformBufferAlignment set by hardware capabilities 2025-12-21 21:16:16 -03:00
CamilleLaVey
95cb6b6b3d [vk, qcom] Samplers Budget Management 2025-12-21 21:16:16 -03:00
CamilleLaVey
7b4b0bb6ca [vk, qcom] Extending limits of broken parallel compiling to 512.800.51 2025-12-21 21:16:16 -03:00
CamilleLaVey
6e93db52a5 [vk, qcom] Binding buffer limits 2025-12-21 21:16:15 -03:00
CamilleLaVey
e33c814879 [vk, vendor] Clamping memory usage in mobile gpu's 2025-12-21 21:16:15 -03:00
CamilleLaVey
5f3ad571cb [vk, qcom] Remove VK_EXT_CUSTOM_BORDER_COLOR 2025-12-21 21:16:15 -03:00
Caio Oliveira
20b171ded3 Partially reverts "[vk] Changing conditions for Swapchain maintenance1 "
This reverts commit 3fc3b5fdad.
2025-12-21 20:15:09 -03:00
CamilleLaVey
68edde944a [revert] Resolving conflicting changes 2025-12-21 19:06:13 -03:00
CamilleLaVey
510c795bd6 Revert "[vk] Ensure image view flags are resolved" 2025-12-21 19:06:13 -03:00
CamilleLaVey
eb7b62fefc Revert "[vk, texture_cache] Preveting ARGB8 get misinterpretated with depth formats" 2025-12-21 19:06:13 -03:00
CamilleLaVey
b974a8f7a8 Revert "[vk, texture_cache] BGRA8 Depth/Stencil format convertions" 2025-12-21 19:06:12 -03:00
CamilleLaVey
40fd7d5558 Revert "[maxwell] Logging for HDR wrong convertions into depth formats"
This reverts commit 66c26e39fe.
2025-12-21 19:06:12 -03:00
CamilleLaVey
a99c35d0ff Revert "[surface, vk, pipeline, texture_cache] Refactor image view handling and add normalized compatible format utility"
This reverts commit 6a230bec1a.
2025-12-21 19:06:12 -03:00
CamilleLaVey
83323d7993 Revert "[spir-v] Add is_integer flag to texture descriptors and update image type handling" 2025-12-21 19:06:12 -03:00
CamilleLaVey
6b8d5c1963 Revert "[vk, pipeline, texture_cache] Renamed MakeView parametter" 2025-12-21 19:06:12 -03:00
CamilleLaVey
7b94b7ebc1 [vk] Gating advanced ExtendedDynamicState1 2025-12-21 19:06:12 -03:00
CamilleLaVey
5d645ba905 [licences] Updating licenses on missing files 2025-12-21 19:06:11 -03:00
CamilleLaVey
5f70c3fc57 [vk] Line rasterization and Alpha features adjusments (again) 2025-12-21 19:06:11 -03:00
CamilleLaVey
b87d0215a0 [vk, scheduler] Applying finising call for TF when it's not getting used 2025-12-21 19:06:11 -03:00
CamilleLaVey
e6e6785583 [vk, qcom] Returning forced SScaled and UScaled formats emulations to Adreno. 2025-12-21 19:06:11 -03:00
CamilleLaVey
60bffd9992 [vk, vendor] Forcing BGR5 emulation path due to driver misbehavior. 2025-12-21 19:06:11 -03:00
CamilleLaVey
130632b8ef [vk] ExtendedDynamicState repair #2 2025-12-21 19:06:11 -03:00
CamilleLaVey
e2264170f6 [vk] Depth State Refresh Update. 2025-12-21 19:06:11 -03:00
CamilleLaVey
737a409088 [vk, compute_pass] Conditioning Conditional Rendering 2025-12-21 19:06:10 -03:00
CamilleLaVey
e7130a28d6 [spir-v, emit] Flat Decoration Adjustment 2025-12-21 19:06:10 -03:00
CamilleLaVey
d9db45913c [spir-v, emit] SPV Image Missmatch 2025-12-21 19:06:10 -03:00
CamilleLaVey
6d2c8e6dbb [vk, rasterizer] Clamping Render-Area out of limits 2025-12-21 19:06:10 -03:00
CamilleLaVey
19600b7e0e [vk, rasterizer, state_tracker] LineMode disabled from scheduler 2025-12-21 19:06:10 -03:00
CamilleLaVey
dd74bb459b [surface, vk, pipeline, texture_cache] Texture Sampling Fix 2025-12-21 19:06:10 -03:00
CamilleLaVey
648bf1e813 [vk, swapchain] Swapchaing Image VkQueue 2025-12-21 19:06:09 -03:00
CamilleLaVey
cbef1f3451 [vk, graphics, pipeline, rasterizer] Alpha Coverage Adjustment 2025-12-21 19:06:09 -03:00
CamilleLaVey
b4472764eb [vk, pipeline, texture_cache] Renamed MakeView parametter 2025-12-21 19:06:09 -03:00
CamilleLaVey
5fe7a51522 [spir-v] Add is_integer flag to texture descriptors and update image type handling 2025-12-21 19:06:09 -03:00
CamilleLaVey
2e0374c458 [surface, vk, pipeline, texture_cache] Refactor image view handling and add normalized compatible format utility 2025-12-21 19:06:09 -03:00
CamilleLaVey
61a0f38a2a [vk] Removing false remove feature logging for robustness2 and image robustness. 2025-12-21 19:06:09 -03:00
CamilleLaVey
72d61c38be [vk] ExtendedDynamicState repair #1 2025-12-21 19:06:09 -03:00
CamilleLaVey
d45094c246 [spir-v] Flat decorations for input interfaces 2025-12-21 19:06:08 -03:00
CamilleLaVey
1607f62149 [vk] VK_EXT_multi_draw 2025-12-21 19:06:08 -03:00
CamilleLaVey
17ddbb2240 [vk] Declaring features from Maintenance5 2025-12-21 19:06:08 -03:00
CamilleLaVey
2eb425d7f8 [vk] Fixing logging statements 2025-12-21 19:06:08 -03:00
CamilleLaVey
114c946971 [vk] Removing Image Robustness from EXT list. 2025-12-21 19:06:08 -03:00
CamilleLaVey
0e4382d09b [vk] ExtendedDynamicState impl close to Vulkan specs 2025-12-21 19:06:08 -03:00
CamilleLaVey
e3a9e85099 [vk, rasterizer] Reduce FlushWork constant drawcalls 2025-12-21 19:06:07 -03:00
CamilleLaVey
eb120ac1ac [vk] Moving Maintenance features to wrapper 2025-12-21 19:06:07 -03:00
CamilleLaVey
956c28b41a [vk] Re-ordering tiling format features 2025-12-21 19:06:07 -03:00
CamilleLaVey
79f20ece71 [vk] Re-ordering format feature 2025-12-21 19:06:07 -03:00
CamilleLaVey
1373ee90b0 [vk] Robustness2 and Image Robustness 2025-12-21 19:06:07 -03:00
CamilleLaVey
74a4816530 [maxwell] Logging for HDR wrong convertions into depth formats 2025-12-21 19:06:07 -03:00
CamilleLaVey
30706cde41 [vk, texture_cache] BGRA8 Depth/Stencil format convertions 2025-12-21 19:06:07 -03:00
CamilleLaVey
e436030383 [vk, texture_cache] Preveting ARGB8 get misinterpretated with depth formats 2025-12-21 19:06:06 -03:00
CamilleLaVey
1bcc998040 [vk] Adjusting Custom Border Color 2025-12-21 19:06:06 -03:00
CamilleLaVey
fbd357d82c [vk] Adjusting VIDS 2025-12-21 19:06:06 -03:00
CamilleLaVey
3fc3b5fdad [vk] Changing conditions for Swapchain maintenance1 2025-12-21 19:06:06 -03:00
CamilleLaVey
b5f8ff7035 [vk] Ensure image view flags are resolved 2025-12-21 19:06:06 -03:00
CamilleLaVey
002078af36 [vk] Aliging ExtendedDynamicState2 2025-12-21 19:06:06 -03:00
CamilleLaVey
e4b4a75cab [vk, spir-v] Conditioning creation of VK_EXT_Shader_Stencil_Export in SPIR-V 2025-12-21 19:06:05 -03:00
CamilleLaVey
7103abccdb fixing building error. 2025-12-21 19:06:05 -03:00
CamilleLaVey
319dfeaad8 [vk, texture_cache, vendor] Adding path for hardware resolve on shader stencil export/ MSAA image blits 2025-12-21 19:06:05 -03:00
CamilleLaVey
49e4f3b29a [vk] Return VK 1.3 as main target, treat VK 1.4 core features as extensions if driver supports it 2025-12-21 19:06:05 -03:00
CamilleLaVey
bafc6f34fb [vk] Ordering double cases specified and allocating them in the correct please on GetSuitability phase 2025-12-21 19:06:05 -03:00
CamilleLaVey
f9d612329a Dammed macros. 2025-12-21 19:06:05 -03:00
CamilleLaVey
6ca3d3a1b1 Fix building issues 2025-12-21 19:06:04 -03:00
CamilleLaVey
7b40a8df80 [vk, qcom] VertexInputDynamicState ban removal 2025-12-21 19:06:04 -03:00
CamilleLaVey
53f10ab19e [vk] Bumping features to 1.4 2025-12-21 19:06:04 -03:00
CamilleLaVey
33c72ef7e1 [vk] Updated maintenance features 2025-12-21 19:06:04 -03:00
CamilleLaVey
acae4b089d [vk, amd, qcom] Removed older driver workarounds 2025-12-21 19:06:04 -03:00
CamilleLaVey
8055a64b5f [vk, spir-v] Adding decoration for NonWritable buffers if vertexPipelineStoresAndAtomics isn't available 2025-12-21 19:06:04 -03:00
CamilleLaVey
e4ae8a72dd [vk, buffer_cache] Aligning VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT logic 2025-12-21 19:06:04 -03:00
CamilleLaVey
d68d0bd65e [vk., vendor] Adding driver id flag in blacklist 2025-12-21 19:06:03 -03:00
CamilleLaVey
a35c9d5576 [Refactor, vk] DynamicState, ExtendedDynamicState and VertexInputDynamicState 2025-12-21 19:06:03 -03:00
Caio Oliveira
eade2c19c8 Revert "[vk] attempt to continue even if unsuitable driver (#3087)"
This reverts commit 7a98ee4ead.
2025-12-21 19:05:54 -03:00
Caio Oliveira
1ff788bed5 Revert "[video_core] Fix inconsistency between EDS and VIDS settings (#3148)"
This reverts commit 7157d5167e.
2025-12-21 19:00:35 -03:00
111 changed files with 3992 additions and 822 deletions

View File

@@ -96,8 +96,8 @@
"package": "VVL",
"repo": "KhronosGroup/Vulkan-ValidationLayers",
"tag": "vulkan-sdk-%VERSION%",
"git_version": "1.4.335.0",
"git_version": "1.4.328.1",
"artifact": "android-binaries-%VERSION%.zip",
"hash": "48167c4a17736301bd08f9290f41830443e1f18cce8ad867fc6f289b49e18b40e93c9850b377951af82f51b5b6d7313aa6a884fc5df79f5ce3df82696c1c1244"
"hash": "5ec895a453cb7c2f156830b9766953a0c2bd44dea99e6a3dac4160305041ccd3e87534b4ce0bd102392178d2a8eca48411856298f9395e60117cdfe89f72137e"
}
}

View File

@@ -119,10 +119,10 @@
"package": "VulkanUtilityLibraries",
"repo": "scripts/VulkanUtilityHeaders",
"tag": "%VERSION%",
"git_version": "1.4.335",
"git_version": "1.4.328",
"artifact": "VulkanUtilityHeaders.tar.zst",
"git_host": "git.crueter.xyz",
"hash": "16dac0e6586702580c4279e4cd37ffe3cf909c93eb31b5069da7af36436d47b270a9cbaac953bb66c22ed12ed67ffa096688599267f307dfb62be1bc09f79833"
"hash": "9922217b39faf73cd4fc1510f2fdba14a49aa5c0d77f9ee24ee0512cef16b234d0cabc83c1fec861fa5df1d43e7f086ca9b6501753899119f39c5ca530cb0dae"
},
"spirv-tools": {
"package": "SPIRV-Tools",

View File

@@ -217,6 +217,7 @@ else()
--disable-ffmpeg
--disable-ffprobe
--disable-network
--disable-postproc
--disable-swresample
--enable-decoder=h264
--enable-decoder=vp8

View File

@@ -18,7 +18,6 @@ plugins {
id("androidx.navigation.safeargs.kotlin")
id("org.jlleitschuh.gradle.ktlint") version "11.4.0"
id("com.github.triplet.play") version "3.8.6"
id("idea")
}
/**
@@ -28,8 +27,6 @@ plugins {
*/
val autoVersion = (((System.currentTimeMillis() / 1000) - 1451606400) / 10).toInt()
val edenDir = project(":Eden").projectDir
@Suppress("UnstableApiUsage")
android {
namespace = "org.yuzu.yuzu_emu"
@@ -244,17 +241,11 @@ android {
externalNativeBuild {
cmake {
version = "3.22.1"
path = file("${edenDir}/CMakeLists.txt")
path = file("../../../CMakeLists.txt")
}
}
}
idea {
module {
// Inclusion to exclude build/ dir from non-Android
excludeDirs.add(file("${edenDir}/build"))
}
}
tasks.register<Delete>("ktlintReset", fun Delete.() {
delete(File(layout.buildDirectory.toString() + File.separator + "intermediates/ktLint"))
@@ -355,7 +346,7 @@ fun getGitVersion(): String {
}
afterEvaluate {
val artifactsDir = layout.projectDirectory.dir("${edenDir}/artifacts")
val artifactsDir = layout.projectDirectory.dir("../../../artifacts")
val outputsDir = layout.buildDirectory.dir("outputs").get()
android.applicationVariants.forEach { variant ->

View File

@@ -31,6 +31,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
RENDERER_VERTEX_INPUT_DYNAMIC_STATE("vertex_input_dynamic_state"),
RENDERER_PROVOKING_VERTEX("provoking_vertex"),
RENDERER_DESCRIPTOR_INDEXING("descriptor_indexing"),
RENDERER_SAMPLE_SHADING("sample_shading"),
PICTURE_IN_PICTURE("picture_in_picture"),
USE_CUSTOM_RTC("custom_rtc_enabled"),
BLACK_BACKGROUNDS("black_backgrounds"),

View File

@@ -160,6 +160,13 @@ abstract class SettingsItem(
descriptionId = R.string.descriptor_indexing_description
)
)
put(
SwitchSetting(
BooleanSetting.RENDERER_SAMPLE_SHADING,
titleId = R.string.sample_shading,
descriptionId = R.string.sample_shading_description
)
)
put(
SliderSetting(
IntSetting.RENDERER_SAMPLE_SHADING_FRACTION,

View File

@@ -453,11 +453,12 @@ class SettingsFragmentPresenter(
private fun addEdenVeilSettings(sl: ArrayList<SettingsItem>) {
sl.apply {
add(HeaderSetting(R.string.veil_extensions))
add(IntSetting.RENDERER_SAMPLE_SHADING_FRACTION.key)
add(ByteSetting.RENDERER_DYNA_STATE.key)
add(BooleanSetting.RENDERER_VERTEX_INPUT_DYNAMIC_STATE.key)
add(BooleanSetting.RENDERER_PROVOKING_VERTEX.key)
add(BooleanSetting.RENDERER_DESCRIPTOR_INDEXING.key)
add(BooleanSetting.RENDERER_SAMPLE_SHADING.key)
add(IntSetting.RENDERER_SAMPLE_SHADING_FRACTION.key)
add(HeaderSetting(R.string.veil_renderer))
add(IntSetting.DMA_ACCURACY.key)

View File

@@ -96,6 +96,8 @@
<string name="provoking_vertex_description">يحسن الإضاءة ومعالجة الرؤوس في بعض الألعاب. مدعوم فقط على وحدات معالجة الرسومات Vulkan 1.0+.</string>
<string name="descriptor_indexing">فهرسة الوصف</string>
<string name="descriptor_indexing_description">يحسن معالجة النسيج والمخزن المؤقت، بالإضافة إلى طبقة الترجمة Maxwell. مدعوم من بعض وحدات معالجة الرسومات Vulkan 1.1 وجميع وحدات معالجة الرسومات Vulkan 1.2+.</string>
<string name="sample_shading">تظليل العينة</string>
<string name="sample_shading_description">يسمح لتظلل الأجزاء بتنفيذ كل عينة في جزء متعدد العينات بدلاً من مرة واحدة لكل جزء. يحسن جودة الرسومات على حساب بعض الأداء. لا تدعم هذه الإضافة سوى أجهزة Vulkan 1.1+ فقط.</string>
<string name="sample_shading_fraction">نسبة التظليل النموذجية</string>
<string name="sample_shading_fraction_description">كثافة تمرير تظليل العينة. تؤدي القيم الأعلى إلى تحسين الجودة بشكل أكبر، ولكنها تقلل أيضًا من الأداء إلى حد كبير.</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">يحسن الإضاءة ومعالجة الرؤوس في بعض الألعاب. مدعوم فقط على وحدات معالجة الرسومات التي تدعم فولكان 1.0+.</string>
<string name="descriptor_indexing">فهرسة الواصفات</string>
<string name="descriptor_indexing_description">يحسن معالجة القوام والمخازن المؤقتة، بالإضافة إلى طبقة ترجمة ماكسويل. مدعوم من قبل بعض وحدات معالجة الرسومات التي تدعم فولكان 1.1 وجميع وحدات معالجة الرسومات التي تدعم فولكان 1.2+.</string>
<string name="sample_shading">سێبەندی نمونە</string>
<string name="sample_shading_description">ڕێگە بە شێدەری پارچە دەدات کە بۆ هەر نمونەیەک لە پارچەی فرە نمونەیدا جێبەجێ بکات لە جیاتی جێبەجێکردنی بۆ هەر پارچەیەک. جۆرایی گرافیک باشتر دەکات بە بەهای هەندێک لە کارایی. تەنها ئامێرەکانی Vulkan 1.1+ پشتگیری ئەم درێژکراوە دەکەن.</string>
<string name="sample_shading_fraction">پێکهاتەی سێبەرکردنی نموونە</string>
<string name="sample_shading_fraction_description">چڕی تێپەڕاندنی سێبەرکردنی نموونە. بەهای زیاتر کوالێتی باشتر دەکات بەڵام کارایی زیاتر کەم دەکاتەوە.</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">Zlepšuje osvětlení a zpracování vertexů v některých hrách. Podporováno pouze na GPU s Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indexování deskriptorů</string>
<string name="descriptor_indexing_description">Zlepšuje zpracování textur a bufferů, stejně jako Maxwell překladovou vrstvu. Podporováno některými GPU s Vulkan 1.1 a všemi GPU s Vulkan 1.2+.</string>
<string name="sample_shading">Vzorkovací stínování</string>
<string name="sample_shading_description">Umožňuje fragment shaderu provádět výpočty pro každý vzorek ve fragmentu s více vzorky namísto jednou pro fragment. Zlepšuje kvalitu grafiky na úkor výkonu. Tuto funkci podporují pouze zařízení s Vulkan 1.1+.</string>
<string name="sample_shading_fraction">Podíl stínování vzorku</string>
<string name="sample_shading_fraction_description">Intenzita průchodu stínování vzorku. Vyšší hodnoty zlepšují kvalitu, ale také výrazněji snižují výkon.</string>

View File

@@ -78,6 +78,8 @@
<string name="provoking_vertex_description">Verbessert die Beleuchtung und die Vertex-Verarbeitung in einigen Spielen. Wird nur von GPUs mit Vulkan 1.0+ unterstützt.</string>
<string name="descriptor_indexing">Deskriptor-Indizierung</string>
<string name="descriptor_indexing_description">Verbessert die Textur- und Puffer-Verarbeitung sowie die Maxwell-Übersetzungsschicht. Wird von einigen Vulkan 1.1-GPUs und allen Vulkan 1.2+-GPUs unterstützt.</string>
<string name="sample_shading">Sample Shading</string>
<string name="sample_shading_description">Ermöglicht dem Fragment-Shader, pro Sample in einem mehrfach gesampleten Fragment ausgeführt zu werden, anstatt einmal pro Fragment. Verbessert die Grafikqualität auf Kosten der Leistung. Nur Vulkan 1.1+-Geräte unterstützen diese Erweiterung.</string>
<string name="sample_shading_fraction">Sample-Shading-Anteil</string>
<string name="sample_shading_fraction_description">Die Intensität des Sample-Shading-Durchgangs. Höhere Werte verbessern die Qualität stärker, beeinträchtigen aber auch die Leistung stärker.</string>

View File

@@ -91,6 +91,8 @@
<string name="provoking_vertex_description">Mejora la iluminación y el manejo de vértices en ciertos juegos. Solo es compatible con las GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indexación del descriptor</string>
<string name="descriptor_indexing_description">Mejora la textura y el manejo del búfer, así como la capa de traducción Maxwell. Compatible con algunas GPU Vulkan 1.1 y todas las GPU Vulkan 1.2+.</string>
<string name="sample_shading">Sombreado de muestra</string>
<string name="sample_shading_description">Permite que el sombreador de fragmentos se ejecute por muestra en un fragmento de múltiples muestras en lugar de una vez por fragmento. Mejora la calidad gráfica a costa de algo de rendimiento. Solo los dispositivos Vulkan 1.1+ admiten esta extensión.</string>
<string name="sample_shading_fraction">Fracción de sombreado de muestra</string>
<string name="sample_shading_fraction_description">La intensidad del paso de sombreado de la muestra. Los valores más altos mejoran más la calidad, pero también reducen el rendimiento en mayor medida.</string>

View File

@@ -104,6 +104,8 @@
<string name="fast_cpu_time_description">از Boost (1700MHz) برای کار با بالاترین سرعت کلاک بومی سوئیچ یا Fast (2000MHz) برای کار با دو برابر سرعت استفاده کنید.</string>
<string name="memory_layout">چیدمان حافظه</string>
<string name="memory_layout_description">(آزمایشی) چیدمان حافظه شبیه‌سازی شده را تغییر می‌دهد. این تنظیم عملکرد را افزایش نمی‌دهد، اما ممکن است به بازی‌هایی که از رزولوشن بالا با استفاده از مادها استفاده می‌کنند کمک کند. در تلفن‌های با 8 گیگابایت رم یا کمتر استفاده نشود.</string>
<string name="sample_shading">سایه‌زنی نمونه</string>
<string name="sample_shading_description">اجازه می‌دهد شیدر قطعه در هر نمونه از یک قطعه چندنمونه‌ای اجرا شود به جای یک بار برای هر قطعه. کیفیت گرافیک را به بهای کاهش عملکرد بهبود می‌بخشد. فقط دستگاه‌های Vulkan 1.1+ از این افزونه پشتیبانی می‌کنند.</string>
<string name="sample_shading_fraction">کسر سایه‌زنی نمونه</string>
<string name="sample_shading_fraction_description">شدت مرحله سایه‌زنی نمونه. مقادیر بالاتر کیفیت را بیشتر بهبود می‌بخشد اما عملکرد را نیز به میزان بیشتری کاهش می‌دهد.</string>
<string name="custom_cpu_ticks">تیک‌های CPU سفارشی</string>

View File

@@ -93,6 +93,8 @@
<string name="provoking_vertex_description">Améliore l`éclairage et la gestion des vertex dans certains jeux. Pris en charge uniquement par les GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indexation des descripteurs</string>
<string name="descriptor_indexing_description">Améliore la gestion des textures et des tampons, ainsi que la couche de traduction Maxwell. Pris en charge par certains GPU Vulkan 1.1 et tous les GPU Vulkan 1.2+.</string>
<string name="sample_shading">Échantillonnage de shading</string>
<string name="sample_shading_description">Permet au fragment shader de s\'exécuter par échantillon dans un fragment multi-échantillonné au lieu d\'une fois par fragment. Améliore la qualité graphique au détriment des performances. Seuls les appareils Vulkan 1.1+ prennent en charge cette extension.</string>
<string name="sample_shading_fraction">Fraction d\'ombrage d\'échantillon</string>
<string name="sample_shading_fraction_description">L\'intensité de la passe d\'ombrage d\'échantillon. Des valeurs plus élevées améliorent davantage la qualité mais réduisent aussi plus fortement les performances.</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">משפר תאורה וטיפול בקודקודים במשחקים מסוימים. נתמך רק בכרטיסי מסך עם Vulkan 1.0+.</string>
<string name="descriptor_indexing">אינדוקס תיאורים</string>
<string name="descriptor_indexing_description">משפר טיפול במרקמים ומאגרים, כמו גם בשכבת התרגום של Maxwell. נתמך בחלק מכרטיסי ה-Vulkan 1.1 ובכל כרטיסי ה-Vulkan 1.2+.</string>
<string name="sample_shading">דגימת צל</string>
<string name="sample_shading_description">מאפשר לשברי הצללה לרוץ לכל דגימה בקטע רב-דגימות במקום פעם אחת לקטע. משפר את איכות הגרפיקה במחיר של ביצועים. רק מכשירי Vulkan 1.1+ תומכים בהרחבה זו.</string>
<string name="sample_shading_fraction">שבר הצללה לדוגמה</string>
<string name="sample_shading_fraction_description">עוצמת מעבר ההצללה לדוגמה. ערכים גבוהים יותר משפרים את האיכות יותר אך גם מפחיתים את הביצועים במידה רבה יותר.</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">Javítja a világítást és a csúcskezelést bizonyos játékokban. Csak Vulkan 1.0+ GPU-kon támogatott.</string>
<string name="descriptor_indexing">Deskriptor Indexelés</string>
<string name="descriptor_indexing_description">Javítja a textúrák és pufferek kezelését, valamint a Maxwell fordítási réteget. Néhány Vulkan 1.1 GPU és minden Vulkan 1.2+ GPU támogatja.</string>
<string name="sample_shading">Mintavételezés árnyékolás</string>
<string name="sample_shading_description">Lehetővé teszi, hogy a fragment shader mintánként fusson egy többmintás fragmentben a fragmentenkénti futtatás helyett. Javítja a grafikai minőséget a teljesítmény rovására. Csak Vulkan 1.1+ eszközök támogatják ezt a kiterjesztést.</string>
<string name="sample_shading_fraction">Mintavételezés árnyékolási hányad</string>
<string name="sample_shading_fraction_description">A mintavételezés árnyékolási lépés intenzitása. A magasabb értékek jobb minőséget eredményeznek, de nagyobb mértékben csökkentik a teljesítményt.</string>

View File

@@ -91,6 +91,8 @@
<string name="provoking_vertex_description">Meningkatkan pencahayaan dan penanganan vertex di beberapa game. Hanya didukung di GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Pengindeks Deskriptor</string>
<string name="descriptor_indexing_description">Meningkatkan penanganan tekstur dan buffer, serta lapisan terjemahan Maxwell. Didukung oleh beberapa GPU Vulkan 1.1 dan semua GPU Vulkan 1.2+.</string>
<string name="sample_shading">Pencahayaan Sampel</string>
<string name="sample_shading_description">Memungkinkan fragment shader dieksekusi per sampel dalam fragmen multisampel alih-alih sekali per fragmen. Meningkatkan kualitas grafis dengan mengorbankan kinerja. Hanya perangkat Vulkan 1.1+ yang mendukung ekstensi ini.</string>
<string name="sample_shading_fraction">Fraksi Pencahayaan Sampel</string>
<string name="sample_shading_fraction_description">Intensitas proses pencahayaan sampel. Nilai lebih tinggi meningkatkan kualitas lebih baik tetapi juga mengurangi performa lebih besar.</string>

View File

@@ -91,6 +91,8 @@
<string name="provoking_vertex_description">Migliora illuminazione e gestione dei vertici in alcuni giochi. Supportato solo su GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indicizzazione descrittori</string>
<string name="descriptor_indexing_description">Migliora la gestione di texture e buffer, nonché il livello di traduzione Maxwell. Supportato da alcune GPU Vulkan 1.1 e tutte le GPU Vulkan 1.2+.</string>
<string name="sample_shading">Shading campione</string>
<string name="sample_shading_description">Permette al fragment shader di eseguire per campione in un frammento multi-campione invece che una volta per frammento. Migliora la qualità grafica a scapito delle prestazioni. Solo i dispositivi Vulkan 1.1+ supportano questa estensione.</string>
<string name="sample_shading_fraction">Frazione di ombreggiatura campione</string>
<string name="sample_shading_fraction_description">L\'intensità della passata di ombreggiatura campione. Valori più alti migliorano la qualità ma riducono maggiormente le prestazioni.</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">特定のゲームで照明と頂点処理を改善します。Vulkan 1.0+ GPUでのみサポートされています。</string>
<string name="descriptor_indexing">ディスクリプタインデキシング</string>
<string name="descriptor_indexing_description">テクスチャとバッファの処理、およびMaxwell翻訳レイヤーを改善します。一部のVulkan 1.1 GPUとすべてのVulkan 1.2+ GPUでサポートされています。</string>
<string name="sample_shading">サンプルシェーディング</string>
<string name="sample_shading_description">マルチサンプルフラグメントでフラグメントシェーダーをフラグメントごとではなくサンプルごとに実行できるようにします。パフォーマンスを犠牲にしてグラフィック品質を向上させます。Vulkan 1.1+デバイスのみがこの拡張機能をサポートしています。</string>
<string name="sample_shading_fraction">サンプルシェーディング率</string>
<string name="sample_shading_fraction_description">サンプルシェーディング処理の強度。高い値ほど品質は向上しますが、パフォーマンスも大きく低下します。</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">일부 게임에서 조명과 버텍스 처리를 개선합니다. Vulkan 1.0+ GPU에서만 지원됩니다.</string>
<string name="descriptor_indexing">디스크립터 인덱싱</string>
<string name="descriptor_indexing_description">텍스처 및 버퍼 처리와 Maxwell 변환 레이어를 개선합니다. 일부 Vulkan 1.1 GPU 및 모든 Vulkan 1.2+ GPU에서 지원됩니다.</string>
<string name="sample_shading">샘플 쉐이딩</string>
<string name="sample_shading_description">멀티샘플 프래그먼트에서 프래그먼트 쉐이더가 프래그먼트당 한 번이 아니라 샘플당 실행되도록 합니다. 성능을 희생하여 그래픽 품질을 향상시킵니다. Vulkan 1.1+ 장치만 이 확장을 지원합니다.</string>
<string name="sample_shading_fraction">샘플 쉐이딩 비율</string>
<string name="sample_shading_fraction_description">샘플 쉐이딩 패스의 강도. 값이 높을수록 품질이 더 향상되지만 성능도 더 크게 저하됩니다.</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">Forbedrer belysning og vertexhåndtering i enkelte spill. Støttes kun på Vulkan 1.0+ GPU-er.</string>
<string name="descriptor_indexing">Beskrivelsesindeksering</string>
<string name="descriptor_indexing_description">Forbedrer tekstur- og bufferhåndtering, samt Maxwell-oversettelseslaget. Støttes av noen Vulkan 1.1 GPU-er og alle Vulkan 1.2+ GPU-er.</string>
<string name="sample_shading">Prøvegjengivelse</string>
<string name="sample_shading_description">Lar fragment-shaderen kjøres per prøve i et flerprøvefragment i stedet for en gang per fragment. Forbedrer grafikkvaliteten på bekostning av ytelse. Bare Vulkan 1.1+-enheter støtter denne utvidelsen.</string>
<string name="sample_shading_fraction">Prøveskyggebrøk</string>
<string name="sample_shading_fraction_description">Intensiteten til prøveskyggepasseringen. Høyere verdier forbedrer kvaliteten mer, men reduserer også ytelsen i større grad.</string>

View File

@@ -96,6 +96,8 @@
<string name="provoking_vertex_description">Poprawia oświetlenie i obsługę wierzchołków w niektórych grach. Obsługiwane tylko przez GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indeksowanie deskryptorów</string>
<string name="descriptor_indexing_description">Poprawia obsługę tekstur i buforów oraz warstwę tłumaczenia Maxwell. Obsługiwane przez niektóre GPU Vulkan 1.1 i wszystkie GPU Vulkan 1.2+.</string>
<string name="sample_shading">Cieniowanie próbek</string>
<string name="sample_shading_description">Pozwala shaderowi fragmentów wykonywać się na próbkę w fragmencie wielopróbkowym zamiast raz na fragment. Poprawia jakość grafiki kosztem wydajności. Tylko urządzenia Vulkan 1.1+ obsługują to rozszerzenie.</string>
<string name="sample_shading_fraction">Ułamek cieniowania próbki</string>
<string name="sample_shading_fraction_description">Intensywność przebiegu cieniowania próbki. Wyższe wartości poprawiają jakość, ale także w większym stopniu zmniejszają wydajność.</string>

View File

@@ -91,6 +91,8 @@
<string name="provoking_vertex_description">Vértice Provocante: Melhora a iluminação e o processamento de vértices em certos jogos. Suportado apenas em GPUs com Vulkan 1.0 ou superior.</string>
<string name="descriptor_indexing">Descriptor Indexing</string>
<string name="descriptor_indexing_description">Indexação de Descritores: Melhora o processamento de texturas e buffers, assim como a camada de tradução Maxwell. Suportado por algumas GPUs Vulkan 1.1 e todas as GPUs Vulkan 1.2 ou superiores.</string>
<string name="sample_shading">Sample Shading</string>
<string name="sample_shading_description">Amostragem de Sombreamento: Permite que o shader de fragmento seja processado por cada amostra em fragmentos multiamostrados, em vez de executar uma vez por fragmento, melhorando a qualidade gráfica, porém impactando levemente o desempenho. Funciona apenas em dispositivos Vulkan 1.1 ou superiores.</string>
<string name="sample_shading_fraction">Sample Shading Fraction</string>
<string name="sample_shading_fraction_description">Fração de Sombreamento de Amostra: Define a intensidade do sample shading. Quanto maior, melhor a qualidade, mas maior o impacto no desempenho.</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">Melhora a iluminação e o tratamento de vértices em certos jogos. Suportado apenas em GPUs Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indexação de descritores</string>
<string name="descriptor_indexing_description">Melhora o tratamento de texturas e buffers, assim como a camada de tradução Maxwell. Suportado por algumas GPUs Vulkan 1.1 e todas Vulkan 1.2+.</string>
<string name="sample_shading">Amostragem de sombreamento</string>
<string name="sample_shading_description">Permite que o fragment shader seja executado por amostra num fragmento multi-amostrado em vez de uma vez por fragmento. Melhora a qualidade gráfica à custa de desempenho. Apenas dispositivos Vulkan 1.1+ suportam esta extensão.</string>
<string name="sample_shading_fraction">Fração de Sombreamento de Amostra</string>
<string name="sample_shading_fraction_description">A intensidade da passagem de sombreamento de amostra. Valores mais elevados melhoram a qualidade, mas também reduzem o desempenho numa maior medida.</string>

View File

@@ -96,6 +96,8 @@
<string name="provoking_vertex_description">Улучшает освещение и обработку вершин в некоторых играх. Поддерживается только ГПУ с Vulkan 1.0+.</string>
<string name="descriptor_indexing">Индексирование дескрипторов</string>
<string name="descriptor_indexing_description">Улучшает обработку текстур и буферов, а также слой перевода Maxwell. Поддерживается некоторыми ГПУ Vulkan 1.1 и всеми ГПУ Vulkan 1.2+.</string>
<string name="sample_shading">Сэмпловый шейдинг</string>
<string name="sample_shading_description">Позволяет шейдеру фрагментов выполняться на каждый сэмпл в мультисэмпловом фрагменте вместо одного раза на фрагмент. Улучшает качество графики ценой производительности. Только устройства с Vulkan 1.1+ поддерживают это расширение.</string>
<string name="sample_shading_fraction">Доля сэмплового затенения</string>
<string name="sample_shading_fraction_description">Интенсивность прохода сэмплового затенения. Более высокие значения улучшают качество, но и сильнее снижают производительность.</string>

View File

@@ -68,6 +68,8 @@
<string name="provoking_vertex_description">Побољшава осветљење и вертификат руковања у одређеним играма. Подржан само на Вулкану 1.0+ ГПУ-у.</string>
<string name="descriptor_indexing">Индексирање дескриптора</string>
<string name="descriptor_indexing_description">Побољшава текстуру и руковање међуспремника, као и преводилачки слој Маквелл. Подржани од стране неких Вулкана 1.1 ГПУ-а и сви Вулкан 1.2+ ГПУ.</string>
<string name="sample_shading">Семпловање сенчења</string>
<string name="sample_shading_description">Омогућава фрагмент шејдеру да се извршава по узорку у вишеузорачном фрагменту уместо једном по фрагменту. Побољшава квалитет графике на рачун перформанси. Само Vulkan 1.1+ уређаји подржавају ову екстензију.</string>
<string name="sample_shading_fraction">Удео сенчења узорка</string>
<string name="sample_shading_fraction_description">Интензитет проласка сенчења узорка. Веће вредности побољшавају квалитет више, али такође више смањују перформансе.</string>

View File

@@ -96,6 +96,8 @@
<string name="provoking_vertex_description">Покращує освітлення та взаємодію з вершинами у деяких іграх. Лише для ГП з підтримкою Vulkan 1.0+.</string>
<string name="descriptor_indexing">Індексація дескрипторів</string>
<string name="descriptor_indexing_description">Покращує обробку текстур та буферів, а також шар перекладу Maxwell. Підтримується деякими GPU Vulkan 1.1 та всіма GPU Vulkan 1.2+.</string>
<string name="sample_shading">Шейдинг зразків</string>
<string name="sample_shading_description">Дозволяє шейдеру фрагментів виконуватися на кожен семпл у багатосемпловому фрагменті замість одного разу на фрагмент. Покращує якість графіки за рахунок продуктивності. Лише пристрої з Vulkan 1.1+ підтримують це розширення.</string>
<string name="sample_shading_fraction">Частка затінення зразка</string>
<string name="sample_shading_fraction_description">Інтенсивність проходу затінення зразка. Вищі значення покращують якість, але й сильніше знижують продуктивність.</string>

View File

@@ -70,6 +70,8 @@
<string name="provoking_vertex_description">Cải thiện ánh sáng và xử lý đỉnh trong một số trò chơi. Chỉ được hỗ trợ trên GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Lập chỉ mục bộ mô tả</string>
<string name="descriptor_indexing_description">Cải thiện xử lý kết cấu và bộ đệm, cũng như lớp dịch Maxwell. Được hỗ trợ bởi một số GPU Vulkan 1.1 và tất cả GPU Vulkan 1.2+.</string>
<string name="sample_shading">Tô bóng mẫu</string>
<string name="sample_shading_description">Cho phép fragment shader thực thi trên mỗi mẫu trong một fragment đa mẫu thay vì một lần mỗi fragment. Cải thiện chất lượng đồ họa với chi phí hiệu suất. Chỉ thiết bị Vulkan 1.1+ hỗ trợ tiện ích mở rộng này.</string>
<string name="sample_shading_fraction">Phần trăm tô bóng mẫu</string>
<string name="sample_shading_fraction_description">Cường độ của bước tô bóng mẫu. Giá trị cao hơn cải thiện chất lượng tốt hơn nhưng cũng giảm hiệu suất nhiều hơn.</string>

View File

@@ -93,6 +93,8 @@
<string name="provoking_vertex_description">改善某些游戏中的光照和顶点处理。仅支持Vulkan 1.0+ GPU。</string>
<string name="descriptor_indexing">描述符索引</string>
<string name="descriptor_indexing_description">改进纹理和缓冲区处理以及Maxwell转换层。部分Vulkan 1.1 GPU和所有Vulkan 1.2+ GPU支持。</string>
<string name="sample_shading">采样着色</string>
<string name="sample_shading_description">允许片段着色器在多采样片段中每个样本执行一次而不是每个片段执行一次。以提高性能为代价改善图形质量。仅Vulkan 1.1+设备支持此扩展。</string>
<string name="sample_shading_fraction">采样着色比例</string>
<string name="sample_shading_fraction_description">采样着色处理的强度。值越高,质量改善越多,但性能降低也越明显。</string>

View File

@@ -96,6 +96,8 @@
<string name="provoking_vertex_description">改善某些遊戲中的光照和頂點處理。僅支援Vulkan 1.0+ GPU。</string>
<string name="descriptor_indexing">描述符索引</string>
<string name="descriptor_indexing_description">改進紋理和緩衝區處理以及Maxwell轉換層。部分Vulkan 1.1 GPU和所有Vulkan 1.2+ GPU支援。</string>
<string name="sample_shading">取樣著色</string>
<string name="sample_shading_description">允許片段著色器在多取樣片段中每個樣本執行一次而不是每個片段執行一次。以提高效能為代價改善圖形品質。僅Vulkan 1.1+裝置支援此擴充功能。</string>
<string name="sample_shading_fraction">採樣著色比例</string>
<string name="sample_shading_fraction_description">採樣著色處理的強度。數值越高,品質改善越多,但效能降低也越明顯。</string>

View File

@@ -101,8 +101,10 @@
<string name="provoking_vertex_description">Improves lighting and vertex handling in certain games. Only supported on Vulkan 1.0+ GPUs.</string>
<string name="descriptor_indexing">Descriptor Indexing</string>
<string name="descriptor_indexing_description">Improves texture and buffer handling, as well as the Maxwell translation layer. Supported by some Vulkan 1.1 GPUs and all Vulkan 1.2+ GPUs.</string>
<string name="sample_shading">Sample Shading</string>
<string name="sample_shading_description">Allows the fragment shader to execute per sample in a multi-sampled fragment instead once per fragment. Improves graphics quality at the cost of some performance. Only Vulkan 1.1+ devices support this extension.</string>
<string name="sample_shading_fraction">Sample Shading Fraction</string>
<string name="sample_shading_fraction_description">Allows the fragment shader to execute per sample in a multi-sampled fragment instead of once per fragment. Improves graphics quality at the cost of performance. Higher values improve quality but degrade performance.</string>
<string name="sample_shading_fraction_description">The intensity of the sample shading pass. Higher values improve quality more but also reduce performance to a greater extent.</string>
<string name="veil_renderer">Renderer</string>
<string name="sync_memory_operations">Sync Memory Operations</string>

View File

@@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -22,6 +19,3 @@ dependencyResolutionManagement {
}
include(":app")
include("Eden")
project(":Eden").projectDir = file("../..")

View File

@@ -578,7 +578,10 @@ public:
#endif
int flags = (fd > 0 ? MAP_SHARED : MAP_PRIVATE) | MAP_FIXED;
void* ret = mmap(virtual_base + virtual_offset, length, prot_flags, flags, fd, host_offset);
ASSERT_MSG(ret != MAP_FAILED, "mmap: {}", strerror(errno));
ASSERT_MSG(ret != MAP_FAILED,
"mmap(virt_off=0x{:X}, host_off=0x{:X}, len=0x{:X}, virt_size=0x{:X}, backing_size=0x{:X}, perms=0x{:X}) failed: {}",
virtual_offset, host_offset, length, virtual_size, backing_size,
static_cast<u32>(perms), strerror(errno));
}
void Unmap(size_t virtual_offset, size_t length) {

View File

@@ -466,7 +466,7 @@ struct Values {
true};
SwitchableSetting<bool> async_presentation{linkage,
#ifdef ANDROID
true,
false,
#else
false,
#endif
@@ -514,7 +514,6 @@ struct Values {
SwitchableSetting<bool> barrier_feedback_loops{linkage, true, "barrier_feedback_loops",
Category::RendererAdvanced};
SwitchableSetting<u32, true> sample_shading_fraction{linkage, 0, 0, 100, "sample_shading_fraction", Category::RendererExtensions, Specialization::Scalar, true};
SwitchableSetting<u8, true> dyna_state{linkage,
#if defined (_WIN32)
3,
@@ -536,6 +535,17 @@ struct Values {
SwitchableSetting<bool> vertex_input_dynamic_state{linkage, true, "vertex_input_dynamic_state", Category::RendererExtensions};
SwitchableSetting<bool> provoking_vertex{linkage, false, "provoking_vertex", Category::RendererExtensions};
SwitchableSetting<bool> descriptor_indexing{linkage, false, "descriptor_indexing", Category::RendererExtensions};
SwitchableSetting<bool> sample_shading{linkage, false, "sample_shading", Category::RendererExtensions, Specialization::Paired};
SwitchableSetting<u32, true> sample_shading_fraction{linkage,
50,
0,
100,
"sample_shading_fraction",
Category::RendererExtensions,
Specialization::Scalar,
true,
false,
&sample_shading};
Setting<bool> renderer_debug{linkage, false, "debug", Category::RendererDebug};
Setting<bool> renderer_shader_feedback{linkage, false, "shader_feedback",

View File

@@ -130,6 +130,17 @@ public:
ResetStorageBit(id.index);
}
[[nodiscard]] bool Contains(SlotId id) const noexcept {
if (!id) {
return false;
}
const size_t word = id.index / 64;
if (word >= stored_bitset.size()) {
return false;
}
return ((stored_bitset[word] >> (id.index % 64)) & 1) != 0;
}
[[nodiscard]] Iterator begin() noexcept {
const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
if (it == stored_bitset.end()) {

View File

@@ -391,15 +391,28 @@ const std::size_t CACHE_PAGE_SIZE = 4096;
void ArmNce::ClearInstructionCache() {
#ifdef __aarch64__
// Ensure all previous memory operations complete
// Use IC IALLU to actually invalidate L1 instruction cache
asm volatile("dsb ish\n"
"ic iallu\n"
"dsb ish\n"
"isb" ::: "memory");
#endif
}
void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) {
this->ClearInstructionCache();
#ifdef ARCHITECTURE_arm64
// Invalidate instruction cache for specific range instead of full flush
constexpr u64 cache_line_size = 64;
const u64 aligned_addr = addr & ~(cache_line_size - 1);
const u64 end_addr = (addr + size + cache_line_size - 1) & ~(cache_line_size - 1);
asm volatile("dsb ish" ::: "memory");
for (u64 i = aligned_addr; i < end_addr; i += cache_line_size) {
asm volatile("ic ivau, %0" :: "r"(i) : "memory");
}
asm volatile("dsb ish\n"
"isb" ::: "memory");
#endif
}
} // namespace Core

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstring>
#include <sstream>
#include <boost/range/algorithm_ext/erase.hpp>
@@ -187,7 +191,7 @@ void HLERequestContext::ParseCommandBuffer(u32_le* src_cmdbuf, bool incoming) {
buffer_w_descriptors.push_back(rp.PopRaw<IPC::BufferDescriptorABW>());
}
const auto buffer_c_offset = rp.GetCurrentOffset() + command_header->data_size;
buffer_c_offset = rp.GetCurrentOffset() + command_header->data_size;
if (!command_header->IsTipc()) {
// Padding to align to 16 bytes
@@ -294,7 +298,15 @@ Result HLERequestContext::WriteToOutgoingCommandBuffer() {
}
// Write the domain objects to the command buffer, these go after the raw untranslated data.
// TODO(Subv): This completely ignores C buffers.
if (buffer_c_offset != 0 && !buffer_c_descriptors.empty()) {
constexpr u32 WORDS_PER_DESCRIPTOR = sizeof(IPC::BufferDescriptorC) / sizeof(u32);
u32 descriptor_offset = buffer_c_offset;
for (const auto& descriptor : buffer_c_descriptors) {
std::memcpy(&cmd_buf[descriptor_offset], &descriptor, sizeof(descriptor));
descriptor_offset += WORDS_PER_DESCRIPTOR;
}
}
if (GetManager()->IsDomain()) {
current_offset = domain_offset - static_cast<u32>(outgoing_domain_objects.size());
@@ -393,10 +405,14 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
BufferDescriptorB()[buffer_index].Size()};
const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBuffer target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
if (is_buffer_b) {
@@ -418,15 +434,25 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
std::size_t HLERequestContext::WriteBufferB(const void* buffer, std::size_t size,
std::size_t buffer_index) const {
if (buffer_index >= BufferDescriptorB().size() || size == 0) {
if (buffer_index >= BufferDescriptorB().size()) {
LOG_WARNING(Core, "WriteBufferB invalid buffer index {}", buffer_index);
return 0;
}
if (size == 0) {
LOG_WARNING(Core, "skip empty buffer write (B)");
return 0;
}
const auto buffer_size{BufferDescriptorB()[buffer_index].Size()};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBufferB target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
@@ -435,15 +461,25 @@ std::size_t HLERequestContext::WriteBufferB(const void* buffer, std::size_t size
std::size_t HLERequestContext::WriteBufferC(const void* buffer, std::size_t size,
std::size_t buffer_index) const {
if (buffer_index >= BufferDescriptorC().size() || size == 0) {
if (buffer_index >= BufferDescriptorC().size()) {
LOG_WARNING(Core, "WriteBufferC invalid buffer index {}", buffer_index);
return 0;
}
if (size == 0) {
LOG_WARNING(Core, "skip empty buffer write (C)");
return 0;
}
const auto buffer_size{BufferDescriptorC()[buffer_index].Size()};
if (buffer_size == 0) {
LOG_WARNING(Core, "WriteBufferC target index {} has zero capacity", buffer_index);
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);
size = buffer_size; // TODO(bunnei): This needs to be HW tested
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
size, buffer_size);
size = buffer_size;
}
memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
@@ -473,12 +509,20 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons
ASSERT_OR_EXECUTE_MSG(
BufferDescriptorB().size() > buffer_index, { return 0; },
"BufferDescriptorB invalid buffer_index {}", buffer_index);
return BufferDescriptorB()[buffer_index].Size();
const auto size = BufferDescriptorB()[buffer_index].Size();
if (size == 0) {
LOG_WARNING(Core, "BufferDescriptorB index {} has zero size", buffer_index);
}
return size;
} else {
ASSERT_OR_EXECUTE_MSG(
BufferDescriptorC().size() > buffer_index, { return 0; },
"BufferDescriptorC invalid buffer_index {}", buffer_index);
return BufferDescriptorC()[buffer_index].Size();
const auto size = BufferDescriptorC()[buffer_index].Size();
if (size == 0) {
LOG_WARNING(Core, "BufferDescriptorC index {} has zero size", buffer_index);
}
return size;
}
return 0;
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -422,6 +425,7 @@ private:
u32 data_payload_offset{};
u32 handles_offset{};
u32 domain_offset{};
u32 buffer_c_offset{};
std::weak_ptr<SessionRequestManager> manager{};
bool is_deferred{false};

View File

@@ -134,4 +134,4 @@ target_include_directories(dynarmic_tests PRIVATE . ../src)
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
add_test(NAME dynarmic_tests COMMAND dynarmic_tests --durations yes)
add_test(dynarmic_tests dynarmic_tests --durations yes)

View File

@@ -324,13 +324,6 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QObject* parent)
tr("Improves rendering of transparency effects in specific games."));
// Renderer (Extensions)
INSERT(Settings,
sample_shading_fraction,
tr("Sample Shading"),
tr("Allows the fragment shader to execute per sample in a multi-sampled fragment "
"instead of once per fragment. Improves graphics quality at the cost of performance.\n"
"Higher values improve quality but degrade performance."));
INSERT(Settings,
dyna_state,
tr("Extended Dynamic State"),
@@ -353,6 +346,15 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QObject* parent)
tr("Improves texture & buffer handling and the Maxwell translation layer.\n"
"Some Vulkan 1.1+ and all 1.2+ devices support this extension."));
INSERT(Settings, sample_shading, QString(), QString());
INSERT(Settings,
sample_shading_fraction,
tr("Sample Shading"),
tr("Allows the fragment shader to execute per sample in a multi-sampled fragment "
"instead of once per fragment. Improves graphics quality at the cost of performance.\n"
"Higher values improve quality but degrade performance."));
// Renderer (Debug)
// System

View File

@@ -11,6 +11,7 @@
#include <vector>
#include <spirv-tools/optimizer.hpp>
#include "common/logging/log.h"
#include "common/settings.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@@ -439,15 +440,23 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
info.uses_subgroup_shuffles) &&
profile.support_vote) {
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(ctx.stage);
const bool needs_warp_intrinsics = info.uses_subgroup_vote ||
info.uses_subgroup_invocation_id ||
info.uses_subgroup_shuffles;
if (needs_warp_intrinsics && profile.support_vote && stage_supports_warp) {
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
ctx.AddCapability(spv::Capability::GroupNonUniformShuffle);
if (!profile.warp_size_potentially_larger_than_guest) {
// vote ops are only used when not taking the long path
ctx.AddCapability(spv::Capability::GroupNonUniformVote);
}
} else if (needs_warp_intrinsics && !stage_supports_warp) {
LOG_WARNING(Shader,
"Warp intrinsics requested in stage {} but the device does not report subgroup "
"support; falling back to scalar approximations",
static_cast<u32>(ctx.stage));
}
if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
ctx.AddCapability(spv::Capability::Int64Atomics);

View File

@@ -491,9 +491,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
}
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
const AttributeType output_type{ctx.runtime_info.color_output_types[index]};
Id pointer_type{ctx.output_f32};
Id store_value{value};
switch (output_type) {
case AttributeType::SignedInt:
pointer_type = ctx.output_s32;
store_value = ctx.OpBitcast(ctx.S32[1], value);
break;
case AttributeType::UnsignedInt:
pointer_type = ctx.output_u32;
store_value = ctx.OpBitcast(ctx.U32[1], value);
break;
default:
break;
}
const Id component_id{ctx.Const(component)};
const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
ctx.OpStore(pointer, value);
const Id pointer{ctx.OpAccessChain(pointer_type, ctx.frag_color.at(index), component_id)};
ctx.OpStore(pointer, store_value);
}
void EmitSetSampleMask(EmitContext& ctx, Id value) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -195,6 +198,41 @@ Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR
}
}
Id TextureColorResultType(EmitContext& ctx, const TextureDefinition& def) {
switch (def.component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return ctx.F32[4];
case SamplerComponentType::Sint:
return ctx.S32[4];
case SamplerComponentType::Stencil:
return ctx.U32[4];
case SamplerComponentType::Uint:
return ctx.U32[4];
}
throw InvalidArgument("Invalid sampler component type {}", def.component_type);
}
Id TextureSampleResultToFloat(EmitContext& ctx, const TextureDefinition& def, Id color) {
switch (def.component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return color;
case SamplerComponentType::Sint:
return ctx.OpConvertSToF(ctx.F32[4], color);
case SamplerComponentType::Stencil:
{
const Id converted{ctx.OpConvertUToF(ctx.F32[4], color)};
const Id inv255{ctx.Const(1.0f / 255.0f)};
const Id scale{ctx.ConstantComposite(ctx.F32[4], inv255, inv255, inv255, inv255)};
return ctx.OpFMul(ctx.F32[4], converted, scale);
}
case SamplerComponentType::Uint:
return ctx.OpConvertUToF(ctx.F32[4], color);
}
throw InvalidArgument("Invalid sampler component type {}", def.component_type);
}
Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
if (!index.IsImmediate() || index.U32() != 0) {
throw NotImplementedException("Indirect image indexing");
@@ -449,31 +487,39 @@ Id EmitBoundImageWrite(EmitContext&) {
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id bias_lc, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const Id texture{Texture(ctx, info, index)};
Id color{};
if (ctx.stage == Stage::Fragment) {
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
bias_lc, offset);
return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
&EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
color = Emit(&EmitContext::OpImageSparseSampleImplicitLod,
&EmitContext::OpImageSampleImplicitLod, ctx, inst, color_type, texture,
coords, operands.MaskOptional(), operands.Span());
} else {
// We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
// if the lod was explicitly zero. This may change on Turing with implicit compute
// derivatives
const Id lod{ctx.Const(0.0f)};
const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
color = Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type, texture,
coords, operands.Mask(), operands.Span());
}
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const ImageOperands operands(ctx, false, true, false, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
@@ -509,13 +555,18 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const ImageOperands operands(ctx, offset, offset2);
const Id texture{Texture(ctx, info, index)};
if (ctx.profile.need_gather_subpixel_offset) {
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
}
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
operands.MaskOptional(), operands.Span());
const Id color{
Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, color_type,
texture, coords, ctx.Const(info.gather_component), operands.MaskOptional(),
operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
@@ -533,6 +584,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition* def =
info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index);
const Id result_type{def ? TextureColorResultType(ctx, *def) : ctx.F32[4]};
AddOffsetToCoordinates(ctx, info, coords, offset);
if (info.type == TextureType::Buffer) {
lod = Id{};
@@ -542,8 +596,13 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
lod = Id{};
}
const ImageOperands operands(lod, ms);
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
Id color{Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst,
result_type, TextureImage(ctx, info, index), coords, operands.MaskOptional(),
operands.Span())};
if (def) {
color = TextureSampleResultToFloat(ctx, *def, color);
}
return color;
}
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
@@ -588,14 +647,17 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id derivatives, const IR::Value& offset, Id lod_clamp) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
const Id color_type{TextureColorResultType(ctx, def)};
const auto operands = info.num_derivatives == 3
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
ctx.Def(offset), {}, lod_clamp)
: ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
info.num_derivatives, offset, lod_clamp);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span())};
return TextureSampleResultToFloat(ctx, def, color);
}
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -78,9 +81,25 @@ Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
}
bool SupportsWarpIntrinsics(const EmitContext& ctx) {
return ctx.profile.SupportsWarpIntrinsics(ctx.stage);
}
void SetAlwaysInBounds(EmitContext& ctx, IR::Inst* inst) {
SetInBoundsFlag(inst, ctx.true_value);
}
Id FallbackBallotMask(EmitContext& ctx, Id pred) {
const Id full_mask{ctx.Const(0xFFFFFFFFu)};
return ctx.OpSelect(ctx.U32[1], pred, full_mask, ctx.u32_zero_value);
}
} // Anonymous namespace
Id EmitLaneId(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
const Id id{GetThreadId(ctx)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return id;
@@ -89,6 +108,9 @@ Id EmitLaneId(EmitContext& ctx) {
}
Id EmitVoteAll(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAll(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -102,6 +124,9 @@ Id EmitVoteAll(EmitContext& ctx, Id pred) {
}
Id EmitVoteAny(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAny(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -115,6 +140,9 @@ Id EmitVoteAny(EmitContext& ctx, Id pred) {
}
Id EmitVoteEqual(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return pred;
}
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpGroupNonUniformAllEqual(ctx.U1, SubgroupScope(ctx), pred);
}
@@ -129,6 +157,9 @@ Id EmitVoteEqual(EmitContext& ctx, Id pred) {
}
Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
if (!SupportsWarpIntrinsics(ctx)) {
return FallbackBallotMask(ctx, pred);
}
const Id ballot{ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
@@ -137,27 +168,46 @@ Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
}
Id EmitSubgroupEqMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_eq);
}
Id EmitSubgroupLtMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_lt);
}
Id EmitSubgroupLeMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_le);
}
Id EmitSubgroupGtMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_gt);
}
Id EmitSubgroupGeMask(EmitContext& ctx) {
if (!SupportsWarpIntrinsics(ctx)) {
return ctx.u32_zero_value;
}
return LoadMask(ctx, ctx.subgroup_mask_ge);
}
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
const Id thread_id{EmitLaneId(ctx)};
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
@@ -177,6 +227,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
@@ -192,6 +246,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
@@ -207,6 +265,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
if (!SupportsWarpIntrinsics(ctx)) {
SetAlwaysInBounds(ctx, inst);
return value;
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};

View File

@@ -28,27 +28,41 @@ enum class Operation {
FPMax,
};
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
Id ComponentScalarType(EmitContext& ctx, SamplerComponentType component_type) {
switch (component_type) {
case SamplerComponentType::Float:
case SamplerComponentType::Depth:
return ctx.F32[1];
case SamplerComponentType::Sint:
return ctx.S32[1];
case SamplerComponentType::Stencil:
return ctx.U32[1];
case SamplerComponentType::Uint:
return ctx.U32[1];
}
throw InvalidArgument("Invalid sampler component type {}", component_type);
}
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc, Id sampled_type) {
const spv::ImageFormat format{spv::ImageFormat::Unknown};
const Id type{ctx.F32[1]};
const bool depth{desc.is_depth};
const bool ms{desc.is_multisample};
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, false, false, 1, format);
case TextureType::ColorArray1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, true, false, 1, format);
case TextureType::Color2D:
case TextureType::Color2DRect:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, depth, false, ms, 1, format);
case TextureType::ColorArray2D:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, depth, true, ms, 1, format);
case TextureType::Color3D:
return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, depth, false, false, 1, format);
case TextureType::ColorCube:
return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Cube, depth, false, false, 1, format);
case TextureType::ColorArrayCube:
return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
return ctx.TypeImage(sampled_type, spv::Dim::Cube, depth, true, false, 1, format);
case TextureType::Buffer:
break;
}
@@ -315,6 +329,9 @@ void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
ctx.Decorate(id, spv::Decoration::Binding, binding);
ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
ctx.Name(id, fmt::format("ssbo{}", index));
if (!desc.is_written) {
ctx.Decorate(id, spv::Decoration::NonWritable);
}
if (ctx.profile.supported_spirv >= 0x00010400) {
ctx.interfaces.push_back(id);
}
@@ -546,6 +563,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
output_s32 = Name(TypePointer(spv::StorageClass::Output, S32[1]), "output_s32");
if (info.uses_int8 && profile.support_int8) {
AddCapability(spv::Capability::Int8);
@@ -1359,7 +1377,8 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) {
textures.reserve(info.texture_descriptors.size());
for (const TextureDescriptor& desc : info.texture_descriptors) {
const Id image_type{ImageType(*this, desc)};
const Id result_type{ComponentScalarType(*this, desc.component_type)};
const Id image_type{ImageType(*this, desc, result_type)};
const Id sampled_type{TypeSampledImage(image_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
@@ -1372,8 +1391,10 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in
.sampled_type = sampled_type,
.pointer_type = pointer_type,
.image_type = image_type,
.result_type = result_type,
.count = desc.count,
.is_multisample = desc.is_multisample,
.component_type = desc.component_type,
});
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(id);
@@ -1416,6 +1437,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde
void EmitContext::DefineInputs(const IR::Program& program) {
const Info& info{program.info};
const VaryingState loads{info.loads.mask | info.passthrough.mask};
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(stage);
if (info.uses_workgroup_id) {
workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
@@ -1432,24 +1454,37 @@ void EmitContext::DefineInputs(const IR::Program& program) {
}
if (info.uses_sample_id) {
sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
if (stage == Stage::Fragment) {
Decorate(sample_id, spv::Decoration::Flat);
}
}
if (info.uses_is_helper_invocation) {
is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
}
if (info.uses_subgroup_mask) {
if (info.uses_subgroup_mask && stage_supports_warp) {
subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
if (stage == Stage::Fragment) {
Decorate(subgroup_mask_eq, spv::Decoration::Flat);
Decorate(subgroup_mask_lt, spv::Decoration::Flat);
Decorate(subgroup_mask_le, spv::Decoration::Flat);
Decorate(subgroup_mask_gt, spv::Decoration::Flat);
Decorate(subgroup_mask_ge, spv::Decoration::Flat);
}
}
if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
(profile.warp_size_potentially_larger_than_guest &&
(info.uses_subgroup_vote || info.uses_subgroup_mask))) {
if (stage_supports_warp &&
(info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
(profile.warp_size_potentially_larger_than_guest &&
(info.uses_subgroup_vote || info.uses_subgroup_mask)))) {
AddCapability(spv::Capability::GroupNonUniform);
subgroup_local_invocation_id =
DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
if (stage == Stage::Fragment) {
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
}
if (info.uses_fswzadd) {
const Id f32_one{Const(1.0f)};
@@ -1461,6 +1496,9 @@ void EmitContext::DefineInputs(const IR::Program& program) {
}
if (loads[IR::Attribute::PrimitiveId]) {
primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
if (stage == Stage::Fragment) {
Decorate(primitive_id, spv::Decoration::Flat);
}
}
if (loads[IR::Attribute::Layer]) {
AddCapability(spv::Capability::Geometry);
@@ -1552,17 +1590,21 @@ void EmitContext::DefineInputs(const IR::Program& program) {
if (stage != Stage::Fragment) {
continue;
}
switch (info.interpolation[index]) {
case Interpolation::Smooth:
// Default
// Decorate(id, spv::Decoration::Smooth);
break;
case Interpolation::NoPerspective:
Decorate(id, spv::Decoration::NoPerspective);
break;
case Interpolation::Flat:
const bool is_integer = input_type == AttributeType::SignedInt ||
input_type == AttributeType::UnsignedInt;
if (is_integer) {
Decorate(id, spv::Decoration::Flat);
break;
} else {
switch (info.interpolation[index]) {
case Interpolation::Smooth:
break;
case Interpolation::NoPerspective:
Decorate(id, spv::Decoration::NoPerspective);
break;
case Interpolation::Flat:
Decorate(id, spv::Decoration::Flat);
break;
}
}
}
if (stage == Stage::TessellationEval) {
@@ -1658,7 +1700,18 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
continue;
}
frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
const AttributeType output_type{runtime_info.color_output_types[index]};
const Id vec_type = [&, output_type]() -> Id {
switch (output_type) {
case AttributeType::SignedInt:
return S32[4];
case AttributeType::UnsignedInt:
return U32[4];
default:
return F32[4];
}
}();
frag_color[index] = DefineOutput(*this, vec_type, std::nullopt);
Decorate(frag_color[index], spv::Decoration::Location, index);
Name(frag_color[index], fmt::format("frag_color{}", index));
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -36,8 +39,10 @@ struct TextureDefinition {
Id sampled_type;
Id pointer_type;
Id image_type;
Id result_type;
u32 count;
bool is_multisample;
SamplerComponentType component_type;
};
struct TextureBufferDefinition {
@@ -244,6 +249,7 @@ public:
Id output_f32{};
Id output_u32{};
Id output_s32{};
Id image_buffer_type{};
Id image_u32{};

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -22,6 +25,8 @@ public:
[[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
[[nodiscard]] virtual SamplerComponentType ReadTextureComponentType(u32 raw_handle) = 0;
[[nodiscard]] virtual TexturePixelFormat ReadTexturePixelFormat(u32 raw_handle) = 0;
[[nodiscard]] virtual bool IsTexturePixelFormatInteger(u32 raw_handle) = 0;

View File

@@ -396,6 +396,10 @@ bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf)
return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf));
}
SamplerComponentType ReadTextureComponentType(Environment& env, const ConstBufferAddr& cbuf) {
return env.ReadTextureComponentType(GetTextureHandle(env, cbuf));
}
class Descriptors {
public:
explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
@@ -433,7 +437,9 @@ public:
u32 Add(const TextureDescriptor& desc) {
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
return desc.type == existing.type &&
desc.component_type == existing.component_type &&
desc.is_depth == existing.is_depth &&
desc.has_secondary == existing.has_secondary &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
@@ -666,10 +672,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
.secondary_shift_left = cbuf.secondary_shift_left,
.count = cbuf.count,
.size_shift = DESCRIPTOR_SIZE_SHIFT,
.component_type = ReadTextureComponentType(env, cbuf),
});
} else {
index = descriptors.Add(TextureDescriptor{
.type = flags.type,
.component_type = ReadTextureComponentType(env, cbuf),
.is_depth = flags.is_depth != 0,
.is_multisample = is_multisample,
.has_secondary = cbuf.has_secondary,

View File

@@ -1,9 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <limits>
#include "common/common_types.h"
#include "shader_recompiler/stage.h"
namespace Shader {
@@ -46,6 +52,8 @@ struct Profile {
bool support_multi_viewport{};
bool support_geometry_streams{};
u32 warp_stage_support_mask{std::numeric_limits<u32>::max()};
bool warp_size_potentially_larger_than_guest{};
bool lower_left_origin_mode{};
@@ -90,6 +98,11 @@ struct Profile {
u64 min_ssbo_alignment{};
u32 max_user_clip_distances{};
bool SupportsWarpIntrinsics(Stage stage) const {
const u32 bit = 1u << static_cast<u32>(stage);
return (warp_stage_support_mask & bit) != 0;
}
};
} // namespace Shader

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -80,6 +83,7 @@ struct TransformFeedbackVarying {
struct RuntimeInfo {
std::array<AttributeType, 32> generic_input_types{};
std::array<AttributeType, 8> color_output_types{};
VaryingState previous_stage_stores;
std::map<IR::Attribute, IR::Attribute> previous_stage_legacy_stores_mapping;

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -151,6 +154,14 @@ enum class ImageFormat : u32 {
R32G32B32A32_UINT,
};
enum class SamplerComponentType : u8 {
Float,
Sint,
Uint,
Depth,
Stencil,
};
enum class Interpolation {
Smooth,
Flat,
@@ -183,6 +194,7 @@ struct TextureBufferDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
SamplerComponentType component_type;
auto operator<=>(const TextureBufferDescriptor&) const = default;
};
@@ -204,6 +216,7 @@ using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescrip
struct TextureDescriptor {
TextureType type;
SamplerComponentType component_type;
bool is_depth;
bool is_multisample;
bool has_secondary;

View File

@@ -407,6 +407,12 @@ void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
template <class P>
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers()) {
channel_state->total_graphics_storage_buffers -=
static_cast<u32>(std::popcount(channel_state->enabled_storage_buffers[stage]));
}
}
channel_state->enabled_storage_buffers[stage] = 0;
channel_state->written_storage_buffers[stage] = 0;
}
@@ -414,8 +420,26 @@ void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
template <class P>
bool BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
u32 cbuf_offset, bool is_written) {
const bool already_enabled =
((channel_state->enabled_storage_buffers[stage] >> ssbo_index) & 1U) != 0;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
if (channel_state->total_graphics_storage_buffers >= max_bindings) {
LOG_WARNING(HW_GPU,
"Skipping graphics storage buffer {} due to driver limit {}",
ssbo_index, max_bindings);
return false;
}
}
}
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
++channel_state->total_graphics_storage_buffers;
}
}
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
@@ -446,6 +470,12 @@ void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, G
template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() {
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers()) {
channel_state->total_compute_storage_buffers -=
static_cast<u32>(std::popcount(channel_state->enabled_compute_storage_buffers));
}
}
channel_state->enabled_compute_storage_buffers = 0;
channel_state->written_compute_storage_buffers = 0;
channel_state->image_compute_texture_buffers = 0;
@@ -459,8 +489,26 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
ssbo_index);
return;
}
const bool already_enabled =
((channel_state->enabled_compute_storage_buffers >> ssbo_index) & 1U) != 0;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
if (channel_state->total_compute_storage_buffers >= max_bindings) {
LOG_WARNING(HW_GPU,
"Skipping compute storage buffer {} due to driver limit {}",
ssbo_index, max_bindings);
return;
}
}
}
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
++channel_state->total_compute_storage_buffers;
}
}
const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) == 0) {
@@ -793,9 +841,23 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size);
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
const u32 offset = has_host_buffer ? buffer.Offset(device_addr) : 0;
const bool needs_alignment_stream = [&]() {
if constexpr (IS_OPENGL) {
return false;
} else {
if (!has_host_buffer) {
return false;
}
const u32 alignment = runtime.GetUniformBufferAlignment();
return alignment > 1 && (offset % alignment) != 0;
}
}();
const bool use_fast_buffer = needs_alignment_stream ||
(has_host_buffer &&
size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size));
if (use_fast_buffer) {
if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) {
@@ -834,7 +896,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if (!needs_bind) {
return;
}
const u32 offset = buffer.Offset(device_addr);
if constexpr (IS_OPENGL) {
// Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
@@ -951,9 +1012,30 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
TouchBuffer(buffer, binding.buffer_id);
const u32 size =
(std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
const u32 offset = has_host_buffer ? buffer.Offset(binding.device_addr) : 0;
const bool needs_alignment_stream = [&]() {
if constexpr (IS_OPENGL) {
return false;
} else {
if (!has_host_buffer) {
return false;
}
const u32 alignment = runtime.GetUniformBufferAlignment();
return alignment > 1 && (offset % alignment) != 0;
}
}();
if constexpr (!IS_OPENGL) {
if (needs_alignment_stream) {
const std::span<u8> span =
runtime.BindMappedUniformBuffer(0, binding_index, size);
device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size);
return;
}
}
SynchronizeBuffer(buffer, binding.device_addr, size);
const u32 offset = buffer.Offset(binding.device_addr);
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);

View File

@@ -8,6 +8,7 @@
#include <algorithm>
#include <array>
#include <bit>
#include <functional>
#include <memory>
#include <mutex>
@@ -132,6 +133,9 @@ public:
u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0;
u32 total_graphics_storage_buffers = 0;
u32 total_compute_storage_buffers = 0;
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
std::array<u32, NUM_STAGES> written_texture_buffers{};
std::array<u32, NUM_STAGES> image_texture_buffers{};

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -235,6 +238,9 @@ constexpr Table MakeViewTable() {
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
Enable(view, PixelFormat::D24_UNORM_S8_UINT, PixelFormat::S8_UINT);
Enable(view, PixelFormat::S8_UINT_D24_UNORM, PixelFormat::S8_UINT);
Enable(view, PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::S8_UINT);
return view;
}

View File

@@ -43,90 +43,66 @@ void DmaPusher::DispatchCalls() {
bool DmaPusher::Step() {
if (!ib_enable || dma_pushbuffer.empty()) {
// pushbuffer empty and IB empty or nonexistent - nothing to do
return false;
}
CommandList& command_list{dma_pushbuffer.front()};
CommandList& command_list = dma_pushbuffer.front();
ASSERT_OR_EXECUTE(
command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
// Somehow the command_list is empty, in order to avoid a crash
// We ignore it and assume its size is 0.
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
return true;
});
const size_t prefetch_size = command_list.prefetch_command_list.size();
const size_t command_list_size = command_list.command_lists.size();
if (command_list.prefetch_command_list.size()) {
// Prefetched command list from nvdrv, used for things like synchronization
ProcessCommands(VideoCommon::FixSmallVectorADL(command_list.prefetch_command_list));
if (prefetch_size == 0 && command_list_size == 0) {
dma_pushbuffer.pop();
} else {
const CommandListHeader command_list_header{
command_list.command_lists[dma_pushbuffer_subindex++]};
dma_pushbuffer_subindex = 0;
return true;
}
if (signal_sync) {
std::unique_lock lk(sync_mutex);
sync_cv.wait(lk, [this]() { return synced; });
signal_sync = false;
synced = false;
}
if (prefetch_size > 0) {
ProcessCommands(command_list.prefetch_command_list);
dma_pushbuffer.pop();
return true;
}
dma_state.dma_get = command_list_header.addr;
auto& current_command = command_list.command_lists[dma_pushbuffer_subindex];
const CommandListHeader& header = current_command;
dma_state.dma_get = header.addr;
if (command_list_header.size == 0) {
return true;
}
if (signal_sync && !synced) {
std::unique_lock lk(sync_mutex);
sync_cv.wait(lk, [this]() { return synced; });
signal_sync = false;
synced = false;
}
// Push buffer non-empty, read a word
if (dma_state.method >= MacroRegistersStart) {
if (subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(
dma_state.dma_get, command_list_header.size * sizeof(u32));
}
}
if (header.size > 0 && dma_state.method >= MacroRegistersStart && subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(dma_state.dma_get, header.size * sizeof(u32));
}
const auto safe_process = [&] {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
Tegra::Memory::GuestMemoryFlags::SafeRead>
headers(memory_manager, dma_state.dma_get, command_list_header.size,
&command_headers);
if (header.size > 0) {
if (Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe()) {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::SafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
ProcessCommands(headers);
};
const auto unsafe_process = [&] {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
Tegra::Memory::GuestMemoryFlags::UnsafeRead>
headers(memory_manager, dma_state.dma_get, command_list_header.size,
&command_headers);
ProcessCommands(headers);
};
const bool use_safe = Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe();
if (use_safe) {
safe_process();
} else {
unsafe_process();
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::UnsafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
ProcessCommands(headers);
}
}
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
} else if (command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.sync_memory_operations.GetValue()) {
signal_sync = true;
}
if (++dma_pushbuffer_subindex >= command_list_size) {
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
} else {
signal_sync = command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.sync_memory_operations.GetValue();
}
if (signal_sync) {
rasterizer->SignalFence([this]() {
if (signal_sync) {
rasterizer->SignalFence([this]() {
std::scoped_lock lk(sync_mutex);
synced = true;
sync_cv.notify_all();
});
}
});
}
return true;
}

View File

@@ -91,6 +91,10 @@ public:
uncommitted_operations.clear();
}
QueueFence(new_fence);
//if (!new_fence->IsStubbed()) {
// std::scoped_lock lock{texture_cache.mutex};
// texture_cache.CommitPendingGpuAccesses(new_fence->WaitTick());
//}
fences.push(std::move(new_fence));
if (should_flush) {
rasterizer.FlushCommands();
@@ -179,7 +183,7 @@ private:
return;
}
}
PopAsyncFlushes();
PopAsyncFlushes(current_fence->WaitTick());
auto operations = std::move(pending_operations.front());
pending_operations.pop_front();
for (auto& operation : operations) {
@@ -214,7 +218,7 @@ private:
if (!current_fence->IsStubbed()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
PopAsyncFlushes(current_fence->WaitTick());
for (auto& operation : current_operations) {
operation();
}
@@ -237,10 +241,11 @@ private:
query_cache.HasUncommittedFlushes();
}
void PopAsyncFlushes() {
void PopAsyncFlushes(u64 completed_tick) {
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes();
texture_cache.CompleteGpuAccesses(completed_tick);
buffer_cache.PopAsyncFlushes();
}
query_cache.PopAsyncFlushes();

View File

@@ -211,6 +211,12 @@ void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) {
streamer->CloseCounter();
}
template <typename Traits>
bool QueryCacheBase<Traits>::HasStreamer(QueryType counter_type) const {
const size_t index = static_cast<size_t>(counter_type);
return impl->streamers[index] != nullptr;
}
template <typename Traits>
void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
size_t index = static_cast<size_t>(counter_type);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -92,6 +95,8 @@ public:
void CounterReset(QueryType counter_type);
[[nodiscard]] bool HasStreamer(QueryType counter_type) const;
void CounterClose(QueryType counter_type);
void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags,

View File

@@ -198,6 +198,10 @@ public:
return device.CanReportMemoryUsage();
}
u32 GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment());
}
u32 GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetShaderStorageBufferAlignment());
}

View File

@@ -7,13 +7,50 @@
#include <cstring>
#include <bit>
#include <numeric>
#include <optional>
#include "common/cityhash.h"
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/surface.h"
namespace OpenGL {
namespace {
std::optional<VideoCore::Surface::PixelFormatNumeric>
NumericFromComponentType(Shader::SamplerComponentType component_type) {
using VideoCore::Surface::PixelFormatNumeric;
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
VideoCore::Surface::PixelFormat ResolveTexelBufferFormat(
VideoCore::Surface::PixelFormat format, Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant =
VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
} // Anonymous namespace
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
@@ -174,8 +211,12 @@ void ComputePipeline::Configure() {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
auto buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++texbuf_index;
}
@@ -205,7 +246,8 @@ void ComputePipeline::Configure() {
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
textures[texture_binding] =
image_view.SampledView(desc.type, desc.component_type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << texture_binding;
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -25,6 +28,10 @@ public:
void Wait();
[[nodiscard]] u64 WaitTick() const noexcept {
return 0;
}
private:
OGLSync sync_object;
};

View File

@@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include <optional>
#include <string>
#include <vector>
#include <bit>
@@ -18,6 +19,7 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/texture_cache.h"
#if defined(_MSC_VER) && defined(NDEBUG)
@@ -39,6 +41,38 @@ using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 8;
std::optional<VideoCore::Surface::PixelFormatNumeric>
NumericFromComponentType(Shader::SamplerComponentType component_type) {
using VideoCore::Surface::PixelFormatNumeric;
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
VideoCore::Surface::PixelFormat ResolveTexelBufferFormat(
VideoCore::Surface::PixelFormat format, Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant =
VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
GLenum Stage(size_t stage_index) {
switch (stage_index) {
case 0:
@@ -397,8 +431,12 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
auto buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
++texture_buffer_it;
@@ -483,7 +521,8 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
textures[texture_binding] =
image_view.SampledView(desc.type, desc.component_type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << stage_texture_binding;
}

View File

@@ -220,6 +220,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_gl_sparse_textures = device.HasSparseTexture2(),
.support_gl_derivative_control = device.HasDerivativeControl(),
.support_geometry_streams = true,
.warp_stage_support_mask = 0xFFFFFFFFu,
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),

View File

@@ -692,6 +692,15 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
return device.HasASTC();
}
bool TextureCacheRuntime::SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const noexcept {
using VideoCore::Surface::GetFormatType;
using VideoCore::Surface::IsPixelFormatInteger;
if (IsPixelFormatInteger(format)) {
return false;
}
return GetFormatType(format) == VideoCore::Surface::SurfaceType::ColorTexture;
}
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@@ -1229,6 +1238,13 @@ GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFor
return view;
}
GLuint ImageView::SampledView(Shader::TextureType view_type,
Shader::SamplerComponentType /*component_type*/) {
// OpenGL swizzles already configure depth/stencil selection per TIC entry,
// so fall back to the default view handle.
return Handle(view_type);
}
void ImageView::SetupView(Shader::TextureType view_type) {
views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -13,6 +16,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
@@ -129,6 +133,8 @@ public:
return false;
}
bool SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const noexcept;
bool HasBrokenTextureViewFormats() const noexcept {
return has_broken_texture_view_formats;
}
@@ -137,6 +143,8 @@ public:
void TickFrame() {}
void WaitForGpuTick(u64) {}
StateTracker& GetStateTracker() {
return state_tracker;
}
@@ -264,6 +272,9 @@ public:
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] GLuint SampledView(Shader::TextureType view_type,
Shader::SamplerComponentType component_type);
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
return views[static_cast<size_t>(handle_type)];
}

View File

@@ -525,18 +525,24 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
blit_depth_stencil_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)
: vk::ShaderModule{}),
clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)),
clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)),
clear_stencil_frag(BuildShader(device, VULKAN_DEPTHSTENCIL_CLEAR_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)
: vk::ShaderModule{}),
convert_abgr8_to_d32f_frag(BuildShader(device, CONVERT_ABGR8_TO_D32F_FRAG_SPV)),
convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
convert_abgr8_srgb_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)),
convert_abgr8_srgb_to_d24s8_frag(device.IsExtShaderStencilExportSupported()
? BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)
: vk::ShaderModule{}),
convert_rgba_to_bgra_frag(BuildShader(device, CONVERT_RGBA8_TO_BGRA8_FRAG_SPV)),
convert_yuv420_to_rgb_comp(BuildShader(device, CONVERT_YUV420_TO_RGB_COMP_SPV)),
convert_rgb_to_yuv420_comp(BuildShader(device, CONVERT_RGB_TO_YUV420_COMP_SPV)),
@@ -667,6 +673,11 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
if (!device.IsExtShaderStencilExportSupported()) {
// Shader requires VK_EXT_shader_stencil_export which is not available
LOG_WARNING(Render_Vulkan, "ConvertABGR8ToD24S8 requires shader_stencil_export, skipping");
return;
}
ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
convert_abgr8_to_d24s8_frag);
Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view);
@@ -702,6 +713,11 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
if (!device.IsExtShaderStencilExportSupported()) {
// Shader requires VK_EXT_shader_stencil_export which is not available
LOG_WARNING(Render_Vulkan, "ConvertABGR8SRGBToD24S8 requires shader_stencil_export, skipping");
return;
}
ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline,
dst_framebuffer->RenderPass(),
convert_abgr8_srgb_to_d24s8_frag);

View File

@@ -59,7 +59,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
raw1 = 0;
extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0);
extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0);
extended_dynamic_state_2_extra.Assign(features.has_extended_dynamic_state_2_extra ? 1 : 0);
extended_dynamic_state_2_logic_op.Assign(features.has_extended_dynamic_state_2_logic_op ? 1 : 0);
extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0);
extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0);
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
@@ -157,7 +157,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
return static_cast<u16>(array.stride.Value());
});
}
if (!extended_dynamic_state_2_extra) {
if (!extended_dynamic_state_2_logic_op) {
dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2);
}
if (!extended_dynamic_state_3_blend) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -20,9 +23,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct DynamicFeatures {
bool has_extended_dynamic_state;
bool has_extended_dynamic_state_2;
bool has_extended_dynamic_state_2_extra;
bool has_extended_dynamic_state_2_logic_op;
bool has_extended_dynamic_state_2_patch_control_points;
bool has_extended_dynamic_state_3_blend;
bool has_extended_dynamic_state_3_enables;
bool has_dual_source_blend;
bool has_dynamic_vertex_input;
};
@@ -186,7 +191,7 @@ struct FixedPipelineState {
u32 raw1;
BitField<0, 1, u32> extended_dynamic_state;
BitField<1, 1, u32> extended_dynamic_state_2;
BitField<2, 1, u32> extended_dynamic_state_2_extra;
BitField<2, 1, u32> extended_dynamic_state_2_logic_op;
BitField<3, 1, u32> extended_dynamic_state_3_blend;
BitField<4, 1, u32> extended_dynamic_state_3_enables;
BitField<5, 1, u32> dynamic_vertex_input;

View File

@@ -165,7 +165,7 @@ struct FormatTuple {
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
{VK_FORMAT_R32G32B32_SFLOAT}, // R32G32B32_FLOAT
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable | Storage}, // A8B8G8R8_SRGB
{VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
{VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
{VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
@@ -177,7 +177,7 @@ struct FormatTuple {
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
{VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM
{VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM
{VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
{VK_FORMAT_B8G8R8A8_SRGB, Attachable | Storage}, // B8G8R8A8_SRGB
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
{VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB

View File

@@ -189,12 +189,16 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VideoCommon::SamplerId sampler_id{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
const VkImageView vk_image_view{
image_view.SampledView(desc.type, desc.component_type)};
const Sampler& sampler{texture_cache.GetSampler(sampler_id)};
const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
!image_view.SupportsAnisotropy()};
const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
: sampler.Handle()};
const bool supports_linear_filter{
texture_cache.SupportsLinearFilter(image_view.format)};
const bool supports_depth_compare_sampling{
image_view.SupportsDepthCompareSampling()};
const VkSampler vk_sampler{
sampler.SelectHandle(supports_linear_filter, image_view.SupportsAnisotropy(),
supports_depth_compare_sampling)};
guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}

View File

@@ -280,7 +280,6 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
buffer.Flush(); // Ensure host writes are visible before the GPU copy.
}
const VkBufferImageCopy copy{

View File

@@ -7,6 +7,7 @@
#include <algorithm>
#include <array>
#include <cstring>
#include <limits>
#include <span>
#include <vector>
@@ -333,6 +334,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
compute_pass_descriptor_queue) {
const VkDriverIdKHR driver_id = device.GetDriverID();
limit_dynamic_storage_buffers = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
if (limit_dynamic_storage_buffers) {
max_dynamic_storage_buffers = device.GetMaxDescriptorSetStorageBuffersDynamic();
}
if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
// TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
@@ -408,6 +416,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
u32 BufferCacheRuntime::GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment());
}
u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetStorageBufferAlignment());
}
@@ -583,7 +595,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
if (index >= device.GetMaxVertexInputBindings()) {
return;
}
if (device.IsExtExtendedDynamicStateSupported()) {
if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
ReserveNullBuffer();
buffer = *null_buffer;
offset = 0;
size = std::numeric_limits<u32>::max();
}
// Use BindVertexBuffers2EXT only if EDS1 is supported AND VIDS is not active
// When VIDS is active, the pipeline doesn't declare VERTEX_INPUT_BINDING_STRIDE as dynamic
if (device.IsExtExtendedDynamicStateSupported() && !device.IsExtVertexInputDynamicStateSupported()) {
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
@@ -623,7 +643,8 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
if (binding_count == 0) {
return;
}
if (device.IsExtExtendedDynamicStateSupported()) {
// Use BindVertexBuffers2EXT only if EDS1 is supported AND VIDS is not active
if (device.IsExtExtendedDynamicStateSupported() && !device.IsExtVertexInputDynamicStateSupported()) {
scheduler.Record([bindings_ = std::move(bindings),
buffer_handles_ = std::move(buffer_handles),
binding_count](vk::CommandBuffer cmdbuf) {
@@ -680,27 +701,50 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
}
void BufferCacheRuntime::ReserveNullBuffer() {
const VkBufferUsageFlags expected_usage = NullBufferUsageFlags();
if (null_buffer && null_buffer_usage_flags != expected_usage) {
RefreshNullBuffer();
}
if (!null_buffer) {
null_buffer = CreateNullBuffer();
}
}
VkBufferUsageFlags BufferCacheRuntime::NullBufferUsageFlags() const {
VkBufferUsageFlags usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
if (device.IsExtTransformFeedbackSupported()) {
usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
return usage;
}
void BufferCacheRuntime::RefreshNullBuffer() {
if (!null_buffer) {
return;
}
scheduler.Finish();
null_buffer.reset();
null_buffer = CreateNullBuffer();
}
vk::Buffer BufferCacheRuntime::CreateNullBuffer() {
VkBufferCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 4,
.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
.usage = NullBufferUsageFlags(),
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
vk::Buffer ret = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
null_buffer_usage_flags = create_info.usage;
if (device.HasDebuggingToolAttached()) {
ret.SetObjectNameEXT("Null buffer");
}

View File

@@ -6,6 +6,8 @@
#pragma once
#include <limits>
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h"
@@ -94,6 +96,8 @@ public:
bool CanReportMemoryUsage() const;
u32 GetUniformBufferAlignment() const;
u32 GetStorageBufferAlignment() const;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
@@ -127,6 +131,9 @@ public:
void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
/// Forces destruction and recreation of the shared null buffer so new usage flags take effect.
void RefreshNullBuffer();
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/,
[[maybe_unused]] u32 /*binding_index*/,
u32 size) {
@@ -155,6 +162,14 @@ public:
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
bool ShouldLimitDynamicStorageBuffers() const {
return limit_dynamic_storage_buffers;
}
u32 GetMaxDynamicStorageBuffers() const {
return max_dynamic_storage_buffers;
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
guest_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -162,6 +177,7 @@ private:
void ReserveNullBuffer();
vk::Buffer CreateNullBuffer();
VkBufferUsageFlags NullBufferUsageFlags() const;
struct UniformRing {
static constexpr size_t NUM_FRAMES = 3;
@@ -191,9 +207,13 @@ private:
std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
vk::Buffer null_buffer;
VkBufferUsageFlags null_buffer_usage_flags = 0;
std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass;
bool limit_dynamic_storage_buffers = false;
u32 max_dynamic_storage_buffers = std::numeric_limits<u32>::max();
};
struct BufferCacheParams {

View File

@@ -418,6 +418,9 @@ ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer,
u32 src_offset, bool compare_to_zero) {
if (!device.IsExtConditionalRendering()) {
return;
}
const size_t compare_size = compare_to_zero ? 8 : 24;
compute_pass_descriptor_queue.Acquire();
@@ -448,7 +451,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.Dispatch(1, 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier);
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier);
});
}
@@ -459,10 +462,14 @@ QueriesPrefixScanPass::QueriesPrefixScanPass(
device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS,
QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>,
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT,
VK_SHADER_STAGE_COMPUTE_BIT) &&
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT,
VK_SHADER_STAGE_COMPUTE_BIT)
? std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_COMP_SPV)
: std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV)),
scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
@@ -470,6 +477,14 @@ QueriesPrefixScanPass::QueriesPrefixScanPass(
void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer,
VkBuffer src_buffer, size_t number_of_sums,
size_t min_accumulation_limit, size_t max_accumulation_limit) {
constexpr VkAccessFlags BASE_DST_ACCESS = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT;
const VkAccessFlags conditional_access =
device.IsExtConditionalRendering() ? VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT : 0;
size_t current_runs = number_of_sums;
size_t offset = 0;
while (current_runs != 0) {
@@ -486,22 +501,18 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, min_accumulation_limit, max_accumulation_limit,
runs_to_do, used_offset](vk::CommandBuffer cmdbuf) {
runs_to_do, used_offset, conditional_access](vk::CommandBuffer cmdbuf) {
static constexpr VkMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
};
static constexpr VkMemoryBarrier write_barrier{
const VkMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT,
.dstAccessMask = BASE_DST_ACCESS | conditional_access,
};
const QueriesPrefixScanPushConstants uniforms{
.min_accumulation_base = static_cast<u32>(min_accumulation_limit),
@@ -519,8 +530,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(1, 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0,
write_barrier);
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier);
});
}
}

View File

@@ -18,14 +18,49 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_notify.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#include <optional>
namespace Vulkan {
using Shader::ImageBufferDescriptor;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatNumeric;
static std::optional<PixelFormatNumeric> NumericFromComponentType(
Shader::SamplerComponentType component_type) {
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
static PixelFormat ResolveTexelBufferFormat(PixelFormat format,
Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant = VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
DescriptorPool& descriptor_pool,
@@ -182,8 +217,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(views[index].id);
VideoCore::Surface::PixelFormat buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
}

View File

@@ -34,6 +34,10 @@ public:
void Wait();
[[nodiscard]] u64 WaitTick() const noexcept {
return wait_tick;
}
private:
Scheduler& scheduler;
u64 wait_tick = 0;

View File

@@ -5,6 +5,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <optional>
#include <iostream>
#include <span>
@@ -23,7 +25,9 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_device.h"
#if defined(_MSC_VER) && defined(NDEBUG)
@@ -44,10 +48,83 @@ using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::PixelFormatNumeric;
constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
constexpr size_t MAX_IMAGE_ELEMENTS = 64;
std::optional<PixelFormatNumeric> NumericFromComponentType(
Shader::SamplerComponentType component_type) {
switch (component_type) {
case Shader::SamplerComponentType::Float:
return PixelFormatNumeric::Float;
case Shader::SamplerComponentType::Sint:
return PixelFormatNumeric::Sint;
case Shader::SamplerComponentType::Uint:
return PixelFormatNumeric::Uint;
default:
return std::nullopt;
}
}
PixelFormat ResolveTexelBufferFormat(PixelFormat format,
Shader::SamplerComponentType component_type) {
const auto desired_numeric = NumericFromComponentType(component_type);
if (!desired_numeric) {
return format;
}
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
if (*desired_numeric == current_numeric) {
return format;
}
if (const auto variant = VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
return *variant;
}
return format;
}
bool UsesDualSourceFactor(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Source1Color_D3D:
case Maxwell::Blend::Factor::Source1Color_GL:
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
case Maxwell::Blend::Factor::Source1Alpha_D3D:
case Maxwell::Blend::Factor::Source1Alpha_GL:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
return true;
default:
return false;
}
}
Maxwell::Blend::Factor FallbackDualSourceFactor(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Source1Color_D3D:
case Maxwell::Blend::Factor::Source1Color_GL:
return Maxwell::Blend::Factor::SourceColor_D3D;
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
return Maxwell::Blend::Factor::OneMinusSourceColor_D3D;
case Maxwell::Blend::Factor::Source1Alpha_D3D:
case Maxwell::Blend::Factor::Source1Alpha_GL:
return Maxwell::Blend::Factor::SourceAlpha_D3D;
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
return Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D;
default:
return factor;
}
}
bool AttachmentUsesDualSource(const FixedPipelineState::BlendingAttachment& blend) {
return UsesDualSourceFactor(blend.SourceRGBFactor()) ||
UsesDualSourceFactor(blend.DestRGBFactor()) ||
UsesDualSourceFactor(blend.SourceAlphaFactor()) ||
UsesDualSourceFactor(blend.DestAlphaFactor());
}
DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
DescriptorLayoutBuilder builder{device};
for (size_t index = 0; index < infos.size(); ++index) {
@@ -263,6 +340,7 @@ GraphicsPipeline::GraphicsPipeline(
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
num_textures += Shader::NumDescriptors(info->texture_descriptors);
}
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
uses_push_descriptor = builder.CanUsePushDescriptor();
@@ -416,8 +494,12 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
VideoCore::Surface::PixelFormat buffer_format = image_view.format;
if constexpr (!is_image) {
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
}
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
image_view.BufferSize(), buffer_format,
is_written, is_image);
++index;
++texture_buffer_it;
@@ -702,13 +784,18 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.lineWidth = 1.0f,
// TODO(alekpop): Transfer from regs
};
const bool smooth_lines_supported =
device.IsExtLineRasterizationSupported() && device.SupportsSmoothLines();
const bool stippled_lines_supported =
device.IsExtLineRasterizationSupported() && device.SupportsStippledRectangularLines();
VkPipelineRasterizationLineStateCreateInfoEXT line_state{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
.pNext = nullptr,
.lineRasterizationMode = key.state.smooth_lines != 0
.lineRasterizationMode = key.state.smooth_lines != 0 && smooth_lines_supported
? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
: VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
.stippledLineEnable = dynamic.line_stipple_enable ? VK_TRUE : VK_FALSE,
.stippledLineEnable =
(dynamic.line_stipple_enable && stippled_lines_supported) ? VK_TRUE : VK_FALSE,
.lineStippleFactor = key.state.line_stipple_factor,
.lineStipplePattern = static_cast<uint16_t>(key.state.line_stipple_pattern),
};
@@ -739,17 +826,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
}
const VkPipelineMultisampleStateCreateInfo multisample_ci{
const bool supports_alpha_output = fragment_has_color0_output;
const bool alpha_to_one_supported = device.SupportsAlphaToOne();
const auto msaa_mode = key.state.msaa_mode.Value();
const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode);
VkPipelineMultisampleStateCreateInfo multisample_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
.sampleShadingEnable = Settings::values.sample_shading_fraction.GetValue() > 0 ? VK_TRUE : VK_FALSE,
.minSampleShading = float(Settings::values.sample_shading_fraction.GetValue()) / 100.0f,
.rasterizationSamples = vk_samples,
.sampleShadingEnable = Settings::values.sample_shading.GetValue() ? VK_TRUE : VK_FALSE,
.minSampleShading = static_cast<float>(Settings::values.sample_shading_fraction.GetValue()) / 100.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToOneEnable = key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToCoverageEnable =
supports_alpha_output && key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToOneEnable = supports_alpha_output && alpha_to_one_supported &&
key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
};
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -771,6 +866,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
}
static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const size_t num_attachments{NumAttachments(key.state)};
const bool supports_dual_source_blend = device.SupportsDualSourceBlend();
const u32 max_dual_source_attachments = supports_dual_source_blend
? device.MaxFragmentDualSrcAttachments()
: 0;
u32 granted_dual_source_attachments = 0;
bool logged_dual_source_warning = false;
for (size_t index = 0; index < num_attachments; ++index) {
static constexpr std::array mask_table{
VK_COLOR_COMPONENT_R_BIT,
@@ -784,13 +885,30 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
for (size_t i = 0; i < mask_table.size(); ++i) {
write_mask |= mask[i] ? mask_table[i] : 0;
}
const bool attachment_uses_dual_source = AttachmentUsesDualSource(blend);
const bool allow_dual_source = attachment_uses_dual_source && supports_dual_source_blend &&
granted_dual_source_attachments < max_dual_source_attachments;
if (allow_dual_source) {
++granted_dual_source_attachments;
} else if (attachment_uses_dual_source && !logged_dual_source_warning) {
LOG_WARNING(Render_Vulkan,
"Dual-source blend factors exceed device limit (maxFragmentDualSrcAttachments={}), falling back to single-source factors",
max_dual_source_attachments);
logged_dual_source_warning = true;
}
const auto sanitize_factor = [&](Maxwell::Blend::Factor factor) {
if (allow_dual_source || !UsesDualSourceFactor(factor)) {
return factor;
}
return FallbackDualSourceFactor(factor);
};
cb_attachments.push_back({
.blendEnable = blend.enable != 0,
.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
.srcColorBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.SourceRGBFactor())),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.DestRGBFactor())),
.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.SourceAlphaFactor())),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.DestAlphaFactor())),
.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
.colorWriteMask = write_mask,
});
@@ -806,14 +924,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.blendConstants = {}
};
static_vector<VkDynamicState, 34> dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_LINE_WIDTH,
};
if (device.UsesAdvancedCoreDynamicState()) {
static constexpr std::array core_dynamic_states{
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
};
dynamic_states.insert(dynamic_states.end(), core_dynamic_states.begin(),
core_dynamic_states.end());
}
if (key.state.extended_dynamic_state) {
std::vector<VkDynamicState> extended{
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
@@ -823,51 +952,68 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
};
if (!device.IsExtVertexInputDynamicStateSupported()) {
extended.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
}
if (key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
}
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
if (key.state.extended_dynamic_state_2) {
static constexpr std::array extended2{
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
// VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT is part of EDS1
// Only use it if VIDS is not active (VIDS replaces it with full vertex input control)
if (!key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
}
if (key.state.extended_dynamic_state_2_extra) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
}
// VK_DYNAMIC_STATE_VERTEX_INPUT_EXT (VIDS) - Independent from EDS
// Provides full dynamic vertex input control, replaces VERTEX_INPUT_BINDING_STRIDE
if (key.state.dynamic_vertex_input) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
}
// EDS2 - Core (3 states)
if (key.state.extended_dynamic_state_2) {
static constexpr std::array extended2{
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
}
// EDS2 - LogicOp (granular)
if (key.state.extended_dynamic_state_2_logic_op) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
}
// EDS3 - Blending (composite: 3 states)
if (key.state.extended_dynamic_state_3_blend) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
}
// EDS3 - Enables (composite: per-feature)
if (key.state.extended_dynamic_state_3_enables) {
if (device.SupportsDynamicState3DepthClampEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT);
}
if (key.state.extended_dynamic_state_3_blend) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
// VK_DYNAMIC_STATE_COLOR_BLEND_ADVANCED_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
if (device.SupportsDynamicState3LogicOpEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT);
}
if (key.state.extended_dynamic_state_3_enables) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT,
VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT,
// additional state3 extensions
VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT,
VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT,
VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT,
VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT,
VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT,
VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
if (device.SupportsDynamicState3LineRasterizationMode()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT);
}
if (device.SupportsDynamicState3ConservativeRasterizationMode()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT);
}
if (device.SupportsDynamicState3LineStippleEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT);
}
if (device.SupportsDynamicState3AlphaToCoverageEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT);
}
if (device.SupportsDynamicState3AlphaToOneEnable()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT);
}
}

View File

@@ -82,6 +82,17 @@ public:
const std::array<const Shader::Info*, NUM_STAGES>& infos);
bool HasDynamicVertexInput() const noexcept { return key.state.dynamic_vertex_input; }
bool SupportsAlphaToCoverage() const noexcept {
return fragment_has_color0_output;
}
bool SupportsAlphaToOne() const noexcept {
return fragment_has_color0_output;
}
bool UsesExtendedDynamicState() const noexcept {
return key.state.extended_dynamic_state != 0;
}
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
@@ -149,6 +160,7 @@ private:
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
u32 num_textures{};
bool fragment_has_color0_output{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;

View File

@@ -36,6 +36,7 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/surface.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
@@ -105,6 +106,41 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso
return {};
}
Shader::AttributeType RenderTargetAttributeType(Tegra::RenderTargetFormat format) {
if (format == Tegra::RenderTargetFormat::NONE) {
return Shader::AttributeType::Float;
}
const auto pixel_format{
VideoCore::Surface::PixelFormatFromRenderTargetFormat(format)};
if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) {
return Shader::AttributeType::Float;
}
if (VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)) {
return Shader::AttributeType::SignedInt;
}
return Shader::AttributeType::UnsignedInt;
}
VkShaderStageFlagBits StageToVkStage(Shader::Stage stage) {
switch (stage) {
case Shader::Stage::VertexA:
case Shader::Stage::VertexB:
return VK_SHADER_STAGE_VERTEX_BIT;
case Shader::Stage::TessellationControl:
return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
case Shader::Stage::TessellationEval:
return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
case Shader::Stage::Geometry:
return VK_SHADER_STAGE_GEOMETRY_BIT;
case Shader::Stage::Fragment:
return VK_SHADER_STAGE_FRAGMENT_BIT;
case Shader::Stage::Compute:
return VK_SHADER_STAGE_COMPUTE_BIT;
default:
return VK_SHADER_STAGE_VERTEX_BIT;
}
}
Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
if (attr.enabled == 0) {
return Shader::AttributeType::Disabled;
@@ -229,6 +265,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.alpha_test_func = MaxwellToCompareFunction(
key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
info.alpha_test_reference = std::bit_cast<float>(key.state.alpha_test_ref);
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
const auto format = static_cast<Tegra::RenderTargetFormat>(key.state.color_formats[index]);
info.color_output_types[index] = RenderTargetAttributeType(format);
}
break;
default:
break;
@@ -269,8 +309,8 @@ size_t GetTotalPipelineWorkers() {
const size_t max_core_threads =
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
#ifdef ANDROID
// Leave at least a few cores free in android
constexpr size_t free_cores = 3ULL;
// Leave at least one core free on Android to reduce thermal pressure.
constexpr size_t free_cores = 1ULL;
if (max_core_threads <= free_cores) {
return 1ULL;
}
@@ -317,6 +357,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
"VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
const bool float_controls_supported{device.IsKhrShaderFloatControlsSupported()};
const VkDriverId driver_id{device.GetDriverID()};
profile = Shader::Profile{
.supported_spirv = device.SupportedSpirvVersion(),
@@ -326,20 +367,24 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.support_vertex_instance_id = false,
.support_float_controls = device.IsKhrShaderFloatControlsSupported(),
.support_separate_denorm_behavior =
.support_float_controls = float_controls_supported,
.support_separate_denorm_behavior = float_controls_supported &&
float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_separate_rounding_mode =
.support_separate_rounding_mode = float_controls_supported &&
float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
.support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
.support_fp16_signed_zero_nan_preserve =
.support_fp16_denorm_preserve = float_controls_supported &&
float_control.shaderDenormPreserveFloat16 != VK_FALSE,
.support_fp32_denorm_preserve = float_controls_supported &&
float_control.shaderDenormPreserveFloat32 != VK_FALSE,
.support_fp16_denorm_flush = float_controls_supported &&
float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
.support_fp32_denorm_flush = float_controls_supported &&
float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
.support_fp16_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
.support_fp32_signed_zero_nan_preserve =
.support_fp32_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
.support_fp64_signed_zero_nan_preserve = float_controls_supported &&
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
@@ -395,6 +440,27 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_conditional_barrier = device.SupportsConditionalBarriers(),
};
profile.warp_stage_support_mask = 0;
static constexpr std::array kAllStages{
Shader::Stage::VertexA, Shader::Stage::VertexB,
Shader::Stage::TessellationControl, Shader::Stage::TessellationEval,
Shader::Stage::Geometry, Shader::Stage::Fragment,
Shader::Stage::Compute,
};
for (const auto stage : kAllStages) {
const auto vk_stage = StageToVkStage(stage);
if (device.SupportsWarpIntrinsics(vk_stage)) {
profile.warp_stage_support_mask |= 1u << static_cast<u32>(stage);
}
}
profile.support_vote = profile.warp_stage_support_mask != 0;
if (!profile.SupportsWarpIntrinsics(Shader::Stage::Fragment)) {
LOG_WARNING(Render_Vulkan,
"Fragment shaders lack subgroup support on this driver; warp intrinsics will be "
"approximated and visual artifacts may remain");
}
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
device.GetMaxVertexInputAttributes(), Maxwell::NumVertexAttributes);
@@ -404,14 +470,40 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays);
}
dynamic_features = DynamicFeatures{
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(),
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported(),
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported(),
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported(),
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported(),
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
};
LOG_INFO(Render_Vulkan, "DynamicState setting value: {}", Settings::values.dyna_state.GetValue());
dynamic_features = {};
// User granularity enforced in vulkan_device.cpp switch statement:
// Level 0: Core Dynamic States only
// Level 1: Core + EDS1
// Level 2: Core + EDS1 + EDS2 (accumulative)
// Level 3: Core + EDS1 + EDS2 + EDS3 (accumulative)
// Here we only verify if extensions were successfully loaded by the device
dynamic_features.has_extended_dynamic_state =
device.IsExtExtendedDynamicStateSupported();
dynamic_features.has_extended_dynamic_state_2 =
device.IsExtExtendedDynamicState2Supported();
dynamic_features.has_extended_dynamic_state_2_logic_op =
device.IsExtExtendedDynamicState2ExtrasSupported();
dynamic_features.has_extended_dynamic_state_2_patch_control_points = false;
dynamic_features.has_extended_dynamic_state_3_blend =
device.IsExtExtendedDynamicState3BlendingSupported();
dynamic_features.has_dual_source_blend = device.SupportsDualSourceBlend();
if (!dynamic_features.has_dual_source_blend) {
LOG_WARNING(Render_Vulkan, "Dual-source blending unsupported, disabling dynamic blend");
dynamic_features.has_extended_dynamic_state_3_blend = false;
}
dynamic_features.has_extended_dynamic_state_3_enables =
device.IsExtExtendedDynamicState3EnablesSupported();
// VIDS: Independent toggle (not affected by dyna_state levels)
dynamic_features.has_dynamic_vertex_input =
device.IsExtVertexInputDynamicStateSupported() &&
Settings::values.vertex_input_dynamic_state.GetValue();
}
PipelineCache::~PipelineCache() {
@@ -421,6 +513,13 @@ PipelineCache::~PipelineCache() {
}
}
void PipelineCache::DrainPendingBuilds() {
if (!device.HasBrokenParallelShaderCompiling()) {
return;
}
workers.WaitForRequests();
}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
if (!RefreshStages(graphics_key.unique_hashes)) {
@@ -451,12 +550,17 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
.shared_memory_size = qmd.shared_alloc,
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
};
const auto [pair, is_new]{compute_cache.try_emplace(key)};
const auto [pair, inserted]{compute_cache.try_emplace(key)};
auto& pipeline{pair->second};
if (!is_new) {
return pipeline.get();
if (!pipeline) {
auto [slot, should_build] = AcquireComputeBuildSlot(key);
if (!should_build) {
WaitForBuildCompletion(slot);
} else {
pipeline = CreateComputePipeline(key, shader);
ReleaseComputeBuildSlot(key, slot);
}
}
pipeline = CreateComputePipeline(key, shader);
return pipeline.get();
}
@@ -516,8 +620,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
dynamic_features.has_extended_dynamic_state ||
(key.state.extended_dynamic_state_2 != 0) !=
dynamic_features.has_extended_dynamic_state_2 ||
(key.state.extended_dynamic_state_2_extra != 0) !=
dynamic_features.has_extended_dynamic_state_2_extra ||
(key.state.extended_dynamic_state_2_logic_op != 0) !=
dynamic_features.has_extended_dynamic_state_2_logic_op ||
(key.state.extended_dynamic_state_3_blend != 0) !=
dynamic_features.has_extended_dynamic_state_3_blend ||
(key.state.extended_dynamic_state_3_enables != 0) !=
@@ -572,13 +676,20 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
const auto [pair, inserted]{graphics_cache.try_emplace(graphics_key)};
auto& pipeline{pair->second};
if (is_new) {
pipeline = CreateGraphicsPipeline();
}
if (!pipeline) {
return nullptr;
const auto key = pair->first;
auto [slot, should_build] = AcquireGraphicsBuildSlot(key);
if (!should_build) {
WaitForBuildCompletion(slot);
} else {
pipeline = CreateGraphicsPipeline();
ReleaseGraphicsBuildSlot(key, slot);
}
if (!pipeline) {
return nullptr;
}
}
if (current_pipeline) {
current_pipeline->AddTransition(pipeline.get());
@@ -601,6 +712,7 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
return pipeline;
}
scheduler.KeepAliveTick();
return nullptr;
}
@@ -704,6 +816,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
}
LOG_ERROR(Render_Vulkan, "{}", exception.what());
return nullptr;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Failed to create graphics pipeline 0x{:016x}: {}", key.Hash(),
exception.what());
return nullptr;
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
@@ -767,6 +883,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const VkDriverIdKHR driver_id = device.GetDriverID();
const bool needs_shared_mem_clamp =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
if (needs_shared_mem_clamp && program.shared_memory_size > max_shared_memory) {
LOG_WARNING(Render_Vulkan,
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
key.unique_hash,
program.shared_memory_size / 1024,
max_shared_memory / 1024);
program.shared_memory_size = max_shared_memory;
}
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)};
@@ -782,6 +911,10 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
} catch (const Shader::Exception& exception) {
LOG_ERROR(Render_Vulkan, "{}", exception.what());
return nullptr;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Failed to create compute pipeline 0x{:016x}: {}", key.Hash(),
exception.what());
return nullptr;
}
void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
@@ -879,4 +1012,68 @@ vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::
}
}
auto PipelineCache::AcquireGraphicsBuildSlot(const GraphicsPipelineCacheKey& key)
-> std::pair<InFlightPipelinePtr, bool> {
std::scoped_lock lock(graphics_inflight_mutex);
auto [it, inserted] = graphics_inflight_builds.try_emplace(key);
if (inserted || !it->second) {
it->second = std::make_shared<InFlightPipelineBuild>();
return {it->second, true};
}
return {it->second, false};
}
auto PipelineCache::AcquireComputeBuildSlot(const ComputePipelineCacheKey& key)
-> std::pair<InFlightPipelinePtr, bool> {
std::scoped_lock lock(compute_inflight_mutex);
auto [it, inserted] = compute_inflight_builds.try_emplace(key);
if (inserted || !it->second) {
it->second = std::make_shared<InFlightPipelineBuild>();
return {it->second, true};
}
return {it->second, false};
}
void PipelineCache::ReleaseGraphicsBuildSlot(const GraphicsPipelineCacheKey& key,
const InFlightPipelinePtr& slot) {
if (!slot) {
return;
}
{
std::scoped_lock slot_lock(slot->mutex);
slot->building = false;
}
slot->cv.notify_all();
std::scoped_lock map_lock(graphics_inflight_mutex);
auto it = graphics_inflight_builds.find(key);
if (it != graphics_inflight_builds.end() && it->second == slot) {
graphics_inflight_builds.erase(it);
}
}
void PipelineCache::ReleaseComputeBuildSlot(const ComputePipelineCacheKey& key,
const InFlightPipelinePtr& slot) {
if (!slot) {
return;
}
{
std::scoped_lock slot_lock(slot->mutex);
slot->building = false;
}
slot->cv.notify_all();
std::scoped_lock map_lock(compute_inflight_mutex);
auto it = compute_inflight_builds.find(key);
if (it != compute_inflight_builds.end() && it->second == slot) {
compute_inflight_builds.erase(it);
}
}
void PipelineCache::WaitForBuildCompletion(const InFlightPipelinePtr& slot) const {
if (!slot) {
return;
}
std::unique_lock lock(slot->mutex);
slot->cv.wait(lock, [&] { return !slot->building; });
}
} // namespace Vulkan

View File

@@ -5,8 +5,10 @@
#include <array>
#include <cstddef>
#include <condition_variable>
#include <filesystem>
#include <memory>
#include <mutex>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -113,7 +115,17 @@ public:
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
void DrainPendingBuilds();
private:
struct InFlightPipelineBuild {
std::mutex mutex;
std::condition_variable cv;
bool building{true};
};
using InFlightPipelinePtr = std::shared_ptr<InFlightPipelineBuild>;
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
@@ -140,6 +152,14 @@ private:
vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename,
u32 expected_cache_version);
std::pair<InFlightPipelinePtr, bool> AcquireGraphicsBuildSlot(
const GraphicsPipelineCacheKey& key);
std::pair<InFlightPipelinePtr, bool> AcquireComputeBuildSlot(
const ComputePipelineCacheKey& key);
void ReleaseGraphicsBuildSlot(const GraphicsPipelineCacheKey& key, const InFlightPipelinePtr& slot);
void ReleaseComputeBuildSlot(const ComputePipelineCacheKey& key, const InFlightPipelinePtr& slot);
void WaitForBuildCompletion(const InFlightPipelinePtr& slot) const;
const Device& device;
Scheduler& scheduler;
DescriptorPool& descriptor_pool;
@@ -158,6 +178,11 @@ private:
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
std::mutex graphics_inflight_mutex;
std::unordered_map<GraphicsPipelineCacheKey, InFlightPipelinePtr> graphics_inflight_builds;
std::mutex compute_inflight_mutex;
std::unordered_map<ComputePipelineCacheKey, InFlightPipelinePtr> compute_inflight_builds;
ShaderPools main_pools;
Shader::Profile profile;

View File

@@ -42,7 +42,8 @@ public:
static constexpr size_t BANK_SIZE = 256;
static constexpr size_t QUERY_SIZE = 8;
explicit SamplesQueryBank(const Device& device_, size_t index_)
: BankBase(BANK_SIZE), device{device_}, index{index_} {
: BankBase(BANK_SIZE), device{device_}, index{index_},
supports_host_query_reset{device_.SupportsHostQueryReset()} {
const auto& dev = device.GetLogical();
query_pool = dev.CreateQueryPool({
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
@@ -60,8 +61,10 @@ public:
void Reset() override {
ASSERT(references == 0);
VideoCommon::BankBase::Reset();
const auto& dev = device.GetLogical();
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
if (supports_host_query_reset) {
const auto& dev = device.GetLogical();
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
}
host_results.fill(0ULL);
next_bank = 0;
}
@@ -99,6 +102,7 @@ public:
private:
const Device& device;
const size_t index;
const bool supports_host_query_reset;
vk::QueryPool query_pool;
std::array<u64, BANK_SIZE> host_results;
};
@@ -1200,21 +1204,24 @@ struct QueryCacheRuntimeImpl {
hcr_setup.pNext = nullptr;
hcr_setup.flags = 0;
conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
if (device.IsExtConditionalRendering()) {
conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = sizeof(u32),
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
hcr_resolve_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = sizeof(u32),
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
hcr_resolve_buffer =
memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
}
}
VideoCore::RasterizerInterface* rasterizer;

View File

@@ -197,6 +197,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
// Log multi-draw support
if (device.IsExtMultiDrawSupported()) {
LOG_INFO(Render_Vulkan, "VK_EXT_multi_draw is enabled for optimized draw calls");
}
}
RasterizerVulkan::~RasterizerVulkan() = default;
@@ -210,6 +215,10 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork();
gpu_memory->FlushCaching();
if (device.HasBrokenParallelShaderCompiling()) {
pipeline_cache.DrainPendingBuilds();
}
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
@@ -234,16 +243,44 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const u32 num_instances{instance_count};
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
// Use VK_EXT_multi_draw if available (single draw becomes multi-draw with count=1)
if (device.IsExtMultiDrawSupported()) {
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
// Use multi-draw indexed with single draw
const VkMultiDrawIndexedInfoEXT multi_draw_info{
.firstIndex = draw_params.first_index,
.indexCount = draw_params.num_vertices,
};
const int32_t vertex_offset = static_cast<int32_t>(draw_params.base_vertex);
cmdbuf.DrawMultiIndexedEXT(1, &multi_draw_info, draw_params.num_instances,
draw_params.base_instance,
sizeof(VkMultiDrawIndexedInfoEXT), &vertex_offset);
} else {
// Use multi-draw with single draw
const VkMultiDrawInfoEXT multi_draw_info{
.firstVertex = draw_params.base_vertex,
.vertexCount = draw_params.num_vertices,
};
cmdbuf.DrawMultiEXT(1, &multi_draw_info, draw_params.num_instances,
draw_params.base_instance,
sizeof(VkMultiDrawInfoEXT));
}
});
} else {
// Fallback to standard draw calls
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
}
});
}
@@ -386,13 +423,48 @@ void RasterizerVulkan::Clear(u32 layer_count) {
.baseArrayLayer = regs.clear_surface.layer,
.layerCount = layer_count,
};
if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
const auto clamp_rect_to_render_area = [render_area](VkRect2D& rect) -> bool {
const auto clamp_axis = [](s32& offset, u32& extent, u32 limit) {
auto clamp_offset = [&offset, limit]() {
if (limit == 0) {
offset = 0;
return;
}
offset = std::clamp(offset, 0, static_cast<s32>(limit));
};
if (extent == 0) {
clamp_offset();
return;
}
if (offset < 0) {
const u32 shrink = (std::min)(extent, static_cast<u32>(-offset));
extent -= shrink;
offset = 0;
}
if (limit == 0) {
extent = 0;
offset = 0;
return;
}
if (offset >= static_cast<s32>(limit)) {
offset = static_cast<s32>(limit);
extent = 0;
return;
}
const u64 end_coord = static_cast<u64>(offset) + extent;
if (end_coord > limit) {
extent = limit - static_cast<u32>(offset);
}
};
clamp_axis(rect.offset.x, rect.extent.width, render_area.width);
clamp_axis(rect.offset.y, rect.extent.height, render_area.height);
return rect.extent.width != 0 && rect.extent.height != 0;
};
if (!clamp_rect_to_render_area(clear_rect.rect)) {
return;
}
clear_rect.rect.extent = VkExtent2D{
.width = (std::min)(clear_rect.rect.extent.width, render_area.width),
.height = (std::min)(clear_rect.rect.extent.height, render_area.height),
};
const u32 color_attachment = regs.clear_surface.RT;
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
@@ -839,23 +911,21 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load
void RasterizerVulkan::FlushWork() {
#ifdef ANDROID
static constexpr u32 DRAWS_TO_DISPATCH = 1024;
static constexpr u32 DRAWS_TO_DISPATCH = 512;
static constexpr u32 CHECK_MASK = 3;
#else
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
static constexpr u32 CHECK_MASK = 7;
#endif // ANDROID
// Only check multiples of 8 draws
static_assert(DRAWS_TO_DISPATCH % 8 == 0);
if ((++draw_counter & 7) != 7) {
static_assert(DRAWS_TO_DISPATCH % (CHECK_MASK + 1) == 0);
if ((++draw_counter & CHECK_MASK) != CHECK_MASK) {
return;
}
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
return;
}
// Otherwise (every certain number of draws) flush execution.
// This submits commands to the Vulkan driver.
scheduler.Flush();
draw_counter = 0;
}
@@ -921,6 +991,8 @@ bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info,
void RasterizerVulkan::UpdateDynamicStates() {
auto& regs = maxwell3d->regs;
// Core Dynamic States (Vulkan 1.0) - Always active regardless of dyna_state setting
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@@ -928,6 +1000,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
UpdateLineWidth(regs);
// EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest
if (device.IsExtExtendedDynamicStateSupported()) {
UpdateCullMode(regs);
UpdateDepthCompareOp(regs);
@@ -938,40 +1011,52 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthTestEnable(regs);
UpdateDepthWriteEnable(regs);
UpdateStencilTestEnable(regs);
if (device.IsExtExtendedDynamicState2Supported()) {
UpdatePrimitiveRestartEnable(regs);
UpdateRasterizerDiscardEnable(regs);
UpdateDepthBiasEnable(regs);
}
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
using namespace Tegra::Engines;
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE || device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
const auto has_float = std::any_of(
regs.vertex_attrib_format.begin(),
regs.vertex_attrib_format.end(),
[](const auto& attrib) {
return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float;
}
);
if (regs.logic_op.enable) {
regs.logic_op.enable = static_cast<u32>(!has_float);
}
}
UpdateLogicOpEnable(regs);
UpdateDepthClampEnable(regs);
}
}
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
UpdateLogicOp(regs);
}
if (device.IsExtExtendedDynamicState3BlendingSupported()) {
UpdateBlending(regs);
}
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
UpdateLineStippleEnable(regs);
UpdateConservativeRasterizationMode(regs);
}
}
// EDS2: PrimitiveRestart, RasterizerDiscard, DepthBias enable/disable
if (device.IsExtExtendedDynamicState2Supported()) {
UpdatePrimitiveRestartEnable(regs);
UpdateRasterizerDiscardEnable(regs);
UpdateDepthBiasEnable(regs);
}
// EDS2 Extras: LogicOp operation selection
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
UpdateLogicOp(regs);
}
// EDS3 Enables: LogicOpEnable, DepthClamp, LineStipple, ConservativeRaster
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
using namespace Tegra::Engines;
// AMD Workaround: LogicOp incompatible with float render targets
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
const auto has_float = std::any_of(
regs.vertex_attrib_format.begin(), regs.vertex_attrib_format.end(),
[](const auto& attrib) {
return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float;
}
);
if (regs.logic_op.enable) {
regs.logic_op.enable = static_cast<u32>(!has_float);
}
}
UpdateLogicOpEnable(regs);
UpdateDepthClampEnable(regs);
UpdateLineRasterizationMode(regs);
UpdateLineStippleEnable(regs);
UpdateConservativeRasterizationMode(regs);
UpdateAlphaToCoverageEnable(regs);
UpdateAlphaToOneEnable(regs);
}
// EDS3 Blending: ColorBlendEnable, ColorBlendEquation, ColorWriteMask
if (device.IsExtExtendedDynamicState3BlendingSupported()) {
UpdateBlending(regs);
}
// Vertex Input Dynamic State: Independent from EDS levels
if (device.IsExtVertexInputDynamicStateSupported()) {
if (auto* gp = pipeline_cache.CurrentGraphicsPipeline(); gp && gp->HasDynamicVertexInput()) {
UpdateVertexInput(regs);
@@ -1144,109 +1229,141 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg
if (!state_tracker.TouchBlendConstants()) {
return;
}
const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
regs.blend_color.a};
scheduler.Record(
[blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); });
if (!device.UsesAdvancedCoreDynamicState()) {
return;
}
const std::array<float, 4> blend_constants{
regs.blend_color.r,
regs.blend_color.g,
regs.blend_color.b,
regs.blend_color.a,
};
scheduler.Record([blend_constants](vk::CommandBuffer cmdbuf) {
cmdbuf.SetBlendConstants(blend_constants.data());
});
}
void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBounds()) {
return;
}
scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); });
if (!device.IsDepthBoundsSupported()) {
return;
}
if (!device.UsesAdvancedCoreDynamicState()) {
return;
}
const bool unrestricted = device.IsExtDepthRangeUnrestrictedSupported();
const float min_depth = unrestricted ? regs.depth_bounds[0]
: std::clamp(regs.depth_bounds[0], 0.0f, 1.0f);
const float max_depth = unrestricted ? regs.depth_bounds[1]
: std::clamp(regs.depth_bounds[1], 0.0f, 1.0f);
scheduler.Record([min_depth, max_depth](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBounds(min_depth, max_depth);
});
}
void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilProperties()) {
const bool two_sided = regs.stencil_two_side_enable != 0;
const bool update_properties = state_tracker.TouchStencilProperties();
const bool update_side = state_tracker.TouchStencilSide(two_sided);
const bool refs_dirty = state_tracker.TouchStencilReference();
const bool write_dirty = state_tracker.TouchStencilWriteMask();
const bool compare_dirty = state_tracker.TouchStencilCompare();
const bool update_references = update_properties || update_side || refs_dirty;
const bool update_write_masks = update_properties || update_side || write_dirty;
const bool update_compare_masks = update_properties || update_side || compare_dirty;
if (!update_references && !update_write_masks && !update_compare_masks) {
state_tracker.ClearStencilReset();
return;
}
bool update_references = state_tracker.TouchStencilReference();
bool update_write_mask = state_tracker.TouchStencilWriteMask();
bool update_compare_masks = state_tracker.TouchStencilCompare();
if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
update_references = true;
update_write_mask = true;
update_compare_masks = true;
if (!device.UsesAdvancedCoreDynamicState()) {
state_tracker.ClearStencilReset();
return;
}
if (update_references) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) &&
!state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) {
return;
}
} else {
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
return;
}
if (two_sided) {
const bool front_dirty =
state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref);
const bool back_dirty =
state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref);
if (front_dirty || back_dirty) {
scheduler.Record([front_ref = regs.stencil_front_ref,
back_ref = regs.stencil_back_ref,
is_two_sided = two_sided](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_ref != back_ref;
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
});
}
scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_ref != back_ref;
// Front face
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
} else if (state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
const u32 reference = regs.stencil_front_ref;
scheduler.Record([reference](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, reference);
});
}();
}
}
if (update_write_mask) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) &&
!state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
return;
}
} else {
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
return;
}
if (update_write_masks) {
if (two_sided) {
if (state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) ||
state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
scheduler.Record([
front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
is_two_sided = regs.stencil_two_side_enable
](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_write_mask != back_write_mask;
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
});
}
scheduler.Record([front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_write_mask != back_write_mask;
// Front face
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
} else if (state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
const u32 front_write_mask = regs.stencil_front_mask;
scheduler.Record([front_write_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, front_write_mask);
});
}();
}
}
if (update_compare_masks) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) &&
!state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
return;
}
} else {
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
return;
}
if (two_sided) {
if (state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) ||
state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
scheduler.Record([
front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
is_two_sided = regs.stencil_two_side_enable
](vk::CommandBuffer cmdbuf) {
const bool set_back = is_two_sided && front_test_mask != back_test_mask;
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
}
});
}
scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_test_mask != back_test_mask;
// Front face
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
}
} else if (state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
const u32 front_test_mask = regs.stencil_front_func_mask;
scheduler.Record([front_test_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, front_test_mask);
});
}();
}
}
state_tracker.ClearStencilReset();
}
@@ -1269,61 +1386,15 @@ void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
});
}
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
bool enabled = regs.depth_bounds_enable;
if (enabled && !device.IsDepthBoundsSupported()) {
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
enabled = false;
}
scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthTestEnable()) {
return;
}
scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthWriteEnable()) {
return;
}
scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthWriteEnableEXT(enable);
});
}
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchPrimitiveRestartEnable()) {
return;
}
scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchRasterizerDiscardEnable()) {
return;
}
scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0);
});
}
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
void RasterizerVulkan::UpdateConservativeRasterizationMode(
Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchConservativeRasterizationMode()) {
return;
}
if (!device.SupportsDynamicState3ConservativeRasterizationMode() ||
!device.IsExtConservativeRasterizationSupported()) {
return;
}
scheduler.Record([enable = regs.conservative_raster_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetConservativeRasterizationModeEXT(
enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
@@ -1336,23 +1407,69 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs&
return;
}
if (!device.SupportsDynamicState3LineStippleEnable()) {
return;
}
scheduler.Record([enable = regs.line_stipple_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineStippleEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
// if (!state_tracker.TouchLi()) {
// return;
// }
if (!device.IsExtLineRasterizationSupported()) {
return;
}
if (!state_tracker.TouchLineRasterizationMode()) {
return;
}
// TODO: The maxwell emulator does not capture line rasters
if (!device.SupportsDynamicState3LineRasterizationMode()) {
static std::once_flag warn_missing_rect;
std::call_once(warn_missing_rect, [] {
LOG_WARNING(Render_Vulkan,
"Driver lacks rectangular line rasterization support; skipping dynamic "
"line state updates");
});
return;
}
// scheduler.Record([enable = regs.line](vk::CommandBuffer cmdbuf) {
// cmdbuf.SetConservativeRasterizationModeEXT(
// enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT
// : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
// });
const bool wants_smooth = regs.line_anti_alias_enable != 0;
VkLineRasterizationModeEXT mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
if (wants_smooth) {
if (device.SupportsSmoothLines()) {
mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
} else {
static std::once_flag warn_missing_smooth;
std::call_once(warn_missing_smooth, [] {
LOG_WARNING(Render_Vulkan,
"Line anti-aliasing requested but smoothLines feature unavailable; "
"using rectangular rasterization");
});
}
}
scheduler.Record([mode](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineRasterizationModeEXT(mode);
});
}
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchPrimitiveRestartEnable()) {
return;
}
scheduler.Record([enable = regs.primitive_restart.enabled != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchRasterizerDiscardEnable()) {
return;
}
const bool discard = regs.rasterize_enable == 0;
scheduler.Record([discard](vk::CommandBuffer cmdbuf) {
cmdbuf.SetRasterizerDiscardEnableEXT(discard);
});
}
void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -1394,6 +1511,9 @@ void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchLogicOpEnable()) {
return;
}
if (!device.SupportsDynamicState3LogicOpEnable()) {
return;
}
scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLogicOpEnableEXT(enable != 0);
});
@@ -1403,6 +1523,9 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r
if (!state_tracker.TouchDepthClampEnable()) {
return;
}
if (!device.SupportsDynamicState3DepthClampEnable()) {
return;
}
bool is_enabled = !(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
@@ -1413,6 +1536,41 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r
[is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); });
}
void RasterizerVulkan::UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchAlphaToCoverageEnable()) {
return;
}
if (!device.SupportsDynamicState3AlphaToCoverageEnable()) {
return;
}
GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToCoverage() &&
regs.anti_alias_alpha_control.alpha_to_coverage != 0;
scheduler.Record([enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetAlphaToCoverageEnableEXT(enable ? VK_TRUE : VK_FALSE);
});
}
void RasterizerVulkan::UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchAlphaToOneEnable()) {
return;
}
if (!device.SupportsDynamicState3AlphaToOneEnable()) {
static std::once_flag warn_alpha_to_one;
std::call_once(warn_alpha_to_one, [] {
LOG_WARNING(Render_Vulkan,
"Alpha-to-one is not supported on this device; forcing it disabled");
});
return;
}
GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToOne() &&
regs.anti_alias_alpha_control.alpha_to_one != 0;
scheduler.Record([enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetAlphaToOneEnableEXT(enable ? VK_TRUE : VK_FALSE);
});
}
void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthCompareOp()) {
return;
@@ -1422,6 +1580,36 @@ void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& reg
});
}
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
if (!device.IsDepthBoundsSupported()) {
return;
}
scheduler.Record([enable = regs.depth_bounds_enable != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthTestEnable()) {
return;
}
scheduler.Record([enable = regs.depth_test_enable != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthWriteEnable()) {
return;
}
scheduler.Record([enable = regs.depth_write_enabled != 0](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthWriteEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchFrontFace()) {
return;

View File

@@ -25,6 +25,7 @@
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -168,7 +169,6 @@ private:
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -183,6 +183,8 @@ private:
void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);

View File

@@ -4,6 +4,7 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <chrono>
#include <memory>
#include <mutex>
#include <thread>
@@ -13,6 +14,7 @@
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -77,6 +79,14 @@ void Scheduler::WaitWorker() {
std::scoped_lock el{execution_mutex};
}
void Scheduler::KeepAliveTick() {
const auto now = Clock::now();
if (now - last_submission_time < KEEPALIVE_INTERVAL) {
return;
}
Flush();
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
@@ -130,9 +140,27 @@ void Scheduler::RequestOutsideRenderPassOperationContext() {
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) {
if (pipeline && pipeline->UsesExtendedDynamicState() &&
state.needs_state_enable_refresh) {
state_tracker.InvalidateStateEnableFlag();
state.needs_state_enable_refresh = false;
}
return false;
}
state.graphics_pipeline = pipeline;
if (!pipeline) {
return true;
}
if (!pipeline->UsesExtendedDynamicState()) {
state.needs_state_enable_refresh = true;
} else if (state.needs_state_enable_refresh) {
state_tracker.InvalidateStateEnableFlag();
state.needs_state_enable_refresh = false;
}
return true;
}
@@ -252,6 +280,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
});
chunk->MarkSubmit();
DispatchWork();
last_submission_time = Clock::now();
return signal_value;
}
@@ -276,8 +305,13 @@ void Scheduler::EndRenderPass()
return;
}
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
query_cache->NotifySegment(false);
if (query_cache) {
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
if (query_cache->HasStreamer(VideoCommon::QueryType::StreamingByteCount)) {
query_cache->CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
query_cache->NotifySegment(false);
}
Record([num_images = num_renderpass_images,
images = renderpass_images,

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
#include <condition_variable>
#include <cstddef>
#include <functional>
@@ -49,6 +53,9 @@ public:
/// safe to touch worker resources.
void WaitWorker();
/// Submits a tiny chunk of work if recent GPU submissions are stale.
void KeepAliveTick();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
@@ -125,6 +132,8 @@ public:
std::mutex submit_mutex;
private:
using Clock = std::chrono::steady_clock;
class Command {
public:
virtual ~Command() = default;
@@ -214,6 +223,7 @@ private:
GraphicsPipeline* graphics_pipeline = nullptr;
bool is_rescaling = false;
bool rescaling_defined = false;
bool needs_state_enable_refresh = false;
};
void WorkerThread(std::stop_token stop_token);
@@ -246,6 +256,9 @@ private:
State state;
Clock::time_point last_submission_time{Clock::time_point::min()};
static constexpr std::chrono::milliseconds KEEPALIVE_INTERVAL{4};
u32 num_renderpass_images = 0;
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};

View File

@@ -5,6 +5,7 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <optional>
#include <utility>
#include <vector>
@@ -49,6 +50,7 @@ size_t GetStreamBufferSize(const Device& device) {
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
@@ -74,13 +76,16 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
}
stream_pointer = stream_buffer.Mapped();
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
return GetStreamBuffer(size);
if (!deferred && usage == MemoryUsage::Upload) {
if (size <= region_size) {
return GetStreamBuffer(size);
}
}
return GetStagingBuffer(size, usage, deferred);
}
@@ -142,6 +147,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
}
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
scheduler.GetMasterSemaphore().Refresh();
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });

View File

@@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <climits>
#include <optional>
#include <vector>
#include "common/common_types.h"

View File

@@ -48,6 +48,7 @@ Flags MakeInvalidationFlags() {
FrontFace,
StencilOp,
StencilTestEnable,
RasterizerDiscardEnable,
VertexBuffers,
VertexInput,
StateEnable,
@@ -55,6 +56,9 @@ Flags MakeInvalidationFlags() {
DepthBiasEnable,
LogicOpEnable,
DepthClampEnable,
AlphaToCoverageEnable,
AlphaToOneEnable,
LineRasterizationMode,
LogicOp,
Blending,
ColorMask,
@@ -148,6 +152,8 @@ void SetupDirtyStateEnable(Tables& tables) {
setup(OFF(logic_op.enable), LogicOpEnable);
setup(OFF(viewport_clip_control.geometry_clip), DepthClampEnable);
setup(OFF(line_stipple_enable), LineStippleEnable);
setup(OFF(anti_alias_alpha_control.alpha_to_coverage), AlphaToCoverageEnable);
setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable);
}
void SetupDirtyDepthCompareOp(Tables& tables) {
@@ -226,6 +232,7 @@ void SetupRasterModes(Tables &tables) {
table[OFF(line_stipple_params)] = LineStippleParams;
table[OFF(conservative_raster_enable)] = ConservativeRasterizationMode;
table[OFF(line_anti_alias_enable)] = LineRasterizationMode;
}
} // Anonymous namespace

View File

@@ -54,6 +54,7 @@ enum : u8 {
PrimitiveRestartEnable,
RasterizerDiscardEnable,
ConservativeRasterizationMode,
LineRasterizationMode,
LineStippleEnable,
LineStippleParams,
DepthBiasEnable,
@@ -61,6 +62,8 @@ enum : u8 {
LogicOp,
LogicOpEnable,
DepthClampEnable,
AlphaToCoverageEnable,
AlphaToOneEnable,
Blending,
BlendEnable,
@@ -94,6 +97,10 @@ public:
(*flags)[Dirty::Scissors] = true;
}
void InvalidateStateEnableFlag() {
(*flags)[Dirty::StateEnable] = true;
}
bool TouchViewports() {
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
@@ -225,6 +232,14 @@ public:
return Exchange(Dirty::DepthClampEnable, false);
}
bool TouchAlphaToCoverageEnable() {
return Exchange(Dirty::AlphaToCoverageEnable, false);
}
bool TouchAlphaToOneEnable() {
return Exchange(Dirty::AlphaToOneEnable, false);
}
bool TouchDepthCompareOp() {
return Exchange(Dirty::DepthCompareOp, false);
}
@@ -261,6 +276,10 @@ public:
return Exchange(Dirty::LogicOp, false);
}
bool TouchLineRasterizationMode() {
return Exchange(Dirty::LineRasterizationMode, false);
}
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
const bool has_changed = current_topology != new_topology;
current_topology = new_topology;

View File

@@ -306,7 +306,17 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) {
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
// According to Vulkan spec, when using VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR,
// the base format (imageFormat) MUST be included in pViewFormats
const std::array view_formats{
swapchain_ci.imageFormat, // Base format MUST be first
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SRGB,
#ifdef ANDROID
VK_FORMAT_R8G8B8A8_UNORM, // Android may use RGBA
VK_FORMAT_R8G8B8A8_SRGB,
#endif
};
VkImageFormatListCreateInfo format_list{
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
.pNext = nullptr,

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -101,7 +104,7 @@ public:
}
VkSemaphore CurrentRenderSemaphore() const {
return *render_semaphores[frame_index];
return *render_semaphores[image_index];
}
u32 GetWidth() const {

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <optional>
#include <span>
#include "video_core/texture_cache/texture_cache_base.h"
@@ -56,12 +60,16 @@ public:
void TickFrame();
void WaitForGpuTick(u64 tick);
u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const;
std::optional<size_t> GetSamplerHeapBudget() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
@@ -108,10 +116,15 @@ public:
return view_formats[static_cast<std::size_t>(format)];
}
bool RequiresBlockCompatibleViewFormats(PixelFormat format) const noexcept {
return requires_block_view_formats[static_cast<std::size_t>(format)];
}
void BarrierFeedbackLoop();
bool IsFormatDitherable(VideoCore::Surface::PixelFormat format);
bool IsFormatScalable(VideoCore::Surface::PixelFormat format);
bool SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const;
VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const;
@@ -125,6 +138,7 @@ public:
std::unique_ptr<MSAACopyPass> msaa_copy_pass;
const Settings::ResolutionScalingInfo& resolution;
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
std::array<bool, VideoCore::Surface::MaxPixelFormat> requires_block_view_formats{};
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
@@ -171,6 +185,30 @@ public:
return (this->*current_image).UsageFlags();
}
void TrackGpuReadTick(u64 tick) noexcept {
TrackPendingReadTick(tick);
}
void TrackGpuWriteTick(u64 tick) noexcept {
TrackPendingWriteTick(tick);
}
void CompleteGpuReadTick(u64 completed_tick) noexcept {
ClearPendingReadTick(completed_tick);
}
void CompleteGpuWriteTick(u64 completed_tick) noexcept {
ClearPendingWriteTick(completed_tick);
}
[[nodiscard]] std::optional<u64> PendingGpuReadTick() const noexcept {
return PendingReadTick();
}
[[nodiscard]] std::optional<u64> PendingGpuWriteTick() const noexcept {
return PendingWriteTick();
}
/// Returns true when the image is already initialized and mark it as initialized
[[nodiscard]] bool ExchangeInitialization() noexcept {
return std::exchange(initialized, true);
@@ -233,15 +271,23 @@ public:
[[nodiscard]] VkImageView ColorView();
[[nodiscard]] VkImageView SampledView(Shader::TextureType texture_type,
Shader::SamplerComponentType component_type);
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] bool IsRescaled() const noexcept;
[[nodiscard]] bool SupportsDepthCompareSampling() const noexcept;
[[nodiscard]] bool Is3DImage() const noexcept {
return is_3d_image;
}
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
return *image_views[static_cast<size_t>(texture_type)];
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
return image_handle;
}
@@ -266,23 +312,36 @@ private:
struct StorageViews {
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> typeless;
};
static constexpr size_t NUMERIC_VIEW_TYPES = 3;
[[nodiscard]] Shader::TextureType BaseTextureType() const noexcept;
[[nodiscard]] std::optional<u32> LayerCountOverride(Shader::TextureType texture_type) const noexcept;
[[nodiscard]] VkImageView DepthView(Shader::TextureType texture_type);
[[nodiscard]] VkImageView StencilView(Shader::TextureType texture_type);
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask,
Shader::TextureType texture_type);
const Device* device = nullptr;
const SlotVector<Image>* slot_images = nullptr;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
std::unique_ptr<StorageViews> storage_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> depth_views;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> stencil_views;
std::array<std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES>, NUMERIC_VIEW_TYPES>
sampled_component_views;
vk::ImageView color_view;
vk::Image null_image;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 buffer_size = 0;
bool is_3d_image = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
@@ -303,9 +362,19 @@ public:
return static_cast<bool>(sampler_default_anisotropy);
}
[[nodiscard]] VkSampler SelectHandle(bool supports_linear_filter,
bool supports_anisotropy,
bool allow_depth_compare) const noexcept;
private:
vk::Sampler sampler;
vk::Sampler sampler_default_anisotropy;
vk::Sampler sampler_force_point;
vk::Sampler sampler_compare_disabled;
vk::Sampler sampler_default_anisotropy_compare_disabled;
vk::Sampler sampler_force_point_compare_disabled;
bool uses_linear_filter = false;
bool depth_compare_enabled = false;
};
class Framebuffer {

View File

@@ -70,6 +70,102 @@ static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture
entry.a_type, entry.srgb_conversion));
}
namespace {
[[nodiscard]] bool UsesSwizzleSource(const Tegra::Texture::TICEntry& entry,
Tegra::Texture::SwizzleSource source) {
const std::array swizzles{entry.x_source.Value(), entry.y_source.Value(),
entry.z_source.Value(), entry.w_source.Value()};
return std::ranges::any_of(swizzles, [source](Tegra::Texture::SwizzleSource current) {
return current == source;
});
}
[[nodiscard]] std::optional<Shader::SamplerComponentType> DepthStencilComponentFromSwizzle(
const Tegra::Texture::TICEntry& entry, VideoCore::Surface::PixelFormat pixel_format) {
using Tegra::Texture::SwizzleSource;
const bool uses_r = UsesSwizzleSource(entry, SwizzleSource::R);
const bool uses_g = UsesSwizzleSource(entry, SwizzleSource::G);
switch (pixel_format) {
case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
if (uses_r != uses_g) {
return uses_r ? Shader::SamplerComponentType::Depth
: Shader::SamplerComponentType::Stencil;
}
break;
case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
if (uses_r != uses_g) {
return uses_r ? Shader::SamplerComponentType::Stencil
: Shader::SamplerComponentType::Depth;
}
break;
default:
break;
}
return std::nullopt;
}
} // Anonymous namespace
static Shader::SamplerComponentType ConvertSamplerComponentType(
const Tegra::Texture::TICEntry& entry) {
const auto pixel_format = PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type,
entry.b_type, entry.a_type,
entry.srgb_conversion);
const auto surface_type = VideoCore::Surface::GetFormatType(pixel_format);
if (entry.depth_texture != 0 || surface_type == VideoCore::Surface::SurfaceType::Depth) {
return Shader::SamplerComponentType::Depth;
}
if (surface_type == VideoCore::Surface::SurfaceType::Stencil) {
return Shader::SamplerComponentType::Stencil;
}
if (surface_type == VideoCore::Surface::SurfaceType::DepthStencil) {
if (const auto inferred = DepthStencilComponentFromSwizzle(entry, pixel_format)) {
return *inferred;
}
return entry.depth_texture != 0 ? Shader::SamplerComponentType::Depth
: Shader::SamplerComponentType::Stencil;
}
const auto accumulate = [](const Tegra::Texture::ComponentType component,
bool& has_signed, bool& has_unsigned) {
switch (component) {
case Tegra::Texture::ComponentType::SINT:
has_signed = true;
break;
case Tegra::Texture::ComponentType::UINT:
has_unsigned = true;
break;
default:
break;
}
};
bool has_signed{};
bool has_unsigned{};
accumulate(entry.r_type, has_signed, has_unsigned);
accumulate(entry.g_type, has_signed, has_unsigned);
accumulate(entry.b_type, has_signed, has_unsigned);
accumulate(entry.a_type, has_signed, has_unsigned);
if (has_signed && !has_unsigned) {
return Shader::SamplerComponentType::Sint;
}
if (has_unsigned && !has_signed) {
return Shader::SamplerComponentType::Uint;
}
if (has_signed) {
return Shader::SamplerComponentType::Sint;
}
if (has_unsigned) {
return Shader::SamplerComponentType::Uint;
}
return Shader::SamplerComponentType::Float;
}
static std::string_view StageToPrefix(Shader::Stage stage) {
switch (stage) {
case Shader::Stage::VertexB:
@@ -200,6 +296,7 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 code_size{static_cast<u64>(CachedSizeBytes())};
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
const u64 num_texture_component_types{static_cast<u64>(texture_component_types.size())};
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};
@@ -207,6 +304,8 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
.write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
sizeof(num_texture_pixel_formats))
.write(reinterpret_cast<const char*>(&num_texture_component_types),
sizeof(num_texture_component_types))
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
.write(reinterpret_cast<const char*>(&num_cbuf_replacement_values),
sizeof(num_cbuf_replacement_values))
@@ -223,6 +322,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&type), sizeof(type));
}
for (const auto& [key, component] : texture_component_types) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&component), sizeof(component));
}
for (const auto& [key, format] : texture_pixel_formats) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&format), sizeof(format));
@@ -277,7 +380,17 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
bool via_header_index, u32 raw) {
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
ASSERT(handle.first <= tic_limit);
if (handle.first > tic_limit) {
static std::atomic<size_t> oob_count{0};
const size_t n = ++oob_count;
if (n <= 4 || (n & 63) == 0) {
LOG_WARNING(Shader,
"TIC handle {} exceeds limit {} (via_header_index={}) — returning empty",
handle.first, tic_limit, via_header_index);
}
return {};
}
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
@@ -374,6 +487,21 @@ Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
const Shader::TextureType result{ConvertTextureType(entry)};
texture_types.emplace(handle, result);
texture_component_types.emplace(handle, ConvertSamplerComponentType(entry));
return result;
}
Shader::SamplerComponentType GraphicsEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it != texture_component_types.end()) {
return it->second;
}
const auto& regs{maxwell3d->regs};
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
auto entry =
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
const Shader::SamplerComponentType result{ConvertSamplerComponentType(entry)};
texture_component_types.emplace(handle, result);
return result;
}
@@ -430,6 +558,20 @@ Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
const Shader::TextureType result{ConvertTextureType(entry)};
texture_types.emplace(handle, result);
texture_component_types.emplace(handle, ConvertSamplerComponentType(entry));
return result;
}
Shader::SamplerComponentType ComputeEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it != texture_component_types.end()) {
return it->second;
}
const auto& regs{kepler_compute->regs};
const auto& qmd{kepler_compute->launch_description};
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
const Shader::SamplerComponentType result{ConvertSamplerComponentType(entry)};
texture_component_types.emplace(handle, result);
return result;
}
@@ -455,12 +597,15 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
u64 code_size{};
u64 num_texture_types{};
u64 num_texture_pixel_formats{};
u64 num_texture_component_types{};
u64 num_cbuf_values{};
u64 num_cbuf_replacement_values{};
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
sizeof(num_texture_pixel_formats))
.read(reinterpret_cast<char*>(&num_texture_component_types),
sizeof(num_texture_component_types))
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
.read(reinterpret_cast<char*>(&num_cbuf_replacement_values),
sizeof(num_cbuf_replacement_values))
@@ -480,6 +625,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
.read(reinterpret_cast<char*>(&type), sizeof(type));
texture_types.emplace(key, type);
}
for (size_t i = 0; i < num_texture_component_types; ++i) {
u32 key;
Shader::SamplerComponentType component;
file.read(reinterpret_cast<char*>(&key), sizeof(key))
.read(reinterpret_cast<char*>(&component), sizeof(component));
texture_component_types.emplace(key, component);
}
for (size_t i = 0; i < num_texture_pixel_formats; ++i) {
u32 key;
Shader::TexturePixelFormat format;
@@ -534,6 +686,15 @@ u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
return it->second;
}
Shader::SamplerComponentType FileEnvironment::ReadTextureComponentType(u32 handle) {
const auto it{texture_component_types.find(handle)};
if (it == texture_component_types.end()) {
LOG_WARNING(Render_Vulkan, "Texture component descriptor {:08x} not found", handle);
return Shader::SamplerComponentType::Float;
}
return it->second;
}
Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
const auto it{texture_types.find(handle)};
if (it == texture_types.end()) {

View File

@@ -80,6 +80,7 @@ protected:
std::vector<u64> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
std::unordered_map<u32, Shader::SamplerComponentType> texture_component_types;
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
@@ -116,6 +117,8 @@ public:
Shader::TextureType ReadTextureType(u32 handle) override;
Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -142,6 +145,8 @@ public:
Shader::TextureType ReadTextureType(u32 handle) override;
Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -176,6 +181,8 @@ public:
[[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
[[nodiscard]] Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
[[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
[[nodiscard]] bool IsTexturePixelFormatInteger(u32 handle) override;
@@ -202,6 +209,7 @@ public:
private:
std::vector<u64> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
std::unordered_map<u32, Shader::SamplerComponentType> texture_component_types;
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;

View File

@@ -4,6 +4,8 @@
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/settings.h"
@@ -408,6 +410,126 @@ bool IsPixelFormatSignedInteger(PixelFormat format) {
}
}
namespace {
struct NumericVariantSet {
PixelFormat float_format = PixelFormat::Invalid;
PixelFormat uint_format = PixelFormat::Invalid;
PixelFormat sint_format = PixelFormat::Invalid;
[[nodiscard]] std::optional<PixelFormat> Select(PixelFormatNumeric numeric) const {
PixelFormat candidate = PixelFormat::Invalid;
switch (numeric) {
case PixelFormatNumeric::Float:
candidate = float_format;
break;
case PixelFormatNumeric::Uint:
candidate = uint_format;
break;
case PixelFormatNumeric::Sint:
candidate = sint_format;
break;
}
if (candidate == PixelFormat::Invalid) {
return std::nullopt;
}
return candidate;
}
};
constexpr NumericVariantSet MakeVariant(PixelFormat float_format, PixelFormat uint_format,
PixelFormat sint_format) {
return NumericVariantSet{
.float_format = float_format,
.uint_format = uint_format,
.sint_format = sint_format,
};
}
std::optional<NumericVariantSet> LookupNumericVariantSet(PixelFormat format) {
switch (format) {
case PixelFormat::R8_UNORM:
case PixelFormat::R8_SNORM:
case PixelFormat::R8_UINT:
case PixelFormat::R8_SINT:
return MakeVariant(PixelFormat::R8_UNORM, PixelFormat::R8_UINT, PixelFormat::R8_SINT);
case PixelFormat::R16_FLOAT:
case PixelFormat::R16_UNORM:
case PixelFormat::R16_SNORM:
case PixelFormat::R16_UINT:
case PixelFormat::R16_SINT:
return MakeVariant(PixelFormat::R16_FLOAT, PixelFormat::R16_UINT, PixelFormat::R16_SINT);
case PixelFormat::R32_FLOAT:
case PixelFormat::R32_UINT:
case PixelFormat::R32_SINT:
return MakeVariant(PixelFormat::R32_FLOAT, PixelFormat::R32_UINT, PixelFormat::R32_SINT);
case PixelFormat::R8G8_UNORM:
case PixelFormat::R8G8_SNORM:
case PixelFormat::R8G8_UINT:
case PixelFormat::R8G8_SINT:
return MakeVariant(PixelFormat::R8G8_UNORM, PixelFormat::R8G8_UINT, PixelFormat::R8G8_SINT);
case PixelFormat::R16G16_FLOAT:
case PixelFormat::R16G16_UNORM:
case PixelFormat::R16G16_SNORM:
case PixelFormat::R16G16_UINT:
case PixelFormat::R16G16_SINT:
return MakeVariant(PixelFormat::R16G16_FLOAT, PixelFormat::R16G16_UINT,
PixelFormat::R16G16_SINT);
case PixelFormat::R32G32_FLOAT:
case PixelFormat::R32G32_UINT:
case PixelFormat::R32G32_SINT:
return MakeVariant(PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
PixelFormat::R32G32_SINT);
case PixelFormat::R16G16B16A16_FLOAT:
case PixelFormat::R16G16B16A16_UNORM:
case PixelFormat::R16G16B16A16_SNORM:
case PixelFormat::R16G16B16A16_UINT:
case PixelFormat::R16G16B16A16_SINT:
return MakeVariant(PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
PixelFormat::R16G16B16A16_SINT);
case PixelFormat::R32G32B32A32_FLOAT:
case PixelFormat::R32G32B32A32_UINT:
case PixelFormat::R32G32B32A32_SINT:
return MakeVariant(PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_UINT,
PixelFormat::R32G32B32A32_SINT);
case PixelFormat::A8B8G8R8_UNORM:
case PixelFormat::A8B8G8R8_SNORM:
case PixelFormat::A8B8G8R8_SRGB:
case PixelFormat::A8B8G8R8_UINT:
case PixelFormat::A8B8G8R8_SINT:
return MakeVariant(PixelFormat::A8B8G8R8_UNORM, PixelFormat::A8B8G8R8_UINT,
PixelFormat::A8B8G8R8_SINT);
case PixelFormat::A2B10G10R10_UNORM:
case PixelFormat::A2B10G10R10_UINT:
return MakeVariant(PixelFormat::A2B10G10R10_UNORM, PixelFormat::A2B10G10R10_UINT,
PixelFormat::Invalid);
default:
return std::nullopt;
}
}
} // Anonymous namespace
PixelFormatNumeric GetPixelFormatNumericType(PixelFormat format) {
if (IsPixelFormatInteger(format)) {
return IsPixelFormatSignedInteger(format) ? PixelFormatNumeric::Sint
: PixelFormatNumeric::Uint;
}
return PixelFormatNumeric::Float;
}
std::optional<PixelFormat> FindPixelFormatVariant(PixelFormat format,
PixelFormatNumeric target_numeric) {
const auto variants = LookupNumericVariantSet(format);
if (!variants) {
return std::nullopt;
}
if (const auto candidate = variants->Select(target_numeric)) {
return candidate;
}
return std::nullopt;
}
size_t PixelComponentSizeBitsInteger(PixelFormat format) {
switch (format) {
case PixelFormat::A8B8G8R8_SINT:

View File

@@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <climits>
#include <optional>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
@@ -517,6 +521,16 @@ bool IsPixelFormatSignedInteger(PixelFormat format);
size_t PixelComponentSizeBitsInteger(PixelFormat format);
enum class PixelFormatNumeric {
Float,
Uint,
Sint,
};
PixelFormatNumeric GetPixelFormatNumericType(PixelFormat format);
std::optional<PixelFormat> FindPixelFormatVariant(PixelFormat format,
PixelFormatNumeric target_numeric);
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
u64 TranscodedAstcSize(u64 base_size, PixelFormat format);

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -21,6 +24,27 @@ constexpr auto FLOAT = ComponentType::FLOAT;
constexpr bool LINEAR = false;
constexpr bool SRGB = true;
constexpr TextureFormat SanitizeFormat(TextureFormat format) {
if (format == static_cast<TextureFormat>(0)) {
return TextureFormat::A8B8G8R8;
}
return format;
}
constexpr ComponentType SanitizeComponent(ComponentType component) {
if (component == static_cast<ComponentType>(0)) {
return ComponentType::UNORM;
}
switch (component) {
case ComponentType::SNORM_FORCE_FP16:
return ComponentType::SNORM;
case ComponentType::UNORM_FORCE_FP16:
return ComponentType::UNORM;
default:
return component;
}
}
constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
u32 hash = is_srgb ? 1 : 0;
@@ -41,6 +65,11 @@ constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb =
PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
ComponentType blue, ComponentType alpha,
bool is_srgb) noexcept {
format = SanitizeFormat(format);
red = SanitizeComponent(red);
green = SanitizeComponent(green);
blue = SanitizeComponent(blue);
alpha = SanitizeComponent(alpha);
switch (Hash(format, red, green, blue, alpha, is_srgb)) {
case Hash(TextureFormat::A8B8G8R8, UNORM):
return PixelFormat::A8B8G8R8_UNORM;

View File

@@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <array>
#include <optional>
#include <vector>
@@ -58,6 +62,50 @@ struct ImageBase {
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
explicit ImageBase(const NullImageParams&);
void TrackPendingReadTick(u64 tick) noexcept {
if (pending_read_tick) {
*pending_read_tick = std::max(*pending_read_tick, tick);
} else {
pending_read_tick = tick;
}
}
void TrackPendingWriteTick(u64 tick) noexcept {
if (pending_write_tick) {
*pending_write_tick = std::max(*pending_write_tick, tick);
} else {
pending_write_tick = tick;
}
}
void ClearPendingReadTick(u64 completed_tick) noexcept {
if (pending_read_tick && completed_tick >= *pending_read_tick) {
pending_read_tick.reset();
}
}
void ClearPendingWriteTick(u64 completed_tick) noexcept {
if (pending_write_tick && completed_tick >= *pending_write_tick) {
pending_write_tick.reset();
}
}
[[nodiscard]] bool HasPendingReadTick() const noexcept {
return pending_read_tick.has_value();
}
[[nodiscard]] bool HasPendingWriteTick() const noexcept {
return pending_write_tick.has_value();
}
[[nodiscard]] std::optional<u64> PendingReadTick() const noexcept {
return pending_read_tick;
}
[[nodiscard]] std::optional<u64> PendingWriteTick() const noexcept {
return pending_write_tick;
}
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
[[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
@@ -115,6 +163,9 @@ struct ImageBase {
std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images;
ImageMapId map_view_id{};
std::optional<u64> pending_read_tick;
std::optional<u64> pending_write_tick;
};
struct ImageMapView {

View File

@@ -33,6 +33,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
dma_downloaded = forced_flushed;
format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
config.a_type, config.srgb_conversion);
num_samples = NumSamples(config.msaa_mode);
resources.levels = config.max_mip_level + 1;
if (config.IsPitchLinear()) {

Some files were not shown because too many files have changed in this diff Show More