Compare commits
209 Commits
3096/qcom/
...
test-donot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
18704cc65f | ||
|
|
b40f91974b | ||
|
|
5f6ee64dc3 | ||
|
|
722a8e4ede | ||
|
|
9caa1d5b1f | ||
|
|
c0ea48e440 | ||
|
|
03e872f69c | ||
|
|
61ff781e0a | ||
|
|
ee2f1a62de | ||
|
|
aee49e4877 | ||
|
|
c5f30776b6 | ||
|
|
ae1c974c2e | ||
|
|
1aec0c0c50 | ||
|
|
c15cd06711 | ||
|
|
1067a69daf | ||
|
|
d6a8c0b013 | ||
|
|
59a2fdba90 | ||
|
|
7e875e9072 | ||
|
|
fc6081d3b9 | ||
|
|
ff36a072e3 | ||
|
|
7fccd7573a | ||
|
|
3e5ba87a4a | ||
|
|
731965ac8f | ||
|
|
094b2a3274 | ||
|
|
2ce3e09d62 | ||
|
|
cc47aa473a | ||
|
|
b0cd2e8cbe | ||
|
|
2ccf9811b7 | ||
|
|
583a9fd320 | ||
|
|
6df9d06e65 | ||
|
|
d7a120fe74 | ||
|
|
6f6ce56528 | ||
|
|
dc8c6ce6d5 | ||
|
|
22e8b24361 | ||
|
|
f63226ff10 | ||
|
|
9f3803c986 | ||
|
|
5899816f7c | ||
|
|
280d05452a | ||
|
|
ccc31edfbb | ||
|
|
7ce570f420 | ||
|
|
fbf023fa88 | ||
|
|
98b4bfb461 | ||
|
|
4e6c978574 | ||
|
|
09b897dbf2 | ||
|
|
3fc9f0efe3 | ||
|
|
b17b70e09d | ||
|
|
65083b10d8 | ||
|
|
f2227af36c | ||
|
|
f1a5f9137b | ||
|
|
f933828749 | ||
|
|
05e8e1d494 | ||
|
|
5937d19750 | ||
|
|
313e885f7a | ||
|
|
6b0b72e034 | ||
|
|
c6d85b7589 | ||
|
|
005b1dd3be | ||
|
|
2c4748cc3d | ||
|
|
a2dbff0bc3 | ||
|
|
1574e7e804 | ||
|
|
ef0061f72b | ||
|
|
b76b74dcb7 | ||
|
|
f6ecb812ca | ||
|
|
54d6a2015d | ||
|
|
9a2e2f922f | ||
|
|
1f74ed2272 | ||
|
|
1307614ad0 | ||
|
|
989a6b7870 | ||
|
|
1cf65cba2a | ||
|
|
3fd7821fc8 | ||
|
|
59521d7e96 | ||
|
|
1fccea8db1 | ||
|
|
94ac08fcd7 | ||
|
|
992ea70ce7 | ||
|
|
9de6c125a7 | ||
|
|
5447ed0d60 | ||
|
|
8b9792855f | ||
|
|
419be467ee | ||
|
|
6df35d859a | ||
|
|
bd28d4269b | ||
|
|
0baf482214 | ||
|
|
690dfc66f5 | ||
|
|
ef7e43bc30 | ||
|
|
d4d704cc26 | ||
|
|
1c9f603947 | ||
|
|
3873b3fe6c | ||
|
|
fb7d5086b7 | ||
|
|
c5cbd67dbc | ||
|
|
7bd0d42bab | ||
|
|
08dbacdf53 | ||
|
|
da8809c240 | ||
|
|
eb7159a859 | ||
|
|
65bef7ec08 | ||
|
|
78a44a4ef6 | ||
|
|
b07356fcbf | ||
|
|
500802cb72 | ||
|
|
e31411170a | ||
|
|
8c350dfd37 | ||
|
|
de3ac7ad4e | ||
|
|
66903b4b98 | ||
|
|
df088c6442 | ||
|
|
3da798ba05 | ||
|
|
58c76ece13 | ||
|
|
e1108010a6 | ||
|
|
3553c372dd | ||
|
|
261dcc30c1 | ||
|
|
095f8e92b1 | ||
|
|
0e6a271e40 | ||
|
|
f22407de1e | ||
|
|
8fa7b068b8 | ||
|
|
29c629737b | ||
|
|
f03b4d77c2 | ||
|
|
41148132f0 | ||
|
|
bf1efc6a4c | ||
|
|
7d535a06c3 | ||
|
|
3a7091d193 | ||
|
|
bdc7acce27 | ||
|
|
68593f9ddc | ||
|
|
979d203a77 | ||
|
|
e086078e41 | ||
|
|
535b33bc6b | ||
|
|
e6b3f98b13 | ||
|
|
b03eb6bd78 | ||
|
|
e4e71a36de | ||
|
|
a83157f0a9 | ||
|
|
8c1e47bbda | ||
|
|
c18662d039 | ||
|
|
4fb7aea7b4 | ||
|
|
a8af150df4 | ||
|
|
5b41abc1b1 | ||
|
|
91f3a4e4bb | ||
|
|
aff095523d | ||
|
|
49ba71a594 | ||
|
|
97cfd9786f | ||
|
|
bc1873c944 | ||
|
|
e3b10eba10 | ||
|
|
ce26d82d09 | ||
|
|
a353f07b8f | ||
|
|
1b160c6091 | ||
|
|
95cb6b6b3d | ||
|
|
7b4b0bb6ca | ||
|
|
6e93db52a5 | ||
|
|
e33c814879 | ||
|
|
5f3ad571cb | ||
|
|
20b171ded3 | ||
|
|
68edde944a | ||
|
|
510c795bd6 | ||
|
|
eb7b62fefc | ||
|
|
b974a8f7a8 | ||
|
|
40fd7d5558 | ||
|
|
a99c35d0ff | ||
|
|
83323d7993 | ||
|
|
6b8d5c1963 | ||
|
|
7b94b7ebc1 | ||
|
|
5d645ba905 | ||
|
|
5f70c3fc57 | ||
|
|
b87d0215a0 | ||
|
|
e6e6785583 | ||
|
|
60bffd9992 | ||
|
|
130632b8ef | ||
|
|
e2264170f6 | ||
|
|
737a409088 | ||
|
|
e7130a28d6 | ||
|
|
d9db45913c | ||
|
|
6d2c8e6dbb | ||
|
|
19600b7e0e | ||
|
|
dd74bb459b | ||
|
|
648bf1e813 | ||
|
|
cbef1f3451 | ||
|
|
b4472764eb | ||
|
|
5fe7a51522 | ||
|
|
2e0374c458 | ||
|
|
61a0f38a2a | ||
|
|
72d61c38be | ||
|
|
d45094c246 | ||
|
|
1607f62149 | ||
|
|
17ddbb2240 | ||
|
|
2eb425d7f8 | ||
|
|
114c946971 | ||
|
|
0e4382d09b | ||
|
|
e3a9e85099 | ||
|
|
eb120ac1ac | ||
|
|
956c28b41a | ||
|
|
79f20ece71 | ||
|
|
1373ee90b0 | ||
|
|
74a4816530 | ||
|
|
30706cde41 | ||
|
|
e436030383 | ||
|
|
1bcc998040 | ||
|
|
fbd357d82c | ||
|
|
3fc3b5fdad | ||
|
|
b5f8ff7035 | ||
|
|
002078af36 | ||
|
|
e4b4a75cab | ||
|
|
7103abccdb | ||
|
|
319dfeaad8 | ||
|
|
49e4f3b29a | ||
|
|
bafc6f34fb | ||
|
|
f9d612329a | ||
|
|
6ca3d3a1b1 | ||
|
|
7b40a8df80 | ||
|
|
53f10ab19e | ||
|
|
33c72ef7e1 | ||
|
|
acae4b089d | ||
|
|
8055a64b5f | ||
|
|
e4ae8a72dd | ||
|
|
d68d0bd65e | ||
|
|
a35c9d5576 | ||
|
|
eade2c19c8 | ||
|
|
1ff788bed5 |
@@ -18,7 +18,6 @@ plugins {
|
||||
id("androidx.navigation.safeargs.kotlin")
|
||||
id("org.jlleitschuh.gradle.ktlint") version "11.4.0"
|
||||
id("com.github.triplet.play") version "3.8.6"
|
||||
id("idea")
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -28,8 +27,6 @@ plugins {
|
||||
*/
|
||||
val autoVersion = (((System.currentTimeMillis() / 1000) - 1451606400) / 10).toInt()
|
||||
|
||||
val edenDir = project(":Eden").projectDir
|
||||
|
||||
@Suppress("UnstableApiUsage")
|
||||
android {
|
||||
namespace = "org.yuzu.yuzu_emu"
|
||||
@@ -244,17 +241,11 @@ android {
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
version = "3.22.1"
|
||||
path = file("${edenDir}/CMakeLists.txt")
|
||||
path = file("../../../CMakeLists.txt")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
idea {
|
||||
module {
|
||||
// Inclusion to exclude build/ dir from non-Android
|
||||
excludeDirs.add(file("${edenDir}/build"))
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register<Delete>("ktlintReset", fun Delete.() {
|
||||
delete(File(layout.buildDirectory.toString() + File.separator + "intermediates/ktLint"))
|
||||
@@ -355,7 +346,7 @@ fun getGitVersion(): String {
|
||||
}
|
||||
|
||||
afterEvaluate {
|
||||
val artifactsDir = layout.projectDirectory.dir("${edenDir}/artifacts")
|
||||
val artifactsDir = layout.projectDirectory.dir("../../../artifacts")
|
||||
val outputsDir = layout.buildDirectory.dir("outputs").get()
|
||||
|
||||
android.applicationVariants.forEach { variant ->
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
@@ -22,6 +19,3 @@ dependencyResolutionManagement {
|
||||
}
|
||||
|
||||
include(":app")
|
||||
|
||||
include("Eden")
|
||||
project(":Eden").projectDir = file("../..")
|
||||
|
||||
@@ -578,7 +578,10 @@ public:
|
||||
#endif
|
||||
int flags = (fd > 0 ? MAP_SHARED : MAP_PRIVATE) | MAP_FIXED;
|
||||
void* ret = mmap(virtual_base + virtual_offset, length, prot_flags, flags, fd, host_offset);
|
||||
ASSERT_MSG(ret != MAP_FAILED, "mmap: {}", strerror(errno));
|
||||
ASSERT_MSG(ret != MAP_FAILED,
|
||||
"mmap(virt_off=0x{:X}, host_off=0x{:X}, len=0x{:X}, virt_size=0x{:X}, backing_size=0x{:X}, perms=0x{:X}) failed: {}",
|
||||
virtual_offset, host_offset, length, virtual_size, backing_size,
|
||||
static_cast<u32>(perms), strerror(errno));
|
||||
}
|
||||
|
||||
void Unmap(size_t virtual_offset, size_t length) {
|
||||
|
||||
@@ -466,7 +466,7 @@ struct Values {
|
||||
true};
|
||||
SwitchableSetting<bool> async_presentation{linkage,
|
||||
#ifdef ANDROID
|
||||
true,
|
||||
false,
|
||||
#else
|
||||
false,
|
||||
#endif
|
||||
|
||||
@@ -130,6 +130,17 @@ public:
|
||||
ResetStorageBit(id.index);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool Contains(SlotId id) const noexcept {
|
||||
if (!id) {
|
||||
return false;
|
||||
}
|
||||
const size_t word = id.index / 64;
|
||||
if (word >= stored_bitset.size()) {
|
||||
return false;
|
||||
}
|
||||
return ((stored_bitset[word] >> (id.index % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] Iterator begin() noexcept {
|
||||
const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
|
||||
if (it == stored_bitset.end()) {
|
||||
|
||||
@@ -391,15 +391,28 @@ const std::size_t CACHE_PAGE_SIZE = 4096;
|
||||
|
||||
void ArmNce::ClearInstructionCache() {
|
||||
#ifdef __aarch64__
|
||||
// Ensure all previous memory operations complete
|
||||
// Use IC IALLU to actually invalidate L1 instruction cache
|
||||
asm volatile("dsb ish\n"
|
||||
"ic iallu\n"
|
||||
"dsb ish\n"
|
||||
"isb" ::: "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) {
|
||||
this->ClearInstructionCache();
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
// Invalidate instruction cache for specific range instead of full flush
|
||||
constexpr u64 cache_line_size = 64;
|
||||
const u64 aligned_addr = addr & ~(cache_line_size - 1);
|
||||
const u64 end_addr = (addr + size + cache_line_size - 1) & ~(cache_line_size - 1);
|
||||
|
||||
asm volatile("dsb ish" ::: "memory");
|
||||
for (u64 i = aligned_addr; i < end_addr; i += cache_line_size) {
|
||||
asm volatile("ic ivau, %0" :: "r"(i) : "memory");
|
||||
}
|
||||
asm volatile("dsb ish\n"
|
||||
"isb" ::: "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/range/algorithm_ext/erase.hpp>
|
||||
@@ -187,7 +191,7 @@ void HLERequestContext::ParseCommandBuffer(u32_le* src_cmdbuf, bool incoming) {
|
||||
buffer_w_descriptors.push_back(rp.PopRaw<IPC::BufferDescriptorABW>());
|
||||
}
|
||||
|
||||
const auto buffer_c_offset = rp.GetCurrentOffset() + command_header->data_size;
|
||||
buffer_c_offset = rp.GetCurrentOffset() + command_header->data_size;
|
||||
|
||||
if (!command_header->IsTipc()) {
|
||||
// Padding to align to 16 bytes
|
||||
@@ -294,7 +298,15 @@ Result HLERequestContext::WriteToOutgoingCommandBuffer() {
|
||||
}
|
||||
|
||||
// Write the domain objects to the command buffer, these go after the raw untranslated data.
|
||||
// TODO(Subv): This completely ignores C buffers.
|
||||
|
||||
if (buffer_c_offset != 0 && !buffer_c_descriptors.empty()) {
|
||||
constexpr u32 WORDS_PER_DESCRIPTOR = sizeof(IPC::BufferDescriptorC) / sizeof(u32);
|
||||
u32 descriptor_offset = buffer_c_offset;
|
||||
for (const auto& descriptor : buffer_c_descriptors) {
|
||||
std::memcpy(&cmd_buf[descriptor_offset], &descriptor, sizeof(descriptor));
|
||||
descriptor_offset += WORDS_PER_DESCRIPTOR;
|
||||
}
|
||||
}
|
||||
|
||||
if (GetManager()->IsDomain()) {
|
||||
current_offset = domain_offset - static_cast<u32>(outgoing_domain_objects.size());
|
||||
@@ -393,10 +405,14 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
|
||||
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
|
||||
BufferDescriptorB()[buffer_index].Size()};
|
||||
const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
|
||||
if (buffer_size == 0) {
|
||||
LOG_WARNING(Core, "WriteBuffer target index {} has zero capacity", buffer_index);
|
||||
return 0;
|
||||
}
|
||||
if (size > buffer_size) {
|
||||
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
|
||||
buffer_size);
|
||||
size = buffer_size; // TODO(bunnei): This needs to be HW tested
|
||||
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
|
||||
size, buffer_size);
|
||||
size = buffer_size;
|
||||
}
|
||||
|
||||
if (is_buffer_b) {
|
||||
@@ -418,15 +434,25 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
|
||||
|
||||
std::size_t HLERequestContext::WriteBufferB(const void* buffer, std::size_t size,
|
||||
std::size_t buffer_index) const {
|
||||
if (buffer_index >= BufferDescriptorB().size() || size == 0) {
|
||||
if (buffer_index >= BufferDescriptorB().size()) {
|
||||
LOG_WARNING(Core, "WriteBufferB invalid buffer index {}", buffer_index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
LOG_WARNING(Core, "skip empty buffer write (B)");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const auto buffer_size{BufferDescriptorB()[buffer_index].Size()};
|
||||
if (buffer_size == 0) {
|
||||
LOG_WARNING(Core, "WriteBufferB target index {} has zero capacity", buffer_index);
|
||||
return 0;
|
||||
}
|
||||
if (size > buffer_size) {
|
||||
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
|
||||
buffer_size);
|
||||
size = buffer_size; // TODO(bunnei): This needs to be HW tested
|
||||
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
|
||||
size, buffer_size);
|
||||
size = buffer_size;
|
||||
}
|
||||
|
||||
memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
|
||||
@@ -435,15 +461,25 @@ std::size_t HLERequestContext::WriteBufferB(const void* buffer, std::size_t size
|
||||
|
||||
std::size_t HLERequestContext::WriteBufferC(const void* buffer, std::size_t size,
|
||||
std::size_t buffer_index) const {
|
||||
if (buffer_index >= BufferDescriptorC().size() || size == 0) {
|
||||
if (buffer_index >= BufferDescriptorC().size()) {
|
||||
LOG_WARNING(Core, "WriteBufferC invalid buffer index {}", buffer_index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
LOG_WARNING(Core, "skip empty buffer write (C)");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const auto buffer_size{BufferDescriptorC()[buffer_index].Size()};
|
||||
if (buffer_size == 0) {
|
||||
LOG_WARNING(Core, "WriteBufferC target index {} has zero capacity", buffer_index);
|
||||
return 0;
|
||||
}
|
||||
if (size > buffer_size) {
|
||||
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
|
||||
buffer_size);
|
||||
size = buffer_size; // TODO(bunnei): This needs to be HW tested
|
||||
LOG_WARNING(Core, "size ({:016X}) is greater than buffer_size ({:016X}); clamping",
|
||||
size, buffer_size);
|
||||
size = buffer_size;
|
||||
}
|
||||
|
||||
memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
|
||||
@@ -473,12 +509,20 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons
|
||||
ASSERT_OR_EXECUTE_MSG(
|
||||
BufferDescriptorB().size() > buffer_index, { return 0; },
|
||||
"BufferDescriptorB invalid buffer_index {}", buffer_index);
|
||||
return BufferDescriptorB()[buffer_index].Size();
|
||||
const auto size = BufferDescriptorB()[buffer_index].Size();
|
||||
if (size == 0) {
|
||||
LOG_WARNING(Core, "BufferDescriptorB index {} has zero size", buffer_index);
|
||||
}
|
||||
return size;
|
||||
} else {
|
||||
ASSERT_OR_EXECUTE_MSG(
|
||||
BufferDescriptorC().size() > buffer_index, { return 0; },
|
||||
"BufferDescriptorC invalid buffer_index {}", buffer_index);
|
||||
return BufferDescriptorC()[buffer_index].Size();
|
||||
const auto size = BufferDescriptorC()[buffer_index].Size();
|
||||
if (size == 0) {
|
||||
LOG_WARNING(Core, "BufferDescriptorC index {} has zero size", buffer_index);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -422,6 +425,7 @@ private:
|
||||
u32 data_payload_offset{};
|
||||
u32 handles_offset{};
|
||||
u32 domain_offset{};
|
||||
u32 buffer_c_offset{};
|
||||
|
||||
std::weak_ptr<SessionRequestManager> manager{};
|
||||
bool is_deferred{false};
|
||||
|
||||
@@ -134,4 +134,4 @@ target_include_directories(dynarmic_tests PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
add_test(NAME dynarmic_tests COMMAND dynarmic_tests --durations yes)
|
||||
add_test(dynarmic_tests dynarmic_tests --durations yes)
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <vector>
|
||||
#include <spirv-tools/optimizer.hpp>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
@@ -439,15 +440,23 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
|
||||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||
}
|
||||
if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
|
||||
info.uses_subgroup_shuffles) &&
|
||||
profile.support_vote) {
|
||||
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(ctx.stage);
|
||||
const bool needs_warp_intrinsics = info.uses_subgroup_vote ||
|
||||
info.uses_subgroup_invocation_id ||
|
||||
info.uses_subgroup_shuffles;
|
||||
|
||||
if (needs_warp_intrinsics && profile.support_vote && stage_supports_warp) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformShuffle);
|
||||
if (!profile.warp_size_potentially_larger_than_guest) {
|
||||
// vote ops are only used when not taking the long path
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformVote);
|
||||
}
|
||||
} else if (needs_warp_intrinsics && !stage_supports_warp) {
|
||||
LOG_WARNING(Shader,
|
||||
"Warp intrinsics requested in stage {} but the device does not report subgroup "
|
||||
"support; falling back to scalar approximations",
|
||||
static_cast<u32>(ctx.stage));
|
||||
}
|
||||
if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
|
||||
ctx.AddCapability(spv::Capability::Int64Atomics);
|
||||
|
||||
@@ -491,9 +491,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
||||
}
|
||||
|
||||
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
|
||||
const AttributeType output_type{ctx.runtime_info.color_output_types[index]};
|
||||
Id pointer_type{ctx.output_f32};
|
||||
Id store_value{value};
|
||||
switch (output_type) {
|
||||
case AttributeType::SignedInt:
|
||||
pointer_type = ctx.output_s32;
|
||||
store_value = ctx.OpBitcast(ctx.S32[1], value);
|
||||
break;
|
||||
case AttributeType::UnsignedInt:
|
||||
pointer_type = ctx.output_u32;
|
||||
store_value = ctx.OpBitcast(ctx.U32[1], value);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
const Id component_id{ctx.Const(component)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
|
||||
ctx.OpStore(pointer, value);
|
||||
const Id pointer{ctx.OpAccessChain(pointer_type, ctx.frag_color.at(index), component_id)};
|
||||
ctx.OpStore(pointer, store_value);
|
||||
}
|
||||
|
||||
void EmitSetSampleMask(EmitContext& ctx, Id value) {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -195,6 +198,41 @@ Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR
|
||||
}
|
||||
}
|
||||
|
||||
Id TextureColorResultType(EmitContext& ctx, const TextureDefinition& def) {
|
||||
switch (def.component_type) {
|
||||
case SamplerComponentType::Float:
|
||||
case SamplerComponentType::Depth:
|
||||
return ctx.F32[4];
|
||||
case SamplerComponentType::Sint:
|
||||
return ctx.S32[4];
|
||||
case SamplerComponentType::Stencil:
|
||||
return ctx.U32[4];
|
||||
case SamplerComponentType::Uint:
|
||||
return ctx.U32[4];
|
||||
}
|
||||
throw InvalidArgument("Invalid sampler component type {}", def.component_type);
|
||||
}
|
||||
|
||||
Id TextureSampleResultToFloat(EmitContext& ctx, const TextureDefinition& def, Id color) {
|
||||
switch (def.component_type) {
|
||||
case SamplerComponentType::Float:
|
||||
case SamplerComponentType::Depth:
|
||||
return color;
|
||||
case SamplerComponentType::Sint:
|
||||
return ctx.OpConvertSToF(ctx.F32[4], color);
|
||||
case SamplerComponentType::Stencil:
|
||||
{
|
||||
const Id converted{ctx.OpConvertUToF(ctx.F32[4], color)};
|
||||
const Id inv255{ctx.Const(1.0f / 255.0f)};
|
||||
const Id scale{ctx.ConstantComposite(ctx.F32[4], inv255, inv255, inv255, inv255)};
|
||||
return ctx.OpFMul(ctx.F32[4], converted, scale);
|
||||
}
|
||||
case SamplerComponentType::Uint:
|
||||
return ctx.OpConvertUToF(ctx.F32[4], color);
|
||||
}
|
||||
throw InvalidArgument("Invalid sampler component type {}", def.component_type);
|
||||
}
|
||||
|
||||
Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
|
||||
if (!index.IsImmediate() || index.U32() != 0) {
|
||||
throw NotImplementedException("Indirect image indexing");
|
||||
@@ -449,31 +487,39 @@ Id EmitBoundImageWrite(EmitContext&) {
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id bias_lc, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
|
||||
const Id color_type{TextureColorResultType(ctx, def)};
|
||||
const Id texture{Texture(ctx, info, index)};
|
||||
Id color{};
|
||||
if (ctx.stage == Stage::Fragment) {
|
||||
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
|
||||
bias_lc, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
|
||||
&EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
|
||||
Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
|
||||
color = Emit(&EmitContext::OpImageSparseSampleImplicitLod,
|
||||
&EmitContext::OpImageSampleImplicitLod, ctx, inst, color_type, texture,
|
||||
coords, operands.MaskOptional(), operands.Span());
|
||||
} else {
|
||||
// We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
|
||||
// if the lod was explicitly zero. This may change on Turing with implicit compute
|
||||
// derivatives
|
||||
const Id lod{ctx.Const(0.0f)};
|
||||
const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
|
||||
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
|
||||
color = Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type, texture,
|
||||
coords, operands.Mask(), operands.Span());
|
||||
}
|
||||
return TextureSampleResultToFloat(ctx, def, color);
|
||||
}
|
||||
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id lod, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
|
||||
const Id color_type{TextureColorResultType(ctx, def)};
|
||||
const ImageOperands operands(ctx, false, true, false, lod, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
|
||||
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
|
||||
const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type,
|
||||
Texture(ctx, info, index), coords, operands.Mask(), operands.Span())};
|
||||
return TextureSampleResultToFloat(ctx, def, color);
|
||||
}
|
||||
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
@@ -509,13 +555,18 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
|
||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
|
||||
const Id color_type{TextureColorResultType(ctx, def)};
|
||||
const ImageOperands operands(ctx, offset, offset2);
|
||||
const Id texture{Texture(ctx, info, index)};
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
|
||||
}
|
||||
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
|
||||
ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
|
||||
operands.MaskOptional(), operands.Span());
|
||||
const Id color{
|
||||
Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, color_type,
|
||||
texture, coords, ctx.Const(info.gather_component), operands.MaskOptional(),
|
||||
operands.Span())};
|
||||
return TextureSampleResultToFloat(ctx, def, color);
|
||||
}
|
||||
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
@@ -533,6 +584,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
||||
Id lod, Id ms) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const TextureDefinition* def =
|
||||
info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index);
|
||||
const Id result_type{def ? TextureColorResultType(ctx, *def) : ctx.F32[4]};
|
||||
AddOffsetToCoordinates(ctx, info, coords, offset);
|
||||
if (info.type == TextureType::Buffer) {
|
||||
lod = Id{};
|
||||
@@ -542,8 +596,13 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
|
||||
lod = Id{};
|
||||
}
|
||||
const ImageOperands operands(lod, ms);
|
||||
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
|
||||
TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
|
||||
Id color{Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst,
|
||||
result_type, TextureImage(ctx, info, index), coords, operands.MaskOptional(),
|
||||
operands.Span())};
|
||||
if (def) {
|
||||
color = TextureSampleResultToFloat(ctx, *def, color);
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
|
||||
@@ -588,14 +647,17 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
|
||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id derivatives, const IR::Value& offset, Id lod_clamp) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
|
||||
const Id color_type{TextureColorResultType(ctx, def)};
|
||||
const auto operands = info.num_derivatives == 3
|
||||
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
|
||||
ctx.Def(offset), {}, lod_clamp)
|
||||
: ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
|
||||
info.num_derivatives, offset, lod_clamp);
|
||||
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
|
||||
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
|
||||
const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type,
|
||||
Texture(ctx, info, index), coords, operands.Mask(), operands.Span())};
|
||||
return TextureSampleResultToFloat(ctx, def, color);
|
||||
}
|
||||
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -78,9 +81,25 @@ Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
|
||||
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
|
||||
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
|
||||
}
|
||||
|
||||
bool SupportsWarpIntrinsics(const EmitContext& ctx) {
|
||||
return ctx.profile.SupportsWarpIntrinsics(ctx.stage);
|
||||
}
|
||||
|
||||
void SetAlwaysInBounds(EmitContext& ctx, IR::Inst* inst) {
|
||||
SetInBoundsFlag(inst, ctx.true_value);
|
||||
}
|
||||
|
||||
Id FallbackBallotMask(EmitContext& ctx, Id pred) {
|
||||
const Id full_mask{ctx.Const(0xFFFFFFFFu)};
|
||||
return ctx.OpSelect(ctx.U32[1], pred, full_mask, ctx.u32_zero_value);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
const Id id{GetThreadId(ctx)};
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return id;
|
||||
@@ -89,6 +108,9 @@ Id EmitLaneId(EmitContext& ctx) {
|
||||
}
|
||||
|
||||
Id EmitVoteAll(EmitContext& ctx, Id pred) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return pred;
|
||||
}
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpGroupNonUniformAll(ctx.U1, SubgroupScope(ctx), pred);
|
||||
}
|
||||
@@ -102,6 +124,9 @@ Id EmitVoteAll(EmitContext& ctx, Id pred) {
|
||||
}
|
||||
|
||||
Id EmitVoteAny(EmitContext& ctx, Id pred) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return pred;
|
||||
}
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpGroupNonUniformAny(ctx.U1, SubgroupScope(ctx), pred);
|
||||
}
|
||||
@@ -115,6 +140,9 @@ Id EmitVoteAny(EmitContext& ctx, Id pred) {
|
||||
}
|
||||
|
||||
Id EmitVoteEqual(EmitContext& ctx, Id pred) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return pred;
|
||||
}
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpGroupNonUniformAllEqual(ctx.U1, SubgroupScope(ctx), pred);
|
||||
}
|
||||
@@ -129,6 +157,9 @@ Id EmitVoteEqual(EmitContext& ctx, Id pred) {
|
||||
}
|
||||
|
||||
Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return FallbackBallotMask(ctx, pred);
|
||||
}
|
||||
const Id ballot{ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred)};
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
|
||||
@@ -137,27 +168,46 @@ Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
|
||||
}
|
||||
|
||||
Id EmitSubgroupEqMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_eq);
|
||||
}
|
||||
|
||||
Id EmitSubgroupLtMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_lt);
|
||||
}
|
||||
|
||||
Id EmitSubgroupLeMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_le);
|
||||
}
|
||||
|
||||
Id EmitSubgroupGtMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_gt);
|
||||
}
|
||||
|
||||
Id EmitSubgroupGeMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_ge);
|
||||
}
|
||||
|
||||
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
SetAlwaysInBounds(ctx, inst);
|
||||
return value;
|
||||
}
|
||||
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
|
||||
@@ -177,6 +227,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
|
||||
|
||||
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
SetAlwaysInBounds(ctx, inst);
|
||||
return value;
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||
@@ -192,6 +246,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
|
||||
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
SetAlwaysInBounds(ctx, inst);
|
||||
return value;
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||
@@ -207,6 +265,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
|
||||
|
||||
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
SetAlwaysInBounds(ctx, inst);
|
||||
return value;
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||
|
||||
@@ -28,27 +28,41 @@ enum class Operation {
|
||||
FPMax,
|
||||
};
|
||||
|
||||
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
|
||||
Id ComponentScalarType(EmitContext& ctx, SamplerComponentType component_type) {
|
||||
switch (component_type) {
|
||||
case SamplerComponentType::Float:
|
||||
case SamplerComponentType::Depth:
|
||||
return ctx.F32[1];
|
||||
case SamplerComponentType::Sint:
|
||||
return ctx.S32[1];
|
||||
case SamplerComponentType::Stencil:
|
||||
return ctx.U32[1];
|
||||
case SamplerComponentType::Uint:
|
||||
return ctx.U32[1];
|
||||
}
|
||||
throw InvalidArgument("Invalid sampler component type {}", component_type);
|
||||
}
|
||||
|
||||
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc, Id sampled_type) {
|
||||
const spv::ImageFormat format{spv::ImageFormat::Unknown};
|
||||
const Id type{ctx.F32[1]};
|
||||
const bool depth{desc.is_depth};
|
||||
const bool ms{desc.is_multisample};
|
||||
switch (desc.type) {
|
||||
case TextureType::Color1D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, false, false, 1, format);
|
||||
case TextureType::ColorArray1D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, true, false, 1, format);
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, depth, false, ms, 1, format);
|
||||
case TextureType::ColorArray2D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, depth, true, ms, 1, format);
|
||||
case TextureType::Color3D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, depth, false, false, 1, format);
|
||||
case TextureType::ColorCube:
|
||||
return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, depth, false, false, 1, format);
|
||||
case TextureType::ColorArrayCube:
|
||||
return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, depth, true, false, 1, format);
|
||||
case TextureType::Buffer:
|
||||
break;
|
||||
}
|
||||
@@ -315,6 +329,9 @@ void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
|
||||
ctx.Decorate(id, spv::Decoration::Binding, binding);
|
||||
ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
ctx.Name(id, fmt::format("ssbo{}", index));
|
||||
if (!desc.is_written) {
|
||||
ctx.Decorate(id, spv::Decoration::NonWritable);
|
||||
}
|
||||
if (ctx.profile.supported_spirv >= 0x00010400) {
|
||||
ctx.interfaces.push_back(id);
|
||||
}
|
||||
@@ -546,6 +563,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
|
||||
|
||||
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
|
||||
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
|
||||
output_s32 = Name(TypePointer(spv::StorageClass::Output, S32[1]), "output_s32");
|
||||
|
||||
if (info.uses_int8 && profile.support_int8) {
|
||||
AddCapability(spv::Capability::Int8);
|
||||
@@ -1359,7 +1377,8 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
|
||||
void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) {
|
||||
textures.reserve(info.texture_descriptors.size());
|
||||
for (const TextureDescriptor& desc : info.texture_descriptors) {
|
||||
const Id image_type{ImageType(*this, desc)};
|
||||
const Id result_type{ComponentScalarType(*this, desc.component_type)};
|
||||
const Id image_type{ImageType(*this, desc, result_type)};
|
||||
const Id sampled_type{TypeSampledImage(image_type)};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
|
||||
const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
|
||||
@@ -1372,8 +1391,10 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in
|
||||
.sampled_type = sampled_type,
|
||||
.pointer_type = pointer_type,
|
||||
.image_type = image_type,
|
||||
.result_type = result_type,
|
||||
.count = desc.count,
|
||||
.is_multisample = desc.is_multisample,
|
||||
.component_type = desc.component_type,
|
||||
});
|
||||
if (profile.supported_spirv >= 0x00010400) {
|
||||
interfaces.push_back(id);
|
||||
@@ -1416,6 +1437,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde
|
||||
void EmitContext::DefineInputs(const IR::Program& program) {
|
||||
const Info& info{program.info};
|
||||
const VaryingState loads{info.loads.mask | info.passthrough.mask};
|
||||
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(stage);
|
||||
|
||||
if (info.uses_workgroup_id) {
|
||||
workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
|
||||
@@ -1432,24 +1454,37 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
||||
}
|
||||
if (info.uses_sample_id) {
|
||||
sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
|
||||
if (stage == Stage::Fragment) {
|
||||
Decorate(sample_id, spv::Decoration::Flat);
|
||||
}
|
||||
}
|
||||
if (info.uses_is_helper_invocation) {
|
||||
is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
|
||||
}
|
||||
if (info.uses_subgroup_mask) {
|
||||
if (info.uses_subgroup_mask && stage_supports_warp) {
|
||||
subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
|
||||
subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
|
||||
subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
|
||||
subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
|
||||
subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
|
||||
if (stage == Stage::Fragment) {
|
||||
Decorate(subgroup_mask_eq, spv::Decoration::Flat);
|
||||
Decorate(subgroup_mask_lt, spv::Decoration::Flat);
|
||||
Decorate(subgroup_mask_le, spv::Decoration::Flat);
|
||||
Decorate(subgroup_mask_gt, spv::Decoration::Flat);
|
||||
Decorate(subgroup_mask_ge, spv::Decoration::Flat);
|
||||
}
|
||||
}
|
||||
if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
|
||||
(profile.warp_size_potentially_larger_than_guest &&
|
||||
(info.uses_subgroup_vote || info.uses_subgroup_mask))) {
|
||||
if (stage_supports_warp &&
|
||||
(info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
|
||||
(profile.warp_size_potentially_larger_than_guest &&
|
||||
(info.uses_subgroup_vote || info.uses_subgroup_mask)))) {
|
||||
AddCapability(spv::Capability::GroupNonUniform);
|
||||
subgroup_local_invocation_id =
|
||||
DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
if (stage == Stage::Fragment) {
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
}
|
||||
if (info.uses_fswzadd) {
|
||||
const Id f32_one{Const(1.0f)};
|
||||
@@ -1461,6 +1496,9 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
||||
}
|
||||
if (loads[IR::Attribute::PrimitiveId]) {
|
||||
primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
|
||||
if (stage == Stage::Fragment) {
|
||||
Decorate(primitive_id, spv::Decoration::Flat);
|
||||
}
|
||||
}
|
||||
if (loads[IR::Attribute::Layer]) {
|
||||
AddCapability(spv::Capability::Geometry);
|
||||
@@ -1552,17 +1590,21 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
||||
if (stage != Stage::Fragment) {
|
||||
continue;
|
||||
}
|
||||
switch (info.interpolation[index]) {
|
||||
case Interpolation::Smooth:
|
||||
// Default
|
||||
// Decorate(id, spv::Decoration::Smooth);
|
||||
break;
|
||||
case Interpolation::NoPerspective:
|
||||
Decorate(id, spv::Decoration::NoPerspective);
|
||||
break;
|
||||
case Interpolation::Flat:
|
||||
const bool is_integer = input_type == AttributeType::SignedInt ||
|
||||
input_type == AttributeType::UnsignedInt;
|
||||
if (is_integer) {
|
||||
Decorate(id, spv::Decoration::Flat);
|
||||
break;
|
||||
} else {
|
||||
switch (info.interpolation[index]) {
|
||||
case Interpolation::Smooth:
|
||||
break;
|
||||
case Interpolation::NoPerspective:
|
||||
Decorate(id, spv::Decoration::NoPerspective);
|
||||
break;
|
||||
case Interpolation::Flat:
|
||||
Decorate(id, spv::Decoration::Flat);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (stage == Stage::TessellationEval) {
|
||||
@@ -1658,7 +1700,18 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
|
||||
if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
|
||||
continue;
|
||||
}
|
||||
frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
|
||||
const AttributeType output_type{runtime_info.color_output_types[index]};
|
||||
const Id vec_type = [&, output_type]() -> Id {
|
||||
switch (output_type) {
|
||||
case AttributeType::SignedInt:
|
||||
return S32[4];
|
||||
case AttributeType::UnsignedInt:
|
||||
return U32[4];
|
||||
default:
|
||||
return F32[4];
|
||||
}
|
||||
}();
|
||||
frag_color[index] = DefineOutput(*this, vec_type, std::nullopt);
|
||||
Decorate(frag_color[index], spv::Decoration::Location, index);
|
||||
Name(frag_color[index], fmt::format("frag_color{}", index));
|
||||
}
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -36,8 +39,10 @@ struct TextureDefinition {
|
||||
Id sampled_type;
|
||||
Id pointer_type;
|
||||
Id image_type;
|
||||
Id result_type;
|
||||
u32 count;
|
||||
bool is_multisample;
|
||||
SamplerComponentType component_type;
|
||||
};
|
||||
|
||||
struct TextureBufferDefinition {
|
||||
@@ -244,6 +249,7 @@ public:
|
||||
|
||||
Id output_f32{};
|
||||
Id output_u32{};
|
||||
Id output_s32{};
|
||||
|
||||
Id image_buffer_type{};
|
||||
Id image_u32{};
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -22,6 +25,8 @@ public:
|
||||
|
||||
[[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
|
||||
|
||||
[[nodiscard]] virtual SamplerComponentType ReadTextureComponentType(u32 raw_handle) = 0;
|
||||
|
||||
[[nodiscard]] virtual TexturePixelFormat ReadTexturePixelFormat(u32 raw_handle) = 0;
|
||||
|
||||
[[nodiscard]] virtual bool IsTexturePixelFormatInteger(u32 raw_handle) = 0;
|
||||
|
||||
@@ -396,6 +396,10 @@ bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf)
|
||||
return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf));
|
||||
}
|
||||
|
||||
SamplerComponentType ReadTextureComponentType(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
return env.ReadTextureComponentType(GetTextureHandle(env, cbuf));
|
||||
}
|
||||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
|
||||
@@ -433,7 +437,9 @@ public:
|
||||
|
||||
u32 Add(const TextureDescriptor& desc) {
|
||||
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
|
||||
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
|
||||
return desc.type == existing.type &&
|
||||
desc.component_type == existing.component_type &&
|
||||
desc.is_depth == existing.is_depth &&
|
||||
desc.has_secondary == existing.has_secondary &&
|
||||
desc.cbuf_index == existing.cbuf_index &&
|
||||
desc.cbuf_offset == existing.cbuf_offset &&
|
||||
@@ -666,10 +672,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
||||
.secondary_shift_left = cbuf.secondary_shift_left,
|
||||
.count = cbuf.count,
|
||||
.size_shift = DESCRIPTOR_SIZE_SHIFT,
|
||||
.component_type = ReadTextureComponentType(env, cbuf),
|
||||
});
|
||||
} else {
|
||||
index = descriptors.Add(TextureDescriptor{
|
||||
.type = flags.type,
|
||||
.component_type = ReadTextureComponentType(env, cbuf),
|
||||
.is_depth = flags.is_depth != 0,
|
||||
.is_multisample = is_multisample,
|
||||
.has_secondary = cbuf.has_secondary,
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/stage.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
@@ -46,6 +52,8 @@ struct Profile {
|
||||
bool support_multi_viewport{};
|
||||
bool support_geometry_streams{};
|
||||
|
||||
u32 warp_stage_support_mask{std::numeric_limits<u32>::max()};
|
||||
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
|
||||
bool lower_left_origin_mode{};
|
||||
@@ -90,6 +98,11 @@ struct Profile {
|
||||
u64 min_ssbo_alignment{};
|
||||
|
||||
u32 max_user_clip_distances{};
|
||||
|
||||
bool SupportsWarpIntrinsics(Stage stage) const {
|
||||
const u32 bit = 1u << static_cast<u32>(stage);
|
||||
return (warp_stage_support_mask & bit) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -80,6 +83,7 @@ struct TransformFeedbackVarying {
|
||||
|
||||
struct RuntimeInfo {
|
||||
std::array<AttributeType, 32> generic_input_types{};
|
||||
std::array<AttributeType, 8> color_output_types{};
|
||||
VaryingState previous_stage_stores;
|
||||
std::map<IR::Attribute, IR::Attribute> previous_stage_legacy_stores_mapping;
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -151,6 +154,14 @@ enum class ImageFormat : u32 {
|
||||
R32G32B32A32_UINT,
|
||||
};
|
||||
|
||||
enum class SamplerComponentType : u8 {
|
||||
Float,
|
||||
Sint,
|
||||
Uint,
|
||||
Depth,
|
||||
Stencil,
|
||||
};
|
||||
|
||||
enum class Interpolation {
|
||||
Smooth,
|
||||
Flat,
|
||||
@@ -183,6 +194,7 @@ struct TextureBufferDescriptor {
|
||||
u32 secondary_shift_left;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
SamplerComponentType component_type;
|
||||
|
||||
auto operator<=>(const TextureBufferDescriptor&) const = default;
|
||||
};
|
||||
@@ -204,6 +216,7 @@ using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescrip
|
||||
|
||||
struct TextureDescriptor {
|
||||
TextureType type;
|
||||
SamplerComponentType component_type;
|
||||
bool is_depth;
|
||||
bool is_multisample;
|
||||
bool has_secondary;
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
|
||||
@@ -16,8 +15,6 @@
|
||||
#include "video_core/guest_memory.h"
|
||||
#include "video_core/host1x/gpu_device_memory_manager.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/polygon_mode_utils.h"
|
||||
#include "video_core/renderer_vulkan/line_loop_utils.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
@@ -356,37 +353,14 @@ void BufferCache<P>::UpdateComputeBuffers() {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if (is_indexed) {
|
||||
BindHostIndexBuffer();
|
||||
} else {
|
||||
if constexpr (!P::IS_OPENGL) {
|
||||
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
|
||||
if (draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
|
||||
polygon_mode == Maxwell::PolygonMode::Line && draw_state.vertex_buffer.count > 1) {
|
||||
const u32 vertex_count = draw_state.vertex_buffer.count;
|
||||
const u32 generated_count = vertex_count + 1;
|
||||
const bool use_u16 = vertex_count <= 0x10000;
|
||||
const u32 element_size = use_u16 ? sizeof(u16) : sizeof(u32);
|
||||
auto staging = runtime.UploadStagingBuffer(
|
||||
static_cast<size_t>(generated_count) * element_size);
|
||||
std::span<u8> dst_span{staging.mapped_span.data(),
|
||||
generated_count * static_cast<size_t>(element_size)};
|
||||
Vulkan::LineLoop::GenerateSequentialWithClosureRaw(dst_span, element_size);
|
||||
const auto synthetic_format = use_u16 ? Maxwell::IndexFormat::UnsignedShort
|
||||
: Maxwell::IndexFormat::UnsignedInt;
|
||||
runtime.BindIndexBuffer(draw_state.topology, synthetic_format,
|
||||
draw_state.vertex_buffer.first, generated_count,
|
||||
staging.buffer, static_cast<u32>(staging.offset),
|
||||
generated_count * element_size);
|
||||
}
|
||||
}
|
||||
if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
||||
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
|
||||
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
|
||||
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
|
||||
draw_state.vertex_buffer.count);
|
||||
}
|
||||
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
|
||||
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
|
||||
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
|
||||
draw_state.vertex_buffer.count);
|
||||
}
|
||||
}
|
||||
BindHostVertexBuffers();
|
||||
@@ -763,44 +737,6 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
||||
const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr);
|
||||
const u32 size = channel_state->index_buffer.size;
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if constexpr (!P::IS_OPENGL) {
|
||||
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
|
||||
const bool polygon_line =
|
||||
draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
|
||||
polygon_mode == Maxwell::PolygonMode::Line;
|
||||
if (polygon_line && draw_state.index_buffer.count > 1) {
|
||||
const u32 element_size = draw_state.index_buffer.FormatSizeInBytes();
|
||||
const size_t src_bytes = static_cast<size_t>(draw_state.index_buffer.count) * element_size;
|
||||
const size_t total_bytes = src_bytes + element_size;
|
||||
auto staging = runtime.UploadStagingBuffer(total_bytes);
|
||||
std::span<u8> dst_span{staging.mapped_span.data(), total_bytes};
|
||||
std::span<const u8> src_span;
|
||||
if (!draw_state.inline_index_draw_indexes.empty()) {
|
||||
const u8* const src =
|
||||
draw_state.inline_index_draw_indexes.data() +
|
||||
static_cast<size_t>(draw_state.index_buffer.first) * element_size;
|
||||
src_span = {src, src_bytes};
|
||||
} else if (const u8* const cpu_base =
|
||||
device_memory.GetPointer<u8>(channel_state->index_buffer.device_addr)) {
|
||||
const u8* const src = cpu_base +
|
||||
static_cast<size_t>(draw_state.index_buffer.first) * element_size;
|
||||
src_span = {src, src_bytes};
|
||||
} else {
|
||||
const DAddr src_addr =
|
||||
channel_state->index_buffer.device_addr +
|
||||
static_cast<DAddr>(draw_state.index_buffer.first) * element_size;
|
||||
device_memory.ReadBlockUnsafe(src_addr, dst_span.data(), src_bytes);
|
||||
src_span = {dst_span.data(), src_bytes};
|
||||
}
|
||||
Vulkan::LineLoop::CopyWithClosureRaw(dst_span, src_span, element_size);
|
||||
buffer.MarkUsage(offset, size);
|
||||
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
|
||||
draw_state.index_buffer.first, draw_state.index_buffer.count + 1,
|
||||
staging.buffer, static_cast<u32>(staging.offset),
|
||||
static_cast<u32>(total_bytes));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
|
||||
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||
auto upload_staging = runtime.UploadStagingBuffer(size);
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -235,6 +238,9 @@ constexpr Table MakeViewTable() {
|
||||
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_12x10_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
|
||||
Enable(view, PixelFormat::D24_UNORM_S8_UINT, PixelFormat::S8_UINT);
|
||||
Enable(view, PixelFormat::S8_UINT_D24_UNORM, PixelFormat::S8_UINT);
|
||||
Enable(view, PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::S8_UINT);
|
||||
return view;
|
||||
}
|
||||
|
||||
|
||||
@@ -43,90 +43,66 @@ void DmaPusher::DispatchCalls() {
|
||||
|
||||
bool DmaPusher::Step() {
|
||||
if (!ib_enable || dma_pushbuffer.empty()) {
|
||||
// pushbuffer empty and IB empty or nonexistent - nothing to do
|
||||
return false;
|
||||
}
|
||||
|
||||
CommandList& command_list{dma_pushbuffer.front()};
|
||||
CommandList& command_list = dma_pushbuffer.front();
|
||||
|
||||
ASSERT_OR_EXECUTE(
|
||||
command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
|
||||
// Somehow the command_list is empty, in order to avoid a crash
|
||||
// We ignore it and assume its size is 0.
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
return true;
|
||||
});
|
||||
const size_t prefetch_size = command_list.prefetch_command_list.size();
|
||||
const size_t command_list_size = command_list.command_lists.size();
|
||||
|
||||
if (command_list.prefetch_command_list.size()) {
|
||||
// Prefetched command list from nvdrv, used for things like synchronization
|
||||
ProcessCommands(VideoCommon::FixSmallVectorADL(command_list.prefetch_command_list));
|
||||
if (prefetch_size == 0 && command_list_size == 0) {
|
||||
dma_pushbuffer.pop();
|
||||
} else {
|
||||
const CommandListHeader command_list_header{
|
||||
command_list.command_lists[dma_pushbuffer_subindex++]};
|
||||
dma_pushbuffer_subindex = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (signal_sync) {
|
||||
std::unique_lock lk(sync_mutex);
|
||||
sync_cv.wait(lk, [this]() { return synced; });
|
||||
signal_sync = false;
|
||||
synced = false;
|
||||
}
|
||||
if (prefetch_size > 0) {
|
||||
ProcessCommands(command_list.prefetch_command_list);
|
||||
dma_pushbuffer.pop();
|
||||
return true;
|
||||
}
|
||||
|
||||
dma_state.dma_get = command_list_header.addr;
|
||||
auto& current_command = command_list.command_lists[dma_pushbuffer_subindex];
|
||||
const CommandListHeader& header = current_command;
|
||||
dma_state.dma_get = header.addr;
|
||||
|
||||
if (command_list_header.size == 0) {
|
||||
return true;
|
||||
}
|
||||
if (signal_sync && !synced) {
|
||||
std::unique_lock lk(sync_mutex);
|
||||
sync_cv.wait(lk, [this]() { return synced; });
|
||||
signal_sync = false;
|
||||
synced = false;
|
||||
}
|
||||
|
||||
// Push buffer non-empty, read a word
|
||||
if (dma_state.method >= MacroRegistersStart) {
|
||||
if (subchannels[dma_state.subchannel]) {
|
||||
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(
|
||||
dma_state.dma_get, command_list_header.size * sizeof(u32));
|
||||
}
|
||||
}
|
||||
if (header.size > 0 && dma_state.method >= MacroRegistersStart && subchannels[dma_state.subchannel]) {
|
||||
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(dma_state.dma_get, header.size * sizeof(u32));
|
||||
}
|
||||
|
||||
const auto safe_process = [&] {
|
||||
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
|
||||
Tegra::Memory::GuestMemoryFlags::SafeRead>
|
||||
headers(memory_manager, dma_state.dma_get, command_list_header.size,
|
||||
&command_headers);
|
||||
if (header.size > 0) {
|
||||
if (Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe()) {
|
||||
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::SafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
|
||||
ProcessCommands(headers);
|
||||
};
|
||||
|
||||
const auto unsafe_process = [&] {
|
||||
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
|
||||
Tegra::Memory::GuestMemoryFlags::UnsafeRead>
|
||||
headers(memory_manager, dma_state.dma_get, command_list_header.size,
|
||||
&command_headers);
|
||||
ProcessCommands(headers);
|
||||
};
|
||||
|
||||
const bool use_safe = Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe();
|
||||
|
||||
if (use_safe) {
|
||||
safe_process();
|
||||
} else {
|
||||
unsafe_process();
|
||||
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::UnsafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
|
||||
ProcessCommands(headers);
|
||||
}
|
||||
}
|
||||
|
||||
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
} else if (command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.sync_memory_operations.GetValue()) {
|
||||
signal_sync = true;
|
||||
}
|
||||
if (++dma_pushbuffer_subindex >= command_list_size) {
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
} else {
|
||||
signal_sync = command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.sync_memory_operations.GetValue();
|
||||
}
|
||||
|
||||
if (signal_sync) {
|
||||
rasterizer->SignalFence([this]() {
|
||||
if (signal_sync) {
|
||||
rasterizer->SignalFence([this]() {
|
||||
std::scoped_lock lk(sync_mutex);
|
||||
synced = true;
|
||||
sync_cv.notify_all();
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -91,6 +91,10 @@ public:
|
||||
uncommitted_operations.clear();
|
||||
}
|
||||
QueueFence(new_fence);
|
||||
//if (!new_fence->IsStubbed()) {
|
||||
// std::scoped_lock lock{texture_cache.mutex};
|
||||
// texture_cache.CommitPendingGpuAccesses(new_fence->WaitTick());
|
||||
//}
|
||||
fences.push(std::move(new_fence));
|
||||
if (should_flush) {
|
||||
rasterizer.FlushCommands();
|
||||
@@ -179,7 +183,7 @@ private:
|
||||
return;
|
||||
}
|
||||
}
|
||||
PopAsyncFlushes();
|
||||
PopAsyncFlushes(current_fence->WaitTick());
|
||||
auto operations = std::move(pending_operations.front());
|
||||
pending_operations.pop_front();
|
||||
for (auto& operation : operations) {
|
||||
@@ -214,7 +218,7 @@ private:
|
||||
if (!current_fence->IsStubbed()) {
|
||||
WaitFence(current_fence);
|
||||
}
|
||||
PopAsyncFlushes();
|
||||
PopAsyncFlushes(current_fence->WaitTick());
|
||||
for (auto& operation : current_operations) {
|
||||
operation();
|
||||
}
|
||||
@@ -237,10 +241,11 @@ private:
|
||||
query_cache.HasUncommittedFlushes();
|
||||
}
|
||||
|
||||
void PopAsyncFlushes() {
|
||||
void PopAsyncFlushes(u64 completed_tick) {
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.PopAsyncFlushes();
|
||||
texture_cache.CompleteGpuAccesses(completed_tick);
|
||||
buffer_cache.PopAsyncFlushes();
|
||||
}
|
||||
query_cache.PopAsyncFlushes();
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
inline Tegra::Engines::Maxwell3D::Regs::PolygonMode EffectivePolygonMode(
|
||||
const Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
const bool cull_enabled = regs.gl_cull_test_enabled != 0;
|
||||
const auto cull_face = regs.gl_cull_face;
|
||||
const bool cull_front = cull_enabled && (cull_face == Maxwell::CullFace::Front ||
|
||||
cull_face == Maxwell::CullFace::FrontAndBack);
|
||||
const bool cull_back = cull_enabled && (cull_face == Maxwell::CullFace::Back ||
|
||||
cull_face == Maxwell::CullFace::FrontAndBack);
|
||||
|
||||
const bool render_front = !cull_front;
|
||||
const bool render_back = !cull_back;
|
||||
|
||||
const auto front_mode = regs.polygon_mode_front;
|
||||
const auto back_mode = regs.polygon_mode_back;
|
||||
|
||||
if (render_front && render_back && front_mode != back_mode) {
|
||||
if (front_mode == Maxwell::PolygonMode::Line || back_mode == Maxwell::PolygonMode::Line) {
|
||||
return Maxwell::PolygonMode::Line;
|
||||
}
|
||||
if (front_mode == Maxwell::PolygonMode::Point || back_mode == Maxwell::PolygonMode::Point) {
|
||||
return Maxwell::PolygonMode::Point;
|
||||
}
|
||||
}
|
||||
|
||||
if (render_front) {
|
||||
return front_mode;
|
||||
}
|
||||
if (render_back) {
|
||||
return back_mode;
|
||||
}
|
||||
return front_mode;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
@@ -211,6 +211,12 @@ void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) {
|
||||
streamer->CloseCounter();
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
bool QueryCacheBase<Traits>::HasStreamer(QueryType counter_type) const {
|
||||
const size_t index = static_cast<size_t>(counter_type);
|
||||
return impl->streamers[index] != nullptr;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
|
||||
size_t index = static_cast<size_t>(counter_type);
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
@@ -92,6 +95,8 @@ public:
|
||||
|
||||
void CounterReset(QueryType counter_type);
|
||||
|
||||
[[nodiscard]] bool HasStreamer(QueryType counter_type) const;
|
||||
|
||||
void CounterClose(QueryType counter_type);
|
||||
|
||||
void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags,
|
||||
|
||||
@@ -7,13 +7,50 @@
|
||||
#include <cstring>
|
||||
#include <bit>
|
||||
#include <numeric>
|
||||
#include <optional>
|
||||
#include "common/cityhash.h"
|
||||
#include "common/settings.h" // for enum class Settings::ShaderBackend
|
||||
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
|
||||
std::optional<VideoCore::Surface::PixelFormatNumeric>
|
||||
NumericFromComponentType(Shader::SamplerComponentType component_type) {
|
||||
using VideoCore::Surface::PixelFormatNumeric;
|
||||
switch (component_type) {
|
||||
case Shader::SamplerComponentType::Float:
|
||||
return PixelFormatNumeric::Float;
|
||||
case Shader::SamplerComponentType::Sint:
|
||||
return PixelFormatNumeric::Sint;
|
||||
case Shader::SamplerComponentType::Uint:
|
||||
return PixelFormatNumeric::Uint;
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
VideoCore::Surface::PixelFormat ResolveTexelBufferFormat(
|
||||
VideoCore::Surface::PixelFormat format, Shader::SamplerComponentType component_type) {
|
||||
const auto desired_numeric = NumericFromComponentType(component_type);
|
||||
if (!desired_numeric) {
|
||||
return format;
|
||||
}
|
||||
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
|
||||
if (*desired_numeric == current_numeric) {
|
||||
return format;
|
||||
}
|
||||
if (const auto variant =
|
||||
VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
|
||||
return *variant;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Tegra::Texture::TexturePair;
|
||||
@@ -174,8 +211,12 @@ void ComputePipeline::Configure() {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
|
||||
auto buffer_format = image_view.format;
|
||||
if constexpr (!is_image) {
|
||||
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
|
||||
}
|
||||
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
image_view.BufferSize(), buffer_format,
|
||||
is_written, is_image);
|
||||
++texbuf_index;
|
||||
}
|
||||
@@ -205,7 +246,8 @@ void ComputePipeline::Configure() {
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
|
||||
textures[texture_binding] = image_view.Handle(desc.type);
|
||||
textures[texture_binding] =
|
||||
image_view.SampledView(desc.type, desc.component_type);
|
||||
if (texture_cache.IsRescaling(image_view)) {
|
||||
texture_scaling_mask |= 1u << texture_binding;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -25,6 +28,10 @@ public:
|
||||
|
||||
void Wait();
|
||||
|
||||
[[nodiscard]] u64 WaitTick() const noexcept {
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLSync sync_object;
|
||||
};
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <bit>
|
||||
@@ -18,6 +19,7 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
#if defined(_MSC_VER) && defined(NDEBUG)
|
||||
@@ -39,6 +41,38 @@ using VideoCommon::ImageId;
|
||||
constexpr u32 MAX_TEXTURES = 64;
|
||||
constexpr u32 MAX_IMAGES = 8;
|
||||
|
||||
std::optional<VideoCore::Surface::PixelFormatNumeric>
|
||||
NumericFromComponentType(Shader::SamplerComponentType component_type) {
|
||||
using VideoCore::Surface::PixelFormatNumeric;
|
||||
switch (component_type) {
|
||||
case Shader::SamplerComponentType::Float:
|
||||
return PixelFormatNumeric::Float;
|
||||
case Shader::SamplerComponentType::Sint:
|
||||
return PixelFormatNumeric::Sint;
|
||||
case Shader::SamplerComponentType::Uint:
|
||||
return PixelFormatNumeric::Uint;
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
VideoCore::Surface::PixelFormat ResolveTexelBufferFormat(
|
||||
VideoCore::Surface::PixelFormat format, Shader::SamplerComponentType component_type) {
|
||||
const auto desired_numeric = NumericFromComponentType(component_type);
|
||||
if (!desired_numeric) {
|
||||
return format;
|
||||
}
|
||||
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
|
||||
if (*desired_numeric == current_numeric) {
|
||||
return format;
|
||||
}
|
||||
if (const auto variant =
|
||||
VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
|
||||
return *variant;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
GLenum Stage(size_t stage_index) {
|
||||
switch (stage_index) {
|
||||
case 0:
|
||||
@@ -397,8 +431,12 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
|
||||
auto buffer_format = image_view.format;
|
||||
if constexpr (!is_image) {
|
||||
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
|
||||
}
|
||||
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
image_view.BufferSize(), buffer_format,
|
||||
is_written, is_image);
|
||||
++index;
|
||||
++texture_buffer_it;
|
||||
@@ -483,7 +521,8 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
|
||||
textures[texture_binding] = image_view.Handle(desc.type);
|
||||
textures[texture_binding] =
|
||||
image_view.SampledView(desc.type, desc.component_type);
|
||||
if (texture_cache.IsRescaling(image_view)) {
|
||||
texture_scaling_mask |= 1u << stage_texture_binding;
|
||||
}
|
||||
|
||||
@@ -220,6 +220,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_gl_sparse_textures = device.HasSparseTexture2(),
|
||||
.support_gl_derivative_control = device.HasDerivativeControl(),
|
||||
.support_geometry_streams = true,
|
||||
.warp_stage_support_mask = 0xFFFFFFFFu,
|
||||
|
||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
|
||||
|
||||
|
||||
@@ -692,6 +692,15 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
|
||||
return device.HasASTC();
|
||||
}
|
||||
|
||||
bool TextureCacheRuntime::SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const noexcept {
|
||||
using VideoCore::Surface::GetFormatType;
|
||||
using VideoCore::Surface::IsPixelFormatInteger;
|
||||
if (IsPixelFormatInteger(format)) {
|
||||
return false;
|
||||
}
|
||||
return GetFormatType(format) == VideoCore::Surface::SurfaceType::ColorTexture;
|
||||
}
|
||||
|
||||
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||
VAddr cpu_addr_)
|
||||
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
|
||||
@@ -1229,6 +1238,13 @@ GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFor
|
||||
return view;
|
||||
}
|
||||
|
||||
GLuint ImageView::SampledView(Shader::TextureType view_type,
|
||||
Shader::SamplerComponentType /*component_type*/) {
|
||||
// OpenGL swizzles already configure depth/stencil selection per TIC entry,
|
||||
// so fall back to the default view handle.
|
||||
return Handle(view_type);
|
||||
}
|
||||
|
||||
void ImageView::SetupView(Shader::TextureType view_type) {
|
||||
views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
|
||||
}
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -13,6 +16,7 @@
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/image_view_base.h"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
|
||||
@@ -129,6 +133,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const noexcept;
|
||||
|
||||
bool HasBrokenTextureViewFormats() const noexcept {
|
||||
return has_broken_texture_view_formats;
|
||||
}
|
||||
@@ -137,6 +143,8 @@ public:
|
||||
|
||||
void TickFrame() {}
|
||||
|
||||
void WaitForGpuTick(u64) {}
|
||||
|
||||
StateTracker& GetStateTracker() {
|
||||
return state_tracker;
|
||||
}
|
||||
@@ -264,6 +272,9 @@ public:
|
||||
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
|
||||
Shader::ImageFormat image_format);
|
||||
|
||||
[[nodiscard]] GLuint SampledView(Shader::TextureType view_type,
|
||||
Shader::SamplerComponentType component_type);
|
||||
|
||||
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
|
||||
return views[static_cast<size_t>(handle_type)];
|
||||
}
|
||||
|
||||
@@ -525,18 +525,24 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
|
||||
nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))),
|
||||
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
|
||||
blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
|
||||
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
|
||||
blit_depth_stencil_frag(device.IsExtShaderStencilExportSupported()
|
||||
? BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)
|
||||
: vk::ShaderModule{}),
|
||||
clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)),
|
||||
clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)),
|
||||
clear_stencil_frag(BuildShader(device, VULKAN_DEPTHSTENCIL_CLEAR_FRAG_SPV)),
|
||||
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
|
||||
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
|
||||
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
|
||||
convert_abgr8_to_d24s8_frag(device.IsExtShaderStencilExportSupported()
|
||||
? BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)
|
||||
: vk::ShaderModule{}),
|
||||
convert_abgr8_to_d32f_frag(BuildShader(device, CONVERT_ABGR8_TO_D32F_FRAG_SPV)),
|
||||
convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)),
|
||||
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
|
||||
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
|
||||
convert_abgr8_srgb_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)),
|
||||
convert_abgr8_srgb_to_d24s8_frag(device.IsExtShaderStencilExportSupported()
|
||||
? BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)
|
||||
: vk::ShaderModule{}),
|
||||
convert_rgba_to_bgra_frag(BuildShader(device, CONVERT_RGBA8_TO_BGRA8_FRAG_SPV)),
|
||||
convert_yuv420_to_rgb_comp(BuildShader(device, CONVERT_YUV420_TO_RGB_COMP_SPV)),
|
||||
convert_rgb_to_yuv420_comp(BuildShader(device, CONVERT_RGB_TO_YUV420_COMP_SPV)),
|
||||
@@ -667,6 +673,11 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
|
||||
|
||||
void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
if (!device.IsExtShaderStencilExportSupported()) {
|
||||
// Shader requires VK_EXT_shader_stencil_export which is not available
|
||||
LOG_WARNING(Render_Vulkan, "ConvertABGR8ToD24S8 requires shader_stencil_export, skipping");
|
||||
return;
|
||||
}
|
||||
ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
|
||||
convert_abgr8_to_d24s8_frag);
|
||||
Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view);
|
||||
@@ -702,6 +713,11 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
|
||||
|
||||
void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
if (!device.IsExtShaderStencilExportSupported()) {
|
||||
// Shader requires VK_EXT_shader_stencil_export which is not available
|
||||
LOG_WARNING(Render_Vulkan, "ConvertABGR8SRGBToD24S8 requires shader_stencil_export, skipping");
|
||||
return;
|
||||
}
|
||||
ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
convert_abgr8_srgb_to_d24s8_frag);
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
#include "video_core/engines/draw_manager.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/polygon_mode_utils.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
@@ -60,13 +59,13 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
|
||||
raw1 = 0;
|
||||
extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0);
|
||||
extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0);
|
||||
extended_dynamic_state_2_extra.Assign(features.has_extended_dynamic_state_2_extra ? 1 : 0);
|
||||
extended_dynamic_state_2_logic_op.Assign(features.has_extended_dynamic_state_2_logic_op ? 1 : 0);
|
||||
extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0);
|
||||
extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0);
|
||||
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
|
||||
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
|
||||
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
|
||||
polygon_mode.Assign(PackPolygonMode(VideoCore::EffectivePolygonMode(regs)));
|
||||
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
|
||||
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
|
||||
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
|
||||
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
|
||||
@@ -158,7 +157,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
|
||||
return static_cast<u16>(array.stride.Value());
|
||||
});
|
||||
}
|
||||
if (!extended_dynamic_state_2_extra) {
|
||||
if (!extended_dynamic_state_2_logic_op) {
|
||||
dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2);
|
||||
}
|
||||
if (!extended_dynamic_state_3_blend) {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -20,9 +23,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
struct DynamicFeatures {
|
||||
bool has_extended_dynamic_state;
|
||||
bool has_extended_dynamic_state_2;
|
||||
bool has_extended_dynamic_state_2_extra;
|
||||
bool has_extended_dynamic_state_2_logic_op;
|
||||
bool has_extended_dynamic_state_2_patch_control_points;
|
||||
bool has_extended_dynamic_state_3_blend;
|
||||
bool has_extended_dynamic_state_3_enables;
|
||||
bool has_dual_source_blend;
|
||||
bool has_dynamic_vertex_input;
|
||||
};
|
||||
|
||||
@@ -186,7 +191,7 @@ struct FixedPipelineState {
|
||||
u32 raw1;
|
||||
BitField<0, 1, u32> extended_dynamic_state;
|
||||
BitField<1, 1, u32> extended_dynamic_state_2;
|
||||
BitField<2, 1, u32> extended_dynamic_state_2_extra;
|
||||
BitField<2, 1, u32> extended_dynamic_state_2_logic_op;
|
||||
BitField<3, 1, u32> extended_dynamic_state_3_blend;
|
||||
BitField<4, 1, u32> extended_dynamic_state_3_enables;
|
||||
BitField<5, 1, u32> dynamic_vertex_input;
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <span>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Vulkan::LineLoop {
|
||||
|
||||
inline void CopyWithClosureRaw(std::span<u8> dst, std::span<const u8> src, size_t element_size) {
|
||||
ASSERT_MSG(dst.size() == src.size() + element_size, "Invalid line loop copy sizes");
|
||||
if (src.empty()) {
|
||||
if (!dst.empty()) {
|
||||
std::fill(dst.begin(), dst.end(), u8{0});
|
||||
}
|
||||
return;
|
||||
}
|
||||
std::memcpy(dst.data(), src.data(), src.size());
|
||||
std::memcpy(dst.data() + src.size(), src.data(), element_size);
|
||||
}
|
||||
|
||||
inline void GenerateSequentialWithClosureRaw(std::span<u8> dst, size_t element_size,
|
||||
u64 start_value = 0) {
|
||||
if (dst.empty()) {
|
||||
return;
|
||||
}
|
||||
const size_t last = dst.size() - element_size;
|
||||
size_t offset = 0;
|
||||
u64 value = start_value;
|
||||
while (offset < last) {
|
||||
std::memcpy(dst.data() + offset, &value, element_size);
|
||||
offset += element_size;
|
||||
++value;
|
||||
}
|
||||
std::memcpy(dst.data() + offset, &start_value, element_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CopyWithClosure(std::span<T> dst, std::span<const T> src) {
|
||||
ASSERT_MSG(dst.size() == src.size() + 1, "Invalid destination size for line loop copy");
|
||||
if (src.empty()) {
|
||||
if (!dst.empty()) {
|
||||
dst.front() = {};
|
||||
}
|
||||
return;
|
||||
}
|
||||
std::copy(src.begin(), src.end(), dst.begin());
|
||||
dst.back() = src.front();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void GenerateSequentialWithClosure(std::span<T> dst, T start_value = {}) {
|
||||
if (dst.empty()) {
|
||||
return;
|
||||
}
|
||||
const size_t last = dst.size() - 1;
|
||||
for (size_t i = 0; i < last; ++i) {
|
||||
dst[i] = static_cast<T>(start_value + static_cast<T>(i));
|
||||
}
|
||||
dst.back() = start_value;
|
||||
}
|
||||
|
||||
} // namespace Vulkan::LineLoop
|
||||
@@ -1,6 +1,3 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -168,7 +165,7 @@ struct FormatTuple {
|
||||
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
|
||||
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
|
||||
{VK_FORMAT_R32G32B32_SFLOAT}, // R32G32B32_FLOAT
|
||||
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB
|
||||
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable | Storage}, // A8B8G8R8_SRGB
|
||||
{VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
|
||||
{VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
|
||||
{VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
|
||||
@@ -180,7 +177,7 @@ struct FormatTuple {
|
||||
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
|
||||
{VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM
|
||||
{VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM
|
||||
{VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
|
||||
{VK_FORMAT_B8G8R8A8_SRGB, Attachable | Storage}, // B8G8R8A8_SRGB
|
||||
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
|
||||
{VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
|
||||
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
|
||||
@@ -326,9 +323,44 @@ VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
|
||||
}
|
||||
|
||||
VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
|
||||
Maxwell::PrimitiveTopology topology,
|
||||
Maxwell::PolygonMode polygon_mode) {
|
||||
return detail::PrimitiveTopologyNoDevice(topology, polygon_mode);
|
||||
Maxwell::PrimitiveTopology topology) {
|
||||
switch (topology) {
|
||||
case Maxwell::PrimitiveTopology::Points:
|
||||
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
||||
case Maxwell::PrimitiveTopology::Lines:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
||||
case Maxwell::PrimitiveTopology::LineLoop:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::TriangleFan:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
case Maxwell::PrimitiveTopology::LinesAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::LineStripAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::Quads:
|
||||
case Maxwell::PrimitiveTopology::QuadStrip:
|
||||
// TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT
|
||||
// whenever it releases
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::Patches:
|
||||
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
case Maxwell::PrimitiveTopology::Polygon:
|
||||
LOG_WARNING(Render_Vulkan, "Draw mode is Polygon with a polygon mode of lines should be a "
|
||||
"single body and not a bunch of triangles.");
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
|
||||
return {};
|
||||
}
|
||||
|
||||
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -18,52 +15,6 @@ namespace Vulkan::MaxwellToVK {
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
|
||||
namespace detail {
|
||||
constexpr VkPrimitiveTopology PrimitiveTopologyNoDevice(Maxwell::PrimitiveTopology topology,
|
||||
Maxwell::PolygonMode polygon_mode) {
|
||||
switch (topology) {
|
||||
case Maxwell::PrimitiveTopology::Points:
|
||||
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
||||
case Maxwell::PrimitiveTopology::Lines:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
||||
case Maxwell::PrimitiveTopology::LineLoop:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::TriangleFan:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
case Maxwell::PrimitiveTopology::LinesAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::LineStripAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::Quads:
|
||||
case Maxwell::PrimitiveTopology::QuadStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::Patches:
|
||||
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
case Maxwell::PrimitiveTopology::Polygon:
|
||||
switch (polygon_mode) {
|
||||
case Maxwell::PolygonMode::Fill:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
case Maxwell::PolygonMode::Line:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PolygonMode::Point:
|
||||
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
VkFilter Filter(Tegra::Texture::TextureFilter filter);
|
||||
@@ -95,8 +46,7 @@ struct FormatInfo {
|
||||
|
||||
VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
|
||||
|
||||
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology,
|
||||
Maxwell::PolygonMode polygon_mode);
|
||||
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
|
||||
|
||||
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
|
||||
Maxwell::VertexAttribute::Size size);
|
||||
|
||||
@@ -189,12 +189,16 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
|
||||
const VideoCommon::ImageViewId image_view_id{(views++)->id};
|
||||
const VideoCommon::SamplerId sampler_id{*(samplers++)};
|
||||
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
|
||||
const VkImageView vk_image_view{image_view.Handle(desc.type)};
|
||||
const VkImageView vk_image_view{
|
||||
image_view.SampledView(desc.type, desc.component_type)};
|
||||
const Sampler& sampler{texture_cache.GetSampler(sampler_id)};
|
||||
const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
|
||||
!image_view.SupportsAnisotropy()};
|
||||
const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
|
||||
: sampler.Handle()};
|
||||
const bool supports_linear_filter{
|
||||
texture_cache.SupportsLinearFilter(image_view.format)};
|
||||
const bool supports_depth_compare_sampling{
|
||||
image_view.SupportsDepthCompareSampling()};
|
||||
const VkSampler vk_sampler{
|
||||
sampler.SelectHandle(supports_linear_filter, image_view.SupportsAnisotropy(),
|
||||
supports_depth_compare_sampling)};
|
||||
guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
|
||||
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
|
||||
}
|
||||
|
||||
@@ -280,7 +280,6 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
buffer.Flush(); // Ensure host writes are visible before the GPU copy.
|
||||
}
|
||||
|
||||
const VkBufferImageCopy copy{
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
@@ -594,7 +595,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
|
||||
if (index >= device.GetMaxVertexInputBindings()) {
|
||||
return;
|
||||
}
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
|
||||
ReserveNullBuffer();
|
||||
buffer = *null_buffer;
|
||||
offset = 0;
|
||||
size = std::numeric_limits<u32>::max();
|
||||
}
|
||||
// Use BindVertexBuffers2EXT only if EDS1 is supported AND VIDS is not active
|
||||
// When VIDS is active, the pipeline doesn't declare VERTEX_INPUT_BINDING_STRIDE as dynamic
|
||||
if (device.IsExtExtendedDynamicStateSupported() && !device.IsExtVertexInputDynamicStateSupported()) {
|
||||
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
|
||||
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
|
||||
@@ -634,7 +643,8 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
|
||||
if (binding_count == 0) {
|
||||
return;
|
||||
}
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
// Use BindVertexBuffers2EXT only if EDS1 is supported AND VIDS is not active
|
||||
if (device.IsExtExtendedDynamicStateSupported() && !device.IsExtVertexInputDynamicStateSupported()) {
|
||||
scheduler.Record([bindings_ = std::move(bindings),
|
||||
buffer_handles_ = std::move(buffer_handles),
|
||||
binding_count](vk::CommandBuffer cmdbuf) {
|
||||
@@ -691,27 +701,50 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ReserveNullBuffer() {
|
||||
const VkBufferUsageFlags expected_usage = NullBufferUsageFlags();
|
||||
if (null_buffer && null_buffer_usage_flags != expected_usage) {
|
||||
RefreshNullBuffer();
|
||||
}
|
||||
if (!null_buffer) {
|
||||
null_buffer = CreateNullBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
VkBufferUsageFlags BufferCacheRuntime::NullBufferUsageFlags() const {
|
||||
VkBufferUsageFlags usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
|
||||
if (device.IsExtTransformFeedbackSupported()) {
|
||||
usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||
}
|
||||
return usage;
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::RefreshNullBuffer() {
|
||||
if (!null_buffer) {
|
||||
return;
|
||||
}
|
||||
scheduler.Finish();
|
||||
null_buffer.reset();
|
||||
null_buffer = CreateNullBuffer();
|
||||
}
|
||||
|
||||
vk::Buffer BufferCacheRuntime::CreateNullBuffer() {
|
||||
VkBufferCreateInfo create_info{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = 4,
|
||||
.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
|
||||
.usage = NullBufferUsageFlags(),
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
if (device.IsExtTransformFeedbackSupported()) {
|
||||
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||
}
|
||||
vk::Buffer ret = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
|
||||
null_buffer_usage_flags = create_info.usage;
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
ret.SetObjectNameEXT("Null buffer");
|
||||
}
|
||||
|
||||
@@ -131,6 +131,9 @@ public:
|
||||
|
||||
void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
|
||||
|
||||
/// Forces destruction and recreation of the shared null buffer so new usage flags take effect.
|
||||
void RefreshNullBuffer();
|
||||
|
||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/,
|
||||
[[maybe_unused]] u32 /*binding_index*/,
|
||||
u32 size) {
|
||||
@@ -174,6 +177,7 @@ private:
|
||||
|
||||
void ReserveNullBuffer();
|
||||
vk::Buffer CreateNullBuffer();
|
||||
VkBufferUsageFlags NullBufferUsageFlags() const;
|
||||
|
||||
struct UniformRing {
|
||||
static constexpr size_t NUM_FRAMES = 3;
|
||||
@@ -203,6 +207,7 @@ private:
|
||||
std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
|
||||
|
||||
vk::Buffer null_buffer;
|
||||
VkBufferUsageFlags null_buffer_usage_flags = 0;
|
||||
|
||||
std::unique_ptr<Uint8Pass> uint8_pass;
|
||||
QuadIndexedPass quad_index_pass;
|
||||
|
||||
@@ -418,6 +418,9 @@ ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
|
||||
|
||||
void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
u32 src_offset, bool compare_to_zero) {
|
||||
if (!device.IsExtConditionalRendering()) {
|
||||
return;
|
||||
}
|
||||
const size_t compare_size = compare_to_zero ? 8 : 24;
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
@@ -448,7 +451,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.Dispatch(1, 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier);
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -459,10 +462,14 @@ QueriesPrefixScanPass::QueriesPrefixScanPass(
|
||||
device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS,
|
||||
QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>,
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT)
|
||||
? std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_COMP_SPV)
|
||||
: std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV)),
|
||||
scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
|
||||
@@ -470,6 +477,14 @@ QueriesPrefixScanPass::QueriesPrefixScanPass(
|
||||
void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer,
|
||||
VkBuffer src_buffer, size_t number_of_sums,
|
||||
size_t min_accumulation_limit, size_t max_accumulation_limit) {
|
||||
constexpr VkAccessFlags BASE_DST_ACCESS = VK_ACCESS_SHADER_READ_BIT |
|
||||
VK_ACCESS_TRANSFER_READ_BIT |
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
|
||||
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
|
||||
VK_ACCESS_INDEX_READ_BIT |
|
||||
VK_ACCESS_UNIFORM_READ_BIT;
|
||||
const VkAccessFlags conditional_access =
|
||||
device.IsExtConditionalRendering() ? VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT : 0;
|
||||
size_t current_runs = number_of_sums;
|
||||
size_t offset = 0;
|
||||
while (current_runs != 0) {
|
||||
@@ -486,22 +501,18 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, descriptor_data, min_accumulation_limit, max_accumulation_limit,
|
||||
runs_to_do, used_offset](vk::CommandBuffer cmdbuf) {
|
||||
runs_to_do, used_offset, conditional_access](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr VkMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier write_barrier{
|
||||
const VkMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT |
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
|
||||
VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT |
|
||||
VK_ACCESS_UNIFORM_READ_BIT |
|
||||
VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT,
|
||||
.dstAccessMask = BASE_DST_ACCESS | conditional_access,
|
||||
};
|
||||
const QueriesPrefixScanPushConstants uniforms{
|
||||
.min_accumulation_base = static_cast<u32>(min_accumulation_limit),
|
||||
@@ -519,8 +530,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
|
||||
cmdbuf.Dispatch(1, 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0,
|
||||
write_barrier);
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,14 +18,49 @@
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
#include <optional>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
|
||||
using Tegra::Texture::TexturePair;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::PixelFormatNumeric;
|
||||
|
||||
static std::optional<PixelFormatNumeric> NumericFromComponentType(
|
||||
Shader::SamplerComponentType component_type) {
|
||||
switch (component_type) {
|
||||
case Shader::SamplerComponentType::Float:
|
||||
return PixelFormatNumeric::Float;
|
||||
case Shader::SamplerComponentType::Sint:
|
||||
return PixelFormatNumeric::Sint;
|
||||
case Shader::SamplerComponentType::Uint:
|
||||
return PixelFormatNumeric::Uint;
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
static PixelFormat ResolveTexelBufferFormat(PixelFormat format,
|
||||
Shader::SamplerComponentType component_type) {
|
||||
const auto desired_numeric = NumericFromComponentType(component_type);
|
||||
if (!desired_numeric) {
|
||||
return format;
|
||||
}
|
||||
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
|
||||
if (*desired_numeric == current_numeric) {
|
||||
return format;
|
||||
}
|
||||
if (const auto variant = VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
|
||||
return *variant;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
|
||||
DescriptorPool& descriptor_pool,
|
||||
@@ -182,8 +217,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view = texture_cache.GetImageView(views[index].id);
|
||||
VideoCore::Surface::PixelFormat buffer_format = image_view.format;
|
||||
if constexpr (!is_image) {
|
||||
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
|
||||
}
|
||||
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
image_view.BufferSize(), buffer_format,
|
||||
is_written, is_image);
|
||||
++index;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,10 @@ public:
|
||||
|
||||
void Wait();
|
||||
|
||||
[[nodiscard]] u64 WaitTick() const noexcept {
|
||||
return wait_tick;
|
||||
}
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
u64 wait_tick = 0;
|
||||
|
||||
@@ -5,9 +5,10 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <iostream>
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
@@ -23,9 +24,10 @@
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/polygon_mode_utils.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/texture_cache/samples_helper.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
#if defined(_MSC_VER) && defined(NDEBUG)
|
||||
@@ -46,10 +48,83 @@ using Tegra::Texture::TexturePair;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
using VideoCore::Surface::PixelFormatNumeric;
|
||||
|
||||
constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
|
||||
constexpr size_t MAX_IMAGE_ELEMENTS = 64;
|
||||
|
||||
std::optional<PixelFormatNumeric> NumericFromComponentType(
|
||||
Shader::SamplerComponentType component_type) {
|
||||
switch (component_type) {
|
||||
case Shader::SamplerComponentType::Float:
|
||||
return PixelFormatNumeric::Float;
|
||||
case Shader::SamplerComponentType::Sint:
|
||||
return PixelFormatNumeric::Sint;
|
||||
case Shader::SamplerComponentType::Uint:
|
||||
return PixelFormatNumeric::Uint;
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
PixelFormat ResolveTexelBufferFormat(PixelFormat format,
|
||||
Shader::SamplerComponentType component_type) {
|
||||
const auto desired_numeric = NumericFromComponentType(component_type);
|
||||
if (!desired_numeric) {
|
||||
return format;
|
||||
}
|
||||
const auto current_numeric = VideoCore::Surface::GetPixelFormatNumericType(format);
|
||||
if (*desired_numeric == current_numeric) {
|
||||
return format;
|
||||
}
|
||||
if (const auto variant = VideoCore::Surface::FindPixelFormatVariant(format, *desired_numeric)) {
|
||||
return *variant;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
bool UsesDualSourceFactor(Maxwell::Blend::Factor factor) {
|
||||
switch (factor) {
|
||||
case Maxwell::Blend::Factor::Source1Color_D3D:
|
||||
case Maxwell::Blend::Factor::Source1Color_GL:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
|
||||
case Maxwell::Blend::Factor::Source1Alpha_D3D:
|
||||
case Maxwell::Blend::Factor::Source1Alpha_GL:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor FallbackDualSourceFactor(Maxwell::Blend::Factor factor) {
|
||||
switch (factor) {
|
||||
case Maxwell::Blend::Factor::Source1Color_D3D:
|
||||
case Maxwell::Blend::Factor::Source1Color_GL:
|
||||
return Maxwell::Blend::Factor::SourceColor_D3D;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
|
||||
return Maxwell::Blend::Factor::OneMinusSourceColor_D3D;
|
||||
case Maxwell::Blend::Factor::Source1Alpha_D3D:
|
||||
case Maxwell::Blend::Factor::Source1Alpha_GL:
|
||||
return Maxwell::Blend::Factor::SourceAlpha_D3D;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
|
||||
return Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D;
|
||||
default:
|
||||
return factor;
|
||||
}
|
||||
}
|
||||
|
||||
bool AttachmentUsesDualSource(const FixedPipelineState::BlendingAttachment& blend) {
|
||||
return UsesDualSourceFactor(blend.SourceRGBFactor()) ||
|
||||
UsesDualSourceFactor(blend.DestRGBFactor()) ||
|
||||
UsesDualSourceFactor(blend.SourceAlphaFactor()) ||
|
||||
UsesDualSourceFactor(blend.DestAlphaFactor());
|
||||
}
|
||||
|
||||
DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
for (size_t index = 0; index < infos.size(); ++index) {
|
||||
@@ -265,6 +340,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
||||
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
||||
}
|
||||
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
|
||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
||||
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
||||
uses_push_descriptor = builder.CanUsePushDescriptor();
|
||||
@@ -418,8 +494,12 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
|
||||
VideoCore::Surface::PixelFormat buffer_format = image_view.format;
|
||||
if constexpr (!is_image) {
|
||||
buffer_format = ResolveTexelBufferFormat(buffer_format, desc.component_type);
|
||||
}
|
||||
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
image_view.BufferSize(), buffer_format,
|
||||
is_written, is_image);
|
||||
++index;
|
||||
++texture_buffer_it;
|
||||
@@ -616,10 +696,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
vertex_input_ci.pNext = &input_divisor_ci;
|
||||
}
|
||||
const bool has_tess_stages = spv_modules[1] || spv_modules[2];
|
||||
const auto polygon_mode =
|
||||
FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode.Value());
|
||||
auto input_assembly_topology =
|
||||
MaxwellToVK::PrimitiveTopology(device, key.state.topology, polygon_mode);
|
||||
auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
|
||||
if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
|
||||
if (!has_tess_stages) {
|
||||
LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
|
||||
@@ -634,33 +711,6 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
}
|
||||
}
|
||||
if (key.state.topology == Maxwell::PrimitiveTopology::Polygon) {
|
||||
const auto polygon_mode_name = [polygon_mode]() -> std::string_view {
|
||||
switch (polygon_mode) {
|
||||
case Maxwell::PolygonMode::Fill:
|
||||
return "Fill";
|
||||
case Maxwell::PolygonMode::Line:
|
||||
return "Line";
|
||||
case Maxwell::PolygonMode::Point:
|
||||
return "Point";
|
||||
}
|
||||
return "Unknown";
|
||||
}();
|
||||
const auto vk_topology_name = [input_assembly_topology]() -> std::string_view {
|
||||
switch (input_assembly_topology) {
|
||||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
||||
return "TriangleFan";
|
||||
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
||||
return "LineStrip";
|
||||
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
|
||||
return "PointList";
|
||||
default:
|
||||
return "Unexpected";
|
||||
}
|
||||
}();
|
||||
LOG_DEBUG(Render_Vulkan, "Polygon primitive in {} mode mapped to {}", polygon_mode_name,
|
||||
vk_topology_name);
|
||||
}
|
||||
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
@@ -734,13 +784,18 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
.lineWidth = 1.0f,
|
||||
// TODO(alekpop): Transfer from regs
|
||||
};
|
||||
const bool smooth_lines_supported =
|
||||
device.IsExtLineRasterizationSupported() && device.SupportsSmoothLines();
|
||||
const bool stippled_lines_supported =
|
||||
device.IsExtLineRasterizationSupported() && device.SupportsStippledRectangularLines();
|
||||
VkPipelineRasterizationLineStateCreateInfoEXT line_state{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.lineRasterizationMode = key.state.smooth_lines != 0
|
||||
.lineRasterizationMode = key.state.smooth_lines != 0 && smooth_lines_supported
|
||||
? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
|
||||
: VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
|
||||
.stippledLineEnable = dynamic.line_stipple_enable ? VK_TRUE : VK_FALSE,
|
||||
.stippledLineEnable =
|
||||
(dynamic.line_stipple_enable && stippled_lines_supported) ? VK_TRUE : VK_FALSE,
|
||||
.lineStippleFactor = key.state.line_stipple_factor,
|
||||
.lineStipplePattern = static_cast<uint16_t>(key.state.line_stipple_pattern),
|
||||
};
|
||||
@@ -771,17 +826,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
|
||||
}
|
||||
|
||||
const VkPipelineMultisampleStateCreateInfo multisample_ci{
|
||||
const bool supports_alpha_output = fragment_has_color0_output;
|
||||
const bool alpha_to_one_supported = device.SupportsAlphaToOne();
|
||||
const auto msaa_mode = key.state.msaa_mode.Value();
|
||||
const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode);
|
||||
VkPipelineMultisampleStateCreateInfo multisample_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
|
||||
.rasterizationSamples = vk_samples,
|
||||
.sampleShadingEnable = Settings::values.sample_shading.GetValue() ? VK_TRUE : VK_FALSE,
|
||||
.minSampleShading = static_cast<float>(Settings::values.sample_shading_fraction.GetValue()) / 100.0f,
|
||||
.pSampleMask = nullptr,
|
||||
.alphaToCoverageEnable = key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
|
||||
.alphaToOneEnable = key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
|
||||
.alphaToCoverageEnable =
|
||||
supports_alpha_output && key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
|
||||
.alphaToOneEnable = supports_alpha_output && alpha_to_one_supported &&
|
||||
key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
|
||||
};
|
||||
|
||||
|
||||
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
@@ -803,6 +866,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
}
|
||||
static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
|
||||
const size_t num_attachments{NumAttachments(key.state)};
|
||||
const bool supports_dual_source_blend = device.SupportsDualSourceBlend();
|
||||
const u32 max_dual_source_attachments = supports_dual_source_blend
|
||||
? device.MaxFragmentDualSrcAttachments()
|
||||
: 0;
|
||||
u32 granted_dual_source_attachments = 0;
|
||||
bool logged_dual_source_warning = false;
|
||||
for (size_t index = 0; index < num_attachments; ++index) {
|
||||
static constexpr std::array mask_table{
|
||||
VK_COLOR_COMPONENT_R_BIT,
|
||||
@@ -816,13 +885,30 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
for (size_t i = 0; i < mask_table.size(); ++i) {
|
||||
write_mask |= mask[i] ? mask_table[i] : 0;
|
||||
}
|
||||
const bool attachment_uses_dual_source = AttachmentUsesDualSource(blend);
|
||||
const bool allow_dual_source = attachment_uses_dual_source && supports_dual_source_blend &&
|
||||
granted_dual_source_attachments < max_dual_source_attachments;
|
||||
if (allow_dual_source) {
|
||||
++granted_dual_source_attachments;
|
||||
} else if (attachment_uses_dual_source && !logged_dual_source_warning) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Dual-source blend factors exceed device limit (maxFragmentDualSrcAttachments={}), falling back to single-source factors",
|
||||
max_dual_source_attachments);
|
||||
logged_dual_source_warning = true;
|
||||
}
|
||||
const auto sanitize_factor = [&](Maxwell::Blend::Factor factor) {
|
||||
if (allow_dual_source || !UsesDualSourceFactor(factor)) {
|
||||
return factor;
|
||||
}
|
||||
return FallbackDualSourceFactor(factor);
|
||||
};
|
||||
cb_attachments.push_back({
|
||||
.blendEnable = blend.enable != 0,
|
||||
.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
|
||||
.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
|
||||
.srcColorBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.SourceRGBFactor())),
|
||||
.dstColorBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.DestRGBFactor())),
|
||||
.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
|
||||
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
|
||||
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
|
||||
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.SourceAlphaFactor())),
|
||||
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(sanitize_factor(blend.DestAlphaFactor())),
|
||||
.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
|
||||
.colorWriteMask = write_mask,
|
||||
});
|
||||
@@ -838,14 +924,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
.blendConstants = {}
|
||||
};
|
||||
static_vector<VkDynamicState, 34> dynamic_states{
|
||||
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
|
||||
VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS,
|
||||
VK_DYNAMIC_STATE_LINE_WIDTH,
|
||||
};
|
||||
|
||||
if (device.UsesAdvancedCoreDynamicState()) {
|
||||
static constexpr std::array core_dynamic_states{
|
||||
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
|
||||
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), core_dynamic_states.begin(),
|
||||
core_dynamic_states.end());
|
||||
}
|
||||
if (key.state.extended_dynamic_state) {
|
||||
std::vector<VkDynamicState> extended{
|
||||
static constexpr std::array extended{
|
||||
VK_DYNAMIC_STATE_CULL_MODE_EXT,
|
||||
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
|
||||
@@ -855,51 +952,68 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
|
||||
};
|
||||
if (!device.IsExtVertexInputDynamicStateSupported()) {
|
||||
extended.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
|
||||
}
|
||||
if (key.state.dynamic_vertex_input) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
|
||||
}
|
||||
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
|
||||
if (key.state.extended_dynamic_state_2) {
|
||||
static constexpr std::array extended2{
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
|
||||
|
||||
// VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT is part of EDS1
|
||||
// Only use it if VIDS is not active (VIDS replaces it with full vertex input control)
|
||||
if (!key.state.dynamic_vertex_input) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT);
|
||||
}
|
||||
if (key.state.extended_dynamic_state_2_extra) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
|
||||
}
|
||||
|
||||
// VK_DYNAMIC_STATE_VERTEX_INPUT_EXT (VIDS) - Independent from EDS
|
||||
// Provides full dynamic vertex input control, replaces VERTEX_INPUT_BINDING_STRIDE
|
||||
if (key.state.dynamic_vertex_input) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
|
||||
}
|
||||
|
||||
// EDS2 - Core (3 states)
|
||||
if (key.state.extended_dynamic_state_2) {
|
||||
static constexpr std::array extended2{
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
|
||||
}
|
||||
|
||||
// EDS2 - LogicOp (granular)
|
||||
if (key.state.extended_dynamic_state_2_logic_op) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
|
||||
}
|
||||
|
||||
// EDS3 - Blending (composite: 3 states)
|
||||
if (key.state.extended_dynamic_state_3_blend) {
|
||||
static constexpr std::array extended3{
|
||||
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
|
||||
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
|
||||
}
|
||||
|
||||
// EDS3 - Enables (composite: per-feature)
|
||||
if (key.state.extended_dynamic_state_3_enables) {
|
||||
if (device.SupportsDynamicState3DepthClampEnable()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT);
|
||||
}
|
||||
if (key.state.extended_dynamic_state_3_blend) {
|
||||
static constexpr std::array extended3{
|
||||
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
|
||||
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
|
||||
|
||||
// VK_DYNAMIC_STATE_COLOR_BLEND_ADVANCED_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
|
||||
if (device.SupportsDynamicState3LogicOpEnable()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT);
|
||||
}
|
||||
if (key.state.extended_dynamic_state_3_enables) {
|
||||
static constexpr std::array extended3{
|
||||
VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT,
|
||||
|
||||
// additional state3 extensions
|
||||
VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT,
|
||||
|
||||
VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT,
|
||||
|
||||
VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
|
||||
if (device.SupportsDynamicState3LineRasterizationMode()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT);
|
||||
}
|
||||
if (device.SupportsDynamicState3ConservativeRasterizationMode()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT);
|
||||
}
|
||||
if (device.SupportsDynamicState3LineStippleEnable()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT);
|
||||
}
|
||||
if (device.SupportsDynamicState3AlphaToCoverageEnable()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT);
|
||||
}
|
||||
if (device.SupportsDynamicState3AlphaToOneEnable()) {
|
||||
dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -82,6 +82,17 @@ public:
|
||||
const std::array<const Shader::Info*, NUM_STAGES>& infos);
|
||||
|
||||
bool HasDynamicVertexInput() const noexcept { return key.state.dynamic_vertex_input; }
|
||||
bool SupportsAlphaToCoverage() const noexcept {
|
||||
return fragment_has_color0_output;
|
||||
}
|
||||
|
||||
bool SupportsAlphaToOne() const noexcept {
|
||||
return fragment_has_color0_output;
|
||||
}
|
||||
|
||||
bool UsesExtendedDynamicState() const noexcept {
|
||||
return key.state.extended_dynamic_state != 0;
|
||||
}
|
||||
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
|
||||
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
|
||||
|
||||
@@ -149,6 +160,7 @@ private:
|
||||
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
||||
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
||||
u32 num_textures{};
|
||||
bool fragment_has_color0_output{};
|
||||
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/shader_environment.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
@@ -105,6 +106,41 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso
|
||||
return {};
|
||||
}
|
||||
|
||||
Shader::AttributeType RenderTargetAttributeType(Tegra::RenderTargetFormat format) {
|
||||
if (format == Tegra::RenderTargetFormat::NONE) {
|
||||
return Shader::AttributeType::Float;
|
||||
}
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromRenderTargetFormat(format)};
|
||||
if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) {
|
||||
return Shader::AttributeType::Float;
|
||||
}
|
||||
if (VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)) {
|
||||
return Shader::AttributeType::SignedInt;
|
||||
}
|
||||
return Shader::AttributeType::UnsignedInt;
|
||||
}
|
||||
|
||||
VkShaderStageFlagBits StageToVkStage(Shader::Stage stage) {
|
||||
switch (stage) {
|
||||
case Shader::Stage::VertexA:
|
||||
case Shader::Stage::VertexB:
|
||||
return VK_SHADER_STAGE_VERTEX_BIT;
|
||||
case Shader::Stage::TessellationControl:
|
||||
return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
case Shader::Stage::TessellationEval:
|
||||
return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
|
||||
case Shader::Stage::Geometry:
|
||||
return VK_SHADER_STAGE_GEOMETRY_BIT;
|
||||
case Shader::Stage::Fragment:
|
||||
return VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
case Shader::Stage::Compute:
|
||||
return VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
default:
|
||||
return VK_SHADER_STAGE_VERTEX_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
|
||||
if (attr.enabled == 0) {
|
||||
return Shader::AttributeType::Disabled;
|
||||
@@ -229,6 +265,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
|
||||
info.alpha_test_func = MaxwellToCompareFunction(
|
||||
key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
|
||||
info.alpha_test_reference = std::bit_cast<float>(key.state.alpha_test_ref);
|
||||
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
const auto format = static_cast<Tegra::RenderTargetFormat>(key.state.color_formats[index]);
|
||||
info.color_output_types[index] = RenderTargetAttributeType(format);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -269,9 +309,8 @@ size_t GetTotalPipelineWorkers() {
|
||||
const size_t max_core_threads =
|
||||
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
|
||||
#ifdef ANDROID
|
||||
// Leave at least a few cores free on Android; reserve two instead of three so
|
||||
// pipeline compilation can consume one more worker thread for testing.
|
||||
constexpr size_t free_cores = 2ULL;
|
||||
// Leave at least one core free on Android to reduce thermal pressure.
|
||||
constexpr size_t free_cores = 1ULL;
|
||||
if (max_core_threads <= free_cores) {
|
||||
return 1ULL;
|
||||
}
|
||||
@@ -318,6 +357,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
"VkPipelineBuilder"),
|
||||
serialization_thread(1, "VkPipelineSerialization") {
|
||||
const auto& float_control{device.FloatControlProperties()};
|
||||
const bool float_controls_supported{device.IsKhrShaderFloatControlsSupported()};
|
||||
const VkDriverId driver_id{device.GetDriverID()};
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = device.SupportedSpirvVersion(),
|
||||
@@ -327,20 +367,24 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_int16 = device.IsShaderInt16Supported(),
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
.support_vertex_instance_id = false,
|
||||
.support_float_controls = device.IsKhrShaderFloatControlsSupported(),
|
||||
.support_separate_denorm_behavior =
|
||||
.support_float_controls = float_controls_supported,
|
||||
.support_separate_denorm_behavior = float_controls_supported &&
|
||||
float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
.support_separate_rounding_mode =
|
||||
.support_separate_rounding_mode = float_controls_supported &&
|
||||
float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
.support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
|
||||
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
|
||||
.support_fp16_signed_zero_nan_preserve =
|
||||
.support_fp16_denorm_preserve = float_controls_supported &&
|
||||
float_control.shaderDenormPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_preserve = float_controls_supported &&
|
||||
float_control.shaderDenormPreserveFloat32 != VK_FALSE,
|
||||
.support_fp16_denorm_flush = float_controls_supported &&
|
||||
float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_flush = float_controls_supported &&
|
||||
float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
|
||||
.support_fp16_signed_zero_nan_preserve = float_controls_supported &&
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_signed_zero_nan_preserve =
|
||||
.support_fp32_signed_zero_nan_preserve = float_controls_supported &&
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
|
||||
.support_fp64_signed_zero_nan_preserve =
|
||||
.support_fp64_signed_zero_nan_preserve = float_controls_supported &&
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
|
||||
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
|
||||
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
|
||||
@@ -396,6 +440,27 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
};
|
||||
|
||||
profile.warp_stage_support_mask = 0;
|
||||
static constexpr std::array kAllStages{
|
||||
Shader::Stage::VertexA, Shader::Stage::VertexB,
|
||||
Shader::Stage::TessellationControl, Shader::Stage::TessellationEval,
|
||||
Shader::Stage::Geometry, Shader::Stage::Fragment,
|
||||
Shader::Stage::Compute,
|
||||
};
|
||||
for (const auto stage : kAllStages) {
|
||||
const auto vk_stage = StageToVkStage(stage);
|
||||
if (device.SupportsWarpIntrinsics(vk_stage)) {
|
||||
profile.warp_stage_support_mask |= 1u << static_cast<u32>(stage);
|
||||
}
|
||||
}
|
||||
profile.support_vote = profile.warp_stage_support_mask != 0;
|
||||
|
||||
if (!profile.SupportsWarpIntrinsics(Shader::Stage::Fragment)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Fragment shaders lack subgroup support on this driver; warp intrinsics will be "
|
||||
"approximated and visual artifacts may remain");
|
||||
}
|
||||
|
||||
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
|
||||
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
|
||||
device.GetMaxVertexInputAttributes(), Maxwell::NumVertexAttributes);
|
||||
@@ -405,14 +470,40 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays);
|
||||
}
|
||||
|
||||
dynamic_features = DynamicFeatures{
|
||||
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(),
|
||||
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported(),
|
||||
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported(),
|
||||
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported(),
|
||||
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported(),
|
||||
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
|
||||
};
|
||||
LOG_INFO(Render_Vulkan, "DynamicState setting value: {}", Settings::values.dyna_state.GetValue());
|
||||
|
||||
dynamic_features = {};
|
||||
|
||||
// User granularity enforced in vulkan_device.cpp switch statement:
|
||||
// Level 0: Core Dynamic States only
|
||||
// Level 1: Core + EDS1
|
||||
// Level 2: Core + EDS1 + EDS2 (accumulative)
|
||||
// Level 3: Core + EDS1 + EDS2 + EDS3 (accumulative)
|
||||
// Here we only verify if extensions were successfully loaded by the device
|
||||
|
||||
dynamic_features.has_extended_dynamic_state =
|
||||
device.IsExtExtendedDynamicStateSupported();
|
||||
|
||||
dynamic_features.has_extended_dynamic_state_2 =
|
||||
device.IsExtExtendedDynamicState2Supported();
|
||||
dynamic_features.has_extended_dynamic_state_2_logic_op =
|
||||
device.IsExtExtendedDynamicState2ExtrasSupported();
|
||||
dynamic_features.has_extended_dynamic_state_2_patch_control_points = false;
|
||||
|
||||
dynamic_features.has_extended_dynamic_state_3_blend =
|
||||
device.IsExtExtendedDynamicState3BlendingSupported();
|
||||
dynamic_features.has_dual_source_blend = device.SupportsDualSourceBlend();
|
||||
if (!dynamic_features.has_dual_source_blend) {
|
||||
LOG_WARNING(Render_Vulkan, "Dual-source blending unsupported, disabling dynamic blend");
|
||||
dynamic_features.has_extended_dynamic_state_3_blend = false;
|
||||
}
|
||||
dynamic_features.has_extended_dynamic_state_3_enables =
|
||||
device.IsExtExtendedDynamicState3EnablesSupported();
|
||||
|
||||
// VIDS: Independent toggle (not affected by dyna_state levels)
|
||||
dynamic_features.has_dynamic_vertex_input =
|
||||
device.IsExtVertexInputDynamicStateSupported() &&
|
||||
Settings::values.vertex_input_dynamic_state.GetValue();
|
||||
}
|
||||
|
||||
PipelineCache::~PipelineCache() {
|
||||
@@ -422,6 +513,13 @@ PipelineCache::~PipelineCache() {
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::DrainPendingBuilds() {
|
||||
if (!device.HasBrokenParallelShaderCompiling()) {
|
||||
return;
|
||||
}
|
||||
workers.WaitForRequests();
|
||||
}
|
||||
|
||||
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
|
||||
|
||||
if (!RefreshStages(graphics_key.unique_hashes)) {
|
||||
@@ -452,12 +550,17 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
|
||||
.shared_memory_size = qmd.shared_alloc,
|
||||
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
|
||||
};
|
||||
const auto [pair, is_new]{compute_cache.try_emplace(key)};
|
||||
const auto [pair, inserted]{compute_cache.try_emplace(key)};
|
||||
auto& pipeline{pair->second};
|
||||
if (!is_new) {
|
||||
return pipeline.get();
|
||||
if (!pipeline) {
|
||||
auto [slot, should_build] = AcquireComputeBuildSlot(key);
|
||||
if (!should_build) {
|
||||
WaitForBuildCompletion(slot);
|
||||
} else {
|
||||
pipeline = CreateComputePipeline(key, shader);
|
||||
ReleaseComputeBuildSlot(key, slot);
|
||||
}
|
||||
}
|
||||
pipeline = CreateComputePipeline(key, shader);
|
||||
return pipeline.get();
|
||||
}
|
||||
|
||||
@@ -517,8 +620,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
|
||||
dynamic_features.has_extended_dynamic_state ||
|
||||
(key.state.extended_dynamic_state_2 != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state_2 ||
|
||||
(key.state.extended_dynamic_state_2_extra != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state_2_extra ||
|
||||
(key.state.extended_dynamic_state_2_logic_op != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state_2_logic_op ||
|
||||
(key.state.extended_dynamic_state_3_blend != 0) !=
|
||||
dynamic_features.has_extended_dynamic_state_3_blend ||
|
||||
(key.state.extended_dynamic_state_3_enables != 0) !=
|
||||
@@ -573,13 +676,20 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
|
||||
}
|
||||
|
||||
GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
|
||||
const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
|
||||
const auto [pair, inserted]{graphics_cache.try_emplace(graphics_key)};
|
||||
auto& pipeline{pair->second};
|
||||
if (is_new) {
|
||||
pipeline = CreateGraphicsPipeline();
|
||||
}
|
||||
if (!pipeline) {
|
||||
return nullptr;
|
||||
const auto key = pair->first;
|
||||
auto [slot, should_build] = AcquireGraphicsBuildSlot(key);
|
||||
if (!should_build) {
|
||||
WaitForBuildCompletion(slot);
|
||||
} else {
|
||||
pipeline = CreateGraphicsPipeline();
|
||||
ReleaseGraphicsBuildSlot(key, slot);
|
||||
}
|
||||
if (!pipeline) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
if (current_pipeline) {
|
||||
current_pipeline->AddTransition(pipeline.get());
|
||||
@@ -602,6 +712,7 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
|
||||
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
|
||||
return pipeline;
|
||||
}
|
||||
scheduler.KeepAliveTick();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -705,6 +816,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||
}
|
||||
LOG_ERROR(Render_Vulkan, "{}", exception.what());
|
||||
return nullptr;
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create graphics pipeline 0x{:016x}: {}", key.Hash(),
|
||||
exception.what());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||
@@ -796,6 +911,10 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||
} catch (const Shader::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "{}", exception.what());
|
||||
return nullptr;
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create compute pipeline 0x{:016x}: {}", key.Hash(),
|
||||
exception.what());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
|
||||
@@ -893,4 +1012,68 @@ vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::
|
||||
}
|
||||
}
|
||||
|
||||
auto PipelineCache::AcquireGraphicsBuildSlot(const GraphicsPipelineCacheKey& key)
|
||||
-> std::pair<InFlightPipelinePtr, bool> {
|
||||
std::scoped_lock lock(graphics_inflight_mutex);
|
||||
auto [it, inserted] = graphics_inflight_builds.try_emplace(key);
|
||||
if (inserted || !it->second) {
|
||||
it->second = std::make_shared<InFlightPipelineBuild>();
|
||||
return {it->second, true};
|
||||
}
|
||||
return {it->second, false};
|
||||
}
|
||||
|
||||
auto PipelineCache::AcquireComputeBuildSlot(const ComputePipelineCacheKey& key)
|
||||
-> std::pair<InFlightPipelinePtr, bool> {
|
||||
std::scoped_lock lock(compute_inflight_mutex);
|
||||
auto [it, inserted] = compute_inflight_builds.try_emplace(key);
|
||||
if (inserted || !it->second) {
|
||||
it->second = std::make_shared<InFlightPipelineBuild>();
|
||||
return {it->second, true};
|
||||
}
|
||||
return {it->second, false};
|
||||
}
|
||||
|
||||
void PipelineCache::ReleaseGraphicsBuildSlot(const GraphicsPipelineCacheKey& key,
|
||||
const InFlightPipelinePtr& slot) {
|
||||
if (!slot) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
std::scoped_lock slot_lock(slot->mutex);
|
||||
slot->building = false;
|
||||
}
|
||||
slot->cv.notify_all();
|
||||
std::scoped_lock map_lock(graphics_inflight_mutex);
|
||||
auto it = graphics_inflight_builds.find(key);
|
||||
if (it != graphics_inflight_builds.end() && it->second == slot) {
|
||||
graphics_inflight_builds.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::ReleaseComputeBuildSlot(const ComputePipelineCacheKey& key,
|
||||
const InFlightPipelinePtr& slot) {
|
||||
if (!slot) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
std::scoped_lock slot_lock(slot->mutex);
|
||||
slot->building = false;
|
||||
}
|
||||
slot->cv.notify_all();
|
||||
std::scoped_lock map_lock(compute_inflight_mutex);
|
||||
auto it = compute_inflight_builds.find(key);
|
||||
if (it != compute_inflight_builds.end() && it->second == slot) {
|
||||
compute_inflight_builds.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::WaitForBuildCompletion(const InFlightPipelinePtr& slot) const {
|
||||
if (!slot) {
|
||||
return;
|
||||
}
|
||||
std::unique_lock lock(slot->mutex);
|
||||
slot->cv.wait(lock, [&] { return !slot->building; });
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -5,8 +5,10 @@
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <condition_variable>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@@ -113,7 +115,17 @@ public:
|
||||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
void DrainPendingBuilds();
|
||||
|
||||
private:
|
||||
struct InFlightPipelineBuild {
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
bool building{true};
|
||||
};
|
||||
|
||||
using InFlightPipelinePtr = std::shared_ptr<InFlightPipelineBuild>;
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
|
||||
@@ -140,6 +152,14 @@ private:
|
||||
vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename,
|
||||
u32 expected_cache_version);
|
||||
|
||||
std::pair<InFlightPipelinePtr, bool> AcquireGraphicsBuildSlot(
|
||||
const GraphicsPipelineCacheKey& key);
|
||||
std::pair<InFlightPipelinePtr, bool> AcquireComputeBuildSlot(
|
||||
const ComputePipelineCacheKey& key);
|
||||
void ReleaseGraphicsBuildSlot(const GraphicsPipelineCacheKey& key, const InFlightPipelinePtr& slot);
|
||||
void ReleaseComputeBuildSlot(const ComputePipelineCacheKey& key, const InFlightPipelinePtr& slot);
|
||||
void WaitForBuildCompletion(const InFlightPipelinePtr& slot) const;
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
DescriptorPool& descriptor_pool;
|
||||
@@ -158,6 +178,11 @@ private:
|
||||
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
||||
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
|
||||
|
||||
std::mutex graphics_inflight_mutex;
|
||||
std::unordered_map<GraphicsPipelineCacheKey, InFlightPipelinePtr> graphics_inflight_builds;
|
||||
std::mutex compute_inflight_mutex;
|
||||
std::unordered_map<ComputePipelineCacheKey, InFlightPipelinePtr> compute_inflight_builds;
|
||||
|
||||
ShaderPools main_pools;
|
||||
|
||||
Shader::Profile profile;
|
||||
|
||||
@@ -42,7 +42,8 @@ public:
|
||||
static constexpr size_t BANK_SIZE = 256;
|
||||
static constexpr size_t QUERY_SIZE = 8;
|
||||
explicit SamplesQueryBank(const Device& device_, size_t index_)
|
||||
: BankBase(BANK_SIZE), device{device_}, index{index_} {
|
||||
: BankBase(BANK_SIZE), device{device_}, index{index_},
|
||||
supports_host_query_reset{device_.SupportsHostQueryReset()} {
|
||||
const auto& dev = device.GetLogical();
|
||||
query_pool = dev.CreateQueryPool({
|
||||
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
||||
@@ -60,8 +61,10 @@ public:
|
||||
void Reset() override {
|
||||
ASSERT(references == 0);
|
||||
VideoCommon::BankBase::Reset();
|
||||
const auto& dev = device.GetLogical();
|
||||
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
|
||||
if (supports_host_query_reset) {
|
||||
const auto& dev = device.GetLogical();
|
||||
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
|
||||
}
|
||||
host_results.fill(0ULL);
|
||||
next_bank = 0;
|
||||
}
|
||||
@@ -99,6 +102,7 @@ public:
|
||||
private:
|
||||
const Device& device;
|
||||
const size_t index;
|
||||
const bool supports_host_query_reset;
|
||||
vk::QueryPool query_pool;
|
||||
std::array<u64, BANK_SIZE> host_results;
|
||||
};
|
||||
@@ -1200,21 +1204,24 @@ struct QueryCacheRuntimeImpl {
|
||||
hcr_setup.pNext = nullptr;
|
||||
hcr_setup.flags = 0;
|
||||
|
||||
conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
|
||||
device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
|
||||
if (device.IsExtConditionalRendering()) {
|
||||
conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
|
||||
device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
|
||||
|
||||
const VkBufferCreateInfo buffer_ci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = sizeof(u32),
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
hcr_resolve_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
|
||||
const VkBufferCreateInfo buffer_ci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = sizeof(u32),
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
hcr_resolve_buffer =
|
||||
memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
|
||||
}
|
||||
}
|
||||
|
||||
VideoCore::RasterizerInterface* rasterizer;
|
||||
|
||||
@@ -37,7 +37,6 @@
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
#include "video_core/polygon_mode_utils.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
@@ -149,8 +148,7 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3
|
||||
return scissor;
|
||||
}
|
||||
|
||||
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed,
|
||||
Maxwell::PolygonMode polygon_mode) {
|
||||
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed) {
|
||||
DrawParams params{
|
||||
.base_instance = draw_state.base_instance,
|
||||
.num_instances = num_instances,
|
||||
@@ -170,21 +168,6 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
|
||||
params.base_vertex = 0;
|
||||
params.is_indexed = true;
|
||||
}
|
||||
const bool polygon_line =
|
||||
draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
|
||||
polygon_mode == Maxwell::PolygonMode::Line;
|
||||
if (polygon_line) {
|
||||
if (params.is_indexed) {
|
||||
if (draw_state.index_buffer.count > 1) {
|
||||
params.num_vertices = draw_state.index_buffer.count + 1;
|
||||
}
|
||||
} else if (draw_state.vertex_buffer.count > 1) {
|
||||
params.num_vertices = draw_state.vertex_buffer.count + 1;
|
||||
params.is_indexed = true;
|
||||
params.first_index = 0;
|
||||
params.base_vertex = draw_state.vertex_buffer.first;
|
||||
}
|
||||
}
|
||||
return params;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
@@ -214,6 +197,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
|
||||
wfi_event(device.GetLogical().CreateEvent()) {
|
||||
scheduler.SetQueryCache(query_cache);
|
||||
|
||||
// Log multi-draw support
|
||||
if (device.IsExtMultiDrawSupported()) {
|
||||
LOG_INFO(Render_Vulkan, "VK_EXT_multi_draw is enabled for optimized draw calls");
|
||||
}
|
||||
}
|
||||
|
||||
RasterizerVulkan::~RasterizerVulkan() = default;
|
||||
@@ -227,6 +215,10 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||
FlushWork();
|
||||
gpu_memory->FlushCaching();
|
||||
|
||||
if (device.HasBrokenParallelShaderCompiling()) {
|
||||
pipeline_cache.DrainPendingBuilds();
|
||||
}
|
||||
|
||||
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
|
||||
if (!pipeline) {
|
||||
return;
|
||||
@@ -250,18 +242,45 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
||||
PrepareDraw(is_indexed, [this, is_indexed, instance_count] {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const u32 num_instances{instance_count};
|
||||
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
|
||||
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed, polygon_mode)};
|
||||
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
|
||||
if (draw_params.is_indexed) {
|
||||
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
|
||||
draw_params.first_index, draw_params.base_vertex,
|
||||
draw_params.base_instance);
|
||||
} else {
|
||||
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
|
||||
draw_params.base_vertex, draw_params.base_instance);
|
||||
}
|
||||
});
|
||||
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
|
||||
|
||||
// Use VK_EXT_multi_draw if available (single draw becomes multi-draw with count=1)
|
||||
if (device.IsExtMultiDrawSupported()) {
|
||||
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
|
||||
if (draw_params.is_indexed) {
|
||||
// Use multi-draw indexed with single draw
|
||||
const VkMultiDrawIndexedInfoEXT multi_draw_info{
|
||||
.firstIndex = draw_params.first_index,
|
||||
.indexCount = draw_params.num_vertices,
|
||||
};
|
||||
const int32_t vertex_offset = static_cast<int32_t>(draw_params.base_vertex);
|
||||
cmdbuf.DrawMultiIndexedEXT(1, &multi_draw_info, draw_params.num_instances,
|
||||
draw_params.base_instance,
|
||||
sizeof(VkMultiDrawIndexedInfoEXT), &vertex_offset);
|
||||
} else {
|
||||
// Use multi-draw with single draw
|
||||
const VkMultiDrawInfoEXT multi_draw_info{
|
||||
.firstVertex = draw_params.base_vertex,
|
||||
.vertexCount = draw_params.num_vertices,
|
||||
};
|
||||
cmdbuf.DrawMultiEXT(1, &multi_draw_info, draw_params.num_instances,
|
||||
draw_params.base_instance,
|
||||
sizeof(VkMultiDrawInfoEXT));
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// Fallback to standard draw calls
|
||||
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
|
||||
if (draw_params.is_indexed) {
|
||||
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
|
||||
draw_params.first_index, draw_params.base_vertex,
|
||||
draw_params.base_instance);
|
||||
} else {
|
||||
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
|
||||
draw_params.base_vertex, draw_params.base_instance);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -404,13 +423,48 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
||||
.baseArrayLayer = regs.clear_surface.layer,
|
||||
.layerCount = layer_count,
|
||||
};
|
||||
if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
|
||||
const auto clamp_rect_to_render_area = [render_area](VkRect2D& rect) -> bool {
|
||||
const auto clamp_axis = [](s32& offset, u32& extent, u32 limit) {
|
||||
auto clamp_offset = [&offset, limit]() {
|
||||
if (limit == 0) {
|
||||
offset = 0;
|
||||
return;
|
||||
}
|
||||
offset = std::clamp(offset, 0, static_cast<s32>(limit));
|
||||
};
|
||||
|
||||
if (extent == 0) {
|
||||
clamp_offset();
|
||||
return;
|
||||
}
|
||||
if (offset < 0) {
|
||||
const u32 shrink = (std::min)(extent, static_cast<u32>(-offset));
|
||||
extent -= shrink;
|
||||
offset = 0;
|
||||
}
|
||||
if (limit == 0) {
|
||||
extent = 0;
|
||||
offset = 0;
|
||||
return;
|
||||
}
|
||||
if (offset >= static_cast<s32>(limit)) {
|
||||
offset = static_cast<s32>(limit);
|
||||
extent = 0;
|
||||
return;
|
||||
}
|
||||
const u64 end_coord = static_cast<u64>(offset) + extent;
|
||||
if (end_coord > limit) {
|
||||
extent = limit - static_cast<u32>(offset);
|
||||
}
|
||||
};
|
||||
|
||||
clamp_axis(rect.offset.x, rect.extent.width, render_area.width);
|
||||
clamp_axis(rect.offset.y, rect.extent.height, render_area.height);
|
||||
return rect.extent.width != 0 && rect.extent.height != 0;
|
||||
};
|
||||
if (!clamp_rect_to_render_area(clear_rect.rect)) {
|
||||
return;
|
||||
}
|
||||
clear_rect.rect.extent = VkExtent2D{
|
||||
.width = (std::min)(clear_rect.rect.extent.width, render_area.width),
|
||||
.height = (std::min)(clear_rect.rect.extent.height, render_area.height),
|
||||
};
|
||||
|
||||
const u32 color_attachment = regs.clear_surface.RT;
|
||||
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
|
||||
@@ -857,23 +911,21 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load
|
||||
|
||||
void RasterizerVulkan::FlushWork() {
|
||||
#ifdef ANDROID
|
||||
static constexpr u32 DRAWS_TO_DISPATCH = 1024;
|
||||
static constexpr u32 DRAWS_TO_DISPATCH = 512;
|
||||
static constexpr u32 CHECK_MASK = 3;
|
||||
#else
|
||||
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
|
||||
static constexpr u32 CHECK_MASK = 7;
|
||||
#endif // ANDROID
|
||||
|
||||
// Only check multiples of 8 draws
|
||||
static_assert(DRAWS_TO_DISPATCH % 8 == 0);
|
||||
if ((++draw_counter & 7) != 7) {
|
||||
static_assert(DRAWS_TO_DISPATCH % (CHECK_MASK + 1) == 0);
|
||||
if ((++draw_counter & CHECK_MASK) != CHECK_MASK) {
|
||||
return;
|
||||
}
|
||||
if (draw_counter < DRAWS_TO_DISPATCH) {
|
||||
// Send recorded tasks to the worker thread
|
||||
scheduler.DispatchWork();
|
||||
return;
|
||||
}
|
||||
// Otherwise (every certain number of draws) flush execution.
|
||||
// This submits commands to the Vulkan driver.
|
||||
scheduler.Flush();
|
||||
draw_counter = 0;
|
||||
}
|
||||
@@ -939,6 +991,8 @@ bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info,
|
||||
|
||||
void RasterizerVulkan::UpdateDynamicStates() {
|
||||
auto& regs = maxwell3d->regs;
|
||||
|
||||
// Core Dynamic States (Vulkan 1.0) - Always active regardless of dyna_state setting
|
||||
UpdateViewportsState(regs);
|
||||
UpdateScissorsState(regs);
|
||||
UpdateDepthBias(regs);
|
||||
@@ -946,6 +1000,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
||||
UpdateDepthBounds(regs);
|
||||
UpdateStencilFaces(regs);
|
||||
UpdateLineWidth(regs);
|
||||
// EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
UpdateCullMode(regs);
|
||||
UpdateDepthCompareOp(regs);
|
||||
@@ -956,40 +1011,52 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
||||
UpdateDepthTestEnable(regs);
|
||||
UpdateDepthWriteEnable(regs);
|
||||
UpdateStencilTestEnable(regs);
|
||||
if (device.IsExtExtendedDynamicState2Supported()) {
|
||||
UpdatePrimitiveRestartEnable(regs);
|
||||
UpdateRasterizerDiscardEnable(regs);
|
||||
UpdateDepthBiasEnable(regs);
|
||||
}
|
||||
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
|
||||
using namespace Tegra::Engines;
|
||||
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE || device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
|
||||
const auto has_float = std::any_of(
|
||||
regs.vertex_attrib_format.begin(),
|
||||
regs.vertex_attrib_format.end(),
|
||||
[](const auto& attrib) {
|
||||
return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float;
|
||||
}
|
||||
);
|
||||
if (regs.logic_op.enable) {
|
||||
regs.logic_op.enable = static_cast<u32>(!has_float);
|
||||
}
|
||||
}
|
||||
UpdateLogicOpEnable(regs);
|
||||
UpdateDepthClampEnable(regs);
|
||||
}
|
||||
}
|
||||
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
|
||||
UpdateLogicOp(regs);
|
||||
}
|
||||
if (device.IsExtExtendedDynamicState3BlendingSupported()) {
|
||||
UpdateBlending(regs);
|
||||
}
|
||||
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
|
||||
UpdateLineStippleEnable(regs);
|
||||
UpdateConservativeRasterizationMode(regs);
|
||||
}
|
||||
}
|
||||
|
||||
// EDS2: PrimitiveRestart, RasterizerDiscard, DepthBias enable/disable
|
||||
if (device.IsExtExtendedDynamicState2Supported()) {
|
||||
UpdatePrimitiveRestartEnable(regs);
|
||||
UpdateRasterizerDiscardEnable(regs);
|
||||
UpdateDepthBiasEnable(regs);
|
||||
}
|
||||
|
||||
// EDS2 Extras: LogicOp operation selection
|
||||
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
|
||||
UpdateLogicOp(regs);
|
||||
}
|
||||
|
||||
// EDS3 Enables: LogicOpEnable, DepthClamp, LineStipple, ConservativeRaster
|
||||
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
|
||||
using namespace Tegra::Engines;
|
||||
// AMD Workaround: LogicOp incompatible with float render targets
|
||||
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|
||||
device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
|
||||
const auto has_float = std::any_of(
|
||||
regs.vertex_attrib_format.begin(), regs.vertex_attrib_format.end(),
|
||||
[](const auto& attrib) {
|
||||
return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float;
|
||||
}
|
||||
);
|
||||
if (regs.logic_op.enable) {
|
||||
regs.logic_op.enable = static_cast<u32>(!has_float);
|
||||
}
|
||||
}
|
||||
UpdateLogicOpEnable(regs);
|
||||
UpdateDepthClampEnable(regs);
|
||||
UpdateLineRasterizationMode(regs);
|
||||
UpdateLineStippleEnable(regs);
|
||||
UpdateConservativeRasterizationMode(regs);
|
||||
UpdateAlphaToCoverageEnable(regs);
|
||||
UpdateAlphaToOneEnable(regs);
|
||||
}
|
||||
|
||||
// EDS3 Blending: ColorBlendEnable, ColorBlendEquation, ColorWriteMask
|
||||
if (device.IsExtExtendedDynamicState3BlendingSupported()) {
|
||||
UpdateBlending(regs);
|
||||
}
|
||||
|
||||
// Vertex Input Dynamic State: Independent from EDS levels
|
||||
if (device.IsExtVertexInputDynamicStateSupported()) {
|
||||
if (auto* gp = pipeline_cache.CurrentGraphicsPipeline(); gp && gp->HasDynamicVertexInput()) {
|
||||
UpdateVertexInput(regs);
|
||||
@@ -1162,109 +1229,141 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg
|
||||
if (!state_tracker.TouchBlendConstants()) {
|
||||
return;
|
||||
}
|
||||
const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
|
||||
regs.blend_color.a};
|
||||
scheduler.Record(
|
||||
[blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); });
|
||||
if (!device.UsesAdvancedCoreDynamicState()) {
|
||||
return;
|
||||
}
|
||||
const std::array<float, 4> blend_constants{
|
||||
regs.blend_color.r,
|
||||
regs.blend_color.g,
|
||||
regs.blend_color.b,
|
||||
regs.blend_color.a,
|
||||
};
|
||||
scheduler.Record([blend_constants](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetBlendConstants(blend_constants.data());
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchDepthBounds()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
|
||||
vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); });
|
||||
if (!device.IsDepthBoundsSupported()) {
|
||||
return;
|
||||
}
|
||||
if (!device.UsesAdvancedCoreDynamicState()) {
|
||||
return;
|
||||
}
|
||||
const bool unrestricted = device.IsExtDepthRangeUnrestrictedSupported();
|
||||
const float min_depth = unrestricted ? regs.depth_bounds[0]
|
||||
: std::clamp(regs.depth_bounds[0], 0.0f, 1.0f);
|
||||
const float max_depth = unrestricted ? regs.depth_bounds[1]
|
||||
: std::clamp(regs.depth_bounds[1], 0.0f, 1.0f);
|
||||
scheduler.Record([min_depth, max_depth](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetDepthBounds(min_depth, max_depth);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchStencilProperties()) {
|
||||
const bool two_sided = regs.stencil_two_side_enable != 0;
|
||||
const bool update_properties = state_tracker.TouchStencilProperties();
|
||||
const bool update_side = state_tracker.TouchStencilSide(two_sided);
|
||||
const bool refs_dirty = state_tracker.TouchStencilReference();
|
||||
const bool write_dirty = state_tracker.TouchStencilWriteMask();
|
||||
const bool compare_dirty = state_tracker.TouchStencilCompare();
|
||||
|
||||
const bool update_references = update_properties || update_side || refs_dirty;
|
||||
const bool update_write_masks = update_properties || update_side || write_dirty;
|
||||
const bool update_compare_masks = update_properties || update_side || compare_dirty;
|
||||
|
||||
if (!update_references && !update_write_masks && !update_compare_masks) {
|
||||
state_tracker.ClearStencilReset();
|
||||
return;
|
||||
}
|
||||
bool update_references = state_tracker.TouchStencilReference();
|
||||
bool update_write_mask = state_tracker.TouchStencilWriteMask();
|
||||
bool update_compare_masks = state_tracker.TouchStencilCompare();
|
||||
if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
|
||||
update_references = true;
|
||||
update_write_mask = true;
|
||||
update_compare_masks = true;
|
||||
|
||||
if (!device.UsesAdvancedCoreDynamicState()) {
|
||||
state_tracker.ClearStencilReset();
|
||||
return;
|
||||
}
|
||||
|
||||
if (update_references) {
|
||||
[&]() {
|
||||
if (regs.stencil_two_side_enable) {
|
||||
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) &&
|
||||
!state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
|
||||
return;
|
||||
}
|
||||
if (two_sided) {
|
||||
const bool front_dirty =
|
||||
state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref);
|
||||
const bool back_dirty =
|
||||
state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref);
|
||||
if (front_dirty || back_dirty) {
|
||||
scheduler.Record([front_ref = regs.stencil_front_ref,
|
||||
back_ref = regs.stencil_back_ref,
|
||||
is_two_sided = two_sided](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = is_two_sided && front_ref != back_ref;
|
||||
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
|
||||
: VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
front_ref);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
|
||||
}
|
||||
});
|
||||
}
|
||||
scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
|
||||
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = two_sided && front_ref != back_ref;
|
||||
// Front face
|
||||
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
|
||||
: VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
front_ref);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
|
||||
}
|
||||
} else if (state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
|
||||
const u32 reference = regs.stencil_front_ref;
|
||||
scheduler.Record([reference](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, reference);
|
||||
});
|
||||
}();
|
||||
}
|
||||
}
|
||||
if (update_write_mask) {
|
||||
[&]() {
|
||||
if (regs.stencil_two_side_enable) {
|
||||
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) &&
|
||||
!state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (update_write_masks) {
|
||||
if (two_sided) {
|
||||
if (state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) ||
|
||||
state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
|
||||
scheduler.Record([
|
||||
front_write_mask = regs.stencil_front_mask,
|
||||
back_write_mask = regs.stencil_back_mask,
|
||||
is_two_sided = regs.stencil_two_side_enable
|
||||
](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = is_two_sided && front_write_mask != back_write_mask;
|
||||
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
|
||||
: VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
front_write_mask);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
|
||||
}
|
||||
});
|
||||
}
|
||||
scheduler.Record([front_write_mask = regs.stencil_front_mask,
|
||||
back_write_mask = regs.stencil_back_mask,
|
||||
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = two_sided && front_write_mask != back_write_mask;
|
||||
// Front face
|
||||
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
|
||||
: VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
front_write_mask);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
|
||||
}
|
||||
} else if (state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
|
||||
const u32 front_write_mask = regs.stencil_front_mask;
|
||||
scheduler.Record([front_write_mask](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, front_write_mask);
|
||||
});
|
||||
}();
|
||||
}
|
||||
}
|
||||
|
||||
if (update_compare_masks) {
|
||||
[&]() {
|
||||
if (regs.stencil_two_side_enable) {
|
||||
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) &&
|
||||
!state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
|
||||
return;
|
||||
}
|
||||
if (two_sided) {
|
||||
if (state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) ||
|
||||
state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
|
||||
scheduler.Record([
|
||||
front_test_mask = regs.stencil_front_func_mask,
|
||||
back_test_mask = regs.stencil_back_func_mask,
|
||||
is_two_sided = regs.stencil_two_side_enable
|
||||
](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = is_two_sided && front_test_mask != back_test_mask;
|
||||
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
|
||||
: VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
front_test_mask);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
|
||||
}
|
||||
});
|
||||
}
|
||||
scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
|
||||
back_test_mask = regs.stencil_back_func_mask,
|
||||
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = two_sided && front_test_mask != back_test_mask;
|
||||
// Front face
|
||||
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
|
||||
: VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
front_test_mask);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
|
||||
}
|
||||
} else if (state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
|
||||
const u32 front_test_mask = regs.stencil_front_func_mask;
|
||||
scheduler.Record([front_test_mask](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, front_test_mask);
|
||||
});
|
||||
}();
|
||||
}
|
||||
}
|
||||
|
||||
state_tracker.ClearStencilReset();
|
||||
}
|
||||
|
||||
@@ -1287,61 +1386,15 @@ void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchDepthBoundsTestEnable()) {
|
||||
return;
|
||||
}
|
||||
bool enabled = regs.depth_bounds_enable;
|
||||
if (enabled && !device.IsDepthBoundsSupported()) {
|
||||
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
|
||||
enabled = false;
|
||||
}
|
||||
scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchDepthTestEnable()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetDepthTestEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchDepthWriteEnable()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetDepthWriteEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchPrimitiveRestartEnable()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchRasterizerDiscardEnable()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
void RasterizerVulkan::UpdateConservativeRasterizationMode(
|
||||
Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchConservativeRasterizationMode()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!device.SupportsDynamicState3ConservativeRasterizationMode() ||
|
||||
!device.IsExtConservativeRasterizationSupported()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.conservative_raster_enable](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetConservativeRasterizationModeEXT(
|
||||
enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
|
||||
@@ -1354,23 +1407,69 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs&
|
||||
return;
|
||||
}
|
||||
|
||||
if (!device.SupportsDynamicState3LineStippleEnable()) {
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.Record([enable = regs.line_stipple_enable](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetLineStippleEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
// if (!state_tracker.TouchLi()) {
|
||||
// return;
|
||||
// }
|
||||
if (!device.IsExtLineRasterizationSupported()) {
|
||||
return;
|
||||
}
|
||||
if (!state_tracker.TouchLineRasterizationMode()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: The maxwell emulator does not capture line rasters
|
||||
if (!device.SupportsDynamicState3LineRasterizationMode()) {
|
||||
static std::once_flag warn_missing_rect;
|
||||
std::call_once(warn_missing_rect, [] {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Driver lacks rectangular line rasterization support; skipping dynamic "
|
||||
"line state updates");
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// scheduler.Record([enable = regs.line](vk::CommandBuffer cmdbuf) {
|
||||
// cmdbuf.SetConservativeRasterizationModeEXT(
|
||||
// enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT
|
||||
// : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
|
||||
// });
|
||||
const bool wants_smooth = regs.line_anti_alias_enable != 0;
|
||||
VkLineRasterizationModeEXT mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
|
||||
if (wants_smooth) {
|
||||
if (device.SupportsSmoothLines()) {
|
||||
mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
|
||||
} else {
|
||||
static std::once_flag warn_missing_smooth;
|
||||
std::call_once(warn_missing_smooth, [] {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Line anti-aliasing requested but smoothLines feature unavailable; "
|
||||
"using rectangular rasterization");
|
||||
});
|
||||
}
|
||||
}
|
||||
scheduler.Record([mode](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetLineRasterizationModeEXT(mode);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchPrimitiveRestartEnable()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.primitive_restart.enabled != 0](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchRasterizerDiscardEnable()) {
|
||||
return;
|
||||
}
|
||||
const bool discard = regs.rasterize_enable == 0;
|
||||
scheduler.Record([discard](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetRasterizerDiscardEnableEXT(discard);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
@@ -1412,6 +1511,9 @@ void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs
|
||||
if (!state_tracker.TouchLogicOpEnable()) {
|
||||
return;
|
||||
}
|
||||
if (!device.SupportsDynamicState3LogicOpEnable()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetLogicOpEnableEXT(enable != 0);
|
||||
});
|
||||
@@ -1421,6 +1523,9 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r
|
||||
if (!state_tracker.TouchDepthClampEnable()) {
|
||||
return;
|
||||
}
|
||||
if (!device.SupportsDynamicState3DepthClampEnable()) {
|
||||
return;
|
||||
}
|
||||
bool is_enabled = !(regs.viewport_clip_control.geometry_clip ==
|
||||
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
|
||||
regs.viewport_clip_control.geometry_clip ==
|
||||
@@ -1431,6 +1536,41 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r
|
||||
[is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); });
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchAlphaToCoverageEnable()) {
|
||||
return;
|
||||
}
|
||||
if (!device.SupportsDynamicState3AlphaToCoverageEnable()) {
|
||||
return;
|
||||
}
|
||||
GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline();
|
||||
const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToCoverage() &&
|
||||
regs.anti_alias_alpha_control.alpha_to_coverage != 0;
|
||||
scheduler.Record([enable](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetAlphaToCoverageEnableEXT(enable ? VK_TRUE : VK_FALSE);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchAlphaToOneEnable()) {
|
||||
return;
|
||||
}
|
||||
if (!device.SupportsDynamicState3AlphaToOneEnable()) {
|
||||
static std::once_flag warn_alpha_to_one;
|
||||
std::call_once(warn_alpha_to_one, [] {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Alpha-to-one is not supported on this device; forcing it disabled");
|
||||
});
|
||||
return;
|
||||
}
|
||||
GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline();
|
||||
const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToOne() &&
|
||||
regs.anti_alias_alpha_control.alpha_to_one != 0;
|
||||
scheduler.Record([enable](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetAlphaToOneEnableEXT(enable ? VK_TRUE : VK_FALSE);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchDepthCompareOp()) {
|
||||
return;
|
||||
@@ -1440,6 +1580,36 @@ void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& reg
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchDepthBoundsTestEnable()) {
|
||||
return;
|
||||
}
|
||||
if (!device.IsDepthBoundsSupported()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.depth_bounds_enable != 0](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchDepthTestEnable()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.depth_test_enable != 0](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetDepthTestEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchDepthWriteEnable()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record([enable = regs.depth_write_enabled != 0](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetDepthWriteEnableEXT(enable);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchFrontFace()) {
|
||||
return;
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/texture_cache/samples_helper.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
@@ -168,7 +169,6 @@ private:
|
||||
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
@@ -183,6 +183,8 @@ private:
|
||||
void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
@@ -13,6 +14,7 @@
|
||||
|
||||
#include "common/thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_command_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
@@ -77,6 +79,14 @@ void Scheduler::WaitWorker() {
|
||||
std::scoped_lock el{execution_mutex};
|
||||
}
|
||||
|
||||
void Scheduler::KeepAliveTick() {
|
||||
const auto now = Clock::now();
|
||||
if (now - last_submission_time < KEEPALIVE_INTERVAL) {
|
||||
return;
|
||||
}
|
||||
Flush();
|
||||
}
|
||||
|
||||
void Scheduler::DispatchWork() {
|
||||
if (chunk->Empty()) {
|
||||
return;
|
||||
@@ -130,9 +140,27 @@ void Scheduler::RequestOutsideRenderPassOperationContext() {
|
||||
|
||||
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
|
||||
if (state.graphics_pipeline == pipeline) {
|
||||
if (pipeline && pipeline->UsesExtendedDynamicState() &&
|
||||
state.needs_state_enable_refresh) {
|
||||
state_tracker.InvalidateStateEnableFlag();
|
||||
state.needs_state_enable_refresh = false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
state.graphics_pipeline = pipeline;
|
||||
|
||||
if (!pipeline) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!pipeline->UsesExtendedDynamicState()) {
|
||||
state.needs_state_enable_refresh = true;
|
||||
} else if (state.needs_state_enable_refresh) {
|
||||
state_tracker.InvalidateStateEnableFlag();
|
||||
state.needs_state_enable_refresh = false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -252,6 +280,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
||||
});
|
||||
chunk->MarkSubmit();
|
||||
DispatchWork();
|
||||
last_submission_time = Clock::now();
|
||||
return signal_value;
|
||||
}
|
||||
|
||||
@@ -276,8 +305,13 @@ void Scheduler::EndRenderPass()
|
||||
return;
|
||||
}
|
||||
|
||||
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
|
||||
query_cache->NotifySegment(false);
|
||||
if (query_cache) {
|
||||
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
|
||||
if (query_cache->HasStreamer(VideoCommon::QueryType::StreamingByteCount)) {
|
||||
query_cache->CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
|
||||
}
|
||||
query_cache->NotifySegment(false);
|
||||
}
|
||||
|
||||
Record([num_images = num_renderpass_images,
|
||||
images = renderpass_images,
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
@@ -49,6 +53,9 @@ public:
|
||||
/// safe to touch worker resources.
|
||||
void WaitWorker();
|
||||
|
||||
/// Submits a tiny chunk of work if recent GPU submissions are stale.
|
||||
void KeepAliveTick();
|
||||
|
||||
/// Sends currently recorded work to the worker thread.
|
||||
void DispatchWork();
|
||||
|
||||
@@ -125,6 +132,8 @@ public:
|
||||
std::mutex submit_mutex;
|
||||
|
||||
private:
|
||||
using Clock = std::chrono::steady_clock;
|
||||
|
||||
class Command {
|
||||
public:
|
||||
virtual ~Command() = default;
|
||||
@@ -214,6 +223,7 @@ private:
|
||||
GraphicsPipeline* graphics_pipeline = nullptr;
|
||||
bool is_rescaling = false;
|
||||
bool rescaling_defined = false;
|
||||
bool needs_state_enable_refresh = false;
|
||||
};
|
||||
|
||||
void WorkerThread(std::stop_token stop_token);
|
||||
@@ -246,6 +256,9 @@ private:
|
||||
|
||||
State state;
|
||||
|
||||
Clock::time_point last_submission_time{Clock::time_point::min()};
|
||||
static constexpr std::chrono::milliseconds KEEPALIVE_INTERVAL{4};
|
||||
|
||||
u32 num_renderpass_images = 0;
|
||||
std::array<VkImage, 9> renderpass_images{};
|
||||
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -49,6 +50,7 @@ size_t GetStreamBufferSize(const Device& device) {
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
||||
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
Scheduler& scheduler_)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||
@@ -74,13 +76,16 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
|
||||
}
|
||||
stream_pointer = stream_buffer.Mapped();
|
||||
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
|
||||
|
||||
}
|
||||
|
||||
StagingBufferPool::~StagingBufferPool() = default;
|
||||
|
||||
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
|
||||
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
|
||||
return GetStreamBuffer(size);
|
||||
if (!deferred && usage == MemoryUsage::Upload) {
|
||||
if (size <= region_size) {
|
||||
return GetStreamBuffer(size);
|
||||
}
|
||||
}
|
||||
return GetStagingBuffer(size, usage, deferred);
|
||||
}
|
||||
@@ -142,6 +147,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
||||
}
|
||||
|
||||
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
|
||||
scheduler.GetMasterSemaphore().Refresh();
|
||||
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
|
||||
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
|
||||
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
@@ -48,6 +48,7 @@ Flags MakeInvalidationFlags() {
|
||||
FrontFace,
|
||||
StencilOp,
|
||||
StencilTestEnable,
|
||||
RasterizerDiscardEnable,
|
||||
VertexBuffers,
|
||||
VertexInput,
|
||||
StateEnable,
|
||||
@@ -55,6 +56,9 @@ Flags MakeInvalidationFlags() {
|
||||
DepthBiasEnable,
|
||||
LogicOpEnable,
|
||||
DepthClampEnable,
|
||||
AlphaToCoverageEnable,
|
||||
AlphaToOneEnable,
|
||||
LineRasterizationMode,
|
||||
LogicOp,
|
||||
Blending,
|
||||
ColorMask,
|
||||
@@ -148,6 +152,8 @@ void SetupDirtyStateEnable(Tables& tables) {
|
||||
setup(OFF(logic_op.enable), LogicOpEnable);
|
||||
setup(OFF(viewport_clip_control.geometry_clip), DepthClampEnable);
|
||||
setup(OFF(line_stipple_enable), LineStippleEnable);
|
||||
setup(OFF(anti_alias_alpha_control.alpha_to_coverage), AlphaToCoverageEnable);
|
||||
setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable);
|
||||
}
|
||||
|
||||
void SetupDirtyDepthCompareOp(Tables& tables) {
|
||||
@@ -226,6 +232,7 @@ void SetupRasterModes(Tables &tables) {
|
||||
|
||||
table[OFF(line_stipple_params)] = LineStippleParams;
|
||||
table[OFF(conservative_raster_enable)] = ConservativeRasterizationMode;
|
||||
table[OFF(line_anti_alias_enable)] = LineRasterizationMode;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
||||
@@ -54,6 +54,7 @@ enum : u8 {
|
||||
PrimitiveRestartEnable,
|
||||
RasterizerDiscardEnable,
|
||||
ConservativeRasterizationMode,
|
||||
LineRasterizationMode,
|
||||
LineStippleEnable,
|
||||
LineStippleParams,
|
||||
DepthBiasEnable,
|
||||
@@ -61,6 +62,8 @@ enum : u8 {
|
||||
LogicOp,
|
||||
LogicOpEnable,
|
||||
DepthClampEnable,
|
||||
AlphaToCoverageEnable,
|
||||
AlphaToOneEnable,
|
||||
|
||||
Blending,
|
||||
BlendEnable,
|
||||
@@ -94,6 +97,10 @@ public:
|
||||
(*flags)[Dirty::Scissors] = true;
|
||||
}
|
||||
|
||||
void InvalidateStateEnableFlag() {
|
||||
(*flags)[Dirty::StateEnable] = true;
|
||||
}
|
||||
|
||||
bool TouchViewports() {
|
||||
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
|
||||
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
|
||||
@@ -225,6 +232,14 @@ public:
|
||||
return Exchange(Dirty::DepthClampEnable, false);
|
||||
}
|
||||
|
||||
bool TouchAlphaToCoverageEnable() {
|
||||
return Exchange(Dirty::AlphaToCoverageEnable, false);
|
||||
}
|
||||
|
||||
bool TouchAlphaToOneEnable() {
|
||||
return Exchange(Dirty::AlphaToOneEnable, false);
|
||||
}
|
||||
|
||||
bool TouchDepthCompareOp() {
|
||||
return Exchange(Dirty::DepthCompareOp, false);
|
||||
}
|
||||
@@ -261,6 +276,10 @@ public:
|
||||
return Exchange(Dirty::LogicOp, false);
|
||||
}
|
||||
|
||||
bool TouchLineRasterizationMode() {
|
||||
return Exchange(Dirty::LineRasterizationMode, false);
|
||||
}
|
||||
|
||||
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
|
||||
const bool has_changed = current_topology != new_topology;
|
||||
current_topology = new_topology;
|
||||
|
||||
@@ -306,7 +306,17 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) {
|
||||
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
|
||||
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
|
||||
}
|
||||
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
|
||||
// According to Vulkan spec, when using VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR,
|
||||
// the base format (imageFormat) MUST be included in pViewFormats
|
||||
const std::array view_formats{
|
||||
swapchain_ci.imageFormat, // Base format MUST be first
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_FORMAT_B8G8R8A8_SRGB,
|
||||
#ifdef ANDROID
|
||||
VK_FORMAT_R8G8B8A8_UNORM, // Android may use RGBA
|
||||
VK_FORMAT_R8G8B8A8_SRGB,
|
||||
#endif
|
||||
};
|
||||
VkImageFormatListCreateInfo format_list{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -101,7 +104,7 @@ public:
|
||||
}
|
||||
|
||||
VkSemaphore CurrentRenderSemaphore() const {
|
||||
return *render_semaphores[frame_index];
|
||||
return *render_semaphores[image_index];
|
||||
}
|
||||
|
||||
u32 GetWidth() const {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,12 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <span>
|
||||
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
@@ -56,6 +60,8 @@ public:
|
||||
|
||||
void TickFrame();
|
||||
|
||||
void WaitForGpuTick(u64 tick);
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
||||
u64 GetDeviceMemoryUsage() const;
|
||||
@@ -110,10 +116,15 @@ public:
|
||||
return view_formats[static_cast<std::size_t>(format)];
|
||||
}
|
||||
|
||||
bool RequiresBlockCompatibleViewFormats(PixelFormat format) const noexcept {
|
||||
return requires_block_view_formats[static_cast<std::size_t>(format)];
|
||||
}
|
||||
|
||||
void BarrierFeedbackLoop();
|
||||
|
||||
bool IsFormatDitherable(VideoCore::Surface::PixelFormat format);
|
||||
bool IsFormatScalable(VideoCore::Surface::PixelFormat format);
|
||||
bool SupportsLinearFilter(VideoCore::Surface::PixelFormat format) const;
|
||||
|
||||
VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const;
|
||||
|
||||
@@ -127,6 +138,7 @@ public:
|
||||
std::unique_ptr<MSAACopyPass> msaa_copy_pass;
|
||||
const Settings::ResolutionScalingInfo& resolution;
|
||||
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
|
||||
std::array<bool, VideoCore::Surface::MaxPixelFormat> requires_block_view_formats{};
|
||||
|
||||
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
|
||||
std::array<vk::Buffer, indexing_slots> buffers{};
|
||||
@@ -173,6 +185,30 @@ public:
|
||||
return (this->*current_image).UsageFlags();
|
||||
}
|
||||
|
||||
void TrackGpuReadTick(u64 tick) noexcept {
|
||||
TrackPendingReadTick(tick);
|
||||
}
|
||||
|
||||
void TrackGpuWriteTick(u64 tick) noexcept {
|
||||
TrackPendingWriteTick(tick);
|
||||
}
|
||||
|
||||
void CompleteGpuReadTick(u64 completed_tick) noexcept {
|
||||
ClearPendingReadTick(completed_tick);
|
||||
}
|
||||
|
||||
void CompleteGpuWriteTick(u64 completed_tick) noexcept {
|
||||
ClearPendingWriteTick(completed_tick);
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> PendingGpuReadTick() const noexcept {
|
||||
return PendingReadTick();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> PendingGpuWriteTick() const noexcept {
|
||||
return PendingWriteTick();
|
||||
}
|
||||
|
||||
/// Returns true when the image is already initialized and mark it as initialized
|
||||
[[nodiscard]] bool ExchangeInitialization() noexcept {
|
||||
return std::exchange(initialized, true);
|
||||
@@ -235,15 +271,23 @@ public:
|
||||
|
||||
[[nodiscard]] VkImageView ColorView();
|
||||
|
||||
[[nodiscard]] VkImageView SampledView(Shader::TextureType texture_type,
|
||||
Shader::SamplerComponentType component_type);
|
||||
|
||||
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
|
||||
Shader::ImageFormat image_format);
|
||||
|
||||
[[nodiscard]] bool IsRescaled() const noexcept;
|
||||
|
||||
[[nodiscard]] bool SupportsDepthCompareSampling() const noexcept;
|
||||
|
||||
[[nodiscard]] bool Is3DImage() const noexcept {
|
||||
return is_3d_image;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
|
||||
return *image_views[static_cast<size_t>(texture_type)];
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImage ImageHandle() const noexcept {
|
||||
return image_handle;
|
||||
}
|
||||
@@ -268,23 +312,36 @@ private:
|
||||
struct StorageViews {
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> typeless;
|
||||
};
|
||||
|
||||
static constexpr size_t NUMERIC_VIEW_TYPES = 3;
|
||||
|
||||
[[nodiscard]] Shader::TextureType BaseTextureType() const noexcept;
|
||||
[[nodiscard]] std::optional<u32> LayerCountOverride(Shader::TextureType texture_type) const noexcept;
|
||||
[[nodiscard]] VkImageView DepthView(Shader::TextureType texture_type);
|
||||
[[nodiscard]] VkImageView StencilView(Shader::TextureType texture_type);
|
||||
|
||||
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
|
||||
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask,
|
||||
Shader::TextureType texture_type);
|
||||
|
||||
const Device* device = nullptr;
|
||||
const SlotVector<Image>* slot_images = nullptr;
|
||||
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
|
||||
std::unique_ptr<StorageViews> storage_views;
|
||||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> depth_views;
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> stencil_views;
|
||||
std::array<std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES>, NUMERIC_VIEW_TYPES>
|
||||
sampled_component_views;
|
||||
vk::ImageView color_view;
|
||||
vk::Image null_image;
|
||||
VkImage image_handle = VK_NULL_HANDLE;
|
||||
VkImageView render_target = VK_NULL_HANDLE;
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
u32 buffer_size = 0;
|
||||
bool is_3d_image = false;
|
||||
};
|
||||
|
||||
class ImageAlloc : public VideoCommon::ImageAllocBase {};
|
||||
@@ -305,9 +362,19 @@ public:
|
||||
return static_cast<bool>(sampler_default_anisotropy);
|
||||
}
|
||||
|
||||
[[nodiscard]] VkSampler SelectHandle(bool supports_linear_filter,
|
||||
bool supports_anisotropy,
|
||||
bool allow_depth_compare) const noexcept;
|
||||
|
||||
private:
|
||||
vk::Sampler sampler;
|
||||
vk::Sampler sampler_default_anisotropy;
|
||||
vk::Sampler sampler_force_point;
|
||||
vk::Sampler sampler_compare_disabled;
|
||||
vk::Sampler sampler_default_anisotropy_compare_disabled;
|
||||
vk::Sampler sampler_force_point_compare_disabled;
|
||||
bool uses_linear_filter = false;
|
||||
bool depth_compare_enabled = false;
|
||||
};
|
||||
|
||||
class Framebuffer {
|
||||
|
||||
@@ -70,6 +70,102 @@ static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture
|
||||
entry.a_type, entry.srgb_conversion));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
[[nodiscard]] bool UsesSwizzleSource(const Tegra::Texture::TICEntry& entry,
|
||||
Tegra::Texture::SwizzleSource source) {
|
||||
const std::array swizzles{entry.x_source.Value(), entry.y_source.Value(),
|
||||
entry.z_source.Value(), entry.w_source.Value()};
|
||||
return std::ranges::any_of(swizzles, [source](Tegra::Texture::SwizzleSource current) {
|
||||
return current == source;
|
||||
});
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<Shader::SamplerComponentType> DepthStencilComponentFromSwizzle(
|
||||
const Tegra::Texture::TICEntry& entry, VideoCore::Surface::PixelFormat pixel_format) {
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
const bool uses_r = UsesSwizzleSource(entry, SwizzleSource::R);
|
||||
const bool uses_g = UsesSwizzleSource(entry, SwizzleSource::G);
|
||||
|
||||
switch (pixel_format) {
|
||||
case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
|
||||
case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
|
||||
if (uses_r != uses_g) {
|
||||
return uses_r ? Shader::SamplerComponentType::Depth
|
||||
: Shader::SamplerComponentType::Stencil;
|
||||
}
|
||||
break;
|
||||
case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
|
||||
if (uses_r != uses_g) {
|
||||
return uses_r ? Shader::SamplerComponentType::Stencil
|
||||
: Shader::SamplerComponentType::Depth;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
static Shader::SamplerComponentType ConvertSamplerComponentType(
|
||||
const Tegra::Texture::TICEntry& entry) {
|
||||
const auto pixel_format = PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type,
|
||||
entry.b_type, entry.a_type,
|
||||
entry.srgb_conversion);
|
||||
const auto surface_type = VideoCore::Surface::GetFormatType(pixel_format);
|
||||
if (entry.depth_texture != 0 || surface_type == VideoCore::Surface::SurfaceType::Depth) {
|
||||
return Shader::SamplerComponentType::Depth;
|
||||
}
|
||||
if (surface_type == VideoCore::Surface::SurfaceType::Stencil) {
|
||||
return Shader::SamplerComponentType::Stencil;
|
||||
}
|
||||
if (surface_type == VideoCore::Surface::SurfaceType::DepthStencil) {
|
||||
if (const auto inferred = DepthStencilComponentFromSwizzle(entry, pixel_format)) {
|
||||
return *inferred;
|
||||
}
|
||||
return entry.depth_texture != 0 ? Shader::SamplerComponentType::Depth
|
||||
: Shader::SamplerComponentType::Stencil;
|
||||
}
|
||||
|
||||
const auto accumulate = [](const Tegra::Texture::ComponentType component,
|
||||
bool& has_signed, bool& has_unsigned) {
|
||||
switch (component) {
|
||||
case Tegra::Texture::ComponentType::SINT:
|
||||
has_signed = true;
|
||||
break;
|
||||
case Tegra::Texture::ComponentType::UINT:
|
||||
has_unsigned = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
bool has_signed{};
|
||||
bool has_unsigned{};
|
||||
accumulate(entry.r_type, has_signed, has_unsigned);
|
||||
accumulate(entry.g_type, has_signed, has_unsigned);
|
||||
accumulate(entry.b_type, has_signed, has_unsigned);
|
||||
accumulate(entry.a_type, has_signed, has_unsigned);
|
||||
|
||||
if (has_signed && !has_unsigned) {
|
||||
return Shader::SamplerComponentType::Sint;
|
||||
}
|
||||
if (has_unsigned && !has_signed) {
|
||||
return Shader::SamplerComponentType::Uint;
|
||||
}
|
||||
if (has_signed) {
|
||||
return Shader::SamplerComponentType::Sint;
|
||||
}
|
||||
if (has_unsigned) {
|
||||
return Shader::SamplerComponentType::Uint;
|
||||
}
|
||||
return Shader::SamplerComponentType::Float;
|
||||
}
|
||||
|
||||
static std::string_view StageToPrefix(Shader::Stage stage) {
|
||||
switch (stage) {
|
||||
case Shader::Stage::VertexB:
|
||||
@@ -200,6 +296,7 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
|
||||
const u64 code_size{static_cast<u64>(CachedSizeBytes())};
|
||||
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
|
||||
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
|
||||
const u64 num_texture_component_types{static_cast<u64>(texture_component_types.size())};
|
||||
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
|
||||
const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};
|
||||
|
||||
@@ -207,6 +304,8 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
|
||||
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
|
||||
.write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
|
||||
sizeof(num_texture_pixel_formats))
|
||||
.write(reinterpret_cast<const char*>(&num_texture_component_types),
|
||||
sizeof(num_texture_component_types))
|
||||
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
||||
.write(reinterpret_cast<const char*>(&num_cbuf_replacement_values),
|
||||
sizeof(num_cbuf_replacement_values))
|
||||
@@ -223,6 +322,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
|
||||
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
||||
}
|
||||
for (const auto& [key, component] : texture_component_types) {
|
||||
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||
.write(reinterpret_cast<const char*>(&component), sizeof(component));
|
||||
}
|
||||
for (const auto& [key, format] : texture_pixel_formats) {
|
||||
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||
.write(reinterpret_cast<const char*>(&format), sizeof(format));
|
||||
@@ -277,7 +380,17 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
|
||||
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
|
||||
bool via_header_index, u32 raw) {
|
||||
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
|
||||
ASSERT(handle.first <= tic_limit);
|
||||
if (handle.first > tic_limit) {
|
||||
static std::atomic<size_t> oob_count{0};
|
||||
const size_t n = ++oob_count;
|
||||
if (n <= 4 || (n & 63) == 0) {
|
||||
LOG_WARNING(Shader,
|
||||
"TIC handle {} exceeds limit {} (via_header_index={}) — returning empty",
|
||||
handle.first, tic_limit, via_header_index);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
|
||||
Tegra::Texture::TICEntry entry;
|
||||
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
||||
@@ -374,6 +487,21 @@ Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
|
||||
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
|
||||
const Shader::TextureType result{ConvertTextureType(entry)};
|
||||
texture_types.emplace(handle, result);
|
||||
texture_component_types.emplace(handle, ConvertSamplerComponentType(entry));
|
||||
return result;
|
||||
}
|
||||
|
||||
Shader::SamplerComponentType GraphicsEnvironment::ReadTextureComponentType(u32 handle) {
|
||||
const auto it{texture_component_types.find(handle)};
|
||||
if (it != texture_component_types.end()) {
|
||||
return it->second;
|
||||
}
|
||||
const auto& regs{maxwell3d->regs};
|
||||
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
|
||||
auto entry =
|
||||
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
|
||||
const Shader::SamplerComponentType result{ConvertSamplerComponentType(entry)};
|
||||
texture_component_types.emplace(handle, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -430,6 +558,20 @@ Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
|
||||
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
|
||||
const Shader::TextureType result{ConvertTextureType(entry)};
|
||||
texture_types.emplace(handle, result);
|
||||
texture_component_types.emplace(handle, ConvertSamplerComponentType(entry));
|
||||
return result;
|
||||
}
|
||||
|
||||
Shader::SamplerComponentType ComputeEnvironment::ReadTextureComponentType(u32 handle) {
|
||||
const auto it{texture_component_types.find(handle)};
|
||||
if (it != texture_component_types.end()) {
|
||||
return it->second;
|
||||
}
|
||||
const auto& regs{kepler_compute->regs};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
|
||||
const Shader::SamplerComponentType result{ConvertSamplerComponentType(entry)};
|
||||
texture_component_types.emplace(handle, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -455,12 +597,15 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
|
||||
u64 code_size{};
|
||||
u64 num_texture_types{};
|
||||
u64 num_texture_pixel_formats{};
|
||||
u64 num_texture_component_types{};
|
||||
u64 num_cbuf_values{};
|
||||
u64 num_cbuf_replacement_values{};
|
||||
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
|
||||
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
|
||||
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
|
||||
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
|
||||
sizeof(num_texture_pixel_formats))
|
||||
.read(reinterpret_cast<char*>(&num_texture_component_types),
|
||||
sizeof(num_texture_component_types))
|
||||
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
||||
.read(reinterpret_cast<char*>(&num_cbuf_replacement_values),
|
||||
sizeof(num_cbuf_replacement_values))
|
||||
@@ -480,6 +625,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
|
||||
.read(reinterpret_cast<char*>(&type), sizeof(type));
|
||||
texture_types.emplace(key, type);
|
||||
}
|
||||
for (size_t i = 0; i < num_texture_component_types; ++i) {
|
||||
u32 key;
|
||||
Shader::SamplerComponentType component;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
||||
.read(reinterpret_cast<char*>(&component), sizeof(component));
|
||||
texture_component_types.emplace(key, component);
|
||||
}
|
||||
for (size_t i = 0; i < num_texture_pixel_formats; ++i) {
|
||||
u32 key;
|
||||
Shader::TexturePixelFormat format;
|
||||
@@ -534,6 +686,15 @@ u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Shader::SamplerComponentType FileEnvironment::ReadTextureComponentType(u32 handle) {
|
||||
const auto it{texture_component_types.find(handle)};
|
||||
if (it == texture_component_types.end()) {
|
||||
LOG_WARNING(Render_Vulkan, "Texture component descriptor {:08x} not found", handle);
|
||||
return Shader::SamplerComponentType::Float;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
|
||||
const auto it{texture_types.find(handle)};
|
||||
if (it == texture_types.end()) {
|
||||
|
||||
@@ -80,6 +80,7 @@ protected:
|
||||
|
||||
std::vector<u64> code;
|
||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||
std::unordered_map<u32, Shader::SamplerComponentType> texture_component_types;
|
||||
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
|
||||
std::unordered_map<u64, u32> cbuf_values;
|
||||
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
|
||||
@@ -116,6 +117,8 @@ public:
|
||||
|
||||
Shader::TextureType ReadTextureType(u32 handle) override;
|
||||
|
||||
Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
|
||||
|
||||
Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
|
||||
|
||||
bool IsTexturePixelFormatInteger(u32 handle) override;
|
||||
@@ -142,6 +145,8 @@ public:
|
||||
|
||||
Shader::TextureType ReadTextureType(u32 handle) override;
|
||||
|
||||
Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
|
||||
|
||||
Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
|
||||
|
||||
bool IsTexturePixelFormatInteger(u32 handle) override;
|
||||
@@ -176,6 +181,8 @@ public:
|
||||
|
||||
[[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
|
||||
|
||||
[[nodiscard]] Shader::SamplerComponentType ReadTextureComponentType(u32 handle) override;
|
||||
|
||||
[[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
|
||||
|
||||
[[nodiscard]] bool IsTexturePixelFormatInteger(u32 handle) override;
|
||||
@@ -202,6 +209,7 @@ public:
|
||||
private:
|
||||
std::vector<u64> code;
|
||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||
std::unordered_map<u32, Shader::SamplerComponentType> texture_component_types;
|
||||
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
|
||||
std::unordered_map<u64, u32> cbuf_values;
|
||||
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/math_util.h"
|
||||
#include "common/settings.h"
|
||||
@@ -408,6 +410,126 @@ bool IsPixelFormatSignedInteger(PixelFormat format) {
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct NumericVariantSet {
|
||||
PixelFormat float_format = PixelFormat::Invalid;
|
||||
PixelFormat uint_format = PixelFormat::Invalid;
|
||||
PixelFormat sint_format = PixelFormat::Invalid;
|
||||
|
||||
[[nodiscard]] std::optional<PixelFormat> Select(PixelFormatNumeric numeric) const {
|
||||
PixelFormat candidate = PixelFormat::Invalid;
|
||||
switch (numeric) {
|
||||
case PixelFormatNumeric::Float:
|
||||
candidate = float_format;
|
||||
break;
|
||||
case PixelFormatNumeric::Uint:
|
||||
candidate = uint_format;
|
||||
break;
|
||||
case PixelFormatNumeric::Sint:
|
||||
candidate = sint_format;
|
||||
break;
|
||||
}
|
||||
if (candidate == PixelFormat::Invalid) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
};
|
||||
|
||||
constexpr NumericVariantSet MakeVariant(PixelFormat float_format, PixelFormat uint_format,
|
||||
PixelFormat sint_format) {
|
||||
return NumericVariantSet{
|
||||
.float_format = float_format,
|
||||
.uint_format = uint_format,
|
||||
.sint_format = sint_format,
|
||||
};
|
||||
}
|
||||
|
||||
std::optional<NumericVariantSet> LookupNumericVariantSet(PixelFormat format) {
|
||||
switch (format) {
|
||||
case PixelFormat::R8_UNORM:
|
||||
case PixelFormat::R8_SNORM:
|
||||
case PixelFormat::R8_UINT:
|
||||
case PixelFormat::R8_SINT:
|
||||
return MakeVariant(PixelFormat::R8_UNORM, PixelFormat::R8_UINT, PixelFormat::R8_SINT);
|
||||
case PixelFormat::R16_FLOAT:
|
||||
case PixelFormat::R16_UNORM:
|
||||
case PixelFormat::R16_SNORM:
|
||||
case PixelFormat::R16_UINT:
|
||||
case PixelFormat::R16_SINT:
|
||||
return MakeVariant(PixelFormat::R16_FLOAT, PixelFormat::R16_UINT, PixelFormat::R16_SINT);
|
||||
case PixelFormat::R32_FLOAT:
|
||||
case PixelFormat::R32_UINT:
|
||||
case PixelFormat::R32_SINT:
|
||||
return MakeVariant(PixelFormat::R32_FLOAT, PixelFormat::R32_UINT, PixelFormat::R32_SINT);
|
||||
case PixelFormat::R8G8_UNORM:
|
||||
case PixelFormat::R8G8_SNORM:
|
||||
case PixelFormat::R8G8_UINT:
|
||||
case PixelFormat::R8G8_SINT:
|
||||
return MakeVariant(PixelFormat::R8G8_UNORM, PixelFormat::R8G8_UINT, PixelFormat::R8G8_SINT);
|
||||
case PixelFormat::R16G16_FLOAT:
|
||||
case PixelFormat::R16G16_UNORM:
|
||||
case PixelFormat::R16G16_SNORM:
|
||||
case PixelFormat::R16G16_UINT:
|
||||
case PixelFormat::R16G16_SINT:
|
||||
return MakeVariant(PixelFormat::R16G16_FLOAT, PixelFormat::R16G16_UINT,
|
||||
PixelFormat::R16G16_SINT);
|
||||
case PixelFormat::R32G32_FLOAT:
|
||||
case PixelFormat::R32G32_UINT:
|
||||
case PixelFormat::R32G32_SINT:
|
||||
return MakeVariant(PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
|
||||
PixelFormat::R32G32_SINT);
|
||||
case PixelFormat::R16G16B16A16_FLOAT:
|
||||
case PixelFormat::R16G16B16A16_UNORM:
|
||||
case PixelFormat::R16G16B16A16_SNORM:
|
||||
case PixelFormat::R16G16B16A16_UINT:
|
||||
case PixelFormat::R16G16B16A16_SINT:
|
||||
return MakeVariant(PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
|
||||
PixelFormat::R16G16B16A16_SINT);
|
||||
case PixelFormat::R32G32B32A32_FLOAT:
|
||||
case PixelFormat::R32G32B32A32_UINT:
|
||||
case PixelFormat::R32G32B32A32_SINT:
|
||||
return MakeVariant(PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_UINT,
|
||||
PixelFormat::R32G32B32A32_SINT);
|
||||
case PixelFormat::A8B8G8R8_UNORM:
|
||||
case PixelFormat::A8B8G8R8_SNORM:
|
||||
case PixelFormat::A8B8G8R8_SRGB:
|
||||
case PixelFormat::A8B8G8R8_UINT:
|
||||
case PixelFormat::A8B8G8R8_SINT:
|
||||
return MakeVariant(PixelFormat::A8B8G8R8_UNORM, PixelFormat::A8B8G8R8_UINT,
|
||||
PixelFormat::A8B8G8R8_SINT);
|
||||
case PixelFormat::A2B10G10R10_UNORM:
|
||||
case PixelFormat::A2B10G10R10_UINT:
|
||||
return MakeVariant(PixelFormat::A2B10G10R10_UNORM, PixelFormat::A2B10G10R10_UINT,
|
||||
PixelFormat::Invalid);
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
PixelFormatNumeric GetPixelFormatNumericType(PixelFormat format) {
|
||||
if (IsPixelFormatInteger(format)) {
|
||||
return IsPixelFormatSignedInteger(format) ? PixelFormatNumeric::Sint
|
||||
: PixelFormatNumeric::Uint;
|
||||
}
|
||||
return PixelFormatNumeric::Float;
|
||||
}
|
||||
|
||||
std::optional<PixelFormat> FindPixelFormatVariant(PixelFormat format,
|
||||
PixelFormatNumeric target_numeric) {
|
||||
const auto variants = LookupNumericVariantSet(format);
|
||||
if (!variants) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (const auto candidate = variants->Select(target_numeric)) {
|
||||
return candidate;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
size_t PixelComponentSizeBitsInteger(PixelFormat format) {
|
||||
switch (format) {
|
||||
case PixelFormat::A8B8G8R8_SINT:
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
@@ -517,6 +521,16 @@ bool IsPixelFormatSignedInteger(PixelFormat format);
|
||||
|
||||
size_t PixelComponentSizeBitsInteger(PixelFormat format);
|
||||
|
||||
enum class PixelFormatNumeric {
|
||||
Float,
|
||||
Uint,
|
||||
Sint,
|
||||
};
|
||||
|
||||
PixelFormatNumeric GetPixelFormatNumericType(PixelFormat format);
|
||||
std::optional<PixelFormat> FindPixelFormatVariant(PixelFormat format,
|
||||
PixelFormatNumeric target_numeric);
|
||||
|
||||
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
|
||||
|
||||
u64 TranscodedAstcSize(u64 base_size, PixelFormat format);
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -21,6 +24,27 @@ constexpr auto FLOAT = ComponentType::FLOAT;
|
||||
constexpr bool LINEAR = false;
|
||||
constexpr bool SRGB = true;
|
||||
|
||||
constexpr TextureFormat SanitizeFormat(TextureFormat format) {
|
||||
if (format == static_cast<TextureFormat>(0)) {
|
||||
return TextureFormat::A8B8G8R8;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
constexpr ComponentType SanitizeComponent(ComponentType component) {
|
||||
if (component == static_cast<ComponentType>(0)) {
|
||||
return ComponentType::UNORM;
|
||||
}
|
||||
switch (component) {
|
||||
case ComponentType::SNORM_FORCE_FP16:
|
||||
return ComponentType::SNORM;
|
||||
case ComponentType::UNORM_FORCE_FP16:
|
||||
return ComponentType::UNORM;
|
||||
default:
|
||||
return component;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
|
||||
ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
|
||||
u32 hash = is_srgb ? 1 : 0;
|
||||
@@ -41,6 +65,11 @@ constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb =
|
||||
PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
|
||||
ComponentType blue, ComponentType alpha,
|
||||
bool is_srgb) noexcept {
|
||||
format = SanitizeFormat(format);
|
||||
red = SanitizeComponent(red);
|
||||
green = SanitizeComponent(green);
|
||||
blue = SanitizeComponent(blue);
|
||||
alpha = SanitizeComponent(alpha);
|
||||
switch (Hash(format, red, green, blue, alpha, is_srgb)) {
|
||||
case Hash(TextureFormat::A8B8G8R8, UNORM):
|
||||
return PixelFormat::A8B8G8R8_UNORM;
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
@@ -58,6 +62,50 @@ struct ImageBase {
|
||||
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
|
||||
explicit ImageBase(const NullImageParams&);
|
||||
|
||||
void TrackPendingReadTick(u64 tick) noexcept {
|
||||
if (pending_read_tick) {
|
||||
*pending_read_tick = std::max(*pending_read_tick, tick);
|
||||
} else {
|
||||
pending_read_tick = tick;
|
||||
}
|
||||
}
|
||||
|
||||
void TrackPendingWriteTick(u64 tick) noexcept {
|
||||
if (pending_write_tick) {
|
||||
*pending_write_tick = std::max(*pending_write_tick, tick);
|
||||
} else {
|
||||
pending_write_tick = tick;
|
||||
}
|
||||
}
|
||||
|
||||
void ClearPendingReadTick(u64 completed_tick) noexcept {
|
||||
if (pending_read_tick && completed_tick >= *pending_read_tick) {
|
||||
pending_read_tick.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void ClearPendingWriteTick(u64 completed_tick) noexcept {
|
||||
if (pending_write_tick && completed_tick >= *pending_write_tick) {
|
||||
pending_write_tick.reset();
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasPendingReadTick() const noexcept {
|
||||
return pending_read_tick.has_value();
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasPendingWriteTick() const noexcept {
|
||||
return pending_write_tick.has_value();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> PendingReadTick() const noexcept {
|
||||
return pending_read_tick;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> PendingWriteTick() const noexcept {
|
||||
return pending_write_tick;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
|
||||
|
||||
[[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
|
||||
@@ -115,6 +163,9 @@ struct ImageBase {
|
||||
std::vector<AliasedImage> aliased_images;
|
||||
std::vector<ImageId> overlapping_images;
|
||||
ImageMapId map_view_id{};
|
||||
|
||||
std::optional<u64> pending_read_tick;
|
||||
std::optional<u64> pending_write_tick;
|
||||
};
|
||||
|
||||
struct ImageMapView {
|
||||
|
||||
@@ -33,6 +33,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
|
||||
dma_downloaded = forced_flushed;
|
||||
format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
|
||||
config.a_type, config.srgb_conversion);
|
||||
|
||||
num_samples = NumSamples(config.msaa_mode);
|
||||
resources.levels = config.max_mip_level + 1;
|
||||
if (config.IsPitchLinear()) {
|
||||
|
||||
@@ -1,15 +1,22 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
constexpr inline std::size_t MaxSampleLocationSlots = 16;
|
||||
|
||||
[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
|
||||
switch (num_samples) {
|
||||
case 1:
|
||||
@@ -95,4 +102,24 @@ namespace VideoCommon {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// NVN guarantees up to sixteen programmable sample slots shared across a repeating pixel grid
|
||||
// (per the 0.7.0 addon release notes), so the grid dimensions shrink as MSAA increases.
|
||||
[[nodiscard]] inline std::pair<u32, u32> SampleLocationGridSize(Tegra::Texture::MsaaMode msaa_mode) {
|
||||
const int samples = NumSamples(msaa_mode);
|
||||
switch (samples) {
|
||||
case 1:
|
||||
return {4u, 4u};
|
||||
case 2:
|
||||
return {4u, 2u};
|
||||
case 4:
|
||||
return {2u, 2u};
|
||||
case 8:
|
||||
return {2u, 1u};
|
||||
case 16:
|
||||
return {1u, 1u};
|
||||
}
|
||||
ASSERT_MSG(false, "Unsupported sample count for grid size={}", samples);
|
||||
return {1u, 1u};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
@@ -540,6 +540,7 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
|
||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
||||
return;
|
||||
}
|
||||
EnsureImageReady(image, ImageAccessType::Write);
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, image_id);
|
||||
@@ -550,11 +551,12 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
|
||||
template <class P>
|
||||
void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
|
||||
boost::container::small_vector<ImageId, 16> images;
|
||||
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
|
||||
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
|
||||
if (!image.IsSafeDownload()) {
|
||||
return;
|
||||
}
|
||||
image.flags &= ~ImageFlagBits::GpuModified;
|
||||
EnsureImageReady(this->slot_images[image_id], ImageAccessType::Read);
|
||||
images.push_back(image_id);
|
||||
});
|
||||
if (images.empty()) {
|
||||
@@ -606,6 +608,7 @@ void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
|
||||
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
EnsureImageReady(image, ImageAccessType::Write);
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, id);
|
||||
}
|
||||
@@ -621,6 +624,7 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
|
||||
[&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
EnsureImageReady(image, ImageAccessType::Write);
|
||||
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
@@ -2423,6 +2427,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
||||
template <class P>
|
||||
void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
|
||||
Image& image = slot_images[image_id];
|
||||
EnsureImageReady(image, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
|
||||
TrackGpuImageAccess(image_id, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
|
||||
runtime.TransitionImageLayout(image);
|
||||
if (invalidate) {
|
||||
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
|
||||
if (False(image.flags & ImageFlagBits::Tracked)) {
|
||||
@@ -2438,6 +2445,85 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
|
||||
lru_cache.Touch(image.lru_index, frame_tick);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TrackGpuImageAccess(ImageId image_id, ImageAccessType access) {
|
||||
staged_gpu_accesses.push_back({image_id, access});
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::CommitPendingGpuAccesses(u64 fence_value) {
|
||||
if (staged_gpu_accesses.empty()) {
|
||||
return;
|
||||
}
|
||||
if (fence_value == 0) {
|
||||
return;
|
||||
}
|
||||
auto& batch = committed_gpu_accesses.emplace_back(std::move(staged_gpu_accesses));
|
||||
staged_gpu_accesses.clear();
|
||||
committed_gpu_ticks.push_back(fence_value);
|
||||
for (const PendingImageAccess& access : batch) {
|
||||
ImageBase& image = slot_images[access.image_id];
|
||||
if (access.access == ImageAccessType::Read) {
|
||||
image.TrackPendingReadTick(fence_value);
|
||||
} else {
|
||||
image.TrackPendingWriteTick(fence_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::CompleteGpuAccesses(u64 completed_fence) {
|
||||
if (completed_fence == 0) {
|
||||
return;
|
||||
}
|
||||
while (!committed_gpu_ticks.empty() && committed_gpu_ticks.front() <= completed_fence) {
|
||||
auto accesses = std::move(committed_gpu_accesses.front());
|
||||
committed_gpu_accesses.pop_front();
|
||||
committed_gpu_ticks.pop_front();
|
||||
for (const PendingImageAccess& access : accesses) {
|
||||
if (!slot_images.Contains(access.image_id)) {
|
||||
continue;
|
||||
}
|
||||
ImageBase& image = slot_images[access.image_id];
|
||||
if (access.access == ImageAccessType::Read) {
|
||||
image.ClearPendingReadTick(completed_fence);
|
||||
} else {
|
||||
image.ClearPendingWriteTick(completed_fence);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::EnsureImageReady(ImageBase& image, ImageAccessType access) {
|
||||
auto wait_tick = [this](std::optional<u64> tick) -> std::optional<u64> {
|
||||
if (!tick) {
|
||||
return std::nullopt;
|
||||
}
|
||||
runtime.WaitForGpuTick(*tick);
|
||||
return tick;
|
||||
};
|
||||
|
||||
if (access == ImageAccessType::Write) {
|
||||
if (const auto tick = image.PendingReadTick()) {
|
||||
if (const auto waited = wait_tick(tick)) {
|
||||
image.ClearPendingReadTick(*waited);
|
||||
}
|
||||
}
|
||||
if (const auto tick = image.PendingWriteTick()) {
|
||||
if (const auto waited = wait_tick(tick)) {
|
||||
image.ClearPendingWriteTick(*waited);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (const auto tick = image.PendingWriteTick()) {
|
||||
if (const auto waited = wait_tick(tick)) {
|
||||
image.ClearPendingWriteTick(*waited);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
|
||||
bool invalidate) {
|
||||
@@ -2455,6 +2541,8 @@ template <class P>
|
||||
void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
|
||||
Image& dst = slot_images[dst_id];
|
||||
Image& src = slot_images[src_id];
|
||||
EnsureImageReady(dst, ImageAccessType::Write);
|
||||
EnsureImageReady(src, ImageAccessType::Read);
|
||||
const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
|
||||
if (is_rescaled) {
|
||||
ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
|
||||
@@ -2471,20 +2559,30 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
|
||||
}
|
||||
}
|
||||
}
|
||||
const auto TrackCopyAccesses = [this, dst_id, src_id]() {
|
||||
TrackGpuImageAccess(dst_id, ImageAccessType::Write);
|
||||
TrackGpuImageAccess(src_id, ImageAccessType::Read);
|
||||
};
|
||||
const auto dst_format_type = GetFormatType(dst.info.format);
|
||||
const auto src_format_type = GetFormatType(src.info.format);
|
||||
if (src_format_type == dst_format_type) {
|
||||
if constexpr (HAS_EMULATED_COPIES) {
|
||||
if (!runtime.CanImageBeCopied(dst, src)) {
|
||||
return runtime.EmulateCopyImage(dst, src, copies);
|
||||
runtime.EmulateCopyImage(dst, src, copies);
|
||||
TrackCopyAccesses();
|
||||
return;
|
||||
}
|
||||
}
|
||||
return runtime.CopyImage(dst, src, copies);
|
||||
runtime.CopyImage(dst, src, copies);
|
||||
TrackCopyAccesses();
|
||||
return;
|
||||
}
|
||||
UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
|
||||
UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
|
||||
if (runtime.ShouldReinterpret(dst, src)) {
|
||||
return runtime.ReinterpretImage(dst, src, copies);
|
||||
runtime.ReinterpretImage(dst, src, copies);
|
||||
TrackCopyAccesses();
|
||||
return;
|
||||
}
|
||||
for (const ImageCopy& copy : copies) {
|
||||
UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
|
||||
@@ -2537,6 +2635,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
|
||||
|
||||
runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
|
||||
}
|
||||
TrackCopyAccesses();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
||||
@@ -182,6 +182,11 @@ public:
|
||||
/// Return a reference to the given sampler id
|
||||
[[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
|
||||
|
||||
/// Returns true when the runtime format supports linear filtering
|
||||
[[nodiscard]] bool SupportsLinearFilter(PixelFormat format) const noexcept {
|
||||
return runtime.SupportsLinearFilter(format);
|
||||
}
|
||||
|
||||
/// Refresh the state for graphics image view and sampler descriptors
|
||||
void SynchronizeGraphicsDescriptors();
|
||||
|
||||
@@ -258,6 +263,15 @@ public:
|
||||
/// Prepare an image to be used
|
||||
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
|
||||
|
||||
/// Track that an image participates in upcoming GPU work with the given access type
|
||||
void TrackGpuImageAccess(ImageId image_id, ImageAccessType access);
|
||||
|
||||
/// Notify the cache that tracked GPU work has been submitted with the specified fence value
|
||||
void CommitPendingGpuAccesses(u64 fence_value);
|
||||
|
||||
/// Notify the cache that a fence value has completed so tracked accesses can be released
|
||||
void CompleteGpuAccesses(u64 completed_fence);
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
|
||||
private:
|
||||
@@ -408,6 +422,8 @@ private:
|
||||
/// Execute copies from one image to the other, even if they are incompatible
|
||||
void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
|
||||
|
||||
void EnsureImageReady(ImageBase& image, ImageAccessType access);
|
||||
|
||||
/// Bind an image view as render target, downloading resources preemtively if needed
|
||||
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
|
||||
|
||||
@@ -465,6 +481,11 @@ private:
|
||||
Common::SlotId object_id;
|
||||
};
|
||||
|
||||
struct PendingImageAccess {
|
||||
ImageId image_id;
|
||||
ImageAccessType access;
|
||||
};
|
||||
|
||||
Common::SlotVector<Image> slot_images;
|
||||
Common::SlotVector<ImageMapView> slot_map_views;
|
||||
Common::SlotVector<ImageView> slot_image_views;
|
||||
@@ -480,6 +501,9 @@ private:
|
||||
std::vector<AsyncBuffer> uncommitted_async_buffers;
|
||||
std::deque<std::vector<AsyncBuffer>> async_buffers;
|
||||
std::deque<AsyncBuffer> async_buffers_death_ring;
|
||||
std::vector<PendingImageAccess> staged_gpu_accesses;
|
||||
std::deque<std::vector<PendingImageAccess>> committed_gpu_accesses;
|
||||
std::deque<u64> committed_gpu_ticks;
|
||||
|
||||
struct LRUItemParams {
|
||||
using ObjectType = ImageId;
|
||||
|
||||
@@ -155,4 +155,9 @@ struct SwizzleParameters {
|
||||
s32 level;
|
||||
};
|
||||
|
||||
enum class ImageAccessType : u8 {
|
||||
Read,
|
||||
Write,
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
@@ -22,6 +22,14 @@
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
// Define maintenance 7-9 extension names (not yet in official Vulkan headers)
|
||||
#ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME
|
||||
#define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7"
|
||||
#endif
|
||||
#ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME
|
||||
#define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8"
|
||||
#endif
|
||||
|
||||
// Sanitize macros
|
||||
#undef CreateEvent
|
||||
#undef CreateSemaphore
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
@@ -90,8 +91,57 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
constexpr std::array B10G11R11_UFLOAT_PACK32{
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
VK_FORMAT_R16G16B16A16_UNORM,
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
constexpr std::array R16G16B16A16_SFLOAT{
|
||||
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
constexpr std::array R16G16B16A16_UNORM{
|
||||
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
} // namespace Alternatives
|
||||
|
||||
struct UnsupportedFormatKey {
|
||||
VkFormat format;
|
||||
VkFormatFeatureFlags usage;
|
||||
FormatType type;
|
||||
|
||||
bool operator==(const UnsupportedFormatKey&) const noexcept = default;
|
||||
};
|
||||
|
||||
struct UnsupportedFormatKeyHash {
|
||||
size_t operator()(const UnsupportedFormatKey& key) const noexcept {
|
||||
size_t seed = std::hash<int>{}(static_cast<int>(key.format));
|
||||
seed ^= static_cast<size_t>(key.usage) + 0x9e3779b97f4a7c15ULL + (seed << 6) + (seed >> 2);
|
||||
seed ^= static_cast<size_t>(key.type) + 0x9e3779b97f4a7c15ULL + (seed << 6) + (seed >> 2);
|
||||
return seed;
|
||||
}
|
||||
};
|
||||
|
||||
bool ShouldLogUnsupportedFormat(VkFormat format, VkFormatFeatureFlags usage, FormatType type) {
|
||||
static std::mutex mutex;
|
||||
static std::unordered_set<UnsupportedFormatKey, UnsupportedFormatKeyHash> logged_keys;
|
||||
const UnsupportedFormatKey key{format, usage, type};
|
||||
std::scoped_lock lock{mutex};
|
||||
const auto [it, inserted] = logged_keys.insert(key);
|
||||
return inserted;
|
||||
}
|
||||
|
||||
[[maybe_unused]] constexpr VkShaderStageFlags GraphicsStageMask =
|
||||
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
|
||||
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT |
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
|
||||
template <typename T>
|
||||
void SetNext(void**& next, T& data) {
|
||||
*next = &data;
|
||||
@@ -122,6 +172,12 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
|
||||
return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data();
|
||||
case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
|
||||
return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data();
|
||||
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
|
||||
return Alternatives::B10G11R11_UFLOAT_PACK32.data();
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT:
|
||||
return Alternatives::R16G16B16A16_SFLOAT.data();
|
||||
case VK_FORMAT_R16G16B16A16_UNORM:
|
||||
return Alternatives::R16G16B16A16_UNORM.data();
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
@@ -289,32 +345,6 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
|
||||
return format_properties;
|
||||
}
|
||||
|
||||
#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
|
||||
void OverrideBcnFormats(std::unordered_map<VkFormat, VkFormatProperties>& format_properties) {
|
||||
// These properties are extracted from Adreno driver 512.687.0
|
||||
constexpr VkFormatFeatureFlags tiling_features{
|
||||
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
|
||||
|
||||
constexpr VkFormatFeatureFlags buffer_features{VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT};
|
||||
|
||||
static constexpr std::array bcn_formats{
|
||||
VK_FORMAT_BC1_RGBA_SRGB_BLOCK, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, VK_FORMAT_BC2_SRGB_BLOCK,
|
||||
VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK,
|
||||
VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK,
|
||||
VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC6H_SFLOAT_BLOCK, VK_FORMAT_BC6H_UFLOAT_BLOCK,
|
||||
VK_FORMAT_BC7_SRGB_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK,
|
||||
};
|
||||
|
||||
for (const auto format : bcn_formats) {
|
||||
format_properties[format].linearTilingFeatures = tiling_features;
|
||||
format_properties[format].optimalTilingFeatures = tiling_features;
|
||||
format_properties[format].bufferFeatures = buffer_features;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
|
||||
const std::set<std::string, std::less<>>& exts) {
|
||||
VkPhysicalDeviceProperties2 physical_properties{};
|
||||
@@ -376,10 +406,10 @@ void Device::RemoveExtension(bool& extension, const std::string& extension_name)
|
||||
loaded_extensions.erase(extension_name);
|
||||
}
|
||||
|
||||
void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) {
|
||||
if (loaded_extensions.contains(extension_name) && !is_suitable) {
|
||||
void Device::RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name) {
|
||||
if (loaded_extensions.contains(extension_name) && !extension) {
|
||||
LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name);
|
||||
this->RemoveExtension(is_suitable, extension_name);
|
||||
this->RemoveExtension(extension, extension_name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -400,11 +430,12 @@ void Device::RemoveExtensionFeature(bool& extension, Feature& feature,
|
||||
}
|
||||
|
||||
template <typename Feature>
|
||||
void Device::RemoveExtensionFeatureIfUnsuitable(bool is_suitable, Feature& feature,
|
||||
void Device::RemoveExtensionFeatureIfUnsuitable(bool& extension, Feature& feature,
|
||||
const std::string& extension_name) {
|
||||
if (loaded_extensions.contains(extension_name) && !is_suitable) {
|
||||
LOG_WARNING(Render_Vulkan, "Removing features for unsuitable extension {}", extension_name);
|
||||
this->RemoveExtensionFeature(is_suitable, feature, extension_name);
|
||||
if (loaded_extensions.contains(extension_name) && !extension) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Removing features for unsuitable extension {}", extension_name);
|
||||
this->RemoveExtensionFeature(extension, feature, extension_name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -416,7 +447,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
const bool is_suitable = GetSuitability(surface != nullptr);
|
||||
|
||||
const VkDriverId driver_id = properties.driver.driverID;
|
||||
const auto device_id = properties.properties.deviceID;
|
||||
const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
|
||||
const bool is_amd_driver =
|
||||
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
|
||||
@@ -427,11 +457,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
|
||||
const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
|
||||
const bool is_s8gen2 = device_id == 0x43050a01;
|
||||
//const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
|
||||
const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
|
||||
|
||||
if (!is_suitable)
|
||||
LOG_WARNING(Render_Vulkan, "Unsuitable driver - continuing anyways");
|
||||
if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) {
|
||||
LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway");
|
||||
} else if (!is_suitable) {
|
||||
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
|
||||
}
|
||||
|
||||
if (is_nvidia) {
|
||||
nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions);
|
||||
@@ -492,36 +524,27 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
CollectToolingInfo();
|
||||
|
||||
if (is_qualcomm) {
|
||||
// Qualcomm Adreno GPUs doesn't handle scaled vertex attributes; keep emulation enabled
|
||||
must_emulate_scaled_formats = true;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers");
|
||||
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
"Qualcomm drivers require scaled vertex format emulation; forcing fallback");
|
||||
|
||||
if (extensions.shader_float_controls) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm drivers: VK_KHR_shader_float_controls is unstable; disabling usage");
|
||||
RemoveExtension(extensions.shader_float_controls,
|
||||
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"Qualcomm drivers: VK_KHR_shader_float_controls already unavailable");
|
||||
}
|
||||
disable_shader_float_controls_usage = true;
|
||||
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
|
||||
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
|
||||
features.shader_atomic_int64.shaderBufferInt64Atomics = false;
|
||||
features.shader_atomic_int64.shaderSharedInt64Atomics = false;
|
||||
features.features.shaderInt64 = false;
|
||||
|
||||
#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
|
||||
// Patch the driver to enable BCn textures.
|
||||
const auto major = (properties.properties.driverVersion >> 24) << 2;
|
||||
const auto minor = (properties.properties.driverVersion >> 12) & 0xFFFU;
|
||||
const auto vendor = properties.properties.vendorID;
|
||||
const auto patch_status = adrenotools_get_bcn_type(major, minor, vendor);
|
||||
|
||||
if (patch_status == ADRENOTOOLS_BCN_PATCH) {
|
||||
LOG_INFO(Render_Vulkan, "Patching Adreno driver to support BCn texture formats");
|
||||
if (adrenotools_patch_bcn(
|
||||
reinterpret_cast<void*>(dld.vkGetPhysicalDeviceFormatProperties))) {
|
||||
OverrideBcnFormats(format_properties);
|
||||
} else {
|
||||
LOG_ERROR(Render_Vulkan, "Patch failed! Driver code may now crash");
|
||||
}
|
||||
} else if (patch_status == ADRENOTOOLS_BCN_BLOB) {
|
||||
LOG_INFO(Render_Vulkan, "Adreno driver supports BCn textures without patches");
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Adreno driver can't be patched to enable BCn textures");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (is_nvidia) {
|
||||
@@ -533,35 +556,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
}
|
||||
|
||||
if (nv_major_version >= 510) {
|
||||
LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"NVIDIA Drivers >= 510 do not support MSAA->MSAA image blits. "
|
||||
"MSAA scaling will use 3D helpers. MSAA resolves work normally.");
|
||||
cant_blit_msaa = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (extensions.extended_dynamic_state3 && is_radv) {
|
||||
LOG_WARNING(Render_Vulkan, "RADV has broken extendedDynamicState3ColorBlendEquation");
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
|
||||
dynamic_state3_blending = false;
|
||||
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(0, 23, 1, 0)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"RADV versions older than 23.1.0 have broken depth clamp dynamic state");
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
|
||||
dynamic_state3_enables = false;
|
||||
// Mali/ NVIDIA proprietary drivers: Shader stencil export not supported
|
||||
// Use hardware depth/stencil blits instead when available
|
||||
if (!extensions.shader_stencil_export) {
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"NVIDIA: VK_EXT_shader_stencil_export not supported, using hardware blits "
|
||||
"for depth/stencil operations");
|
||||
LOG_INFO(Render_Vulkan, " D24S8 hardware blit support: {}",
|
||||
is_blit_depth24_stencil8_supported);
|
||||
LOG_INFO(Render_Vulkan, " D32S8 hardware blit support: {}",
|
||||
is_blit_depth32_stencil8_supported);
|
||||
|
||||
if (!is_blit_depth24_stencil8_supported && !is_blit_depth32_stencil8_supported) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"NVIDIA: Neither shader export nor hardware blits available for "
|
||||
"depth/stencil. Performance may be degraded.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (extensions.extended_dynamic_state3 && (is_amd_driver || driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) {
|
||||
// AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation");
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
|
||||
dynamic_state3_blending = false;
|
||||
}
|
||||
|
||||
sets_per_pool = 64;
|
||||
if (is_amd_driver) {
|
||||
// AMD drivers need a higher amount of Sets per Pool in certain circumstances like in XC2.
|
||||
@@ -574,11 +593,44 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
}
|
||||
}
|
||||
|
||||
const bool needs_mobile_alignment_clamp = is_qualcomm || is_arm;
|
||||
|
||||
if (is_qualcomm) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
|
||||
has_broken_parallel_compiling = true;
|
||||
has_broken_parallel_compiling = true;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Adreno drivers exhibit instability with parallel shader compilation; "
|
||||
"forcing single-threaded pipeline builds");
|
||||
if (extensions.push_descriptor) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm driver mishandles push descriptors; disabling "
|
||||
"VK_KHR_push_descriptor to avoid incomplete descriptor writes");
|
||||
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
if (needs_mobile_alignment_clamp) {
|
||||
const char* driver_label = is_qualcomm ? "Qualcomm" : "ARM";
|
||||
|
||||
constexpr VkDeviceSize MIN_UNIFORM_ALIGNMENT = 256;
|
||||
const VkDeviceSize reported_uniform_alignment =
|
||||
properties.properties.limits.minUniformBufferOffsetAlignment;
|
||||
if (reported_uniform_alignment < MIN_UNIFORM_ALIGNMENT) {
|
||||
uniform_buffer_alignment_minimum = MIN_UNIFORM_ALIGNMENT;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"{} driver reports {}-byte minUniformBufferOffsetAlignment; clamping to {}",
|
||||
driver_label, reported_uniform_alignment, uniform_buffer_alignment_minimum);
|
||||
}
|
||||
|
||||
constexpr VkDeviceSize MIN_STORAGE_ALIGNMENT = 64;
|
||||
const VkDeviceSize reported_storage_alignment =
|
||||
properties.properties.limits.minStorageBufferOffsetAlignment;
|
||||
if (reported_storage_alignment < MIN_STORAGE_ALIGNMENT) {
|
||||
storage_buffer_alignment_minimum = MIN_STORAGE_ALIGNMENT;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"{} driver reports {}-byte minStorageBufferOffsetAlignment; clamping to {}",
|
||||
driver_label, reported_storage_alignment, storage_buffer_alignment_minimum);
|
||||
}
|
||||
|
||||
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
|
||||
if (sampler_limit > 0) {
|
||||
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
|
||||
@@ -587,9 +639,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
|
||||
sampler_heap_budget = derived_budget;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm driver reports max {} samplers; reserving {} (25%) and "
|
||||
"{} driver reports max {} samplers; reserving {} (25%) and "
|
||||
"allowing Eden to use {} (75%) to avoid heap exhaustion",
|
||||
sampler_limit, reserved, sampler_heap_budget);
|
||||
driver_label, sampler_limit, reserved, sampler_heap_budget);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -610,15 +662,27 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
}
|
||||
|
||||
if (is_intel_windows) {
|
||||
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Intel proprietary drivers do not support MSAA->MSAA image blits. "
|
||||
"MSAA scaling will use 3D helpers. MSAA resolves work normally.");
|
||||
cant_blit_msaa = true;
|
||||
}
|
||||
|
||||
has_broken_compute =
|
||||
CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
|
||||
!Settings::values.enable_compute_pipelines.GetValue();
|
||||
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
|
||||
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
|
||||
must_emulate_bgr565 = false; // Default: assume emulation isn't required
|
||||
|
||||
if (is_intel_anv) {
|
||||
LOG_WARNING(Render_Vulkan, "Intel ANV driver does not support native BGR format");
|
||||
must_emulate_bgr565 = true;
|
||||
} else if (is_qualcomm) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm driver mishandles BGR5 formats, forcing emulation");
|
||||
must_emulate_bgr565 = true;
|
||||
} else if (is_arm) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"ARM Mali driver mishandles BGR5 formats, forcing emulation");
|
||||
must_emulate_bgr565 = true;
|
||||
}
|
||||
|
||||
@@ -631,51 +695,55 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
(std::min)(properties.properties.limits.maxVertexInputBindings, 16U);
|
||||
}
|
||||
|
||||
if (is_turnip) {
|
||||
LOG_WARNING(Render_Vulkan, "Turnip requires higher-than-reported binding limits");
|
||||
if (is_turnip || is_qualcomm) {
|
||||
LOG_WARNING(Render_Vulkan, "Driver requires higher-than-reported binding limits");
|
||||
properties.properties.limits.maxVertexInputBindings = 32;
|
||||
}
|
||||
|
||||
if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) {
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"Removing extendedDynamicState2 due to missing extendedDynamicState");
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (!extensions.extended_dynamic_state2 && extensions.extended_dynamic_state3) {
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"Removing extendedDynamicState3 due to missing extendedDynamicState2");
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
dynamic_state3_blending = false;
|
||||
dynamic_state3_enables = false;
|
||||
}
|
||||
|
||||
// Mesa Intel drivers on UHD 620 have broken EDS causing extreme flickering - unknown if it affects other iGPUs
|
||||
// ALSO affects ALL versions of UHD drivers on Windows 10+, seems to cause even worse issues like straight up crashing
|
||||
// So... Yeah, UHD drivers fucking suck -- maybe one day we can work past this, maybe; some driver hacking?
|
||||
// And then we can rest in peace by doing `< VK_MAKE_API_VERSION(26, 0, 0)` for our beloved mesa drivers... one day
|
||||
if ((is_mvk || (is_integrated && is_intel_anv) || (is_integrated && is_intel_windows)) && Settings::values.dyna_state.GetValue() != 0) {
|
||||
LOG_WARNING(Render_Vulkan, "Driver has broken dynamic state, forcing to 0 to prevent graphical issues");
|
||||
Settings::values.dyna_state.SetValue(0);
|
||||
}
|
||||
|
||||
// Base dynamic states (VIEWPORT, SCISSOR, DEPTH_BIAS, etc.) are ALWAYS active in vk_graphics_pipeline.cpp
|
||||
// This slider controls EXTENDED dynamic states with accumulative levels per Vulkan specs:
|
||||
// Level 0 = Core Dynamic States only (Vulkan 1.0)
|
||||
// Level 1 = Core + VK_EXT_extended_dynamic_state
|
||||
// Level 2 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2
|
||||
// Level 3 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2 + VK_EXT_extended_dynamic_state3
|
||||
|
||||
switch (Settings::values.dyna_state.GetValue()) {
|
||||
case 0:
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
[[fallthrough]];
|
||||
case 1:
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
[[fallthrough]];
|
||||
case 2:
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
// Level 0: Disable all extended dynamic state extensions
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
dynamic_state3_blending = false;
|
||||
dynamic_state3_enables = false;
|
||||
break;
|
||||
case 1:
|
||||
// Level 1: Enable EDS1, disable EDS2 and EDS3
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
dynamic_state3_blending = false;
|
||||
dynamic_state3_enables = false;
|
||||
break;
|
||||
case 2:
|
||||
// Level 2: Enable EDS1 + EDS2, disable EDS3
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
dynamic_state3_blending = false;
|
||||
dynamic_state3_enables = false;
|
||||
break;
|
||||
case 3:
|
||||
default:
|
||||
// Level 3: Enable all (EDS1 + EDS2 + EDS3)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!Settings::values.vertex_input_dynamic_state.GetValue() || !extensions.extended_dynamic_state) {
|
||||
// VK_EXT_vertex_input_dynamic_state is independent from EDS
|
||||
// It can be enabled even without extended_dynamic_state
|
||||
if (!Settings::values.vertex_input_dynamic_state.GetValue()) {
|
||||
RemoveExtensionFeature(extensions.vertex_input_dynamic_state, features.vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
@@ -720,32 +788,63 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
|
||||
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
|
||||
return wanted_format;
|
||||
}
|
||||
// The wanted format is not supported by hardware, search for alternatives
|
||||
|
||||
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
|
||||
if (alternatives == nullptr) {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Format={} with usage={} and type={} has no defined alternatives and host "
|
||||
"hardware does not support it",
|
||||
wanted_format, wanted_usage, format_type);
|
||||
if (ShouldLogUnsupportedFormat(wanted_format, wanted_usage, format_type)) {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Format={} with usage={} and type={} has no defined alternatives and host "
|
||||
"hardware does not support it",
|
||||
wanted_format, wanted_usage, format_type);
|
||||
}
|
||||
return wanted_format;
|
||||
}
|
||||
|
||||
std::size_t i = 0;
|
||||
for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) {
|
||||
#if defined(ANDROID)
|
||||
const auto downgrade_r16_to_ldr = [&](VkFormat candidate) -> VkFormat {
|
||||
if (wanted_format != VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
|
||||
return candidate;
|
||||
}
|
||||
if (candidate != VK_FORMAT_R16G16B16A16_SFLOAT &&
|
||||
candidate != VK_FORMAT_R16G16B16A16_UNORM) {
|
||||
return candidate;
|
||||
}
|
||||
static constexpr std::array ldr_candidates{
|
||||
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_FORMAT_B8G8R8A8_SRGB,
|
||||
};
|
||||
for (const VkFormat format : ldr_candidates) {
|
||||
if (IsFormatSupported(format, wanted_usage, format_type)) {
|
||||
return format;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
};
|
||||
#endif
|
||||
|
||||
for (std::size_t i = 0; VkFormat alternative = alternatives[i]; ++i) {
|
||||
if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
|
||||
continue;
|
||||
}
|
||||
#if defined(ANDROID)
|
||||
const VkFormat selected = downgrade_r16_to_ldr(alternative);
|
||||
#else
|
||||
const VkFormat selected = alternative;
|
||||
#endif
|
||||
LOG_DEBUG(Render_Vulkan,
|
||||
"Emulating format={} with alternative format={} with usage={} and type={}",
|
||||
wanted_format, alternative, wanted_usage, format_type);
|
||||
return alternative;
|
||||
wanted_format, selected, wanted_usage, format_type);
|
||||
return selected;
|
||||
}
|
||||
|
||||
// No alternatives found, panic
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Format={} with usage={} and type={} is not supported by the host hardware and "
|
||||
"doesn't support any of the alternatives",
|
||||
wanted_format, wanted_usage, format_type);
|
||||
if (ShouldLogUnsupportedFormat(wanted_format, wanted_usage, format_type)) {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Format={} with usage={} and type={} is not supported by the host hardware and "
|
||||
"doesn't support any of the alternatives",
|
||||
wanted_format, wanted_usage, format_type);
|
||||
}
|
||||
return wanted_format;
|
||||
}
|
||||
|
||||
@@ -763,8 +862,8 @@ void Device::SaveShader(std::span<const u32> spirv) const {
|
||||
}
|
||||
|
||||
bool Device::ComputeIsOptimalAstcSupported() const {
|
||||
// Disable for now to avoid converting ASTC twice.
|
||||
static constexpr std::array astc_formats = {
|
||||
// Verify hardware supports all ASTC formats with optimal tiling to avoid software conversion
|
||||
static constexpr std::array<VkFormat, 28> astc_formats = {
|
||||
VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
|
||||
@@ -783,9 +882,10 @@ bool Device::ComputeIsOptimalAstcSupported() const {
|
||||
if (!features.features.textureCompressionASTC_LDR) {
|
||||
return false;
|
||||
}
|
||||
const auto format_feature_usage{
|
||||
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
|
||||
const auto format_feature_usage{VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
|
||||
VK_FORMAT_FEATURE_BLIT_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_BLIT_DST_BIT |
|
||||
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
|
||||
for (const auto format : astc_formats) {
|
||||
const auto physical_format_properties{physical.GetFormatProperties(format)};
|
||||
@@ -891,11 +991,8 @@ bool Device::ShouldBoostClocks() const {
|
||||
}
|
||||
|
||||
bool Device::HasTimelineSemaphore() const {
|
||||
if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
GetDriverID() == VK_DRIVER_ID_MESA_TURNIP) {
|
||||
// Timeline semaphores do not work properly on all Qualcomm drivers.
|
||||
// They generally work properly with Turnip drivers, but are problematic on some devices
|
||||
// (e.g. ZTE handsets with Snapdragon 870).
|
||||
const VkDriverIdKHR driver_id = GetDriverID();
|
||||
if (driver_id == VK_DRIVER_ID_MESA_TURNIP || driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
|
||||
return false;
|
||||
}
|
||||
return features.timeline_semaphore.timelineSemaphore;
|
||||
@@ -982,14 +1079,16 @@ bool Device::GetSuitability(bool requires_swapchain) {
|
||||
// Set next pointer.
|
||||
void** next = &features2.pNext;
|
||||
|
||||
// Vulkan 1.2, 1.3 and 1.4 features
|
||||
// Vulkan 1.2 and 1.3 features
|
||||
if (instance_version >= VK_API_VERSION_1_2) {
|
||||
features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
|
||||
features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
|
||||
|
||||
features_1_2.pNext = &features_1_3;
|
||||
|
||||
features_1_2.pNext = nullptr;
|
||||
*next = &features_1_2;
|
||||
if (instance_version >= VK_API_VERSION_1_3) {
|
||||
features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
|
||||
features_1_3.pNext = nullptr;
|
||||
features_1_2.pNext = &features_1_3;
|
||||
}
|
||||
}
|
||||
|
||||
// Test all features we know about. If the feature is not available in core at our
|
||||
@@ -1088,6 +1187,16 @@ bool Device::GetSuitability(bool requires_swapchain) {
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
|
||||
SetNext(next, properties.transform_feedback);
|
||||
}
|
||||
if (extensions.maintenance5) {
|
||||
properties.maintenance5.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_PROPERTIES_KHR;
|
||||
SetNext(next, properties.maintenance5);
|
||||
}
|
||||
if (extensions.multi_draw) {
|
||||
properties.multi_draw.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT;
|
||||
SetNext(next, properties.multi_draw);
|
||||
}
|
||||
|
||||
// Perform the property fetch.
|
||||
physical.GetProperties2(properties2);
|
||||
@@ -1120,14 +1229,85 @@ bool Device::GetSuitability(bool requires_swapchain) {
|
||||
}
|
||||
}
|
||||
|
||||
// VK_DYNAMIC_STATE
|
||||
|
||||
// Driver detection variables for workarounds in GetSuitability
|
||||
const VkDriverId driver_id = properties.driver.driverID;
|
||||
[[maybe_unused]] const bool is_amd_driver =
|
||||
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
|
||||
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
|
||||
[[maybe_unused]] const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
|
||||
|
||||
// VK_EXT_CUSTOM_BORDER_COLOR
|
||||
|
||||
if (extensions.custom_border_color && (is_qualcomm || is_turnip)) {
|
||||
const char* driver_name = is_turnip ? "Turnip" : "Adreno";
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"{} driver previously required custom border color disablement; leaving VK_EXT_custom_border_color enabled for testing",
|
||||
driver_name);
|
||||
}
|
||||
|
||||
// VK_EXT_extended_dynamic_state2 below this will appear drivers that need workarounds.
|
||||
|
||||
// VK_EXT_extended_dynamic_state3 below this will appear drivers that need workarounds.
|
||||
|
||||
[[maybe_unused]] const auto device_id = properties.properties.deviceID;
|
||||
|
||||
// Samsung: Broken extendedDynamicState3ColorBlendEquation
|
||||
// Disable blend equation dynamic state, force static pipeline state
|
||||
if (extensions.extended_dynamic_state3 &&
|
||||
(driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Samsung: Disabling broken extendedDynamicState3ColorBlendEquation");
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
|
||||
}
|
||||
|
||||
// Intel Windows < 27.20.100.0: Broken VertexInputDynamicState
|
||||
// Disable VertexInputDynamicState on old Intel Windows drivers
|
||||
if (extensions.vertex_input_dynamic_state && is_intel_windows) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Intel Windows < 27.20.100.0: Disabling broken VK_EXT_vertex_input_dynamic_state");
|
||||
RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
|
||||
features.vertex_input_dynamic_state,
|
||||
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
if (Settings::values.dyna_state.GetValue() == 0) {
|
||||
LOG_INFO(Render_Vulkan, "Extended Dynamic State disabled by user setting, clearing all EDS features");
|
||||
features.custom_border_color.customBorderColors = false;
|
||||
features.custom_border_color.customBorderColorWithoutFormat = false;
|
||||
features.extended_dynamic_state.extendedDynamicState = false;
|
||||
features.extended_dynamic_state2.extendedDynamicState2 = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
|
||||
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable = false;
|
||||
}
|
||||
|
||||
// Return whether we were suitable.
|
||||
return suitable;
|
||||
}
|
||||
|
||||
void Device::RemoveUnsuitableExtensions() {
|
||||
// VK_EXT_custom_border_color
|
||||
extensions.custom_border_color = features.custom_border_color.customBorderColors &&
|
||||
features.custom_border_color.customBorderColorWithoutFormat;
|
||||
// Enable extension if driver supports it, then check individual features
|
||||
// - customBorderColors: Required to use VK_BORDER_COLOR_FLOAT_CUSTOM_EXT
|
||||
// - customBorderColorWithoutFormat: Optional, allows VK_FORMAT_UNDEFINED
|
||||
// If only customBorderColors is available, we must provide a specific format
|
||||
if (extensions.custom_border_color) {
|
||||
// Verify that at least customBorderColors is available
|
||||
if (!features.custom_border_color.customBorderColors) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"VK_EXT_custom_border_color reported but customBorderColors feature not available, disabling");
|
||||
extensions.custom_border_color = false;
|
||||
}
|
||||
}
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.custom_border_color, features.custom_border_color,
|
||||
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
|
||||
@@ -1156,21 +1336,79 @@ void Device::RemoveUnsuitableExtensions() {
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_extended_dynamic_state3
|
||||
dynamic_state3_blending =
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable &&
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation &&
|
||||
const bool supports_color_blend_enable =
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable;
|
||||
const bool supports_color_blend_equation =
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation;
|
||||
const bool supports_color_write_mask =
|
||||
features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask;
|
||||
dynamic_state3_enables =
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable &&
|
||||
dynamic_state3_blending = supports_color_blend_enable && supports_color_blend_equation &&
|
||||
supports_color_write_mask;
|
||||
|
||||
const bool supports_depth_clamp_enable =
|
||||
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable;
|
||||
const bool supports_logic_op_enable =
|
||||
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable;
|
||||
const bool supports_line_raster_mode =
|
||||
features.extended_dynamic_state3.extendedDynamicState3LineRasterizationMode &&
|
||||
extensions.line_rasterization && features.line_rasterization.rectangularLines;
|
||||
const bool supports_conservative_raster_mode =
|
||||
features.extended_dynamic_state3.extendedDynamicState3ConservativeRasterizationMode &&
|
||||
extensions.conservative_rasterization;
|
||||
const bool supports_line_stipple_enable =
|
||||
features.extended_dynamic_state3.extendedDynamicState3LineStippleEnable &&
|
||||
extensions.line_rasterization && features.line_rasterization.stippledRectangularLines;
|
||||
const bool supports_alpha_to_coverage =
|
||||
features.extended_dynamic_state3.extendedDynamicState3AlphaToCoverageEnable;
|
||||
const bool supports_alpha_to_one =
|
||||
features.extended_dynamic_state3.extendedDynamicState3AlphaToOneEnable &&
|
||||
features.features.alphaToOne;
|
||||
|
||||
dynamic_state3_depth_clamp_enable = supports_depth_clamp_enable;
|
||||
dynamic_state3_logic_op_enable = supports_logic_op_enable;
|
||||
dynamic_state3_line_raster_mode = supports_line_raster_mode;
|
||||
dynamic_state3_conservative_raster_mode = supports_conservative_raster_mode;
|
||||
dynamic_state3_line_stipple_enable = supports_line_stipple_enable;
|
||||
dynamic_state3_alpha_to_coverage = supports_alpha_to_coverage;
|
||||
dynamic_state3_alpha_to_one = supports_alpha_to_one;
|
||||
|
||||
dynamic_state3_enables = dynamic_state3_depth_clamp_enable || dynamic_state3_logic_op_enable ||
|
||||
dynamic_state3_line_raster_mode ||
|
||||
dynamic_state3_conservative_raster_mode ||
|
||||
dynamic_state3_line_stipple_enable ||
|
||||
dynamic_state3_alpha_to_coverage || dynamic_state3_alpha_to_one;
|
||||
|
||||
extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables;
|
||||
dynamic_state3_blending = dynamic_state3_blending && extensions.extended_dynamic_state3;
|
||||
dynamic_state3_enables = dynamic_state3_enables && extensions.extended_dynamic_state3;
|
||||
if (!extensions.extended_dynamic_state3) {
|
||||
dynamic_state3_blending = false;
|
||||
dynamic_state3_enables = false;
|
||||
dynamic_state3_depth_clamp_enable = false;
|
||||
dynamic_state3_logic_op_enable = false;
|
||||
dynamic_state3_line_raster_mode = false;
|
||||
dynamic_state3_conservative_raster_mode = false;
|
||||
dynamic_state3_line_stipple_enable = false;
|
||||
dynamic_state3_alpha_to_coverage = false;
|
||||
dynamic_state3_alpha_to_one = false;
|
||||
}
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.extended_dynamic_state3,
|
||||
features.extended_dynamic_state3,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_robustness2
|
||||
// Enable if at least one robustness2 feature is available
|
||||
extensions.robustness_2 = features.robustness2.robustBufferAccess2 ||
|
||||
features.robustness2.robustImageAccess2 ||
|
||||
features.robustness2.nullDescriptor;
|
||||
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.robustness_2, features.robustness2,
|
||||
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_image_robustness
|
||||
// Enable if robustImageAccess is available
|
||||
extensions.image_robustness = features.image_robustness.robustImageAccess;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness,
|
||||
VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_provoking_vertex
|
||||
if (Settings::values.provoking_vertex.GetValue()) {
|
||||
extensions.provoking_vertex = features.provoking_vertex.provokingVertexLast
|
||||
@@ -1215,6 +1453,14 @@ void Device::RemoveUnsuitableExtensions() {
|
||||
properties.transform_feedback.maxTransformFeedbackBuffers > 0 &&
|
||||
properties.transform_feedback.transformFeedbackQueries &&
|
||||
properties.transform_feedback.transformFeedbackDraw;
|
||||
|
||||
if (extensions.transform_feedback) {
|
||||
features.transform_feedback.transformFeedback = VK_TRUE;
|
||||
features.transform_feedback.geometryStreams = VK_TRUE;
|
||||
} else {
|
||||
features.transform_feedback.transformFeedback = VK_FALSE;
|
||||
features.transform_feedback.geometryStreams = VK_FALSE;
|
||||
}
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback,
|
||||
VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
|
||||
|
||||
@@ -1225,6 +1471,18 @@ void Device::RemoveUnsuitableExtensions() {
|
||||
features.vertex_input_dynamic_state,
|
||||
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_multi_draw
|
||||
extensions.multi_draw = features.multi_draw.multiDraw;
|
||||
if (extensions.multi_draw) {
|
||||
features.multi_draw.multiDraw = VK_TRUE;
|
||||
LOG_INFO(Render_Vulkan, "VK_EXT_multi_draw: maxMultiDrawCount={}",
|
||||
properties.multi_draw.maxMultiDrawCount);
|
||||
} else {
|
||||
features.multi_draw.multiDraw = VK_FALSE;
|
||||
}
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.multi_draw, features.multi_draw,
|
||||
VK_EXT_MULTI_DRAW_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_pipeline_executable_properties
|
||||
if (Settings::values.renderer_shader_feedback.GetValue()) {
|
||||
extensions.pipeline_executable_properties =
|
||||
@@ -1248,6 +1506,98 @@ void Device::RemoveUnsuitableExtensions() {
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout,
|
||||
features.workgroup_memory_explicit_layout,
|
||||
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||
|
||||
// VK_EXT_swapchain_maintenance1 (extension only, has features)
|
||||
// Requires VK_EXT_surface_maintenance1 instance extension
|
||||
extensions.swapchain_maintenance1 = features.swapchain_maintenance1.swapchainMaintenance1;
|
||||
if (extensions.swapchain_maintenance1) {
|
||||
// Check if VK_EXT_surface_maintenance1 instance extension is available
|
||||
const auto instance_extensions = vk::EnumerateInstanceExtensionProperties(dld);
|
||||
const bool has_surface_maintenance1 = instance_extensions && std::ranges::any_of(*instance_extensions,
|
||||
[](const VkExtensionProperties& prop) {
|
||||
return std::strcmp(prop.extensionName, VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME) == 0;
|
||||
});
|
||||
if (!has_surface_maintenance1) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"VK_EXT_swapchain_maintenance1 requires VK_EXT_surface_maintenance1, disabling");
|
||||
extensions.swapchain_maintenance1 = false;
|
||||
features.swapchain_maintenance1.swapchainMaintenance1 = false;
|
||||
}
|
||||
}
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.swapchain_maintenance1, features.swapchain_maintenance1,
|
||||
VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_maintenance1 (core in Vulkan 1.1, no features)
|
||||
extensions.maintenance1 = loaded_extensions.contains(VK_KHR_MAINTENANCE_1_EXTENSION_NAME);
|
||||
RemoveExtensionIfUnsuitable(extensions.maintenance1, VK_KHR_MAINTENANCE_1_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_maintenance2 (core in Vulkan 1.1, no features)
|
||||
extensions.maintenance2 = loaded_extensions.contains(VK_KHR_MAINTENANCE_2_EXTENSION_NAME);
|
||||
RemoveExtensionIfUnsuitable(extensions.maintenance2, VK_KHR_MAINTENANCE_2_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_maintenance3 (core in Vulkan 1.1, no features)
|
||||
extensions.maintenance3 = loaded_extensions.contains(VK_KHR_MAINTENANCE_3_EXTENSION_NAME);
|
||||
RemoveExtensionIfUnsuitable(extensions.maintenance3, VK_KHR_MAINTENANCE_3_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_maintenance4
|
||||
extensions.maintenance4 = features.maintenance4.maintenance4;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance4, features.maintenance4,
|
||||
VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_maintenance5
|
||||
extensions.maintenance5 = features.maintenance5.maintenance5;
|
||||
if (extensions.maintenance5) {
|
||||
LOG_INFO(Render_Vulkan, "VK_KHR_maintenance5 properties: polygonModePointSize={} "
|
||||
"depthStencilSwizzleOne={} earlyFragmentTests={} nonStrictWideLines={}",
|
||||
properties.maintenance5.polygonModePointSize,
|
||||
properties.maintenance5.depthStencilSwizzleOneSupport,
|
||||
properties.maintenance5.earlyFragmentMultisampleCoverageAfterSampleCounting &&
|
||||
properties.maintenance5.earlyFragmentSampleMaskTestBeforeSampleCounting,
|
||||
properties.maintenance5.nonStrictWideLinesUseParallelogram);
|
||||
}
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance5, features.maintenance5,
|
||||
VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_maintenance6
|
||||
extensions.maintenance6 = features.maintenance6.maintenance6;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance6, features.maintenance6,
|
||||
VK_KHR_MAINTENANCE_6_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_maintenance7 (proposed for Vulkan 1.4, no features)
|
||||
extensions.maintenance7 = loaded_extensions.contains(VK_KHR_MAINTENANCE_7_EXTENSION_NAME);
|
||||
RemoveExtensionIfUnsuitable(extensions.maintenance7, VK_KHR_MAINTENANCE_7_EXTENSION_NAME);
|
||||
|
||||
// VK_KHR_maintenance8 (proposed for Vulkan 1.4, no features)
|
||||
extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
|
||||
RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
|
||||
|
||||
}
|
||||
|
||||
bool Device::SupportsSubgroupStage(VkShaderStageFlags stage_mask) const {
|
||||
if (stage_mask == 0) {
|
||||
return true;
|
||||
}
|
||||
const VkShaderStageFlags supported = properties.subgroup_properties.supportedStages;
|
||||
return (supported & stage_mask) == stage_mask;
|
||||
}
|
||||
|
||||
bool Device::IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature,
|
||||
VkShaderStageFlags stage_mask) const {
|
||||
if ((properties.subgroup_properties.supportedOperations & feature) == 0) {
|
||||
return false;
|
||||
}
|
||||
return SupportsSubgroupStage(stage_mask);
|
||||
}
|
||||
|
||||
bool Device::SupportsWarpIntrinsics(VkShaderStageFlagBits stage) const {
|
||||
constexpr VkSubgroupFeatureFlags required_ops =
|
||||
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
|
||||
VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
|
||||
if ((properties.subgroup_properties.supportedOperations & required_ops) != required_ops) {
|
||||
return false;
|
||||
}
|
||||
return SupportsSubgroupStage(stage);
|
||||
}
|
||||
|
||||
void Device::SetupFamilies(VkSurfaceKHR surface) {
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <span>
|
||||
@@ -38,9 +39,13 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore)
|
||||
|
||||
#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \
|
||||
FEATURE(EXT, ImageRobustness, IMAGE_ROBUSTNESS, image_robustness) \
|
||||
FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \
|
||||
shader_demote_to_helper_invocation) \
|
||||
FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control)
|
||||
FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \
|
||||
FEATURE(KHR, Maintenance4, MAINTENANCE_4, maintenance4)
|
||||
|
||||
#define FOR_EACH_VK_FEATURE_1_4(FEATURE)
|
||||
|
||||
// Define all features which may be used by the implementation and require an extension here.
|
||||
#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \
|
||||
@@ -53,12 +58,16 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \
|
||||
FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
|
||||
FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
|
||||
FEATURE(EXT, MultiDraw, MULTI_DRAW, multi_draw) \
|
||||
FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
|
||||
primitive_topology_list_restart) \
|
||||
FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
|
||||
FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \
|
||||
FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \
|
||||
FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \
|
||||
FEATURE(EXT, SwapchainMaintenance1, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \
|
||||
FEATURE(KHR, Maintenance5, MAINTENANCE_5, maintenance5) \
|
||||
FEATURE(KHR, Maintenance6, MAINTENANCE_6, maintenance6) \
|
||||
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
|
||||
pipeline_executable_properties) \
|
||||
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
|
||||
@@ -85,6 +94,11 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
EXTENSION(KHR, SWAPCHAIN, swapchain) \
|
||||
EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
|
||||
EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \
|
||||
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
|
||||
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
|
||||
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
|
||||
EXTENSION(KHR, MAINTENANCE_7, maintenance7) \
|
||||
EXTENSION(KHR, MAINTENANCE_8, maintenance8) \
|
||||
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
|
||||
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
|
||||
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
|
||||
@@ -111,6 +125,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \
|
||||
@@ -162,6 +177,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
FEATURE_NAME(depth_bias_control, depthBiasExact) \
|
||||
FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \
|
||||
FEATURE_NAME(format_a4b4g4r4, formatA4B4G4R4) \
|
||||
FEATURE_NAME(image_robustness, robustImageAccess) \
|
||||
FEATURE_NAME(index_type_uint8, indexTypeUint8) \
|
||||
FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \
|
||||
FEATURE_NAME(provoking_vertex, provokingVertexLast) \
|
||||
@@ -288,12 +304,14 @@ public:
|
||||
|
||||
/// Returns uniform buffer alignment requirement.
|
||||
VkDeviceSize GetUniformBufferAlignment() const {
|
||||
return properties.properties.limits.minUniformBufferOffsetAlignment;
|
||||
return (std::max)(properties.properties.limits.minUniformBufferOffsetAlignment,
|
||||
uniform_buffer_alignment_minimum);
|
||||
}
|
||||
|
||||
/// Returns storage alignment requirement.
|
||||
VkDeviceSize GetStorageBufferAlignment() const {
|
||||
return properties.properties.limits.minStorageBufferOffsetAlignment;
|
||||
return (std::max)(properties.properties.limits.minStorageBufferOffsetAlignment,
|
||||
storage_buffer_alignment_minimum);
|
||||
}
|
||||
|
||||
/// Returns the maximum range for storage buffers.
|
||||
@@ -326,6 +344,7 @@ public:
|
||||
return properties.float_controls;
|
||||
}
|
||||
|
||||
|
||||
/// Returns true if ASTC is natively supported.
|
||||
bool IsOptimalAstcSupported() const {
|
||||
return features.features.textureCompressionASTC_LDR;
|
||||
@@ -341,6 +360,11 @@ public:
|
||||
return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||
}
|
||||
|
||||
/// Returns true if vkResetQueryPool can be used from the host.
|
||||
bool SupportsHostQueryReset() const {
|
||||
return features.host_query_reset.hostQueryReset;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports float64 natively.
|
||||
bool IsFloat64Supported() const {
|
||||
return features.features.shaderFloat64;
|
||||
@@ -371,11 +395,26 @@ public:
|
||||
return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports the provided subgroup feature.
|
||||
bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature) const {
|
||||
return properties.subgroup_properties.supportedOperations & feature;
|
||||
/// Returns true if the device supports the provided subgroup feature for the given stages.
|
||||
bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature,
|
||||
VkShaderStageFlags stage_mask = 0) const;
|
||||
|
||||
/// Returns true if the device reports subgroup support for the provided shader stages.
|
||||
bool SupportsSubgroupStage(VkShaderStageFlags stage_mask) const;
|
||||
|
||||
/// Returns the set of stages that report subgroup support.
|
||||
VkShaderStageFlags GetSubgroupSupportedStages() const {
|
||||
return properties.subgroup_properties.supportedStages;
|
||||
}
|
||||
|
||||
/// Returns the set of subgroup operations reported by the driver.
|
||||
VkSubgroupFeatureFlags GetSubgroupSupportedOperations() const {
|
||||
return properties.subgroup_properties.supportedOperations;
|
||||
}
|
||||
|
||||
/// Returns true if the driver can execute all warp intrinsics for the given shader stage.
|
||||
bool SupportsWarpIntrinsics(VkShaderStageFlagBits stage) const;
|
||||
|
||||
/// Returns the maximum number of push descriptors.
|
||||
u32 MaxPushDescriptors() const {
|
||||
return properties.push_descriptor.maxPushDescriptors;
|
||||
@@ -451,9 +490,14 @@ public:
|
||||
return extensions.swapchain_mutable_format;
|
||||
}
|
||||
|
||||
/// Returns true if VK_EXT_swapchain_maintenance1 is enabled.
|
||||
bool IsExtSwapchainMaintenance1Enabled() const {
|
||||
return extensions.swapchain_maintenance1;
|
||||
}
|
||||
|
||||
/// Returns true if VK_KHR_shader_float_controls is enabled.
|
||||
bool IsKhrShaderFloatControlsSupported() const {
|
||||
return extensions.shader_float_controls;
|
||||
return extensions.shader_float_controls && !disable_shader_float_controls_usage;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
|
||||
@@ -487,10 +531,18 @@ public:
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_shader_stencil_export.
|
||||
/// Note: Most Mali/NVIDIA drivers don't support this. Use hardware blits as fallback.
|
||||
bool IsExtShaderStencilExportSupported() const {
|
||||
return extensions.shader_stencil_export;
|
||||
}
|
||||
|
||||
/// Returns true if depth/stencil operations can be performed efficiently.
|
||||
/// Either through shader export or hardware blits.
|
||||
bool CanPerformDepthStencilOperations() const {
|
||||
return extensions.shader_stencil_export || is_blit_depth24_stencil8_supported ||
|
||||
is_blit_depth32_stencil8_supported;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
|
||||
bool IsExtDepthRangeUnrestrictedSupported() const {
|
||||
return extensions.depth_range_unrestricted;
|
||||
@@ -531,6 +583,46 @@ public:
|
||||
return extensions.custom_border_color;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_image_robustness.
|
||||
bool IsExtImageRobustnessSupported() const {
|
||||
return extensions.image_robustness;
|
||||
}
|
||||
|
||||
/// Returns true if robustImageAccess is supported.
|
||||
bool IsRobustImageAccessSupported() const {
|
||||
return features.image_robustness.robustImageAccess;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_robustness2.
|
||||
bool IsExtRobustness2Supported() const {
|
||||
return extensions.robustness_2;
|
||||
}
|
||||
|
||||
/// Returns true if robustBufferAccess2 is supported.
|
||||
bool IsRobustBufferAccess2Supported() const {
|
||||
return features.robustness2.robustBufferAccess2;
|
||||
}
|
||||
|
||||
/// Returns true if robustImageAccess2 is supported.
|
||||
bool IsRobustImageAccess2Supported() const {
|
||||
return features.robustness2.robustImageAccess2;
|
||||
}
|
||||
|
||||
/// Returns true if nullDescriptor is supported.
|
||||
bool IsNullDescriptorSupported() const {
|
||||
return features.robustness2.nullDescriptor;
|
||||
}
|
||||
|
||||
/// Returns true if customBorderColors feature is available.
|
||||
bool IsCustomBorderColorsSupported() const {
|
||||
return features.custom_border_color.customBorderColors;
|
||||
}
|
||||
|
||||
/// Returns true if customBorderColorWithoutFormat feature is available.
|
||||
bool IsCustomBorderColorWithoutFormatSupported() const {
|
||||
return features.custom_border_color.customBorderColorWithoutFormat;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_extended_dynamic_state.
|
||||
bool IsExtExtendedDynamicStateSupported() const {
|
||||
return extensions.extended_dynamic_state;
|
||||
@@ -580,9 +672,69 @@ public:
|
||||
return extensions.line_rasterization;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
|
||||
bool SupportsRectangularLines() const {
|
||||
return features.line_rasterization.rectangularLines != VK_FALSE;
|
||||
}
|
||||
|
||||
bool SupportsSmoothLines() const {
|
||||
return features.line_rasterization.smoothLines != VK_FALSE;
|
||||
}
|
||||
|
||||
bool SupportsStippledRectangularLines() const {
|
||||
return features.line_rasterization.stippledRectangularLines != VK_FALSE;
|
||||
}
|
||||
|
||||
bool SupportsAlphaToOne() const {
|
||||
return features.features.alphaToOne != VK_FALSE;
|
||||
}
|
||||
|
||||
bool SupportsDualSourceBlend(u32 required_dual_source_attachments = 1) const {
|
||||
const u32 max_dual = properties.properties.limits.maxFragmentDualSrcAttachments;
|
||||
return features.features.dualSrcBlend != VK_FALSE &&
|
||||
max_dual >= required_dual_source_attachments;
|
||||
}
|
||||
|
||||
u32 MaxFragmentDualSrcAttachments() const {
|
||||
return properties.properties.limits.maxFragmentDualSrcAttachments;
|
||||
}
|
||||
|
||||
bool SupportsDynamicState3DepthClampEnable() const {
|
||||
return dynamic_state3_depth_clamp_enable;
|
||||
}
|
||||
|
||||
bool SupportsDynamicState3LogicOpEnable() const {
|
||||
return dynamic_state3_logic_op_enable;
|
||||
}
|
||||
|
||||
bool SupportsDynamicState3LineRasterizationMode() const {
|
||||
return dynamic_state3_line_raster_mode;
|
||||
}
|
||||
|
||||
bool SupportsDynamicState3ConservativeRasterizationMode() const {
|
||||
return dynamic_state3_conservative_raster_mode;
|
||||
}
|
||||
|
||||
bool SupportsDynamicState3LineStippleEnable() const {
|
||||
return dynamic_state3_line_stipple_enable;
|
||||
}
|
||||
|
||||
bool SupportsDynamicState3AlphaToCoverageEnable() const {
|
||||
return dynamic_state3_alpha_to_coverage;
|
||||
}
|
||||
|
||||
bool SupportsDynamicState3AlphaToOneEnable() const {
|
||||
return dynamic_state3_alpha_to_one;
|
||||
}
|
||||
|
||||
/// Returns true when the user enabled extended core dynamic states (level > 0).
|
||||
bool UsesAdvancedCoreDynamicState() const {
|
||||
return Settings::values.dyna_state.GetValue() > 0;
|
||||
}
|
||||
|
||||
/// Returns true if VK_EXT_vertex_input_dynamic_state is enabled on the device.
|
||||
bool IsExtVertexInputDynamicStateSupported() const {
|
||||
return extensions.vertex_input_dynamic_state;
|
||||
return extensions.vertex_input_dynamic_state &&
|
||||
features.vertex_input_dynamic_state.vertexInputDynamicState;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation
|
||||
@@ -613,10 +765,11 @@ public:
|
||||
|
||||
/// Returns the minimum supported version of SPIR-V.
|
||||
u32 SupportedSpirvVersion() const {
|
||||
if (instance_version >= VK_API_VERSION_1_3) {
|
||||
const bool has_float_controls = extensions.shader_float_controls;
|
||||
if (instance_version >= VK_API_VERSION_1_3 && has_float_controls) {
|
||||
return 0x00010600U;
|
||||
}
|
||||
if (extensions.spirv_1_4) {
|
||||
if (extensions.spirv_1_4 && has_float_controls) {
|
||||
return 0x00010400U;
|
||||
}
|
||||
return 0x00010300U;
|
||||
@@ -716,6 +869,68 @@ public:
|
||||
return features2.features.multiViewport;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_maintenance1.
|
||||
bool IsKhrMaintenance1Supported() const {
|
||||
return extensions.maintenance1;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_maintenance2.
|
||||
bool IsKhrMaintenance2Supported() const {
|
||||
return extensions.maintenance2;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_maintenance3.
|
||||
bool IsKhrMaintenance3Supported() const {
|
||||
return extensions.maintenance3;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_maintenance4.
|
||||
bool IsKhrMaintenance4Supported() const {
|
||||
return extensions.maintenance4;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_maintenance5.
|
||||
bool IsKhrMaintenance5Supported() const {
|
||||
return extensions.maintenance5;
|
||||
}
|
||||
|
||||
/// Returns true if polygon mode POINT supports gl_PointSize.
|
||||
bool SupportsPolygonModePointSize() const {
|
||||
return extensions.maintenance5 && properties.maintenance5.polygonModePointSize;
|
||||
}
|
||||
|
||||
/// Returns true if depth/stencil swizzle ONE is supported.
|
||||
bool SupportsDepthStencilSwizzleOne() const {
|
||||
return extensions.maintenance5 && properties.maintenance5.depthStencilSwizzleOneSupport;
|
||||
}
|
||||
|
||||
/// Returns true if early fragment tests optimizations are available.
|
||||
bool SupportsEarlyFragmentTests() const {
|
||||
return extensions.maintenance5 &&
|
||||
properties.maintenance5.earlyFragmentMultisampleCoverageAfterSampleCounting &&
|
||||
properties.maintenance5.earlyFragmentSampleMaskTestBeforeSampleCounting;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_maintenance6.
|
||||
bool IsKhrMaintenance6Supported() const {
|
||||
return extensions.maintenance6;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_multi_draw.
|
||||
bool IsExtMultiDrawSupported() const {
|
||||
return extensions.multi_draw;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_maintenance7.
|
||||
bool IsKhrMaintenance7Supported() const {
|
||||
return extensions.maintenance7;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_maintenance8.
|
||||
bool IsKhrMaintenance8Supported() const {
|
||||
return extensions.maintenance8;
|
||||
}
|
||||
|
||||
[[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
|
||||
u32 driver_version) {
|
||||
if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
|
||||
@@ -753,13 +968,13 @@ private:
|
||||
void RemoveUnsuitableExtensions();
|
||||
|
||||
void RemoveExtension(bool& extension, const std::string& extension_name);
|
||||
void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name);
|
||||
void RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name);
|
||||
|
||||
template <typename Feature>
|
||||
void RemoveExtensionFeature(bool& extension, Feature& feature,
|
||||
const std::string& extension_name);
|
||||
template <typename Feature>
|
||||
void RemoveExtensionFeatureIfUnsuitable(bool is_suitable, Feature& feature,
|
||||
void RemoveExtensionFeatureIfUnsuitable(bool& extension, Feature& feature,
|
||||
const std::string& extension_name);
|
||||
|
||||
/// Sets up queue families.
|
||||
@@ -799,6 +1014,7 @@ private:
|
||||
FOR_EACH_VK_FEATURE_1_1(FEATURE);
|
||||
FOR_EACH_VK_FEATURE_1_2(FEATURE);
|
||||
FOR_EACH_VK_FEATURE_1_3(FEATURE);
|
||||
FOR_EACH_VK_FEATURE_1_4(FEATURE);
|
||||
FOR_EACH_VK_FEATURE_EXT(FEATURE);
|
||||
FOR_EACH_VK_EXTENSION(EXTENSION);
|
||||
|
||||
@@ -815,6 +1031,7 @@ private:
|
||||
FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE);
|
||||
FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE);
|
||||
FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE);
|
||||
FOR_EACH_VK_FEATURE_1_4(FEATURE_CORE);
|
||||
FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT);
|
||||
|
||||
#undef FEATURE_CORE
|
||||
@@ -830,6 +1047,8 @@ private:
|
||||
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
|
||||
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
|
||||
VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
|
||||
VkPhysicalDeviceMaintenance5PropertiesKHR maintenance5{};
|
||||
VkPhysicalDeviceMultiDrawPropertiesEXT multi_draw{};
|
||||
|
||||
VkPhysicalDeviceProperties properties{};
|
||||
};
|
||||
@@ -859,10 +1078,20 @@ private:
|
||||
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
|
||||
bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation
|
||||
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
|
||||
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
|
||||
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
|
||||
bool disable_shader_float_controls_usage{}; ///< True when VK_KHR_shader_float_controls cannot be safely used.
|
||||
bool dynamic_state3_blending{}; ///< Has blending features of dynamic_state3.
|
||||
bool dynamic_state3_enables{}; ///< Has at least one enable feature of dynamic_state3.
|
||||
bool dynamic_state3_depth_clamp_enable{};
|
||||
bool dynamic_state3_logic_op_enable{};
|
||||
bool dynamic_state3_line_raster_mode{};
|
||||
bool dynamic_state3_conservative_raster_mode{};
|
||||
bool dynamic_state3_line_stipple_enable{};
|
||||
bool dynamic_state3_alpha_to_coverage{};
|
||||
bool dynamic_state3_alpha_to_one{};
|
||||
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
||||
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
|
||||
VkDeviceSize uniform_buffer_alignment_minimum{}; ///< Minimum enforced UBO alignment.
|
||||
VkDeviceSize storage_buffer_alignment_minimum{}; ///< Minimum enforced SSBO alignment.
|
||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||
u32 sets_per_pool{}; ///< Sets per Description Pool
|
||||
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
|
||||
|
||||
@@ -116,6 +116,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||
X(vkCmdDrawIndirectCount);
|
||||
X(vkCmdDrawIndexedIndirectCount);
|
||||
X(vkCmdDrawIndirectByteCountEXT);
|
||||
X(vkCmdDrawMultiEXT);
|
||||
X(vkCmdDrawMultiIndexedEXT);
|
||||
X(vkCmdEndConditionalRenderingEXT);
|
||||
X(vkCmdEndQuery);
|
||||
X(vkCmdEndRenderPass);
|
||||
@@ -145,6 +147,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||
X(vkCmdSetDepthWriteEnableEXT);
|
||||
X(vkCmdSetPrimitiveRestartEnableEXT);
|
||||
X(vkCmdSetRasterizerDiscardEnableEXT);
|
||||
X(vkCmdSetAlphaToCoverageEnableEXT);
|
||||
X(vkCmdSetAlphaToOneEnableEXT);
|
||||
X(vkCmdSetConservativeRasterizationModeEXT);
|
||||
X(vkCmdSetLineRasterizationModeEXT);
|
||||
X(vkCmdSetLineStippleEnableEXT);
|
||||
|
||||
@@ -216,6 +216,8 @@ struct DeviceDispatch : InstanceDispatch {
|
||||
PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
|
||||
PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
|
||||
PFN_vkCmdDrawIndirectByteCountEXT vkCmdDrawIndirectByteCountEXT{};
|
||||
PFN_vkCmdDrawMultiEXT vkCmdDrawMultiEXT{};
|
||||
PFN_vkCmdDrawMultiIndexedEXT vkCmdDrawMultiIndexedEXT{};
|
||||
PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{};
|
||||
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
||||
PFN_vkCmdEndQuery vkCmdEndQuery{};
|
||||
@@ -238,6 +240,8 @@ struct DeviceDispatch : InstanceDispatch {
|
||||
PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
|
||||
PFN_vkCmdSetPrimitiveRestartEnableEXT vkCmdSetPrimitiveRestartEnableEXT{};
|
||||
PFN_vkCmdSetRasterizerDiscardEnableEXT vkCmdSetRasterizerDiscardEnableEXT{};
|
||||
PFN_vkCmdSetAlphaToCoverageEnableEXT vkCmdSetAlphaToCoverageEnableEXT{};
|
||||
PFN_vkCmdSetAlphaToOneEnableEXT vkCmdSetAlphaToOneEnableEXT{};
|
||||
PFN_vkCmdSetConservativeRasterizationModeEXT vkCmdSetConservativeRasterizationModeEXT{};
|
||||
PFN_vkCmdSetLineRasterizationModeEXT vkCmdSetLineRasterizationModeEXT{};
|
||||
PFN_vkCmdSetLineStippleEnableEXT vkCmdSetLineStippleEnableEXT{};
|
||||
@@ -1239,6 +1243,19 @@ public:
|
||||
counter_buffer_offset, counter_offset, stride);
|
||||
}
|
||||
|
||||
void DrawMultiEXT(u32 draw_count, const VkMultiDrawInfoEXT* vertex_info,
|
||||
u32 instance_count, u32 first_instance, u32 stride) const noexcept {
|
||||
dld->vkCmdDrawMultiEXT(handle, draw_count, vertex_info, instance_count, first_instance,
|
||||
stride);
|
||||
}
|
||||
|
||||
void DrawMultiIndexedEXT(u32 draw_count, const VkMultiDrawIndexedInfoEXT* index_info,
|
||||
u32 instance_count, u32 first_instance, u32 stride,
|
||||
const int32_t* vertex_offset) const noexcept {
|
||||
dld->vkCmdDrawMultiIndexedEXT(handle, draw_count, index_info, instance_count,
|
||||
first_instance, stride, vertex_offset);
|
||||
}
|
||||
|
||||
void ClearAttachments(Span<VkClearAttachment> attachments,
|
||||
Span<VkClearRect> rects) const noexcept {
|
||||
dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
|
||||
@@ -1471,6 +1488,14 @@ public:
|
||||
dld->vkCmdSetLogicOpEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
|
||||
}
|
||||
|
||||
void SetAlphaToCoverageEnableEXT(bool enable) const noexcept {
|
||||
dld->vkCmdSetAlphaToCoverageEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
|
||||
}
|
||||
|
||||
void SetAlphaToOneEnableEXT(bool enable) const noexcept {
|
||||
dld->vkCmdSetAlphaToOneEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
|
||||
}
|
||||
|
||||
void SetDepthClampEnableEXT(bool enable) const noexcept {
|
||||
dld->vkCmdSetDepthClampEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user