Compare commits
10 Commits
release/0.
...
3096/qcom/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c0a6eb8fdd | ||
|
|
dec889adb1 | ||
|
|
b8b4fe9a45 | ||
|
|
270fdb978d | ||
|
|
19863048b4 | ||
|
|
fa0859814c | ||
|
|
00ec67d65b | ||
|
|
fbd28a9d34 | ||
|
|
3413fbd9da | ||
|
|
bccc46a325 |
@@ -18,6 +18,7 @@ plugins {
|
||||
id("androidx.navigation.safeargs.kotlin")
|
||||
id("org.jlleitschuh.gradle.ktlint") version "11.4.0"
|
||||
id("com.github.triplet.play") version "3.8.6"
|
||||
id("idea")
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -27,6 +28,8 @@ plugins {
|
||||
*/
|
||||
val autoVersion = (((System.currentTimeMillis() / 1000) - 1451606400) / 10).toInt()
|
||||
|
||||
val edenDir = project(":Eden").projectDir
|
||||
|
||||
@Suppress("UnstableApiUsage")
|
||||
android {
|
||||
namespace = "org.yuzu.yuzu_emu"
|
||||
@@ -241,11 +244,17 @@ android {
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
version = "3.22.1"
|
||||
path = file("../../../CMakeLists.txt")
|
||||
path = file("${edenDir}/CMakeLists.txt")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
idea {
|
||||
module {
|
||||
// Inclusion to exclude build/ dir from non-Android
|
||||
excludeDirs.add(file("${edenDir}/build"))
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register<Delete>("ktlintReset", fun Delete.() {
|
||||
delete(File(layout.buildDirectory.toString() + File.separator + "intermediates/ktLint"))
|
||||
@@ -346,7 +355,7 @@ fun getGitVersion(): String {
|
||||
}
|
||||
|
||||
afterEvaluate {
|
||||
val artifactsDir = layout.projectDirectory.dir("../../../artifacts")
|
||||
val artifactsDir = layout.projectDirectory.dir("${edenDir}/artifacts")
|
||||
val outputsDir = layout.buildDirectory.dir("outputs").get()
|
||||
|
||||
android.applicationVariants.forEach { variant ->
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
@@ -19,3 +22,6 @@ dependencyResolutionManagement {
|
||||
}
|
||||
|
||||
include(":app")
|
||||
|
||||
include("Eden")
|
||||
project(":Eden").projectDir = file("../..")
|
||||
|
||||
@@ -391,28 +391,15 @@ const std::size_t CACHE_PAGE_SIZE = 4096;
|
||||
|
||||
void ArmNce::ClearInstructionCache() {
|
||||
#ifdef __aarch64__
|
||||
// Use IC IALLU to actually invalidate L1 instruction cache
|
||||
// Ensure all previous memory operations complete
|
||||
asm volatile("dsb ish\n"
|
||||
"ic iallu\n"
|
||||
"dsb ish\n"
|
||||
"isb" ::: "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) {
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
// Invalidate instruction cache for specific range instead of full flush
|
||||
constexpr u64 cache_line_size = 64;
|
||||
const u64 aligned_addr = addr & ~(cache_line_size - 1);
|
||||
const u64 end_addr = (addr + size + cache_line_size - 1) & ~(cache_line_size - 1);
|
||||
|
||||
asm volatile("dsb ish" ::: "memory");
|
||||
for (u64 i = aligned_addr; i < end_addr; i += cache_line_size) {
|
||||
asm volatile("ic ivau, %0" :: "r"(i) : "memory");
|
||||
}
|
||||
asm volatile("dsb ish\n"
|
||||
"isb" ::: "memory");
|
||||
#endif
|
||||
this->ClearInstructionCache();
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -134,4 +134,4 @@ target_include_directories(dynarmic_tests PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
add_test(dynarmic_tests dynarmic_tests --durations yes)
|
||||
add_test(NAME dynarmic_tests COMMAND dynarmic_tests --durations yes)
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ostream.h>
|
||||
#include <fmt/ranges.h>
|
||||
#include <mcl/bit/swap.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
|
||||
@@ -433,6 +433,12 @@ void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers()) {
|
||||
channel_state->total_graphics_storage_buffers -=
|
||||
static_cast<u32>(std::popcount(channel_state->enabled_storage_buffers[stage]));
|
||||
}
|
||||
}
|
||||
channel_state->enabled_storage_buffers[stage] = 0;
|
||||
channel_state->written_storage_buffers[stage] = 0;
|
||||
}
|
||||
@@ -440,8 +446,26 @@ void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
|
||||
template <class P>
|
||||
bool BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
|
||||
u32 cbuf_offset, bool is_written) {
|
||||
const bool already_enabled =
|
||||
((channel_state->enabled_storage_buffers[stage] >> ssbo_index) & 1U) != 0;
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
|
||||
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
|
||||
if (channel_state->total_graphics_storage_buffers >= max_bindings) {
|
||||
LOG_WARNING(HW_GPU,
|
||||
"Skipping graphics storage buffer {} due to driver limit {}",
|
||||
ssbo_index, max_bindings);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
|
||||
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
|
||||
++channel_state->total_graphics_storage_buffers;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& cbufs = maxwell3d->state.shader_stages[stage];
|
||||
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
|
||||
@@ -472,6 +496,12 @@ void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, G
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindComputeStorageBuffers() {
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers()) {
|
||||
channel_state->total_compute_storage_buffers -=
|
||||
static_cast<u32>(std::popcount(channel_state->enabled_compute_storage_buffers));
|
||||
}
|
||||
}
|
||||
channel_state->enabled_compute_storage_buffers = 0;
|
||||
channel_state->written_compute_storage_buffers = 0;
|
||||
channel_state->image_compute_texture_buffers = 0;
|
||||
@@ -485,8 +515,26 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
|
||||
ssbo_index);
|
||||
return;
|
||||
}
|
||||
const bool already_enabled =
|
||||
((channel_state->enabled_compute_storage_buffers >> ssbo_index) & 1U) != 0;
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
|
||||
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
|
||||
if (channel_state->total_compute_storage_buffers >= max_bindings) {
|
||||
LOG_WARNING(HW_GPU,
|
||||
"Skipping compute storage buffer {} due to driver limit {}",
|
||||
ssbo_index, max_bindings);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
|
||||
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
|
||||
++channel_state->total_compute_storage_buffers;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& launch_desc = kepler_compute->launch_description;
|
||||
if (((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) == 0) {
|
||||
@@ -857,9 +905,23 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
||||
size <= channel_state->uniform_buffer_skip_cache_size &&
|
||||
!memory_tracker.IsRegionGpuModified(device_addr, size);
|
||||
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
|
||||
const u32 offset = has_host_buffer ? buffer.Offset(device_addr) : 0;
|
||||
const bool needs_alignment_stream = [&]() {
|
||||
if constexpr (IS_OPENGL) {
|
||||
return false;
|
||||
} else {
|
||||
if (!has_host_buffer) {
|
||||
return false;
|
||||
}
|
||||
const u32 alignment = runtime.GetUniformBufferAlignment();
|
||||
return alignment > 1 && (offset % alignment) != 0;
|
||||
}
|
||||
}();
|
||||
const bool use_fast_buffer = needs_alignment_stream ||
|
||||
(has_host_buffer &&
|
||||
size <= channel_state->uniform_buffer_skip_cache_size &&
|
||||
!memory_tracker.IsRegionGpuModified(device_addr, size));
|
||||
if (use_fast_buffer) {
|
||||
if constexpr (IS_OPENGL) {
|
||||
if (runtime.HasFastBufferSubData()) {
|
||||
@@ -898,7 +960,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
if (!needs_bind) {
|
||||
return;
|
||||
}
|
||||
const u32 offset = buffer.Offset(device_addr);
|
||||
if constexpr (IS_OPENGL) {
|
||||
// Mark the index as dirty if offset doesn't match
|
||||
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
||||
@@ -1015,9 +1076,30 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size =
|
||||
(std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
|
||||
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
|
||||
const u32 offset = has_host_buffer ? buffer.Offset(binding.device_addr) : 0;
|
||||
const bool needs_alignment_stream = [&]() {
|
||||
if constexpr (IS_OPENGL) {
|
||||
return false;
|
||||
} else {
|
||||
if (!has_host_buffer) {
|
||||
return false;
|
||||
}
|
||||
const u32 alignment = runtime.GetUniformBufferAlignment();
|
||||
return alignment > 1 && (offset % alignment) != 0;
|
||||
}
|
||||
}();
|
||||
if constexpr (!IS_OPENGL) {
|
||||
if (needs_alignment_stream) {
|
||||
const std::span<u8> span =
|
||||
runtime.BindMappedUniformBuffer(0, binding_index, size);
|
||||
device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
SynchronizeBuffer(buffer, binding.device_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.device_addr);
|
||||
buffer.MarkUsage(offset, size);
|
||||
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
||||
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bit>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
@@ -132,6 +133,9 @@ public:
|
||||
u32 enabled_compute_storage_buffers = 0;
|
||||
u32 written_compute_storage_buffers = 0;
|
||||
|
||||
u32 total_graphics_storage_buffers = 0;
|
||||
u32 total_compute_storage_buffers = 0;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> image_texture_buffers{};
|
||||
|
||||
@@ -198,6 +198,10 @@ public:
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
u32 GetUniformBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetUniformBufferAlignment());
|
||||
}
|
||||
|
||||
u32 GetStorageBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetShaderStorageBufferAlignment());
|
||||
}
|
||||
|
||||
@@ -333,6 +333,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
|
||||
staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
|
||||
quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
|
||||
compute_pass_descriptor_queue) {
|
||||
const VkDriverIdKHR driver_id = device.GetDriverID();
|
||||
limit_dynamic_storage_buffers = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
|
||||
if (limit_dynamic_storage_buffers) {
|
||||
max_dynamic_storage_buffers = device.GetMaxDescriptorSetStorageBuffersDynamic();
|
||||
}
|
||||
if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
|
||||
// TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
|
||||
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
|
||||
@@ -408,6 +415,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
u32 BufferCacheRuntime::GetUniformBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetUniformBufferAlignment());
|
||||
}
|
||||
|
||||
u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetStorageBufferAlignment());
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache_base.h"
|
||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
||||
#include "video_core/buffer_cache/usage_tracker.h"
|
||||
@@ -94,6 +96,8 @@ public:
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
u32 GetUniformBufferAlignment() const;
|
||||
|
||||
u32 GetStorageBufferAlignment() const;
|
||||
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
@@ -155,6 +159,14 @@ public:
|
||||
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
|
||||
}
|
||||
|
||||
bool ShouldLimitDynamicStorageBuffers() const {
|
||||
return limit_dynamic_storage_buffers;
|
||||
}
|
||||
|
||||
u32 GetMaxDynamicStorageBuffers() const {
|
||||
return max_dynamic_storage_buffers;
|
||||
}
|
||||
|
||||
private:
|
||||
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
guest_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||
@@ -194,6 +206,9 @@ private:
|
||||
|
||||
std::unique_ptr<Uint8Pass> uint8_pass;
|
||||
QuadIndexedPass quad_index_pass;
|
||||
|
||||
bool limit_dynamic_storage_buffers = false;
|
||||
u32 max_dynamic_storage_buffers = std::numeric_limits<u32>::max();
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
|
||||
@@ -269,8 +269,9 @@ size_t GetTotalPipelineWorkers() {
|
||||
const size_t max_core_threads =
|
||||
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
|
||||
#ifdef ANDROID
|
||||
// Leave at least a few cores free in android
|
||||
constexpr size_t free_cores = 3ULL;
|
||||
// Leave at least a few cores free on Android; reserve two instead of three so
|
||||
// pipeline compilation can consume one more worker thread for testing.
|
||||
constexpr size_t free_cores = 2ULL;
|
||||
if (max_core_threads <= free_cores) {
|
||||
return 1ULL;
|
||||
}
|
||||
@@ -767,6 +768,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||
}
|
||||
|
||||
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||
const VkDriverIdKHR driver_id = device.GetDriverID();
|
||||
const bool needs_shared_mem_clamp =
|
||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
|
||||
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
|
||||
if (needs_shared_mem_clamp && program.shared_memory_size > max_shared_memory) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
|
||||
key.unique_hash,
|
||||
program.shared_memory_size / 1024,
|
||||
max_shared_memory / 1024);
|
||||
program.shared_memory_size = max_shared_memory;
|
||||
}
|
||||
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
|
||||
device.SaveShader(code);
|
||||
vk::ShaderModule spv_module{BuildShader(device, code)};
|
||||
|
||||
@@ -1516,6 +1516,10 @@ bool TextureCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
std::optional<size_t> TextureCacheRuntime::GetSamplerHeapBudget() const {
|
||||
return device.GetSamplerHeapBudget();
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::TickFrame() {}
|
||||
|
||||
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||
|
||||
@@ -62,6 +62,8 @@ public:
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
std::optional<size_t> GetSamplerHeapBudget() const;
|
||||
|
||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <unordered_set>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
@@ -1736,11 +1738,89 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
||||
}
|
||||
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
|
||||
if (is_new) {
|
||||
EnforceSamplerBudget();
|
||||
pair->second = slot_samplers.insert(runtime, config);
|
||||
}
|
||||
return pair->second;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
|
||||
if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
|
||||
return runtime.GetSamplerHeapBudget();
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::EnforceSamplerBudget() {
|
||||
const auto budget = QuerySamplerBudget();
|
||||
if (!budget) {
|
||||
return;
|
||||
}
|
||||
if (slot_samplers.size() < *budget) {
|
||||
return;
|
||||
}
|
||||
if (!channel_state) {
|
||||
return;
|
||||
}
|
||||
if (last_sampler_gc_frame == frame_tick) {
|
||||
return;
|
||||
}
|
||||
last_sampler_gc_frame = frame_tick;
|
||||
TrimInactiveSamplers(*budget);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
|
||||
if (channel_state->samplers.empty()) {
|
||||
return;
|
||||
}
|
||||
static constexpr size_t SAMPLER_GC_SLACK = 1024;
|
||||
auto mark_active = [](auto& set, SamplerId id) {
|
||||
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
|
||||
return;
|
||||
}
|
||||
set.insert(id);
|
||||
};
|
||||
std::unordered_set<SamplerId> active;
|
||||
active.reserve(channel_state->graphics_sampler_ids.size() +
|
||||
channel_state->compute_sampler_ids.size());
|
||||
for (const SamplerId id : channel_state->graphics_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
for (const SamplerId id : channel_state->compute_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
|
||||
size_t removed = 0;
|
||||
auto& sampler_map = channel_state->samplers;
|
||||
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
|
||||
const SamplerId sampler_id = it->second;
|
||||
if (!sampler_id || sampler_id == CORRUPT_ID) {
|
||||
it = sampler_map.erase(it);
|
||||
continue;
|
||||
}
|
||||
if (active.find(sampler_id) != active.end()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
slot_samplers.erase(sampler_id);
|
||||
it = sampler_map.erase(it);
|
||||
++removed;
|
||||
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (removed != 0) {
|
||||
LOG_WARNING(HW_GPU,
|
||||
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
|
||||
budget, removed);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
|
||||
const auto& regs = maxwell3d->regs;
|
||||
|
||||
@@ -429,6 +429,9 @@ private:
|
||||
|
||||
void QueueAsyncDecode(Image& image, ImageId image_id);
|
||||
void TickAsyncDecode();
|
||||
void EnforceSamplerBudget();
|
||||
void TrimInactiveSamplers(size_t budget);
|
||||
std::optional<size_t> QuerySamplerBudget() const;
|
||||
|
||||
Runtime& runtime;
|
||||
|
||||
@@ -500,6 +503,7 @@ private:
|
||||
|
||||
u64 modification_tick = 0;
|
||||
u64 frame_tick = 0;
|
||||
u64 last_sampler_gc_frame = (std::numeric_limits<u64>::max)();
|
||||
|
||||
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
||||
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
||||
|
||||
@@ -579,6 +579,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
|
||||
has_broken_parallel_compiling = true;
|
||||
}
|
||||
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
|
||||
if (sampler_limit > 0) {
|
||||
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
|
||||
const size_t reserved = sampler_limit / 4U;
|
||||
const size_t derived_budget =
|
||||
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
|
||||
sampler_heap_budget = derived_budget;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm driver reports max {} samplers; reserving {} (25%) and "
|
||||
"allowing Eden to use {} (75%) to avoid heap exhaustion",
|
||||
sampler_limit, reserved, sampler_heap_budget);
|
||||
}
|
||||
}
|
||||
|
||||
if (extensions.sampler_filter_minmax && is_amd) {
|
||||
@@ -1273,6 +1285,13 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<size_t> Device::GetSamplerHeapBudget() const {
|
||||
if (sampler_heap_budget == 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return sampler_heap_budget;
|
||||
}
|
||||
|
||||
u64 Device::GetDeviceMemoryUsage() const {
|
||||
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
|
||||
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <span>
|
||||
#include <string>
|
||||
@@ -310,6 +311,16 @@ public:
|
||||
return properties.properties.limits.maxComputeSharedMemorySize;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic storage buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
|
||||
}
|
||||
|
||||
/// Returns float control properties of the device.
|
||||
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
|
||||
return properties.float_controls;
|
||||
@@ -631,6 +642,8 @@ public:
|
||||
return has_broken_parallel_compiling;
|
||||
}
|
||||
|
||||
std::optional<size_t> GetSamplerHeapBudget() const;
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return properties.driver.driverName;
|
||||
@@ -849,6 +862,7 @@ private:
|
||||
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
|
||||
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
|
||||
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
||||
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
|
||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||
u32 sets_per_pool{}; ///< Sets per Description Pool
|
||||
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
|
||||
|
||||
Reference in New Issue
Block a user