[vk, qcom] Samplers Budget Management
This commit is contained in:
committed by
Caio Oliveira
parent
19863048b4
commit
270fdb978d
@@ -1516,6 +1516,10 @@ bool TextureCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
std::optional<size_t> TextureCacheRuntime::GetSamplerHeapBudget() const {
|
||||
return device.GetSamplerHeapBudget();
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::TickFrame() {}
|
||||
|
||||
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||
|
||||
@@ -62,6 +62,8 @@ public:
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
std::optional<size_t> GetSamplerHeapBudget() const;
|
||||
|
||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <unordered_set>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
@@ -1736,11 +1738,89 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
||||
}
|
||||
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
|
||||
if (is_new) {
|
||||
EnforceSamplerBudget();
|
||||
pair->second = slot_samplers.insert(runtime, config);
|
||||
}
|
||||
return pair->second;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
|
||||
if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
|
||||
return runtime.GetSamplerHeapBudget();
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::EnforceSamplerBudget() {
|
||||
const auto budget = QuerySamplerBudget();
|
||||
if (!budget) {
|
||||
return;
|
||||
}
|
||||
if (slot_samplers.size() < *budget) {
|
||||
return;
|
||||
}
|
||||
if (!channel_state) {
|
||||
return;
|
||||
}
|
||||
if (last_sampler_gc_frame == frame_tick) {
|
||||
return;
|
||||
}
|
||||
last_sampler_gc_frame = frame_tick;
|
||||
TrimInactiveSamplers(*budget);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
|
||||
if (channel_state->samplers.empty()) {
|
||||
return;
|
||||
}
|
||||
static constexpr size_t SAMPLER_GC_SLACK = 1024;
|
||||
auto mark_active = [](auto& set, SamplerId id) {
|
||||
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
|
||||
return;
|
||||
}
|
||||
set.insert(id);
|
||||
};
|
||||
std::unordered_set<SamplerId> active;
|
||||
active.reserve(channel_state->graphics_sampler_ids.size() +
|
||||
channel_state->compute_sampler_ids.size());
|
||||
for (const SamplerId id : channel_state->graphics_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
for (const SamplerId id : channel_state->compute_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
|
||||
size_t removed = 0;
|
||||
auto& sampler_map = channel_state->samplers;
|
||||
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
|
||||
const SamplerId sampler_id = it->second;
|
||||
if (!sampler_id || sampler_id == CORRUPT_ID) {
|
||||
it = sampler_map.erase(it);
|
||||
continue;
|
||||
}
|
||||
if (active.find(sampler_id) != active.end()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
slot_samplers.erase(sampler_id);
|
||||
it = sampler_map.erase(it);
|
||||
++removed;
|
||||
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (removed != 0) {
|
||||
LOG_WARNING(HW_GPU,
|
||||
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
|
||||
budget, removed);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
|
||||
const auto& regs = maxwell3d->regs;
|
||||
|
||||
@@ -429,6 +429,9 @@ private:
|
||||
|
||||
void QueueAsyncDecode(Image& image, ImageId image_id);
|
||||
void TickAsyncDecode();
|
||||
void EnforceSamplerBudget();
|
||||
void TrimInactiveSamplers(size_t budget);
|
||||
std::optional<size_t> QuerySamplerBudget() const;
|
||||
|
||||
Runtime& runtime;
|
||||
|
||||
@@ -500,6 +503,7 @@ private:
|
||||
|
||||
u64 modification_tick = 0;
|
||||
u64 frame_tick = 0;
|
||||
u64 last_sampler_gc_frame = (std::numeric_limits<u64>::max)();
|
||||
|
||||
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
||||
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
||||
|
||||
@@ -579,6 +579,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
|
||||
has_broken_parallel_compiling = true;
|
||||
}
|
||||
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
|
||||
if (sampler_limit > 0) {
|
||||
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
|
||||
const size_t derived_budget =
|
||||
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit / 4U);
|
||||
sampler_heap_budget = derived_budget;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm driver reports max {} samplers; clamping cache to {} (25%) to "
|
||||
"avoid heap exhaustion",
|
||||
sampler_limit, sampler_heap_budget);
|
||||
}
|
||||
}
|
||||
|
||||
if (extensions.sampler_filter_minmax && is_amd) {
|
||||
@@ -1273,6 +1284,13 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<size_t> Device::GetSamplerHeapBudget() const {
|
||||
if (sampler_heap_budget == 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return sampler_heap_budget;
|
||||
}
|
||||
|
||||
u64 Device::GetDeviceMemoryUsage() const {
|
||||
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
|
||||
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <span>
|
||||
#include <string>
|
||||
@@ -641,6 +642,8 @@ public:
|
||||
return has_broken_parallel_compiling;
|
||||
}
|
||||
|
||||
std::optional<size_t> GetSamplerHeapBudget() const;
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return properties.driver.driverName;
|
||||
@@ -859,6 +862,7 @@ private:
|
||||
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
|
||||
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
|
||||
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
||||
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
|
||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||
u32 sets_per_pool{}; ///< Sets per Description Pool
|
||||
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
|
||||
|
||||
Reference in New Issue
Block a user