Compare commits

..

2 Commits

Author SHA1 Message Date
lizzie
6739f95c21 remove weird conditional 2025-12-26 02:46:15 +00:00
lizzie
19fc47a7e8 [windows/arm] Experimental "Turnip" MESA support for Adreno 6xx laptops 2025-12-25 23:03:33 +00:00
14 changed files with 8 additions and 265 deletions

View File

@@ -177,8 +177,7 @@ option(YUZU_USE_BUNDLED_SIRIT "Download bundled sirit" ${BUNDLED_SIRIT_DEFAULT})
# Re-allow on FreeBSD once its on mainline ports
cmake_dependent_option(ENABLE_LIBUSB "Enable the use of LibUSB" ON "WIN32 OR PLATFORM_LINUX OR APPLE" OFF)
cmake_dependent_option(ENABLE_OPENGL "Enable OpenGL" ON "NOT WIN32 OR NOT ARCHITECTURE_arm64" OFF)
mark_as_advanced(FORCE ENABLE_OPENGL)
option(ENABLE_OPENGL "Enable OpenGL" ON)
option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
option(ENABLE_WIFI_SCAN "Enable WiFi scanning" OFF)

View File

@@ -433,12 +433,6 @@ void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
template <class P>
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers()) {
channel_state->total_graphics_storage_buffers -=
static_cast<u32>(std::popcount(channel_state->enabled_storage_buffers[stage]));
}
}
channel_state->enabled_storage_buffers[stage] = 0;
channel_state->written_storage_buffers[stage] = 0;
}
@@ -446,26 +440,8 @@ void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
template <class P>
bool BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
u32 cbuf_offset, bool is_written) {
const bool already_enabled =
((channel_state->enabled_storage_buffers[stage] >> ssbo_index) & 1U) != 0;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
if (channel_state->total_graphics_storage_buffers >= max_bindings) {
LOG_WARNING(HW_GPU,
"Skipping graphics storage buffer {} due to driver limit {}",
ssbo_index, max_bindings);
return false;
}
}
}
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
++channel_state->total_graphics_storage_buffers;
}
}
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
@@ -496,12 +472,6 @@ void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, G
template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() {
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers()) {
channel_state->total_compute_storage_buffers -=
static_cast<u32>(std::popcount(channel_state->enabled_compute_storage_buffers));
}
}
channel_state->enabled_compute_storage_buffers = 0;
channel_state->written_compute_storage_buffers = 0;
channel_state->image_compute_texture_buffers = 0;
@@ -515,26 +485,8 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
ssbo_index);
return;
}
const bool already_enabled =
((channel_state->enabled_compute_storage_buffers >> ssbo_index) & 1U) != 0;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
if (channel_state->total_compute_storage_buffers >= max_bindings) {
LOG_WARNING(HW_GPU,
"Skipping compute storage buffer {} due to driver limit {}",
ssbo_index, max_bindings);
return;
}
}
}
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
++channel_state->total_compute_storage_buffers;
}
}
const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) == 0) {
@@ -905,23 +857,9 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
const u32 offset = has_host_buffer ? buffer.Offset(device_addr) : 0;
const bool needs_alignment_stream = [&]() {
if constexpr (IS_OPENGL) {
return false;
} else {
if (!has_host_buffer) {
return false;
}
const u32 alignment = runtime.GetUniformBufferAlignment();
return alignment > 1 && (offset % alignment) != 0;
}
}();
const bool use_fast_buffer = needs_alignment_stream ||
(has_host_buffer &&
size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size));
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size);
if (use_fast_buffer) {
if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) {
@@ -960,6 +898,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if (!needs_bind) {
return;
}
const u32 offset = buffer.Offset(device_addr);
if constexpr (IS_OPENGL) {
// Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
@@ -1076,30 +1015,9 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
TouchBuffer(buffer, binding.buffer_id);
const u32 size =
(std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
const u32 offset = has_host_buffer ? buffer.Offset(binding.device_addr) : 0;
const bool needs_alignment_stream = [&]() {
if constexpr (IS_OPENGL) {
return false;
} else {
if (!has_host_buffer) {
return false;
}
const u32 alignment = runtime.GetUniformBufferAlignment();
return alignment > 1 && (offset % alignment) != 0;
}
}();
if constexpr (!IS_OPENGL) {
if (needs_alignment_stream) {
const std::span<u8> span =
runtime.BindMappedUniformBuffer(0, binding_index, size);
device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size);
return;
}
}
SynchronizeBuffer(buffer, binding.device_addr, size);
const u32 offset = buffer.Offset(binding.device_addr);
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);

View File

@@ -8,7 +8,6 @@
#include <algorithm>
#include <array>
#include <bit>
#include <functional>
#include <memory>
#include <mutex>
@@ -133,9 +132,6 @@ public:
u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0;
u32 total_graphics_storage_buffers = 0;
u32 total_compute_storage_buffers = 0;
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
std::array<u32, NUM_STAGES> written_texture_buffers{};
std::array<u32, NUM_STAGES> image_texture_buffers{};

View File

@@ -198,10 +198,6 @@ public:
return device.CanReportMemoryUsage();
}
u32 GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment());
}
u32 GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetShaderStorageBufferAlignment());
}

View File

@@ -333,13 +333,6 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
compute_pass_descriptor_queue) {
const VkDriverIdKHR driver_id = device.GetDriverID();
limit_dynamic_storage_buffers = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
if (limit_dynamic_storage_buffers) {
max_dynamic_storage_buffers = device.GetMaxDescriptorSetStorageBuffersDynamic();
}
if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
// TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
@@ -415,10 +408,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
u32 BufferCacheRuntime::GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment());
}
u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetStorageBufferAlignment());
}

View File

@@ -6,8 +6,6 @@
#pragma once
#include <limits>
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h"
@@ -96,8 +94,6 @@ public:
bool CanReportMemoryUsage() const;
u32 GetUniformBufferAlignment() const;
u32 GetStorageBufferAlignment() const;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
@@ -159,14 +155,6 @@ public:
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
bool ShouldLimitDynamicStorageBuffers() const {
return limit_dynamic_storage_buffers;
}
u32 GetMaxDynamicStorageBuffers() const {
return max_dynamic_storage_buffers;
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
guest_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -206,9 +194,6 @@ private:
std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass;
bool limit_dynamic_storage_buffers = false;
u32 max_dynamic_storage_buffers = std::numeric_limits<u32>::max();
};
struct BufferCacheParams {

View File

@@ -269,9 +269,8 @@ size_t GetTotalPipelineWorkers() {
const size_t max_core_threads =
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
#ifdef ANDROID
// Leave at least a few cores free on Android; reserve two instead of three so
// pipeline compilation can consume one more worker thread for testing.
constexpr size_t free_cores = 2ULL;
// Leave at least a few cores free in android
constexpr size_t free_cores = 3ULL;
if (max_core_threads <= free_cores) {
return 1ULL;
}
@@ -768,19 +767,6 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const VkDriverIdKHR driver_id = device.GetDriverID();
const bool needs_shared_mem_clamp =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
if (needs_shared_mem_clamp && program.shared_memory_size > max_shared_memory) {
LOG_WARNING(Render_Vulkan,
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
key.unique_hash,
program.shared_memory_size / 1024,
max_shared_memory / 1024);
program.shared_memory_size = max_shared_memory;
}
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)};

View File

@@ -1516,10 +1516,6 @@ bool TextureCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
std::optional<size_t> TextureCacheRuntime::GetSamplerHeapBudget() const {
return device.GetSamplerHeapBudget();
}
void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,

View File

@@ -62,8 +62,6 @@ public:
bool CanReportMemoryUsage() const;
std::optional<size_t> GetSamplerHeapBudget() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,

View File

@@ -6,8 +6,6 @@
#pragma once
#include <limits>
#include <optional>
#include <unordered_set>
#include <boost/container/small_vector.hpp>
@@ -1738,89 +1736,11 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
}
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
if (is_new) {
EnforceSamplerBudget();
pair->second = slot_samplers.insert(runtime, config);
}
return pair->second;
}
template <class P>
std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
return runtime.GetSamplerHeapBudget();
} else {
return std::nullopt;
}
}
template <class P>
void TextureCache<P>::EnforceSamplerBudget() {
const auto budget = QuerySamplerBudget();
if (!budget) {
return;
}
if (slot_samplers.size() < *budget) {
return;
}
if (!channel_state) {
return;
}
if (last_sampler_gc_frame == frame_tick) {
return;
}
last_sampler_gc_frame = frame_tick;
TrimInactiveSamplers(*budget);
}
template <class P>
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
if (channel_state->samplers.empty()) {
return;
}
static constexpr size_t SAMPLER_GC_SLACK = 1024;
auto mark_active = [](auto& set, SamplerId id) {
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
return;
}
set.insert(id);
};
std::unordered_set<SamplerId> active;
active.reserve(channel_state->graphics_sampler_ids.size() +
channel_state->compute_sampler_ids.size());
for (const SamplerId id : channel_state->graphics_sampler_ids) {
mark_active(active, id);
}
for (const SamplerId id : channel_state->compute_sampler_ids) {
mark_active(active, id);
}
size_t removed = 0;
auto& sampler_map = channel_state->samplers;
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
const SamplerId sampler_id = it->second;
if (!sampler_id || sampler_id == CORRUPT_ID) {
it = sampler_map.erase(it);
continue;
}
if (active.find(sampler_id) != active.end()) {
++it;
continue;
}
slot_samplers.erase(sampler_id);
it = sampler_map.erase(it);
++removed;
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
break;
}
}
if (removed != 0) {
LOG_WARNING(HW_GPU,
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
budget, removed);
}
}
template <class P>
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs;

View File

@@ -429,9 +429,6 @@ private:
void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode();
void EnforceSamplerBudget();
void TrimInactiveSamplers(size_t budget);
std::optional<size_t> QuerySamplerBudget() const;
Runtime& runtime;
@@ -503,7 +500,6 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
u64 last_sampler_gc_frame = (std::numeric_limits<u64>::max)();
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;

View File

@@ -579,18 +579,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
has_broken_parallel_compiling = true;
}
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
if (sampler_limit > 0) {
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
const size_t reserved = sampler_limit / 4U;
const size_t derived_budget =
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
sampler_heap_budget = derived_budget;
LOG_WARNING(Render_Vulkan,
"Qualcomm driver reports max {} samplers; reserving {} (25%) and "
"allowing Eden to use {} (75%) to avoid heap exhaustion",
sampler_limit, reserved, sampler_heap_budget);
}
}
if (extensions.sampler_filter_minmax && is_amd) {
@@ -1285,13 +1273,6 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
}
}
std::optional<size_t> Device::GetSamplerHeapBudget() const {
if (sampler_heap_budget == 0) {
return std::nullopt;
}
return sampler_heap_budget;
}
u64 Device::GetDeviceMemoryUsage() const {
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;

View File

@@ -6,7 +6,6 @@
#pragma once
#include <optional>
#include <set>
#include <span>
#include <string>
@@ -311,16 +310,6 @@ public:
return properties.properties.limits.maxComputeSharedMemorySize;
}
/// Returns the maximum number of dynamic storage buffer descriptors per set.
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
}
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
}
/// Returns float control properties of the device.
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
return properties.float_controls;
@@ -642,8 +631,6 @@ public:
return has_broken_parallel_compiling;
}
std::optional<size_t> GetSamplerHeapBudget() const;
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return properties.driver.driverName;
@@ -862,7 +849,6 @@ private:
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};

View File

@@ -79,9 +79,6 @@ namespace {
#endif
if (enable_validation && AreExtensionsSupported(dld, *properties, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME}))
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
// VK_EXT_surface_maintenance1 is required for VK_EXT_swapchain_maintenance1
if (window_type != Core::Frontend::WindowSystemType::Headless && AreExtensionsSupported(dld, *properties, std::array{VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME}))
extensions.push_back(VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME);
}
return extensions;
}