Compare commits
6 Commits
smartquery
...
3096/qcom/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c0a6eb8fdd | ||
|
|
dec889adb1 | ||
|
|
b8b4fe9a45 | ||
|
|
270fdb978d | ||
|
|
19863048b4 | ||
|
|
fa0859814c |
@@ -96,8 +96,8 @@
|
||||
"package": "VVL",
|
||||
"repo": "KhronosGroup/Vulkan-ValidationLayers",
|
||||
"tag": "vulkan-sdk-%VERSION%",
|
||||
"git_version": "1.4.335.0",
|
||||
"git_version": "1.4.328.1",
|
||||
"artifact": "android-binaries-%VERSION%.zip",
|
||||
"hash": "48167c4a17736301bd08f9290f41830443e1f18cce8ad867fc6f289b49e18b40e93c9850b377951af82f51b5b6d7313aa6a884fc5df79f5ce3df82696c1c1244"
|
||||
"hash": "5ec895a453cb7c2f156830b9766953a0c2bd44dea99e6a3dac4160305041ccd3e87534b4ce0bd102392178d2a8eca48411856298f9395e60117cdfe89f72137e"
|
||||
}
|
||||
}
|
||||
|
||||
4
externals/cpmfile.json
vendored
4
externals/cpmfile.json
vendored
@@ -119,10 +119,10 @@
|
||||
"package": "VulkanUtilityLibraries",
|
||||
"repo": "scripts/VulkanUtilityHeaders",
|
||||
"tag": "%VERSION%",
|
||||
"git_version": "1.4.335",
|
||||
"git_version": "1.4.328",
|
||||
"artifact": "VulkanUtilityHeaders.tar.zst",
|
||||
"git_host": "git.crueter.xyz",
|
||||
"hash": "16dac0e6586702580c4279e4cd37ffe3cf909c93eb31b5069da7af36436d47b270a9cbaac953bb66c22ed12ed67ffa096688599267f307dfb62be1bc09f79833"
|
||||
"hash": "9922217b39faf73cd4fc1510f2fdba14a49aa5c0d77f9ee24ee0512cef16b234d0cabc83c1fec861fa5df1d43e7f086ca9b6501753899119f39c5ca530cb0dae"
|
||||
},
|
||||
"spirv-tools": {
|
||||
"package": "SPIRV-Tools",
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
|
||||
@@ -15,6 +16,8 @@
|
||||
#include "video_core/guest_memory.h"
|
||||
#include "video_core/host1x/gpu_device_memory_manager.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/polygon_mode_utils.h"
|
||||
#include "video_core/renderer_vulkan/line_loop_utils.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
@@ -353,14 +356,37 @@ void BufferCache<P>::UpdateComputeBuffers() {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if (is_indexed) {
|
||||
BindHostIndexBuffer();
|
||||
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
|
||||
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
|
||||
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
|
||||
draw_state.vertex_buffer.count);
|
||||
} else {
|
||||
if constexpr (!P::IS_OPENGL) {
|
||||
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
|
||||
if (draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
|
||||
polygon_mode == Maxwell::PolygonMode::Line && draw_state.vertex_buffer.count > 1) {
|
||||
const u32 vertex_count = draw_state.vertex_buffer.count;
|
||||
const u32 generated_count = vertex_count + 1;
|
||||
const bool use_u16 = vertex_count <= 0x10000;
|
||||
const u32 element_size = use_u16 ? sizeof(u16) : sizeof(u32);
|
||||
auto staging = runtime.UploadStagingBuffer(
|
||||
static_cast<size_t>(generated_count) * element_size);
|
||||
std::span<u8> dst_span{staging.mapped_span.data(),
|
||||
generated_count * static_cast<size_t>(element_size)};
|
||||
Vulkan::LineLoop::GenerateSequentialWithClosureRaw(dst_span, element_size);
|
||||
const auto synthetic_format = use_u16 ? Maxwell::IndexFormat::UnsignedShort
|
||||
: Maxwell::IndexFormat::UnsignedInt;
|
||||
runtime.BindIndexBuffer(draw_state.topology, synthetic_format,
|
||||
draw_state.vertex_buffer.first, generated_count,
|
||||
staging.buffer, static_cast<u32>(staging.offset),
|
||||
generated_count * element_size);
|
||||
}
|
||||
}
|
||||
if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
||||
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
|
||||
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
|
||||
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
|
||||
draw_state.vertex_buffer.count);
|
||||
}
|
||||
}
|
||||
}
|
||||
BindHostVertexBuffers();
|
||||
@@ -407,6 +433,12 @@ void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers()) {
|
||||
channel_state->total_graphics_storage_buffers -=
|
||||
static_cast<u32>(std::popcount(channel_state->enabled_storage_buffers[stage]));
|
||||
}
|
||||
}
|
||||
channel_state->enabled_storage_buffers[stage] = 0;
|
||||
channel_state->written_storage_buffers[stage] = 0;
|
||||
}
|
||||
@@ -414,8 +446,26 @@ void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
|
||||
template <class P>
|
||||
bool BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
|
||||
u32 cbuf_offset, bool is_written) {
|
||||
const bool already_enabled =
|
||||
((channel_state->enabled_storage_buffers[stage] >> ssbo_index) & 1U) != 0;
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
|
||||
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
|
||||
if (channel_state->total_graphics_storage_buffers >= max_bindings) {
|
||||
LOG_WARNING(HW_GPU,
|
||||
"Skipping graphics storage buffer {} due to driver limit {}",
|
||||
ssbo_index, max_bindings);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
|
||||
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
|
||||
++channel_state->total_graphics_storage_buffers;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& cbufs = maxwell3d->state.shader_stages[stage];
|
||||
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
|
||||
@@ -446,6 +496,12 @@ void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, G
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindComputeStorageBuffers() {
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers()) {
|
||||
channel_state->total_compute_storage_buffers -=
|
||||
static_cast<u32>(std::popcount(channel_state->enabled_compute_storage_buffers));
|
||||
}
|
||||
}
|
||||
channel_state->enabled_compute_storage_buffers = 0;
|
||||
channel_state->written_compute_storage_buffers = 0;
|
||||
channel_state->image_compute_texture_buffers = 0;
|
||||
@@ -459,8 +515,26 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
|
||||
ssbo_index);
|
||||
return;
|
||||
}
|
||||
const bool already_enabled =
|
||||
((channel_state->enabled_compute_storage_buffers >> ssbo_index) & 1U) != 0;
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
|
||||
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
|
||||
if (channel_state->total_compute_storage_buffers >= max_bindings) {
|
||||
LOG_WARNING(HW_GPU,
|
||||
"Skipping compute storage buffer {} due to driver limit {}",
|
||||
ssbo_index, max_bindings);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
|
||||
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
|
||||
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
|
||||
++channel_state->total_compute_storage_buffers;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& launch_desc = kepler_compute->launch_description;
|
||||
if (((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) == 0) {
|
||||
@@ -689,6 +763,44 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
||||
const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr);
|
||||
const u32 size = channel_state->index_buffer.size;
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if constexpr (!P::IS_OPENGL) {
|
||||
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
|
||||
const bool polygon_line =
|
||||
draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
|
||||
polygon_mode == Maxwell::PolygonMode::Line;
|
||||
if (polygon_line && draw_state.index_buffer.count > 1) {
|
||||
const u32 element_size = draw_state.index_buffer.FormatSizeInBytes();
|
||||
const size_t src_bytes = static_cast<size_t>(draw_state.index_buffer.count) * element_size;
|
||||
const size_t total_bytes = src_bytes + element_size;
|
||||
auto staging = runtime.UploadStagingBuffer(total_bytes);
|
||||
std::span<u8> dst_span{staging.mapped_span.data(), total_bytes};
|
||||
std::span<const u8> src_span;
|
||||
if (!draw_state.inline_index_draw_indexes.empty()) {
|
||||
const u8* const src =
|
||||
draw_state.inline_index_draw_indexes.data() +
|
||||
static_cast<size_t>(draw_state.index_buffer.first) * element_size;
|
||||
src_span = {src, src_bytes};
|
||||
} else if (const u8* const cpu_base =
|
||||
device_memory.GetPointer<u8>(channel_state->index_buffer.device_addr)) {
|
||||
const u8* const src = cpu_base +
|
||||
static_cast<size_t>(draw_state.index_buffer.first) * element_size;
|
||||
src_span = {src, src_bytes};
|
||||
} else {
|
||||
const DAddr src_addr =
|
||||
channel_state->index_buffer.device_addr +
|
||||
static_cast<DAddr>(draw_state.index_buffer.first) * element_size;
|
||||
device_memory.ReadBlockUnsafe(src_addr, dst_span.data(), src_bytes);
|
||||
src_span = {dst_span.data(), src_bytes};
|
||||
}
|
||||
Vulkan::LineLoop::CopyWithClosureRaw(dst_span, src_span, element_size);
|
||||
buffer.MarkUsage(offset, size);
|
||||
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
|
||||
draw_state.index_buffer.first, draw_state.index_buffer.count + 1,
|
||||
staging.buffer, static_cast<u32>(staging.offset),
|
||||
static_cast<u32>(total_bytes));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
|
||||
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||
auto upload_staging = runtime.UploadStagingBuffer(size);
|
||||
@@ -793,9 +905,23 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
||||
size <= channel_state->uniform_buffer_skip_cache_size &&
|
||||
!memory_tracker.IsRegionGpuModified(device_addr, size);
|
||||
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
|
||||
const u32 offset = has_host_buffer ? buffer.Offset(device_addr) : 0;
|
||||
const bool needs_alignment_stream = [&]() {
|
||||
if constexpr (IS_OPENGL) {
|
||||
return false;
|
||||
} else {
|
||||
if (!has_host_buffer) {
|
||||
return false;
|
||||
}
|
||||
const u32 alignment = runtime.GetUniformBufferAlignment();
|
||||
return alignment > 1 && (offset % alignment) != 0;
|
||||
}
|
||||
}();
|
||||
const bool use_fast_buffer = needs_alignment_stream ||
|
||||
(has_host_buffer &&
|
||||
size <= channel_state->uniform_buffer_skip_cache_size &&
|
||||
!memory_tracker.IsRegionGpuModified(device_addr, size));
|
||||
if (use_fast_buffer) {
|
||||
if constexpr (IS_OPENGL) {
|
||||
if (runtime.HasFastBufferSubData()) {
|
||||
@@ -834,7 +960,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
if (!needs_bind) {
|
||||
return;
|
||||
}
|
||||
const u32 offset = buffer.Offset(device_addr);
|
||||
if constexpr (IS_OPENGL) {
|
||||
// Mark the index as dirty if offset doesn't match
|
||||
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
||||
@@ -951,9 +1076,30 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size =
|
||||
(std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
|
||||
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
|
||||
const u32 offset = has_host_buffer ? buffer.Offset(binding.device_addr) : 0;
|
||||
const bool needs_alignment_stream = [&]() {
|
||||
if constexpr (IS_OPENGL) {
|
||||
return false;
|
||||
} else {
|
||||
if (!has_host_buffer) {
|
||||
return false;
|
||||
}
|
||||
const u32 alignment = runtime.GetUniformBufferAlignment();
|
||||
return alignment > 1 && (offset % alignment) != 0;
|
||||
}
|
||||
}();
|
||||
if constexpr (!IS_OPENGL) {
|
||||
if (needs_alignment_stream) {
|
||||
const std::span<u8> span =
|
||||
runtime.BindMappedUniformBuffer(0, binding_index, size);
|
||||
device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
SynchronizeBuffer(buffer, binding.device_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.device_addr);
|
||||
buffer.MarkUsage(offset, size);
|
||||
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
||||
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bit>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
@@ -132,6 +133,9 @@ public:
|
||||
u32 enabled_compute_storage_buffers = 0;
|
||||
u32 written_compute_storage_buffers = 0;
|
||||
|
||||
u32 total_graphics_storage_buffers = 0;
|
||||
u32 total_compute_storage_buffers = 0;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> image_texture_buffers{};
|
||||
|
||||
46
src/video_core/polygon_mode_utils.h
Normal file
46
src/video_core/polygon_mode_utils.h
Normal file
@@ -0,0 +1,46 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
inline Tegra::Engines::Maxwell3D::Regs::PolygonMode EffectivePolygonMode(
|
||||
const Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
const bool cull_enabled = regs.gl_cull_test_enabled != 0;
|
||||
const auto cull_face = regs.gl_cull_face;
|
||||
const bool cull_front = cull_enabled && (cull_face == Maxwell::CullFace::Front ||
|
||||
cull_face == Maxwell::CullFace::FrontAndBack);
|
||||
const bool cull_back = cull_enabled && (cull_face == Maxwell::CullFace::Back ||
|
||||
cull_face == Maxwell::CullFace::FrontAndBack);
|
||||
|
||||
const bool render_front = !cull_front;
|
||||
const bool render_back = !cull_back;
|
||||
|
||||
const auto front_mode = regs.polygon_mode_front;
|
||||
const auto back_mode = regs.polygon_mode_back;
|
||||
|
||||
if (render_front && render_back && front_mode != back_mode) {
|
||||
if (front_mode == Maxwell::PolygonMode::Line || back_mode == Maxwell::PolygonMode::Line) {
|
||||
return Maxwell::PolygonMode::Line;
|
||||
}
|
||||
if (front_mode == Maxwell::PolygonMode::Point || back_mode == Maxwell::PolygonMode::Point) {
|
||||
return Maxwell::PolygonMode::Point;
|
||||
}
|
||||
}
|
||||
|
||||
if (render_front) {
|
||||
return front_mode;
|
||||
}
|
||||
if (render_back) {
|
||||
return back_mode;
|
||||
}
|
||||
return front_mode;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include "video_core/query_cache/query_cache_base.h"
|
||||
#include "video_core/query_cache/query_stream.h"
|
||||
#include "video_core/query_cache/types.h"
|
||||
namespace Vulkan { class Scheduler; }
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D;
|
||||
@@ -222,12 +222,6 @@ void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
|
||||
streamer->ResetCounter();
|
||||
}
|
||||
|
||||
// Called at frame start to batch vkCmdResetQueryPool outside render passes.
|
||||
template <typename Traits>
|
||||
void QueryCacheBase<Traits>::FramePrologueResets(Vulkan::Scheduler& scheduler) {
|
||||
impl->runtime.FramePrologueResets(scheduler);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void QueryCacheBase<Traits>::BindToChannel(s32 id) {
|
||||
VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id);
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
@@ -24,9 +21,7 @@
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
namespace Vulkan {
|
||||
class Scheduler;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
}
|
||||
@@ -58,7 +53,7 @@ public:
|
||||
RuntimeType& runtime_);
|
||||
|
||||
~QueryCacheBase();
|
||||
void FramePrologueResets(Vulkan::Scheduler& scheduler);
|
||||
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
IterateCache<true>(addr, size,
|
||||
[this](QueryLocation location) { InvalidateQuery(location); });
|
||||
|
||||
@@ -198,6 +198,10 @@ public:
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
u32 GetUniformBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetUniformBufferAlignment());
|
||||
}
|
||||
|
||||
u32 GetStorageBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetShaderStorageBufferAlignment());
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "video_core/engines/draw_manager.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/polygon_mode_utils.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
@@ -65,7 +66,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
|
||||
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
|
||||
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
|
||||
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
|
||||
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
|
||||
polygon_mode.Assign(PackPolygonMode(VideoCore::EffectivePolygonMode(regs)));
|
||||
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
|
||||
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
|
||||
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
|
||||
|
||||
68
src/video_core/renderer_vulkan/line_loop_utils.h
Normal file
68
src/video_core/renderer_vulkan/line_loop_utils.h
Normal file
@@ -0,0 +1,68 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <span>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Vulkan::LineLoop {
|
||||
|
||||
inline void CopyWithClosureRaw(std::span<u8> dst, std::span<const u8> src, size_t element_size) {
|
||||
ASSERT_MSG(dst.size() == src.size() + element_size, "Invalid line loop copy sizes");
|
||||
if (src.empty()) {
|
||||
if (!dst.empty()) {
|
||||
std::fill(dst.begin(), dst.end(), u8{0});
|
||||
}
|
||||
return;
|
||||
}
|
||||
std::memcpy(dst.data(), src.data(), src.size());
|
||||
std::memcpy(dst.data() + src.size(), src.data(), element_size);
|
||||
}
|
||||
|
||||
inline void GenerateSequentialWithClosureRaw(std::span<u8> dst, size_t element_size,
|
||||
u64 start_value = 0) {
|
||||
if (dst.empty()) {
|
||||
return;
|
||||
}
|
||||
const size_t last = dst.size() - element_size;
|
||||
size_t offset = 0;
|
||||
u64 value = start_value;
|
||||
while (offset < last) {
|
||||
std::memcpy(dst.data() + offset, &value, element_size);
|
||||
offset += element_size;
|
||||
++value;
|
||||
}
|
||||
std::memcpy(dst.data() + offset, &start_value, element_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CopyWithClosure(std::span<T> dst, std::span<const T> src) {
|
||||
ASSERT_MSG(dst.size() == src.size() + 1, "Invalid destination size for line loop copy");
|
||||
if (src.empty()) {
|
||||
if (!dst.empty()) {
|
||||
dst.front() = {};
|
||||
}
|
||||
return;
|
||||
}
|
||||
std::copy(src.begin(), src.end(), dst.begin());
|
||||
dst.back() = src.front();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void GenerateSequentialWithClosure(std::span<T> dst, T start_value = {}) {
|
||||
if (dst.empty()) {
|
||||
return;
|
||||
}
|
||||
const size_t last = dst.size() - 1;
|
||||
for (size_t i = 0; i < last; ++i) {
|
||||
dst[i] = static_cast<T>(start_value + static_cast<T>(i));
|
||||
}
|
||||
dst.back() = start_value;
|
||||
}
|
||||
|
||||
} // namespace Vulkan::LineLoop
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -323,44 +326,9 @@ VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
|
||||
}
|
||||
|
||||
VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
|
||||
Maxwell::PrimitiveTopology topology) {
|
||||
switch (topology) {
|
||||
case Maxwell::PrimitiveTopology::Points:
|
||||
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
||||
case Maxwell::PrimitiveTopology::Lines:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
||||
case Maxwell::PrimitiveTopology::LineLoop:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::TriangleFan:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
case Maxwell::PrimitiveTopology::LinesAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::LineStripAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::Quads:
|
||||
case Maxwell::PrimitiveTopology::QuadStrip:
|
||||
// TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT
|
||||
// whenever it releases
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::Patches:
|
||||
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
case Maxwell::PrimitiveTopology::Polygon:
|
||||
LOG_WARNING(Render_Vulkan, "Draw mode is Polygon with a polygon mode of lines should be a "
|
||||
"single body and not a bunch of triangles.");
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
|
||||
return {};
|
||||
Maxwell::PrimitiveTopology topology,
|
||||
Maxwell::PolygonMode polygon_mode) {
|
||||
return detail::PrimitiveTopologyNoDevice(topology, polygon_mode);
|
||||
}
|
||||
|
||||
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -15,6 +18,52 @@ namespace Vulkan::MaxwellToVK {
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
|
||||
namespace detail {
|
||||
constexpr VkPrimitiveTopology PrimitiveTopologyNoDevice(Maxwell::PrimitiveTopology topology,
|
||||
Maxwell::PolygonMode polygon_mode) {
|
||||
switch (topology) {
|
||||
case Maxwell::PrimitiveTopology::Points:
|
||||
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
||||
case Maxwell::PrimitiveTopology::Lines:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
||||
case Maxwell::PrimitiveTopology::LineLoop:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::TriangleFan:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
case Maxwell::PrimitiveTopology::LinesAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::LineStripAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::Quads:
|
||||
case Maxwell::PrimitiveTopology::QuadStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::Patches:
|
||||
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
case Maxwell::PrimitiveTopology::Polygon:
|
||||
switch (polygon_mode) {
|
||||
case Maxwell::PolygonMode::Fill:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
case Maxwell::PolygonMode::Line:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PolygonMode::Point:
|
||||
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
VkFilter Filter(Tegra::Texture::TextureFilter filter);
|
||||
@@ -46,7 +95,8 @@ struct FormatInfo {
|
||||
|
||||
VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
|
||||
|
||||
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
|
||||
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology,
|
||||
Maxwell::PolygonMode polygon_mode);
|
||||
|
||||
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
|
||||
Maxwell::VertexAttribute::Size size);
|
||||
|
||||
@@ -333,6 +333,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
|
||||
staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
|
||||
quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
|
||||
compute_pass_descriptor_queue) {
|
||||
const VkDriverIdKHR driver_id = device.GetDriverID();
|
||||
limit_dynamic_storage_buffers = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
|
||||
if (limit_dynamic_storage_buffers) {
|
||||
max_dynamic_storage_buffers = device.GetMaxDescriptorSetStorageBuffersDynamic();
|
||||
}
|
||||
if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
|
||||
// TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
|
||||
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
|
||||
@@ -408,6 +415,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
u32 BufferCacheRuntime::GetUniformBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetUniformBufferAlignment());
|
||||
}
|
||||
|
||||
u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetStorageBufferAlignment());
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache_base.h"
|
||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
||||
#include "video_core/buffer_cache/usage_tracker.h"
|
||||
@@ -94,6 +96,8 @@ public:
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
u32 GetUniformBufferAlignment() const;
|
||||
|
||||
u32 GetStorageBufferAlignment() const;
|
||||
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
@@ -155,6 +159,14 @@ public:
|
||||
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
|
||||
}
|
||||
|
||||
bool ShouldLimitDynamicStorageBuffers() const {
|
||||
return limit_dynamic_storage_buffers;
|
||||
}
|
||||
|
||||
u32 GetMaxDynamicStorageBuffers() const {
|
||||
return max_dynamic_storage_buffers;
|
||||
}
|
||||
|
||||
private:
|
||||
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
guest_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||
@@ -194,6 +206,9 @@ private:
|
||||
|
||||
std::unique_ptr<Uint8Pass> uint8_pass;
|
||||
QuadIndexedPass quad_index_pass;
|
||||
|
||||
bool limit_dynamic_storage_buffers = false;
|
||||
u32 max_dynamic_storage_buffers = std::numeric_limits<u32>::max();
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
@@ -22,6 +23,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/polygon_mode_utils.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
@@ -614,7 +616,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
vertex_input_ci.pNext = &input_divisor_ci;
|
||||
}
|
||||
const bool has_tess_stages = spv_modules[1] || spv_modules[2];
|
||||
auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
|
||||
const auto polygon_mode =
|
||||
FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode.Value());
|
||||
auto input_assembly_topology =
|
||||
MaxwellToVK::PrimitiveTopology(device, key.state.topology, polygon_mode);
|
||||
if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
|
||||
if (!has_tess_stages) {
|
||||
LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
|
||||
@@ -629,6 +634,33 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
}
|
||||
}
|
||||
if (key.state.topology == Maxwell::PrimitiveTopology::Polygon) {
|
||||
const auto polygon_mode_name = [polygon_mode]() -> std::string_view {
|
||||
switch (polygon_mode) {
|
||||
case Maxwell::PolygonMode::Fill:
|
||||
return "Fill";
|
||||
case Maxwell::PolygonMode::Line:
|
||||
return "Line";
|
||||
case Maxwell::PolygonMode::Point:
|
||||
return "Point";
|
||||
}
|
||||
return "Unknown";
|
||||
}();
|
||||
const auto vk_topology_name = [input_assembly_topology]() -> std::string_view {
|
||||
switch (input_assembly_topology) {
|
||||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
||||
return "TriangleFan";
|
||||
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
||||
return "LineStrip";
|
||||
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
|
||||
return "PointList";
|
||||
default:
|
||||
return "Unexpected";
|
||||
}
|
||||
}();
|
||||
LOG_DEBUG(Render_Vulkan, "Polygon primitive in {} mode mapped to {}", polygon_mode_name,
|
||||
vk_topology_name);
|
||||
}
|
||||
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
|
||||
@@ -269,8 +269,9 @@ size_t GetTotalPipelineWorkers() {
|
||||
const size_t max_core_threads =
|
||||
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
|
||||
#ifdef ANDROID
|
||||
// Leave at least a few cores free in android
|
||||
constexpr size_t free_cores = 3ULL;
|
||||
// Leave at least a few cores free on Android; reserve two instead of three so
|
||||
// pipeline compilation can consume one more worker thread for testing.
|
||||
constexpr size_t free_cores = 2ULL;
|
||||
if (max_core_threads <= free_cores) {
|
||||
return 1ULL;
|
||||
}
|
||||
@@ -767,6 +768,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||
}
|
||||
|
||||
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||
const VkDriverIdKHR driver_id = device.GetDriverID();
|
||||
const bool needs_shared_mem_clamp =
|
||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
|
||||
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
|
||||
if (needs_shared_mem_clamp && program.shared_memory_size > max_shared_memory) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
|
||||
key.unique_hash,
|
||||
program.shared_memory_size / 1024,
|
||||
max_shared_memory / 1024);
|
||||
program.shared_memory_size = max_shared_memory;
|
||||
}
|
||||
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
|
||||
device.SaveShader(code);
|
||||
vk::ShaderModule spv_module{BuildShader(device, code)};
|
||||
|
||||
@@ -156,7 +156,7 @@ public:
|
||||
|
||||
ReserveHostQuery();
|
||||
|
||||
/* Ensure outside render pass
|
||||
// Ensure outside render pass
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
// Reset query pool outside render pass
|
||||
@@ -167,7 +167,7 @@ public:
|
||||
|
||||
// Manually restart the render pass (required for vkCmdClearAttachments, etc.)
|
||||
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
|
||||
*/
|
||||
|
||||
// Begin query inside the newly started render pass
|
||||
scheduler.Record([query_pool = current_query_pool,
|
||||
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
||||
@@ -402,13 +402,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
VkQueryPool GetOrCreateCurrentPoolForPrologue() {
|
||||
if (!current_bank || current_bank->IsClosed()) {
|
||||
ReserveBank();
|
||||
}
|
||||
return current_query_pool;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Func>
|
||||
void ApplyBankOp(VideoCommon::HostQueryBase* query, Func&& func) {
|
||||
@@ -1312,19 +1305,6 @@ void QueryCacheRuntime::ResumeHostConditionalRendering() {
|
||||
impl->is_hcr_running = true;
|
||||
}
|
||||
|
||||
void QueryCacheRuntime::FramePrologueResets(Scheduler& scheduler) {
|
||||
// Reset the occlusion queries we plan to use this frame in one go.
|
||||
// Ensure this is recorded OUTSIDE any render pass.
|
||||
const VkQueryPool pool = impl->sample_streamer.GetOrCreateCurrentPoolForPrologue();
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([pool](vk::CommandBuffer cmdbuf) {
|
||||
// Reset the whole bank so subsequent BeginQuery calls don't need per-slot resets.
|
||||
cmdbuf.ResetQueryPool(pool,
|
||||
/*first*/ 0,
|
||||
/*count*/ SamplesQueryBank::BANK_SIZE);
|
||||
});
|
||||
}
|
||||
|
||||
void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object,
|
||||
bool is_equal) {
|
||||
{
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
@@ -23,7 +20,7 @@ namespace Vulkan {
|
||||
class Device;
|
||||
class Scheduler;
|
||||
class StagingBufferPool;
|
||||
class Scheduler;
|
||||
|
||||
struct QueryCacheRuntimeImpl;
|
||||
|
||||
class QueryCacheRuntime {
|
||||
@@ -36,7 +33,7 @@ public:
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
|
||||
DescriptorPool& descriptor_pool, TextureCache& texture_cache_);
|
||||
~QueryCacheRuntime();
|
||||
void FramePrologueResets(Scheduler& scheduler);
|
||||
|
||||
template <typename SyncValuesType>
|
||||
void SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer = nullptr);
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
#include "video_core/polygon_mode_utils.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
@@ -148,7 +149,8 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3
|
||||
return scissor;
|
||||
}
|
||||
|
||||
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed) {
|
||||
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed,
|
||||
Maxwell::PolygonMode polygon_mode) {
|
||||
DrawParams params{
|
||||
.base_instance = draw_state.base_instance,
|
||||
.num_instances = num_instances,
|
||||
@@ -168,6 +170,21 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
|
||||
params.base_vertex = 0;
|
||||
params.is_indexed = true;
|
||||
}
|
||||
const bool polygon_line =
|
||||
draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
|
||||
polygon_mode == Maxwell::PolygonMode::Line;
|
||||
if (polygon_line) {
|
||||
if (params.is_indexed) {
|
||||
if (draw_state.index_buffer.count > 1) {
|
||||
params.num_vertices = draw_state.index_buffer.count + 1;
|
||||
}
|
||||
} else if (draw_state.vertex_buffer.count > 1) {
|
||||
params.num_vertices = draw_state.vertex_buffer.count + 1;
|
||||
params.is_indexed = true;
|
||||
params.first_index = 0;
|
||||
params.base_vertex = draw_state.vertex_buffer.first;
|
||||
}
|
||||
}
|
||||
return params;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
@@ -233,7 +250,8 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
||||
PrepareDraw(is_indexed, [this, is_indexed, instance_count] {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const u32 num_instances{instance_count};
|
||||
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
|
||||
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
|
||||
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed, polygon_mode)};
|
||||
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
|
||||
if (draw_params.is_indexed) {
|
||||
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
|
||||
|
||||
@@ -257,11 +257,6 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
||||
|
||||
void Scheduler::AllocateNewContext() {
|
||||
// Enable counters once again. These are disabled when a command buffer is finished.
|
||||
// Record per frame query resets outside any render pass, before the first draw.
|
||||
if (query_cache) {
|
||||
query_cache->FramePrologueResets(*this);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Scheduler::InvalidateState() {
|
||||
|
||||
@@ -1516,6 +1516,10 @@ bool TextureCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
std::optional<size_t> TextureCacheRuntime::GetSamplerHeapBudget() const {
|
||||
return device.GetSamplerHeapBudget();
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::TickFrame() {}
|
||||
|
||||
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||
|
||||
@@ -62,6 +62,8 @@ public:
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
std::optional<size_t> GetSamplerHeapBudget() const;
|
||||
|
||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <unordered_set>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
@@ -1736,11 +1738,89 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
||||
}
|
||||
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
|
||||
if (is_new) {
|
||||
EnforceSamplerBudget();
|
||||
pair->second = slot_samplers.insert(runtime, config);
|
||||
}
|
||||
return pair->second;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
|
||||
if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
|
||||
return runtime.GetSamplerHeapBudget();
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::EnforceSamplerBudget() {
|
||||
const auto budget = QuerySamplerBudget();
|
||||
if (!budget) {
|
||||
return;
|
||||
}
|
||||
if (slot_samplers.size() < *budget) {
|
||||
return;
|
||||
}
|
||||
if (!channel_state) {
|
||||
return;
|
||||
}
|
||||
if (last_sampler_gc_frame == frame_tick) {
|
||||
return;
|
||||
}
|
||||
last_sampler_gc_frame = frame_tick;
|
||||
TrimInactiveSamplers(*budget);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
|
||||
if (channel_state->samplers.empty()) {
|
||||
return;
|
||||
}
|
||||
static constexpr size_t SAMPLER_GC_SLACK = 1024;
|
||||
auto mark_active = [](auto& set, SamplerId id) {
|
||||
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
|
||||
return;
|
||||
}
|
||||
set.insert(id);
|
||||
};
|
||||
std::unordered_set<SamplerId> active;
|
||||
active.reserve(channel_state->graphics_sampler_ids.size() +
|
||||
channel_state->compute_sampler_ids.size());
|
||||
for (const SamplerId id : channel_state->graphics_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
for (const SamplerId id : channel_state->compute_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
|
||||
size_t removed = 0;
|
||||
auto& sampler_map = channel_state->samplers;
|
||||
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
|
||||
const SamplerId sampler_id = it->second;
|
||||
if (!sampler_id || sampler_id == CORRUPT_ID) {
|
||||
it = sampler_map.erase(it);
|
||||
continue;
|
||||
}
|
||||
if (active.find(sampler_id) != active.end()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
slot_samplers.erase(sampler_id);
|
||||
it = sampler_map.erase(it);
|
||||
++removed;
|
||||
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (removed != 0) {
|
||||
LOG_WARNING(HW_GPU,
|
||||
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
|
||||
budget, removed);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
|
||||
const auto& regs = maxwell3d->regs;
|
||||
|
||||
@@ -429,6 +429,9 @@ private:
|
||||
|
||||
void QueueAsyncDecode(Image& image, ImageId image_id);
|
||||
void TickAsyncDecode();
|
||||
void EnforceSamplerBudget();
|
||||
void TrimInactiveSamplers(size_t budget);
|
||||
std::optional<size_t> QuerySamplerBudget() const;
|
||||
|
||||
Runtime& runtime;
|
||||
|
||||
@@ -500,6 +503,7 @@ private:
|
||||
|
||||
u64 modification_tick = 0;
|
||||
u64 frame_tick = 0;
|
||||
u64 last_sampler_gc_frame = (std::numeric_limits<u64>::max)();
|
||||
|
||||
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
||||
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
||||
|
||||
@@ -579,6 +579,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
|
||||
has_broken_parallel_compiling = true;
|
||||
}
|
||||
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
|
||||
if (sampler_limit > 0) {
|
||||
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
|
||||
const size_t reserved = sampler_limit / 4U;
|
||||
const size_t derived_budget =
|
||||
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
|
||||
sampler_heap_budget = derived_budget;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm driver reports max {} samplers; reserving {} (25%) and "
|
||||
"allowing Eden to use {} (75%) to avoid heap exhaustion",
|
||||
sampler_limit, reserved, sampler_heap_budget);
|
||||
}
|
||||
}
|
||||
|
||||
if (extensions.sampler_filter_minmax && is_amd) {
|
||||
@@ -1273,6 +1285,13 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<size_t> Device::GetSamplerHeapBudget() const {
|
||||
if (sampler_heap_budget == 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return sampler_heap_budget;
|
||||
}
|
||||
|
||||
u64 Device::GetDeviceMemoryUsage() const {
|
||||
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
|
||||
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <span>
|
||||
#include <string>
|
||||
@@ -310,6 +311,16 @@ public:
|
||||
return properties.properties.limits.maxComputeSharedMemorySize;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic storage buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
|
||||
}
|
||||
|
||||
/// Returns float control properties of the device.
|
||||
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
|
||||
return properties.float_controls;
|
||||
@@ -631,6 +642,8 @@ public:
|
||||
return has_broken_parallel_compiling;
|
||||
}
|
||||
|
||||
std::optional<size_t> GetSamplerHeapBudget() const;
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return properties.driver.driverName;
|
||||
@@ -849,6 +862,7 @@ private:
|
||||
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
|
||||
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
|
||||
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
||||
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
|
||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||
u32 sets_per_pool{}; ///< Sets per Description Pool
|
||||
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
|
||||
|
||||
@@ -14,7 +14,7 @@ try {
|
||||
Exit 1
|
||||
}
|
||||
|
||||
$VulkanSDKVer = "1.4.335.0"
|
||||
$VulkanSDKVer = "1.4.328.1"
|
||||
$VULKAN_SDK = "C:/VulkanSDK/$VulkanSDKVer"
|
||||
$ExeFile = "vulkansdk-windows-X64-$VulkanSDKVer.exe"
|
||||
$Uri = "https://sdk.lunarg.com/sdk/download/$VulkanSDKVer/windows/$ExeFile"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
: "${VULKAN_SDK_VER:=1.4.335.0}"
|
||||
: "${VULKAN_SDK_VER:=1.4.328.1}"
|
||||
: "${VULKAN_ROOT:=C:/VulkanSDK/$VULKAN_SDK_VER}"
|
||||
EXE_FILE="vulkansdk-windows-X64-$VULKAN_SDK_VER.exe"
|
||||
URI="https://sdk.lunarg.com/sdk/download/$VULKAN_SDK_VER/windows/$EXE_FILE"
|
||||
|
||||
Reference in New Issue
Block a user