[gl, vk] Access Tracking Synchronization
This commit is contained in:
committed by
Caio Oliveira
parent
bfcce2609b
commit
a8cbd910d8
@@ -130,6 +130,17 @@ public:
|
||||
ResetStorageBit(id.index);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool Contains(SlotId id) const noexcept {
|
||||
if (!id) {
|
||||
return false;
|
||||
}
|
||||
const size_t word = id.index / 64;
|
||||
if (word >= stored_bitset.size()) {
|
||||
return false;
|
||||
}
|
||||
return ((stored_bitset[word] >> (id.index % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] Iterator begin() noexcept {
|
||||
const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
|
||||
if (it == stored_bitset.end()) {
|
||||
|
||||
@@ -91,6 +91,10 @@ public:
|
||||
uncommitted_operations.clear();
|
||||
}
|
||||
QueueFence(new_fence);
|
||||
if (!new_fence->IsStubbed()) {
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.CommitPendingGpuAccesses(new_fence->WaitTick());
|
||||
}
|
||||
fences.push(std::move(new_fence));
|
||||
if (should_flush) {
|
||||
rasterizer.FlushCommands();
|
||||
@@ -179,7 +183,7 @@ private:
|
||||
return;
|
||||
}
|
||||
}
|
||||
PopAsyncFlushes();
|
||||
PopAsyncFlushes(current_fence->WaitTick());
|
||||
auto operations = std::move(pending_operations.front());
|
||||
pending_operations.pop_front();
|
||||
for (auto& operation : operations) {
|
||||
@@ -214,7 +218,7 @@ private:
|
||||
if (!current_fence->IsStubbed()) {
|
||||
WaitFence(current_fence);
|
||||
}
|
||||
PopAsyncFlushes();
|
||||
PopAsyncFlushes(current_fence->WaitTick());
|
||||
for (auto& operation : current_operations) {
|
||||
operation();
|
||||
}
|
||||
@@ -237,10 +241,11 @@ private:
|
||||
query_cache.HasUncommittedFlushes();
|
||||
}
|
||||
|
||||
void PopAsyncFlushes() {
|
||||
void PopAsyncFlushes(u64 completed_tick) {
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.PopAsyncFlushes();
|
||||
texture_cache.CompleteGpuAccesses(completed_tick);
|
||||
buffer_cache.PopAsyncFlushes();
|
||||
}
|
||||
query_cache.PopAsyncFlushes();
|
||||
|
||||
@@ -143,6 +143,8 @@ public:
|
||||
|
||||
void TickFrame() {}
|
||||
|
||||
void WaitForGpuTick(u64) {}
|
||||
|
||||
StateTracker& GetStateTracker() {
|
||||
return state_tracker;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,10 @@ public:
|
||||
|
||||
void Wait();
|
||||
|
||||
[[nodiscard]] u64 WaitTick() const noexcept {
|
||||
return wait_tick;
|
||||
}
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
u64 wait_tick = 0;
|
||||
|
||||
@@ -910,6 +910,13 @@ void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
|
||||
staging_buffer_pool.FreeDeferred(ref);
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::WaitForGpuTick(u64 tick) {
|
||||
if (tick == 0) {
|
||||
return;
|
||||
}
|
||||
scheduler.Wait(tick);
|
||||
}
|
||||
|
||||
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
|
||||
if (VideoCore::Surface::GetFormatType(dst.info.format) ==
|
||||
VideoCore::Surface::SurfaceType::DepthStencil &&
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <span>
|
||||
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
@@ -59,6 +60,8 @@ public:
|
||||
|
||||
void TickFrame();
|
||||
|
||||
void WaitForGpuTick(u64 tick);
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
||||
u64 GetDeviceMemoryUsage() const;
|
||||
@@ -177,6 +180,30 @@ public:
|
||||
return (this->*current_image).UsageFlags();
|
||||
}
|
||||
|
||||
void TrackGpuReadTick(u64 tick) noexcept {
|
||||
TrackPendingReadTick(tick);
|
||||
}
|
||||
|
||||
void TrackGpuWriteTick(u64 tick) noexcept {
|
||||
TrackPendingWriteTick(tick);
|
||||
}
|
||||
|
||||
void CompleteGpuReadTick(u64 completed_tick) noexcept {
|
||||
ClearPendingReadTick(completed_tick);
|
||||
}
|
||||
|
||||
void CompleteGpuWriteTick(u64 completed_tick) noexcept {
|
||||
ClearPendingWriteTick(completed_tick);
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> PendingGpuReadTick() const noexcept {
|
||||
return PendingReadTick();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> PendingGpuWriteTick() const noexcept {
|
||||
return PendingWriteTick();
|
||||
}
|
||||
|
||||
/// Returns true when the image is already initialized and mark it as initialized
|
||||
[[nodiscard]] bool ExchangeInitialization() noexcept {
|
||||
return std::exchange(initialized, true);
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
@@ -58,6 +62,50 @@ struct ImageBase {
|
||||
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
|
||||
explicit ImageBase(const NullImageParams&);
|
||||
|
||||
void TrackPendingReadTick(u64 tick) noexcept {
|
||||
if (pending_read_tick) {
|
||||
*pending_read_tick = std::max(*pending_read_tick, tick);
|
||||
} else {
|
||||
pending_read_tick = tick;
|
||||
}
|
||||
}
|
||||
|
||||
void TrackPendingWriteTick(u64 tick) noexcept {
|
||||
if (pending_write_tick) {
|
||||
*pending_write_tick = std::max(*pending_write_tick, tick);
|
||||
} else {
|
||||
pending_write_tick = tick;
|
||||
}
|
||||
}
|
||||
|
||||
void ClearPendingReadTick(u64 completed_tick) noexcept {
|
||||
if (pending_read_tick && completed_tick >= *pending_read_tick) {
|
||||
pending_read_tick.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void ClearPendingWriteTick(u64 completed_tick) noexcept {
|
||||
if (pending_write_tick && completed_tick >= *pending_write_tick) {
|
||||
pending_write_tick.reset();
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasPendingReadTick() const noexcept {
|
||||
return pending_read_tick.has_value();
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasPendingWriteTick() const noexcept {
|
||||
return pending_write_tick.has_value();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> PendingReadTick() const noexcept {
|
||||
return pending_read_tick;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> PendingWriteTick() const noexcept {
|
||||
return pending_write_tick;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
|
||||
|
||||
[[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
|
||||
@@ -115,6 +163,9 @@ struct ImageBase {
|
||||
std::vector<AliasedImage> aliased_images;
|
||||
std::vector<ImageId> overlapping_images;
|
||||
ImageMapId map_view_id{};
|
||||
|
||||
std::optional<u64> pending_read_tick;
|
||||
std::optional<u64> pending_write_tick;
|
||||
};
|
||||
|
||||
struct ImageMapView {
|
||||
|
||||
@@ -540,6 +540,7 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
|
||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
||||
return;
|
||||
}
|
||||
EnsureImageReady(image, ImageAccessType::Write);
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, image_id);
|
||||
@@ -550,11 +551,12 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
|
||||
template <class P>
|
||||
void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
|
||||
boost::container::small_vector<ImageId, 16> images;
|
||||
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
|
||||
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
|
||||
if (!image.IsSafeDownload()) {
|
||||
return;
|
||||
}
|
||||
image.flags &= ~ImageFlagBits::GpuModified;
|
||||
EnsureImageReady(this->slot_images[image_id], ImageAccessType::Read);
|
||||
images.push_back(image_id);
|
||||
});
|
||||
if (images.empty()) {
|
||||
@@ -606,6 +608,7 @@ void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
|
||||
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
EnsureImageReady(image, ImageAccessType::Write);
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, id);
|
||||
}
|
||||
@@ -621,6 +624,7 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
|
||||
[&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
EnsureImageReady(image, ImageAccessType::Write);
|
||||
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
@@ -2423,6 +2427,8 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
||||
template <class P>
|
||||
void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
|
||||
Image& image = slot_images[image_id];
|
||||
EnsureImageReady(image, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
|
||||
TrackGpuImageAccess(image_id, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
|
||||
runtime.TransitionImageLayout(image);
|
||||
if (invalidate) {
|
||||
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
|
||||
@@ -2439,6 +2445,85 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
|
||||
lru_cache.Touch(image.lru_index, frame_tick);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TrackGpuImageAccess(ImageId image_id, ImageAccessType access) {
|
||||
staged_gpu_accesses.push_back({image_id, access});
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::CommitPendingGpuAccesses(u64 fence_value) {
|
||||
if (staged_gpu_accesses.empty()) {
|
||||
return;
|
||||
}
|
||||
if (fence_value == 0) {
|
||||
return;
|
||||
}
|
||||
auto& batch = committed_gpu_accesses.emplace_back(std::move(staged_gpu_accesses));
|
||||
staged_gpu_accesses.clear();
|
||||
committed_gpu_ticks.push_back(fence_value);
|
||||
for (const PendingImageAccess& access : batch) {
|
||||
ImageBase& image = slot_images[access.image_id];
|
||||
if (access.access == ImageAccessType::Read) {
|
||||
image.TrackPendingReadTick(fence_value);
|
||||
} else {
|
||||
image.TrackPendingWriteTick(fence_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::CompleteGpuAccesses(u64 completed_fence) {
|
||||
if (completed_fence == 0) {
|
||||
return;
|
||||
}
|
||||
while (!committed_gpu_ticks.empty() && committed_gpu_ticks.front() <= completed_fence) {
|
||||
auto accesses = std::move(committed_gpu_accesses.front());
|
||||
committed_gpu_accesses.pop_front();
|
||||
committed_gpu_ticks.pop_front();
|
||||
for (const PendingImageAccess& access : accesses) {
|
||||
if (!slot_images.Contains(access.image_id)) {
|
||||
continue;
|
||||
}
|
||||
ImageBase& image = slot_images[access.image_id];
|
||||
if (access.access == ImageAccessType::Read) {
|
||||
image.ClearPendingReadTick(completed_fence);
|
||||
} else {
|
||||
image.ClearPendingWriteTick(completed_fence);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::EnsureImageReady(ImageBase& image, ImageAccessType access) {
|
||||
auto wait_tick = [this](std::optional<u64> tick) -> std::optional<u64> {
|
||||
if (!tick) {
|
||||
return std::nullopt;
|
||||
}
|
||||
runtime.WaitForGpuTick(*tick);
|
||||
return tick;
|
||||
};
|
||||
|
||||
if (access == ImageAccessType::Write) {
|
||||
if (const auto tick = image.PendingReadTick()) {
|
||||
if (const auto waited = wait_tick(tick)) {
|
||||
image.ClearPendingReadTick(*waited);
|
||||
}
|
||||
}
|
||||
if (const auto tick = image.PendingWriteTick()) {
|
||||
if (const auto waited = wait_tick(tick)) {
|
||||
image.ClearPendingWriteTick(*waited);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (const auto tick = image.PendingWriteTick()) {
|
||||
if (const auto waited = wait_tick(tick)) {
|
||||
image.ClearPendingWriteTick(*waited);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
|
||||
bool invalidate) {
|
||||
@@ -2456,6 +2541,8 @@ template <class P>
|
||||
void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
|
||||
Image& dst = slot_images[dst_id];
|
||||
Image& src = slot_images[src_id];
|
||||
EnsureImageReady(dst, ImageAccessType::Write);
|
||||
EnsureImageReady(src, ImageAccessType::Read);
|
||||
const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
|
||||
if (is_rescaled) {
|
||||
ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
|
||||
@@ -2472,20 +2559,30 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
|
||||
}
|
||||
}
|
||||
}
|
||||
const auto TrackCopyAccesses = [this, dst_id, src_id]() {
|
||||
TrackGpuImageAccess(dst_id, ImageAccessType::Write);
|
||||
TrackGpuImageAccess(src_id, ImageAccessType::Read);
|
||||
};
|
||||
const auto dst_format_type = GetFormatType(dst.info.format);
|
||||
const auto src_format_type = GetFormatType(src.info.format);
|
||||
if (src_format_type == dst_format_type) {
|
||||
if constexpr (HAS_EMULATED_COPIES) {
|
||||
if (!runtime.CanImageBeCopied(dst, src)) {
|
||||
return runtime.EmulateCopyImage(dst, src, copies);
|
||||
runtime.EmulateCopyImage(dst, src, copies);
|
||||
TrackCopyAccesses();
|
||||
return;
|
||||
}
|
||||
}
|
||||
return runtime.CopyImage(dst, src, copies);
|
||||
runtime.CopyImage(dst, src, copies);
|
||||
TrackCopyAccesses();
|
||||
return;
|
||||
}
|
||||
UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
|
||||
UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
|
||||
if (runtime.ShouldReinterpret(dst, src)) {
|
||||
return runtime.ReinterpretImage(dst, src, copies);
|
||||
runtime.ReinterpretImage(dst, src, copies);
|
||||
TrackCopyAccesses();
|
||||
return;
|
||||
}
|
||||
for (const ImageCopy& copy : copies) {
|
||||
UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
|
||||
@@ -2538,6 +2635,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
|
||||
|
||||
runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
|
||||
}
|
||||
TrackCopyAccesses();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
||||
@@ -263,6 +263,15 @@ public:
|
||||
/// Prepare an image to be used
|
||||
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
|
||||
|
||||
/// Track that an image participates in upcoming GPU work with the given access type
|
||||
void TrackGpuImageAccess(ImageId image_id, ImageAccessType access);
|
||||
|
||||
/// Notify the cache that tracked GPU work has been submitted with the specified fence value
|
||||
void CommitPendingGpuAccesses(u64 fence_value);
|
||||
|
||||
/// Notify the cache that a fence value has completed so tracked accesses can be released
|
||||
void CompleteGpuAccesses(u64 completed_fence);
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
|
||||
private:
|
||||
@@ -413,6 +422,8 @@ private:
|
||||
/// Execute copies from one image to the other, even if they are incompatible
|
||||
void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
|
||||
|
||||
void EnsureImageReady(ImageBase& image, ImageAccessType access);
|
||||
|
||||
/// Bind an image view as render target, downloading resources preemtively if needed
|
||||
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
|
||||
|
||||
@@ -470,6 +481,11 @@ private:
|
||||
Common::SlotId object_id;
|
||||
};
|
||||
|
||||
struct PendingImageAccess {
|
||||
ImageId image_id;
|
||||
ImageAccessType access;
|
||||
};
|
||||
|
||||
Common::SlotVector<Image> slot_images;
|
||||
Common::SlotVector<ImageMapView> slot_map_views;
|
||||
Common::SlotVector<ImageView> slot_image_views;
|
||||
@@ -485,6 +501,9 @@ private:
|
||||
std::vector<AsyncBuffer> uncommitted_async_buffers;
|
||||
std::deque<std::vector<AsyncBuffer>> async_buffers;
|
||||
std::deque<AsyncBuffer> async_buffers_death_ring;
|
||||
std::vector<PendingImageAccess> staged_gpu_accesses;
|
||||
std::deque<std::vector<PendingImageAccess>> committed_gpu_accesses;
|
||||
std::deque<u64> committed_gpu_ticks;
|
||||
|
||||
struct LRUItemParams {
|
||||
using ObjectType = ImageId;
|
||||
|
||||
@@ -155,4 +155,9 @@ struct SwizzleParameters {
|
||||
s32 level;
|
||||
};
|
||||
|
||||
enum class ImageAccessType : u8 {
|
||||
Read,
|
||||
Write,
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
Reference in New Issue
Block a user