Compare commits

...

5 Commits

Author SHA1 Message Date
xbzk
ce4877b424 skip 1st 2nd dispatches extended to linux. vk_staging_buffer_pool.cpp clamp moved to buffer_cache.h 2025-12-26 05:08:12 +01:00
xbzk
6f4ade37e1 maxwell_dma: multisized components support added (untested) 2025-12-26 05:08:12 +01:00
xbzk
09f06a9a41 MCI boot fix (android): skipping problematic initial pair of dispatches 2025-12-26 05:08:12 +01:00
xbzk
a701ea274f Minimal stopgaps for MCI to boot
- Clamp staging buffer size to 2GB to prevent Vulkan allocation failures
- Add size validation in MappedUploadMemory to avoid buffer overruns
2025-12-26 05:08:12 +01:00
xbzk
cfae726289 [video_core] nvn descriptor layout fix (#3206)
Yxzx presumes this:
// The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size.
But in MCI i`ve discovered that there are no sizes, both registers are GPU addresses (hence the 2.8gb allocation, it was an address actually)

Method could be much simpler but for safety i`ve routed both old and new worlds.

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3206
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Reviewed-by: Lizzie <lizzie@eden-emu.dev>
Co-authored-by: xbzk <xbzk@eden-emu.dev>
Co-committed-by: xbzk <xbzk@eden-emu.dev>
2025-12-26 04:54:14 +01:00
3 changed files with 37 additions and 15 deletions

View File

@@ -1508,7 +1508,10 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 total_size_bytes,
[[maybe_unused]] std::span<BufferCopy> copies) {
if constexpr (USE_MEMORY_MAPS) {
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
constexpr u64 MAX_STAGING_SIZE = 2_GiB;
auto upload_staging = runtime.UploadStagingBuffer((std::min)(total_size_bytes, MAX_STAGING_SIZE));
if (upload_staging.mapped_span.size() < total_size_bytes) return;
//auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
const std::span<u8> staging_pointer = upload_staging.mapped_span;
for (BufferCopy& copy : copies) {
u8* const src_pointer = staging_pointer.data() + copy.src_offset;
@@ -1705,21 +1708,26 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
return NULL_BINDING;
}
// xbzk: New size logic. Fixes MCI.
// If ever the * comment below prove wrong, the 'if' block may be removed.
const auto size = [&]() {
const bool is_nvn_cbuf = cbuf_index == 0;
// The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size.
if (is_nvn_cbuf) {
const u32 ssbo_size = gpu_memory->Read<u32>(ssbo_addr + 8);
if (ssbo_size != 0) {
return ssbo_size;
// * The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size.
const u64 next_qword = gpu_memory->Read<u64>(ssbo_addr + 8);
const u32 upper_32 = static_cast<u32>(next_qword >> 32);
// Hardware-based detection: GPU addresses have non-zero upper bits
if (upper_32 == 0) {
// This is a size field, not a GPU address
return static_cast<u32>(next_qword); // Return lower_32
}
}
// Other titles (notably Doom Eternal) may use STG/LDG on buffer addresses in custom defined
// cbufs, which do not store the sizes adjacent to the addresses, so use the fully
// mapped buffer size for now.
// Fall through: either not NVN cbuf (Doom Eternal & +), or NVN but ssbo_addr+8 is a GPU address (MCI)
const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
// Cap at 8MB to prevent allocator overflow from misinterpreted addresses
return (std::min)(memory_layout_size, static_cast<u32>(8_MiB));
}();
// Alignment only applies to the offset of the buffer
const u32 alignment = runtime.GetStorageBufferAlignment();
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);

View File

@@ -92,19 +92,25 @@ void MaxwellDMA::Launch() {
}
}
} else {
// TODO: allow multisized components.
// TODO: xbzk: multisized components support.
// validadte this widely!
// shipped in PR 3164.
auto& accelerate = rasterizer->AccessAccelerateDMA();
const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
ASSERT(regs.remap_const.component_size_minus_one == 3);
const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
accelerate.BufferClear(regs.offset_out, regs.line_length_in,
regs.remap_const.remap_consta_value);
read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
std::ranges::fill(span, regs.remap_const.remap_consta_value);
read_buffer.resize_destructive(regs.line_length_in * remap_components_size);
for (u32 i = 0; i < regs.line_length_in; ++i) {
for (u32 j = 0; j < remap_components_size; ++j) {
read_buffer[i * remap_components_size + j] =
(regs.remap_const.remap_consta_value >> (j * 8)) & 0xFF;
}
}
memory_manager.WriteBlockUnsafe(regs.offset_out,
reinterpret_cast<u8*>(read_buffer.data()),
regs.line_length_in * sizeof(u32));
read_buffer.data(),
regs.line_length_in * remap_components_size);
} else {
memory_manager.FlushCaching();
const auto convert_linear_2_blocklinear_addr = [](u64 address) {

View File

@@ -479,6 +479,14 @@ void RasterizerVulkan::Clear(u32 layer_count) {
}
void RasterizerVulkan::DispatchCompute() {
#if defined(ANDROID) || defined(__linux__)
static u32 dispatch_count = 0;
if (dispatch_count < 2) {
dispatch_count++;
return;
}
#endif
FlushWork();
gpu_memory->FlushCaching();