Compare commits

..

2 Commits

Author SHA1 Message Date
lizzie
518b806473 oops 2025-12-26 06:42:20 +01:00
lizzie
8f6e23bba0 [host_memory] avoid doing syscall for a simple memset() op 2025-12-26 06:42:20 +01:00
4 changed files with 9 additions and 51 deletions

View File

@@ -219,11 +219,6 @@ public:
}
}
bool ClearBackingRegion(size_t physical_offset, size_t length) {
// TODO: This does not seem to be possible on Windows.
return false;
}
void EnableDirectMappedAddress() {
// TODO
UNREACHABLE();
@@ -617,24 +612,6 @@ public:
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
bool ClearBackingRegion(size_t physical_offset, size_t length) {
#ifdef __linux__
// Only incur syscall cost IF memset would be slower (theshold = 16MiB)
// TODO(lizzie): Smarter way to dynamically get this threshold (broadwell != raptor lake) for example
if (length >= 2097152UL * 8) {
// Set MADV_REMOVE on backing map to destroy it instantly.
// This also deletes the area from the backing file.
int ret = madvise(backing_base + physical_offset, length, MADV_REMOVE);
ASSERT_MSG(ret == 0, "madvise failed: {}", strerror(errno));
return true;
} else {
return false;
}
#else
return false;
#endif
}
void EnableDirectMappedAddress() {
virtual_base = nullptr;
}
@@ -762,9 +739,7 @@ void HostMemory::Protect(size_t virtual_offset, size_t length, MemoryPermission
}
void HostMemory::ClearBackingRegion(size_t physical_offset, size_t length, u32 fill_value) {
if (!impl || fill_value != 0 || !impl->ClearBackingRegion(physical_offset, length)) {
std::memset(backing_base + physical_offset, fill_value, length);
}
std::memset(backing_base + physical_offset, fill_value, length);
}
void HostMemory::EnableDirectMappedAddress() {

View File

@@ -1508,10 +1508,7 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 total_size_bytes,
[[maybe_unused]] std::span<BufferCopy> copies) {
if constexpr (USE_MEMORY_MAPS) {
constexpr u64 MAX_STAGING_SIZE = 2_GiB;
auto upload_staging = runtime.UploadStagingBuffer((std::min)(total_size_bytes, MAX_STAGING_SIZE));
if (upload_staging.mapped_span.size() < total_size_bytes) return;
//auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
const std::span<u8> staging_pointer = upload_staging.mapped_span;
for (BufferCopy& copy : copies) {
u8* const src_pointer = staging_pointer.data() + copy.src_offset;

View File

@@ -92,25 +92,19 @@ void MaxwellDMA::Launch() {
}
}
} else {
// TODO: xbzk: multisized components support.
// validadte this widely!
// shipped in PR 3164.
// TODO: allow multisized components.
auto& accelerate = rasterizer->AccessAccelerateDMA();
const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
ASSERT(regs.remap_const.component_size_minus_one == 3);
accelerate.BufferClear(regs.offset_out, regs.line_length_in,
regs.remap_const.remap_consta_value);
read_buffer.resize_destructive(regs.line_length_in * remap_components_size);
for (u32 i = 0; i < regs.line_length_in; ++i) {
for (u32 j = 0; j < remap_components_size; ++j) {
read_buffer[i * remap_components_size + j] =
(regs.remap_const.remap_consta_value >> (j * 8)) & 0xFF;
}
}
read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
std::ranges::fill(span, regs.remap_const.remap_consta_value);
memory_manager.WriteBlockUnsafe(regs.offset_out,
read_buffer.data(),
regs.line_length_in * remap_components_size);
reinterpret_cast<u8*>(read_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
memory_manager.FlushCaching();
const auto convert_linear_2_blocklinear_addr = [](u64 address) {

View File

@@ -479,14 +479,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
}
void RasterizerVulkan::DispatchCompute() {
#if defined(ANDROID) || defined(__linux__)
static u32 dispatch_count = 0;
if (dispatch_count < 2) {
dispatch_count++;
return;
}
#endif
FlushWork();
gpu_memory->FlushCaching();