Compare commits

..

3 Commits

Author SHA1 Message Date
lizzie
2d95d80443 fix2 2025-11-19 03:42:25 +00:00
lizzie
c55fa8cd70 no wait thats stupid icache invalidation 2025-11-19 03:41:34 +00:00
lizzie
d8b60c5d45 [nce] try fixing slowdown on various games 2025-11-19 03:38:35 +00:00
3 changed files with 7 additions and 19 deletions

View File

@@ -390,14 +390,15 @@ const std::size_t CACHE_PAGE_SIZE = 4096;
void ArmNce::ClearInstructionCache() {
#ifdef __aarch64__
// Ensure all previous memory operations complete
asm volatile("dmb ish" ::: "memory");
asm volatile("dsb ish" ::: "memory");
asm volatile("isb" ::: "memory");
asm volatile(
"\tdmb ish\n"
"\tdsb ish\n"
"\tisb\n" ::: "memory");
#endif
}
void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) {
this->ClearInstructionCache();
ClearInstructionCache();
}
} // namespace Core

View File

@@ -183,17 +183,16 @@ bool InterpreterVisitor::Ordered(size_t size, bool L, bool o0, Reg Rn, Reg Rt) {
u64 address = (Rn == Reg::SP) ? this->GetSp() : this->GetReg(Rn);
switch (memop) {
case MemOp::Store: {
std::atomic_thread_fence(std::memory_order_seq_cst);
std::atomic_thread_fence(std::memory_order_release);
u64 value = this->GetReg(Rt);
m_memory.WriteBlock(address, &value, dbytes);
std::atomic_thread_fence(std::memory_order_seq_cst);
break;
}
case MemOp::Load: {
u64 value = 0;
m_memory.ReadBlock(address, &value, dbytes);
std::atomic_thread_fence(std::memory_order_acquire);
this->SetReg(Rt, value);
std::atomic_thread_fence(std::memory_order_seq_cst);
break;
}
default:

View File

@@ -677,7 +677,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
ConvertLegacyToGeneric(program, runtime_info);
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)};
device.SaveShader(code);
modules[stage_index] = BuildShader(device, code);
@@ -772,17 +771,6 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
// Adreno have lower shared memory limits (32KB)
// Clamp shared memory usage to device maximum to avoid validation errors
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
if (program.shared_memory_size > max_shared_memory) {
LOG_WARNING(Render_Vulkan,
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
key.unique_hash, program.shared_memory_size / 1024, max_shared_memory / 1024);
program.shared_memory_size = max_shared_memory;
}
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)};