From 2faa4947e9cf22574d76cc271431905e08b15724 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 10 Dec 2025 09:13:26 +0000 Subject: [PATCH] Use ctr like gcc does Signed-off-by: lizzie --- externals/powah/powah_emit.hpp | 7 ++++--- src/dynarmic/docs/PowerPC.md | 2 ++ src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h | 2 ++ .../src/dynarmic/backend/ppc64/a32_interface.cpp | 1 + src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h | 2 ++ .../src/dynarmic/backend/ppc64/a64_interface.cpp | 1 + src/dynarmic/src/dynarmic/backend/ppc64/abi.h | 3 +-- .../src/dynarmic/backend/ppc64/emit_ppc64.cpp | 14 ++++++++++---- .../src/dynarmic/backend/ppc64/stack_layout.h | 1 + 9 files changed, 24 insertions(+), 9 deletions(-) diff --git a/externals/powah/powah_emit.hpp b/externals/powah/powah_emit.hpp index 0f10199a77..4b6f1d72e6 100644 --- a/externals/powah/powah_emit.hpp +++ b/externals/powah/powah_emit.hpp @@ -368,10 +368,11 @@ struct Context { void MFLR(GPR const rt) { MFSPR(powah::GPR{8}, rt, powah::GPR{0}); } void MTLR(GPR const rt) { MTSPR(powah::GPR{8}, rt, powah::GPR{0}); } + void BLR() { base[offset++] = 0x4e800020; } //BCLR(R0, CR0, R0); - void BLR() { - base[offset++] = 0x4e800020; //BCLR(R0, CR0, R0); - } + void MFCTR(GPR const rt) { MFSPR(powah::GPR{9}, rt, powah::GPR{0}); } + void MTCTR(GPR const rt) { MTSPR(powah::GPR{9}, rt, powah::GPR{0}); } + void BCTRL() { base[offset++] = 0x4e800421; } //BCCTRL(R0, CR0, R0); // TODO: PowerPC 11 stuff void ISEL(GPR const rd, GPR const ra, GPR const rb, uint32_t d) { diff --git a/src/dynarmic/docs/PowerPC.md b/src/dynarmic/docs/PowerPC.md index a89c49b13e..d625bb30fa 100644 --- a/src/dynarmic/docs/PowerPC.md +++ b/src/dynarmic/docs/PowerPC.md @@ -4,3 +4,5 @@ The ppc64 backend currently only supports the little endian variant, with big en - Flag handling: Flags are emulated via software, while there may be some funny tricks with the CTR, I'd rather not bother - plus it's widely known that those instructions are not nice on real metal - so I would rather take the i-cache cost. - 128-bit atomics: No 128-bit atomic support is provided, this may cause wrong or erroneous execution in some contexts. + +To handle endianess differences all 16/32/64-bit loads and stores to the "emulated memory" are byteswapped beforehand. diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h b/src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h index 2fb0268614..19816d4c93 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h @@ -23,6 +23,8 @@ struct A32JitState { u32 cpsr_nzcv = 0; u32 fpscr = 0; u8 check_bit = 0; + void* run_fn = nullptr; + IR::LocationDescriptor GetLocationDescriptor() const { return IR::LocationDescriptor{regs[15] | (u64(upper_location_descriptor) << 32)}; } diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/ppc64/a32_interface.cpp index a2d443ad9b..e7269582c3 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/ppc64/a32_interface.cpp @@ -74,6 +74,7 @@ struct A32Core final { auto const loc = thread_ctx.GetLocationDescriptor(); auto const entry = process.GetOrEmit(loc); using CodeFn = HaltReason (*)(A32AddressSpace*, A32JitState*, volatile u32*, void*); + thread_ctx.run_fn = (void*)&A32Core::Run; return (CodeFn(entry))(&process, &thread_ctx, halt_reason, reinterpret_cast(&A32Core::Run)); } }; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h b/src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h index 5cd10c726f..0d5fc6eace 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h @@ -29,6 +29,8 @@ struct A64JitState { u32 fpsr = 0; volatile u32 halt_reason = 0; u8 check_bit = 0; + void* run_fn = nullptr; + IR::LocationDescriptor GetLocationDescriptor() const { const u64 fpcr_u64 = u64(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift; const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/ppc64/a64_interface.cpp index 063f74cc68..f07b8ef1be 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/ppc64/a64_interface.cpp @@ -76,6 +76,7 @@ struct A64Core final { const auto loc = thread_ctx.GetLocationDescriptor(); const auto entry = process.GetOrEmit(loc); using CodeFn = HaltReason (*)(A64AddressSpace*, A64JitState*, volatile u32*, void*); + thread_ctx.run_fn = (void*)&A64Core::Run; return (CodeFn(entry))(&process, &thread_ctx, halt_reason, reinterpret_cast(&A64Core::Run)); } }; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/abi.h b/src/dynarmic/src/dynarmic/backend/ppc64/abi.h index 3d4b2c70bd..0f7e059110 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/abi.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/abi.h @@ -8,11 +8,10 @@ namespace Dynarmic::Backend::PPC64 { -// Jit fn signature => (AXXAddressSpace& process, AXXJitState& thread_ctx, volatile u32* halt_reason, void* link_fn) +// Jit fn signature => (AXXAddressSpace& process, AXXJitState& thread_ctx, volatile u32* halt_reason) constexpr powah::GPR RPROCESS = powah::R3; constexpr powah::GPR RJIT = powah::R4; constexpr powah::GPR RHALTREASON = powah::R5; -constexpr powah::GPR RLINKFN = powah::R6; constexpr powah::GPR RNZCV = powah::R31; constexpr powah::GPR ABI_PARAM1 = powah::R3; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64.cpp b/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64.cpp index 09d9e15d82..14be3d3aaa 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64.cpp +++ b/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64.cpp @@ -21,6 +21,7 @@ #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" +#include "stack_layout.h" namespace Dynarmic::Backend::PPC64 { @@ -155,23 +156,28 @@ void EmitTerminal(powah::Context& code, EmitContext& ctx, IR::Term::ReturnToDisp } void EmitTerminal(powah::Context& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool) { + auto const tmp_lr = ctx.reg_alloc.ScratchGpr(); auto const tmp = ctx.reg_alloc.ScratchGpr(); - code.LI(tmp, terminal.next.Value()); + code.LD(tmp_lr, PPC64::RJIT, offsetof(StackLayout, lr)); + code.MFCTR(tmp_lr); if (ctx.emit_conf.a64_variant) { + code.LI(tmp, terminal.next.Value()); code.STD(tmp, PPC64::RJIT, offsetof(A64JitState, pc)); - code.MFLR(PPC64::RLINKFN); + code.LD(tmp, PPC64::RJIT, offsetof(A64JitState, run_fn)); + code.MTCTR(tmp); for (u32 i = 0; i < 4; ++i) code.STD(powah::GPR{3 + i}, powah::R1, -((GPR_ORDER.size() + i) * 8)); code.ADDIS(powah::R1, powah::R1, -sizeof(StackLayout)); - code.BLR(); + code.BCTRL(); code.ADDI(powah::R1, powah::R1, sizeof(StackLayout)); for (u32 i = 0; i < 4; ++i) code.LD(powah::GPR{3 + i}, powah::R1, -((GPR_ORDER.size() + i) * 8)); - code.MTLR(PPC64::RLINKFN); } else { + code.LI(tmp, terminal.next.Value()); code.STW(tmp, PPC64::RJIT, offsetof(A32JitState, regs) + sizeof(u32) * 15); ASSERT(false && "unimp"); } + code.MTCTR(tmp_lr); } void EmitTerminal(powah::Context& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool) { diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/stack_layout.h b/src/dynarmic/src/dynarmic/backend/ppc64/stack_layout.h index 9b83d971d6..91aac3d942 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/stack_layout.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/stack_layout.h @@ -15,6 +15,7 @@ struct alignas(16) StackLayout { std::array regs; std::array spill; u64 check_bit; + void* lr; }; static_assert(sizeof(StackLayout) % 16 == 0);