From 283218bc2967fa2dbf3d0a53cc58940b0fa2f424 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 11 Jan 2026 18:37:29 +0100 Subject: [PATCH 1/2] x64Emitter: Reduce padding in OpArg By moving members of the OpArg struct around, we can cut down on how much padding the struct needs. Now it has a size of 16 bytes, small enough for function calls to pass it in two registers instead of on the stack. --- Source/Core/Common/x64Emitter.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 71dcea30f9..6953770986 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -113,8 +113,8 @@ struct OpArg // dummy op arg, used for storage constexpr OpArg() = default; constexpr OpArg(u64 offset_, int scale_, X64Reg rm_reg = RAX, X64Reg scaled_reg = RAX) - : scale{static_cast(scale_)}, offsetOrBaseReg{static_cast(rm_reg)}, - indexReg{static_cast(scaled_reg)}, offset{offset_} + : offset{offset_}, offsetOrBaseReg{static_cast(rm_reg)}, + indexReg{static_cast(scaled_reg)}, scale{static_cast(scale_)} { } constexpr bool operator==(const OpArg& b) const @@ -234,11 +234,11 @@ private: void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits); void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const; - u8 scale = 0; + u64 offset = 0; // Also used to store immediates. u16 offsetOrBaseReg = 0; u16 indexReg = 0; - u64 offset = 0; // Also used to store immediates. u16 operandReg = 0; + u8 scale = 0; }; template From 3ea366119f47a9cce613002667d3bfd4f71cee42 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 11 Jan 2026 19:05:05 +0100 Subject: [PATCH 2/2] Jit64: Make TrampolineInfo smaller Combined with the previous commit, this brings the TrampolineInfo struct down to 48 bytes. This matters, because Jit64 has a big std::unordered_map where it stores many megabytes of TrampolineInfo entries. The key saving comes from shrinking the len member from u32 to u16. It should be safe to even turn it into a u8, but going that far brings no additional savings due to how the padding works out. --- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 4 +-- .../Core/PowerPC/Jit64Common/TrampolineInfo.h | 30 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 58b34698ad..0c186d0972 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -355,7 +355,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, { NOP(padding); } - info.len = static_cast(GetCodePtr() - info.start); + info.len = static_cast(GetCodePtr() - info.start); js.fastmemLoadStore = mov.address; return; @@ -527,7 +527,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces { NOP(padding); } - info.len = static_cast(GetCodePtr() - info.start); + info.len = static_cast(GetCodePtr() - info.start); js.fastmemLoadStore = mov.address; diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h b/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h index bdf1a1badd..d76d3f724f 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h @@ -16,21 +16,7 @@ struct TrampolineInfo final u8* start = nullptr; // The start + len = end of the store operation (points to the next instruction) - u32 len = 0; - - // The PPC PC for the current load/store block - u32 pc = 0; - - // Saved because we need these to make the ABI call in the trampoline - BitSet32 registersInUse{}; - - // The MOV operation - Gen::X64Reg nonAtomicSwapStoreSrc{}; - - // src/dest for load/store - s32 offset = 0; - Gen::X64Reg op_reg{}; - Gen::OpArg op_arg{}; + u16 len = 0; // Original SafeLoadXXX/SafeStoreXXX flags u8 flags = 0; @@ -46,4 +32,18 @@ struct TrampolineInfo final // Set to true if we added the offset to the address and need to undo it bool offsetAddedToAddress : 1 = false; + + // The PPC PC for the current load/store block + u32 pc = 0; + + // Saved because we need these to make the ABI call in the trampoline + BitSet32 registersInUse{}; + + // The MOV operation + Gen::X64Reg nonAtomicSwapStoreSrc{}; + + // src/dest for load/store + s32 offset = 0; + Gen::X64Reg op_reg{}; + Gen::OpArg op_arg{}; };