Compare commits
2 Commits
liz-dynarm
...
pintocputh
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
112b14b564 | ||
|
|
754883db97 |
@@ -5,9 +5,11 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "common/error.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/thread.h"
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach.h>
|
||||
@@ -18,6 +20,8 @@
|
||||
#include "common/string_util.h"
|
||||
#else
|
||||
#if defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__)
|
||||
#include <sys/cpuset.h>
|
||||
#include <sys/_cpuset.h>
|
||||
#include <pthread_np.h>
|
||||
#endif
|
||||
#include <pthread.h>
|
||||
@@ -28,7 +32,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#define cpu_set_t cpuset_t
|
||||
# define cpu_set_t cpuset_t
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
@@ -77,22 +81,14 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {
|
||||
#endif
|
||||
}
|
||||
|
||||
void SetCurrentThreadName(const char* name) {
|
||||
#ifdef _MSC_VER
|
||||
|
||||
// Sets the debugger-visible name of the current thread.
|
||||
void SetCurrentThreadName(const char* name) {
|
||||
static auto pf = (decltype(&SetThreadDescription))(void*)GetProcAddress(GetModuleHandle(TEXT("KernelBase.dll")), "SetThreadDescription");
|
||||
if (pf)
|
||||
// Sets the debugger-visible name of the current thread.
|
||||
if (auto pf = (decltype(&SetThreadDescription))(void*)GetProcAddress(GetModuleHandle(TEXT("KernelBase.dll")), "SetThreadDescription"); pf)
|
||||
pf(GetCurrentThread(), UTF8ToUTF16W(name).data()); // Windows 10+
|
||||
}
|
||||
|
||||
#else // !MSVC_VER, so must be POSIX threads
|
||||
|
||||
// MinGW with the POSIX threading model does not support pthread_setname_np
|
||||
void SetCurrentThreadName(const char* name) {
|
||||
// See for reference
|
||||
// https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/util/u_thread.c?ref_type=heads#L75
|
||||
#ifdef __APPLE__
|
||||
else
|
||||
; // No-op
|
||||
#elif defined(__APPLE__)
|
||||
pthread_setname_np(name);
|
||||
#elif defined(__HAIKU__)
|
||||
rename_thread(find_thread(NULL), name);
|
||||
@@ -112,13 +108,33 @@ void SetCurrentThreadName(const char* name) {
|
||||
pthread_setname_np(pthread_self(), buf);
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
// mingw stub
|
||||
// MinGW with the POSIX threading model does not support pthread_setname_np
|
||||
// See for reference
|
||||
// https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/util/u_thread.c?ref_type=heads#L75
|
||||
(void)name;
|
||||
#else
|
||||
pthread_setname_np(pthread_self(), name);
|
||||
#endif
|
||||
}
|
||||
|
||||
void PinCurrentThreadToPerformanceCore(size_t core_id) {
|
||||
ASSERT(core_id < 4);
|
||||
// If we set a flag for a CPU that doesn't exist, the thread may not be allowed to
|
||||
// run in ANY processor!
|
||||
auto const total_cores = std::thread::hardware_concurrency();
|
||||
if (core_id < total_cores) {
|
||||
#if defined(__linux__) || defined(__FreeBSD__)
|
||||
cpu_set_t set;
|
||||
CPU_ZERO(&set);
|
||||
CPU_SET(core_id, &set);
|
||||
pthread_setaffinity_np(pthread_self(), sizeof(set), &set);
|
||||
#elif defined(_WIN32)
|
||||
DWORD set = 1UL << core_id;
|
||||
SetThreadAffinityMask(GetCurrentThread(), set);
|
||||
#else
|
||||
// No pin functionality implemented
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
|
||||
@@ -106,7 +106,7 @@ enum class ThreadPriority : u32 {
|
||||
};
|
||||
|
||||
void SetCurrentThreadPriority(ThreadPriority new_priority);
|
||||
|
||||
void SetCurrentThreadName(const char* name);
|
||||
void PinCurrentThreadToPerformanceCore(size_t core_id);
|
||||
|
||||
} // namespace Common
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "common/fiber.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/thread.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/cpu_manager.h"
|
||||
@@ -25,11 +26,8 @@ CpuManager::~CpuManager() = default;
|
||||
void CpuManager::Initialize() {
|
||||
num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1;
|
||||
gpu_barrier = std::make_unique<Common::Barrier>(num_cores + 1);
|
||||
|
||||
for (std::size_t core = 0; core < num_cores; core++) {
|
||||
core_data[core].host_thread =
|
||||
std::jthread([this, core](std::stop_token token) { RunThread(token, core); });
|
||||
}
|
||||
for (std::size_t core = 0; core < num_cores; core++)
|
||||
core_data[core].host_thread = std::jthread([this, core](std::stop_token token) { RunThread(token, core); });
|
||||
}
|
||||
|
||||
void CpuManager::Shutdown() {
|
||||
@@ -188,14 +186,10 @@ void CpuManager::ShutdownThread() {
|
||||
void CpuManager::RunThread(std::stop_token token, std::size_t core) {
|
||||
/// Initialization
|
||||
system.RegisterCoreThread(core);
|
||||
std::string name;
|
||||
if (is_multicore) {
|
||||
name = "CPUCore_" + std::to_string(core);
|
||||
} else {
|
||||
name = "CPUThread";
|
||||
}
|
||||
std::string name = is_multicore ? ("CPUCore_" + std::to_string(core)) : std::string{"CPUThread"};
|
||||
Common::SetCurrentThreadName(name.c_str());
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
|
||||
Common::PinCurrentThreadToPerformanceCore(core);
|
||||
auto& data = core_data[core];
|
||||
data.host_context = Common::Fiber::ThreadToFiber();
|
||||
|
||||
|
||||
@@ -194,8 +194,6 @@ HostFeature GetHostFeatures() {
|
||||
features |= HostFeature::LZCNT;
|
||||
if (cpu_info.has(Cpu::tGFNI))
|
||||
features |= HostFeature::GFNI;
|
||||
if (cpu_info.has(Cpu::tWAITPKG))
|
||||
features |= HostFeature::WAITPKG;
|
||||
|
||||
if (cpu_info.has(Cpu::tBMI2)) {
|
||||
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
|
||||
|
||||
@@ -420,11 +420,10 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
|
||||
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
|
||||
|
||||
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
|
||||
EmitExclusiveLock(code, conf, tmp, eax);
|
||||
|
||||
SharedLabel end = GenSharedLabel();
|
||||
|
||||
|
||||
@@ -346,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p
|
||||
}
|
||||
|
||||
code.mov(pointer, std::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
|
||||
EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG));
|
||||
EmitSpinLockLock(code, pointer, tmp);
|
||||
}
|
||||
|
||||
template<typename UserConfig>
|
||||
|
||||
@@ -35,10 +35,9 @@ enum class HostFeature : u64 {
|
||||
BMI2 = 1ULL << 19,
|
||||
LZCNT = 1ULL << 20,
|
||||
GFNI = 1ULL << 21,
|
||||
WAITPKG = 1ULL << 22,
|
||||
|
||||
// Zen-based BMI2
|
||||
FastBMI2 = 1ULL << 23,
|
||||
FastBMI2 = 1ULL << 22,
|
||||
|
||||
// Orthographic AVX512 features on 128 and 256 vectors
|
||||
AVX512_Ortho = AVX512F | AVX512VL,
|
||||
|
||||
@@ -22,46 +22,17 @@ static const auto default_cg_mode = nullptr; //Allow RWE
|
||||
|
||||
namespace Dynarmic {
|
||||
|
||||
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) {
|
||||
// TODO: this is because we lack regalloc - so better to be safe :(
|
||||
if (waitpkg) {
|
||||
code.push(Xbyak::util::eax);
|
||||
code.push(Xbyak::util::ebx);
|
||||
code.push(Xbyak::util::edx);
|
||||
}
|
||||
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
|
||||
Xbyak::Label start, loop;
|
||||
|
||||
code.jmp(start, code.T_NEAR);
|
||||
code.L(loop);
|
||||
if (waitpkg) {
|
||||
// TODO: This clobbers EAX and EDX did we tell the regalloc?
|
||||
// ARM ptr for address-monitoring
|
||||
code.umonitor(ptr);
|
||||
// tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings
|
||||
// tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings
|
||||
// edx:eax is implicitly used as a 64-bit deadline timestamp
|
||||
// Use the maximum so that we use the operating system's maximum
|
||||
// allowed wait time within the IA32_UMWAIT_CONTROL register
|
||||
// Enter power state designated by tmp and wait for a write to lock_ptr
|
||||
code.mov(Xbyak::util::eax, 0xFFFFFFFF);
|
||||
code.mov(Xbyak::util::edx, Xbyak::util::eax);
|
||||
// TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it...
|
||||
code.mov(Xbyak::util::ebx, 1);
|
||||
code.umwait(Xbyak::util::ebx);
|
||||
// CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write
|
||||
// CF == 0 if we exited the wait for any other reason
|
||||
} else {
|
||||
code.pause();
|
||||
}
|
||||
code.pause();
|
||||
code.L(start);
|
||||
code.mov(tmp, 1);
|
||||
/*code.lock();*/ code.xchg(code.dword[ptr], tmp);
|
||||
code.test(tmp, tmp);
|
||||
code.jnz(loop, code.T_NEAR);
|
||||
if (waitpkg) {
|
||||
code.pop(Xbyak::util::edx);
|
||||
code.pop(Xbyak::util::ebx);
|
||||
code.pop(Xbyak::util::eax);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
|
||||
@@ -89,7 +60,7 @@ void SpinLockImpl::Initialize() noexcept {
|
||||
Xbyak::Reg64 const ABI_PARAM1 = Backend::X64::HostLocToReg64(Backend::X64::ABI_PARAM1);
|
||||
code.align();
|
||||
lock = code.getCurr<void (*)(volatile int*)>();
|
||||
EmitSpinLockLock(code, ABI_PARAM1, code.eax, false);
|
||||
EmitSpinLockLock(code, ABI_PARAM1, code.eax);
|
||||
code.ret();
|
||||
code.align();
|
||||
unlock = code.getCurr<void (*)(volatile int*)>();
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
@@ -12,7 +9,7 @@
|
||||
|
||||
namespace Dynarmic {
|
||||
|
||||
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg);
|
||||
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
|
||||
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
|
||||
|
||||
} // namespace Dynarmic
|
||||
|
||||
Reference in New Issue
Block a user