Compare commits

..

3 Commits

Author SHA1 Message Date
lizzie
bc4d7559d5 update license 2025-12-18 01:09:05 +01:00
lizzie
ac5671b154 fx 2025-12-18 01:09:05 +01:00
lizzie
98f02f0ebb [core/hle] use boost::container::static_vector<> for std::set<> of dummy threads that is usually small enough 2025-12-18 01:09:05 +01:00
9 changed files with 77 additions and 128 deletions

View File

@@ -1,7 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <mutex>
#include <ranges>
#include "common/assert.h"
#include "core/core.h"
@@ -17,57 +21,61 @@ GlobalSchedulerContext::GlobalSchedulerContext(KernelCore& kernel)
GlobalSchedulerContext::~GlobalSchedulerContext() = default;
void GlobalSchedulerContext::AddThread(KThread* thread) {
/// @brief Adds a new thread to the scheduler
void GlobalSchedulerContext::AddThread(KThread* thread) noexcept {
std::scoped_lock lock{m_global_list_guard};
m_thread_list.push_back(thread);
}
void GlobalSchedulerContext::RemoveThread(KThread* thread) {
/// @brief Removes a thread from the scheduler
void GlobalSchedulerContext::RemoveThread(KThread* thread) noexcept {
std::scoped_lock lock{m_global_list_guard};
std::erase(m_thread_list, thread);
m_thread_list.erase(std::ranges::find(m_thread_list, thread));
}
void GlobalSchedulerContext::PreemptThreads() {
/// @brief Rotates the scheduling queues of threads at a preemption priority
/// and then does some core rebalancing. Preemption priorities can be found
/// in the array 'preemption_priorities'.
/// @note This operation happens every 10ms.
void GlobalSchedulerContext::PreemptThreads() noexcept {
// The priority levels at which the global scheduler preempts threads every 10 ms. They are
// ordered from Core 0 to Core 3.
static constexpr std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities{
static constexpr std::array<u32, Core::Hardware::NUM_CPU_CORES> per_core{
59,
59,
59,
63,
};
ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel));
for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
const u32 priority = preemption_priorities[core_id];
KScheduler::RotateScheduledQueue(m_kernel, core_id, priority);
}
for (u32 core_id = 0; core_id < per_core.size(); core_id++)
KScheduler::RotateScheduledQueue(m_kernel, core_id, per_core[core_id]);
}
bool GlobalSchedulerContext::IsLocked() const {
/// @brief Returns true if the global scheduler lock is acquired
bool GlobalSchedulerContext::IsLocked() const noexcept {
return m_scheduler_lock.IsLockedByCurrentThread();
}
void GlobalSchedulerContext::RegisterDummyThreadForWakeup(KThread* thread) {
void GlobalSchedulerContext::RegisterDummyThreadForWakeup(KThread* thread) noexcept {
ASSERT(this->IsLocked());
m_woken_dummy_threads.insert(thread);
m_woken_dummy_threads.push_back(thread);
}
void GlobalSchedulerContext::UnregisterDummyThreadForWakeup(KThread* thread) {
void GlobalSchedulerContext::UnregisterDummyThreadForWakeup(KThread* thread) noexcept {
ASSERT(this->IsLocked());
m_woken_dummy_threads.erase(thread);
}
void GlobalSchedulerContext::WakeupWaitingDummyThreads() {
ASSERT(this->IsLocked());
for (auto* thread : m_woken_dummy_threads) {
thread->DummyThreadEndWait();
if(auto it = std::ranges::find(m_woken_dummy_threads, thread); it != m_woken_dummy_threads.end()) {
*it = m_woken_dummy_threads.back();
m_woken_dummy_threads.pop_back();
}
}
m_woken_dummy_threads.clear();
void GlobalSchedulerContext::WakeupWaitingDummyThreads() noexcept {
ASSERT(this->IsLocked());
if (m_woken_dummy_threads.size() > 0) {
for (auto* thread : m_woken_dummy_threads)
thread->DummyThreadEndWait();
m_woken_dummy_threads.clear();
}
}
} // namespace Kernel

View File

@@ -1,11 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <set>
#include <vector>
#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "core/hardware_properties.h"
@@ -31,59 +33,42 @@ class GlobalSchedulerContext final {
friend class KScheduler;
public:
static constexpr size_t MAX_THREADS = 256;
using LockType = KAbstractSchedulerLock<KScheduler>;
using ThreadList = boost::container::small_vector<KThread*, MAX_THREADS>;
explicit GlobalSchedulerContext(KernelCore& kernel);
~GlobalSchedulerContext();
/// Adds a new thread to the scheduler
void AddThread(KThread* thread);
/// Removes a thread from the scheduler
void RemoveThread(KThread* thread);
/// Returns a list of all threads managed by the scheduler
/// @brief Returns a list of all threads managed by the scheduler
/// This is only safe to iterate while holding the scheduler lock
const std::vector<KThread*>& GetThreadList() const {
ThreadList const& GetThreadList() const noexcept {
return m_thread_list;
}
/**
* Rotates the scheduling queues of threads at a preemption priority and then does
* some core rebalancing. Preemption priorities can be found in the array
* 'preemption_priorities'.
*
* @note This operation happens every 10ms.
*/
void PreemptThreads();
/// Returns true if the global scheduler lock is acquired
bool IsLocked() const;
void UnregisterDummyThreadForWakeup(KThread* thread);
void RegisterDummyThreadForWakeup(KThread* thread);
void WakeupWaitingDummyThreads();
LockType& SchedulerLock() {
LockType& SchedulerLock() noexcept {
return m_scheduler_lock;
}
void AddThread(KThread* thread) noexcept;
void RemoveThread(KThread* thread) noexcept;
void PreemptThreads() noexcept;
bool IsLocked() const noexcept;
void UnregisterDummyThreadForWakeup(KThread* thread) noexcept;
void RegisterDummyThreadForWakeup(KThread* thread) noexcept;
void WakeupWaitingDummyThreads() noexcept;
private:
friend class KScopedSchedulerLock;
friend class KScopedSchedulerLockAndSleep;
KernelCore& m_kernel;
std::atomic_bool m_scheduler_update_needed{};
KSchedulerPriorityQueue m_priority_queue;
LockType m_scheduler_lock;
/// Lists dummy threads pending wakeup on lock release
std::set<KThread*> m_woken_dummy_threads;
/// Lists all thread ids that aren't deleted/etc.
std::vector<KThread*> m_thread_list;
std::mutex m_global_list_guard;
/// Lists dummy threads pending wakeup on lock release
ThreadList m_woken_dummy_threads;
/// Lists all thread ids that aren't deleted/etc.
ThreadList m_thread_list;
};
} // namespace Kernel

View File

@@ -527,35 +527,27 @@ void KScheduler::ClearPreviousThread(KernelCore& kernel, KThread* thread) {
void KScheduler::OnThreadStateChanged(KernelCore& kernel, KThread* thread, ThreadState old_state) {
ASSERT(IsSchedulerLockedByCurrentThread(kernel));
// Check if the state has changed, because if it hasn't there's nothing to do.
const ThreadState cur_state = thread->GetRawState();
if (cur_state == old_state) {
return;
}
// Update the priority queues.
if (old_state == ThreadState::Runnable) {
// If we were previously runnable, then we're not runnable now, and we should remove.
GetPriorityQueue(kernel).Remove(thread);
IncrementScheduledCount(thread);
SetSchedulerUpdateNeeded(kernel);
if (thread->IsDummyThread()) {
if (const ThreadState cur_state = thread->GetRawState(); cur_state != old_state) {
// Update the priority queues.
if (old_state == ThreadState::Runnable) {
// If we were previously runnable, then we're not runnable now, and we should remove.
GetPriorityQueue(kernel).Remove(thread);
IncrementScheduledCount(thread);
SetSchedulerUpdateNeeded(kernel);
// HACK: if this is a dummy thread, it should no longer wake up when the
// scheduler lock is released.
kernel.GlobalSchedulerContext().UnregisterDummyThreadForWakeup(thread);
}
} else if (cur_state == ThreadState::Runnable) {
// If we're now runnable, then we weren't previously, and we should add.
GetPriorityQueue(kernel).PushBack(thread);
IncrementScheduledCount(thread);
SetSchedulerUpdateNeeded(kernel);
if (thread->IsDummyThread()) {
if (thread->IsDummyThread())
kernel.GlobalSchedulerContext().UnregisterDummyThreadForWakeup(thread);
} else if (cur_state == ThreadState::Runnable) {
// If we're now runnable, then we weren't previously, and we should add.
GetPriorityQueue(kernel).PushBack(thread);
IncrementScheduledCount(thread);
SetSchedulerUpdateNeeded(kernel);
// HACK: if this is a dummy thread, it should wake up when the scheduler
// lock is released.
kernel.GlobalSchedulerContext().RegisterDummyThreadForWakeup(thread);
if (thread->IsDummyThread())
kernel.GlobalSchedulerContext().RegisterDummyThreadForWakeup(thread);
}
}
}

View File

@@ -194,8 +194,6 @@ HostFeature GetHostFeatures() {
features |= HostFeature::LZCNT;
if (cpu_info.has(Cpu::tGFNI))
features |= HostFeature::GFNI;
if (cpu_info.has(Cpu::tWAITPKG))
features |= HostFeature::WAITPKG;
if (cpu_info.has(Cpu::tBMI2)) {
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.

View File

@@ -420,11 +420,10 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code);
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
EmitExclusiveLock(code, conf, tmp, eax);
SharedLabel end = GenSharedLabel();

View File

@@ -346,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p
}
code.mov(pointer, std::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG));
EmitSpinLockLock(code, pointer, tmp);
}
template<typename UserConfig>

View File

@@ -35,10 +35,9 @@ enum class HostFeature : u64 {
BMI2 = 1ULL << 19,
LZCNT = 1ULL << 20,
GFNI = 1ULL << 21,
WAITPKG = 1ULL << 22,
// Zen-based BMI2
FastBMI2 = 1ULL << 23,
FastBMI2 = 1ULL << 22,
// Orthographic AVX512 features on 128 and 256 vectors
AVX512_Ortho = AVX512F | AVX512VL,

View File

@@ -22,46 +22,17 @@ static const auto default_cg_mode = nullptr; //Allow RWE
namespace Dynarmic {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) {
// TODO: this is because we lack regalloc - so better to be safe :(
if (waitpkg) {
code.push(Xbyak::util::eax);
code.push(Xbyak::util::ebx);
code.push(Xbyak::util::edx);
}
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
Xbyak::Label start, loop;
code.jmp(start, code.T_NEAR);
code.L(loop);
if (waitpkg) {
// TODO: This clobbers EAX and EDX did we tell the regalloc?
// ARM ptr for address-monitoring
code.umonitor(ptr);
// tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings
// tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings
// edx:eax is implicitly used as a 64-bit deadline timestamp
// Use the maximum so that we use the operating system's maximum
// allowed wait time within the IA32_UMWAIT_CONTROL register
// Enter power state designated by tmp and wait for a write to lock_ptr
code.mov(Xbyak::util::eax, 0xFFFFFFFF);
code.mov(Xbyak::util::edx, Xbyak::util::eax);
// TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it...
code.mov(Xbyak::util::ebx, 1);
code.umwait(Xbyak::util::ebx);
// CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write
// CF == 0 if we exited the wait for any other reason
} else {
code.pause();
}
code.pause();
code.L(start);
code.mov(tmp, 1);
/*code.lock();*/ code.xchg(code.dword[ptr], tmp);
code.test(tmp, tmp);
code.jnz(loop, code.T_NEAR);
if (waitpkg) {
code.pop(Xbyak::util::edx);
code.pop(Xbyak::util::ebx);
code.pop(Xbyak::util::eax);
}
}
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
@@ -89,7 +60,7 @@ void SpinLockImpl::Initialize() noexcept {
Xbyak::Reg64 const ABI_PARAM1 = Backend::X64::HostLocToReg64(Backend::X64::ABI_PARAM1);
code.align();
lock = code.getCurr<void (*)(volatile int*)>();
EmitSpinLockLock(code, ABI_PARAM1, code.eax, false);
EmitSpinLockLock(code, ABI_PARAM1, code.eax);
code.ret();
code.align();
unlock = code.getCurr<void (*)(volatile int*)>();

View File

@@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@@ -12,7 +9,7 @@
namespace Dynarmic {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg);
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
} // namespace Dynarmic