update license

fx
[core/hle] use boost::container::static_vector<> for std::set<> of dummy threads that is usually small enough
2025-12-18 01:09:05 +01:00 · 2025-12-18 01:09:05 +01:00 · 2025-12-18 01:09:05 +01:00
9 changed files with 77 additions and 128 deletions
--- a/src/core/hle/kernel/global_scheduler_context.cpp
+++ b/src/core/hle/kernel/global_scheduler_context.cpp
@@ -1,7 +1,11 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

 #include <mutex>
+#include <ranges>

 #include "common/assert.h"
 #include "core/core.h"
@@ -17,57 +21,61 @@ GlobalSchedulerContext::GlobalSchedulerContext(KernelCore& kernel)

 GlobalSchedulerContext::~GlobalSchedulerContext() = default;

-void GlobalSchedulerContext::AddThread(KThread* thread) {
+/// @brief Adds a new thread to the scheduler
+void GlobalSchedulerContext::AddThread(KThread* thread) noexcept {
    std::scoped_lock lock{m_global_list_guard};
    m_thread_list.push_back(thread);
 }

-void GlobalSchedulerContext::RemoveThread(KThread* thread) {
+/// @brief Removes a thread from the scheduler
+void GlobalSchedulerContext::RemoveThread(KThread* thread) noexcept {
    std::scoped_lock lock{m_global_list_guard};
-    std::erase(m_thread_list, thread);
+    m_thread_list.erase(std::ranges::find(m_thread_list, thread));
 }

-void GlobalSchedulerContext::PreemptThreads() {
+/// @brief Rotates the scheduling queues of threads at a preemption priority
+///  and then does some core rebalancing. Preemption priorities can be found
+/// in the array 'preemption_priorities'.
+/// @note This operation happens every 10ms.
+void GlobalSchedulerContext::PreemptThreads() noexcept {
    // The priority levels at which the global scheduler preempts threads every 10 ms. They are
    // ordered from Core 0 to Core 3.
-    static constexpr std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities{
+    static constexpr std::array<u32, Core::Hardware::NUM_CPU_CORES> per_core{
        59,
        59,
        59,
        63,
    };
-
    ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel));
-    for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-        const u32 priority = preemption_priorities[core_id];
-        KScheduler::RotateScheduledQueue(m_kernel, core_id, priority);
-    }
+    for (u32 core_id = 0; core_id < per_core.size(); core_id++)
+        KScheduler::RotateScheduledQueue(m_kernel, core_id, per_core[core_id]);
 }

-bool GlobalSchedulerContext::IsLocked() const {
+/// @brief Returns true if the global scheduler lock is acquired
+bool GlobalSchedulerContext::IsLocked() const noexcept {
    return m_scheduler_lock.IsLockedByCurrentThread();
 }

-void GlobalSchedulerContext::RegisterDummyThreadForWakeup(KThread* thread) {
+void GlobalSchedulerContext::RegisterDummyThreadForWakeup(KThread* thread) noexcept {
    ASSERT(this->IsLocked());
-
-    m_woken_dummy_threads.insert(thread);
+    m_woken_dummy_threads.push_back(thread);
 }

-void GlobalSchedulerContext::UnregisterDummyThreadForWakeup(KThread* thread) {
+void GlobalSchedulerContext::UnregisterDummyThreadForWakeup(KThread* thread) noexcept {
    ASSERT(this->IsLocked());
-
-    m_woken_dummy_threads.erase(thread);
-}
-
-void GlobalSchedulerContext::WakeupWaitingDummyThreads() {
-    ASSERT(this->IsLocked());
-
-    for (auto* thread : m_woken_dummy_threads) {
-        thread->DummyThreadEndWait();
+    if(auto it = std::ranges::find(m_woken_dummy_threads, thread); it != m_woken_dummy_threads.end()) {
+        *it = m_woken_dummy_threads.back();
+        m_woken_dummy_threads.pop_back();
    }
+}

-    m_woken_dummy_threads.clear();
+void GlobalSchedulerContext::WakeupWaitingDummyThreads() noexcept {
+    ASSERT(this->IsLocked());
+    if (m_woken_dummy_threads.size() > 0) {
+        for (auto* thread : m_woken_dummy_threads)
+            thread->DummyThreadEndWait();
+        m_woken_dummy_threads.clear();
+    }
 }

 } // namespace Kernel
--- a/src/core/hle/kernel/global_scheduler_context.h
+++ b/src/core/hle/kernel/global_scheduler_context.h
@@ -1,11 +1,13 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

 #pragma once

 #include <atomic>
-#include <set>
-#include <vector>
+#include <boost/container/small_vector.hpp>

 #include "common/common_types.h"
 #include "core/hardware_properties.h"
@@ -31,59 +33,42 @@ class GlobalSchedulerContext final {
    friend class KScheduler;

 public:
+    static constexpr size_t MAX_THREADS = 256;
    using LockType = KAbstractSchedulerLock<KScheduler>;
+    using ThreadList = boost::container::small_vector<KThread*, MAX_THREADS>;

    explicit GlobalSchedulerContext(KernelCore& kernel);
    ~GlobalSchedulerContext();

-    /// Adds a new thread to the scheduler
-    void AddThread(KThread* thread);
-
-    /// Removes a thread from the scheduler
-    void RemoveThread(KThread* thread);
-
-    /// Returns a list of all threads managed by the scheduler
+    /// @brief Returns a list of all threads managed by the scheduler
    /// This is only safe to iterate while holding the scheduler lock
-    const std::vector<KThread*>& GetThreadList() const {
+    ThreadList const& GetThreadList() const noexcept {
        return m_thread_list;
    }
-
-    /**
-     * Rotates the scheduling queues of threads at a preemption priority and then does
-     * some core rebalancing. Preemption priorities can be found in the array
-     * 'preemption_priorities'.
-     *
-     * @note This operation happens every 10ms.
-     */
-    void PreemptThreads();
-
-    /// Returns true if the global scheduler lock is acquired
-    bool IsLocked() const;
-
-    void UnregisterDummyThreadForWakeup(KThread* thread);
-    void RegisterDummyThreadForWakeup(KThread* thread);
-    void WakeupWaitingDummyThreads();
-
-    LockType& SchedulerLock() {
+    LockType& SchedulerLock() noexcept {
        return m_scheduler_lock;
    }
+    void AddThread(KThread* thread) noexcept;
+    void RemoveThread(KThread* thread) noexcept;
+    void PreemptThreads() noexcept;
+    bool IsLocked() const noexcept;
+    void UnregisterDummyThreadForWakeup(KThread* thread) noexcept;
+    void RegisterDummyThreadForWakeup(KThread* thread) noexcept;
+    void WakeupWaitingDummyThreads() noexcept;

 private:
    friend class KScopedSchedulerLock;
    friend class KScopedSchedulerLockAndSleep;

    KernelCore& m_kernel;
-
    std::atomic_bool m_scheduler_update_needed{};
    KSchedulerPriorityQueue m_priority_queue;
    LockType m_scheduler_lock;
-
-    /// Lists dummy threads pending wakeup on lock release
-    std::set<KThread*> m_woken_dummy_threads;
-
-    /// Lists all thread ids that aren't deleted/etc.
-    std::vector<KThread*> m_thread_list;
    std::mutex m_global_list_guard;
+    /// Lists dummy threads pending wakeup on lock release
+    ThreadList m_woken_dummy_threads;
+    /// Lists all thread ids that aren't deleted/etc.
+    ThreadList m_thread_list;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -527,35 +527,27 @@ void KScheduler::ClearPreviousThread(KernelCore& kernel, KThread* thread) {

 void KScheduler::OnThreadStateChanged(KernelCore& kernel, KThread* thread, ThreadState old_state) {
    ASSERT(IsSchedulerLockedByCurrentThread(kernel));
-
    // Check if the state has changed, because if it hasn't there's nothing to do.
-    const ThreadState cur_state = thread->GetRawState();
-    if (cur_state == old_state) {
-        return;
-    }
-
-    // Update the priority queues.
-    if (old_state == ThreadState::Runnable) {
-        // If we were previously runnable, then we're not runnable now, and we should remove.
-        GetPriorityQueue(kernel).Remove(thread);
-        IncrementScheduledCount(thread);
-        SetSchedulerUpdateNeeded(kernel);
-
-        if (thread->IsDummyThread()) {
+    if (const ThreadState cur_state = thread->GetRawState(); cur_state != old_state) {
+        // Update the priority queues.
+        if (old_state == ThreadState::Runnable) {
+            // If we were previously runnable, then we're not runnable now, and we should remove.
+            GetPriorityQueue(kernel).Remove(thread);
+            IncrementScheduledCount(thread);
+            SetSchedulerUpdateNeeded(kernel);
            // HACK: if this is a dummy thread, it should no longer wake up when the
            // scheduler lock is released.
-            kernel.GlobalSchedulerContext().UnregisterDummyThreadForWakeup(thread);
-        }
-    } else if (cur_state == ThreadState::Runnable) {
-        // If we're now runnable, then we weren't previously, and we should add.
-        GetPriorityQueue(kernel).PushBack(thread);
-        IncrementScheduledCount(thread);
-        SetSchedulerUpdateNeeded(kernel);
-
-        if (thread->IsDummyThread()) {
+            if (thread->IsDummyThread())
+                kernel.GlobalSchedulerContext().UnregisterDummyThreadForWakeup(thread);
+        } else if (cur_state == ThreadState::Runnable) {
+            // If we're now runnable, then we weren't previously, and we should add.
+            GetPriorityQueue(kernel).PushBack(thread);
+            IncrementScheduledCount(thread);
+            SetSchedulerUpdateNeeded(kernel);
            // HACK: if this is a dummy thread, it should wake up when the scheduler
            // lock is released.
-            kernel.GlobalSchedulerContext().RegisterDummyThreadForWakeup(thread);
+            if (thread->IsDummyThread())
+                kernel.GlobalSchedulerContext().RegisterDummyThreadForWakeup(thread);
        }
    }
 }
--- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp
+++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp
@@ -194,8 +194,6 @@ HostFeature GetHostFeatures() {
        features |= HostFeature::LZCNT;
    if (cpu_info.has(Cpu::tGFNI))
        features |= HostFeature::GFNI;
-    if (cpu_info.has(Cpu::tWAITPKG))
-        features |= HostFeature::WAITPKG;

    if (cpu_info.has(Cpu::tBMI2)) {
        // BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
--- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc
+++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc
@@ -420,11 +420,10 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
    const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
    const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32();
    const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
-    const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code);

    const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];

-    EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
+    EmitExclusiveLock(code, conf, tmp, eax);

    SharedLabel end = GenSharedLabel();

--- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h
+++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h
@@ -346,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p
    }

    code.mov(pointer, std::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
-    EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG));
+    EmitSpinLockLock(code, pointer, tmp);
 }

 template<typename UserConfig>
--- a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h
+++ b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h
@@ -35,10 +35,9 @@ enum class HostFeature : u64 {
    BMI2 = 1ULL << 19,
    LZCNT = 1ULL << 20,
    GFNI = 1ULL << 21,
-    WAITPKG = 1ULL << 22,

    // Zen-based BMI2
-    FastBMI2 = 1ULL << 23,
+    FastBMI2 = 1ULL << 22,

    // Orthographic AVX512 features on 128 and 256 vectors
    AVX512_Ortho = AVX512F | AVX512VL,
--- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp
+++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp
@@ -22,46 +22,17 @@ static const auto default_cg_mode = nullptr; //Allow RWE

 namespace Dynarmic {

-void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) {
-    // TODO: this is because we lack regalloc - so better to be safe :(
-    if (waitpkg) {
-        code.push(Xbyak::util::eax);
-        code.push(Xbyak::util::ebx);
-        code.push(Xbyak::util::edx);
-    }
+void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
    Xbyak::Label start, loop;
+
    code.jmp(start, code.T_NEAR);
    code.L(loop);
-    if (waitpkg) {
-        // TODO: This clobbers EAX and EDX did we tell the regalloc?
-        // ARM ptr for address-monitoring
-        code.umonitor(ptr);
-        // tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings
-        // tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings
-        // edx:eax is implicitly used as a 64-bit deadline timestamp
-        // Use the maximum so that we use the operating system's maximum
-        // allowed wait time within the IA32_UMWAIT_CONTROL register
-        // Enter power state designated by tmp and wait for a write to lock_ptr
-        code.mov(Xbyak::util::eax, 0xFFFFFFFF);
-        code.mov(Xbyak::util::edx, Xbyak::util::eax);
-        // TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it...
-        code.mov(Xbyak::util::ebx, 1);
-        code.umwait(Xbyak::util::ebx);
-        // CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write
-        // CF == 0 if we exited the wait for any other reason
-    } else {
-        code.pause();
-    }
+    code.pause();
    code.L(start);
    code.mov(tmp, 1);
    /*code.lock();*/ code.xchg(code.dword[ptr], tmp);
    code.test(tmp, tmp);
    code.jnz(loop, code.T_NEAR);
-    if (waitpkg) {
-        code.pop(Xbyak::util::edx);
-        code.pop(Xbyak::util::ebx);
-        code.pop(Xbyak::util::eax);
-    }
 }

 void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
@@ -89,7 +60,7 @@ void SpinLockImpl::Initialize() noexcept {
    Xbyak::Reg64 const ABI_PARAM1 = Backend::X64::HostLocToReg64(Backend::X64::ABI_PARAM1);
    code.align();
    lock = code.getCurr<void (*)(volatile int*)>();
-    EmitSpinLockLock(code, ABI_PARAM1, code.eax, false);
+    EmitSpinLockLock(code, ABI_PARAM1, code.eax);
    code.ret();
    code.align();
    unlock = code.getCurr<void (*)(volatile int*)>();
--- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h
+++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h
@@ -1,6 +1,3 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
-// SPDX-License-Identifier: GPL-3.0-or-later
-
 /* This file is part of the dynarmic project.
 * Copyright (c) 2022 MerryMage
 * SPDX-License-Identifier: 0BSD
@@ -12,7 +9,7 @@

 namespace Dynarmic {

-void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg);
+void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
 void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);

 }  // namespace Dynarmic
Author	SHA1	Message	Date
lizzie	bc4d7559d5	update license	2025-12-18 01:09:05 +01:00
lizzie	ac5671b154	fx	2025-12-18 01:09:05 +01:00
lizzie	98f02f0ebb	[core/hle] use boost::container::static_vector<> for std::set<> of dummy threads that is usually small enough	2025-12-18 01:09:05 +01:00