Compare commits

..

6 Commits

Author SHA1 Message Date
lizzie
295e2df7a9 etc fixes 2025-12-10 03:44:44 +00:00
lizzie
93d7ba9a70 stuff 2025-12-09 11:43:20 +00:00
lizzie
4dd8c0ca9e wifi scanner 2025-12-09 11:22:25 +00:00
lizzie
63da545b28 [core/internal_network] cleanup network interface code a bit
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-12-09 11:25:40 +01:00
lizzie
77d83b008a [dynarmic] avoid IsInmediate() comical call recursion (#3145)
Signed-off-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3145
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
2025-12-09 05:28:01 +01:00
lizzie
69a84ee0a6 [dynarmic] remove reg_alloc from all arguments on x86 emitter (#3150)
From my tests this decreases JIT latency twofold, may be placebo.
saving reg_alloc while having it readily available is certainly a very interesting choice... afterall saving it onto %rdi is way more cheap isn't it? :)
Please test any performance rgressions, I got +20 FPS on Rain World (unlocked) off this change alone

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3150
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
2025-12-09 03:53:58 +01:00
31 changed files with 2501 additions and 2569 deletions

View File

@@ -1166,8 +1166,11 @@ void Java_org_yuzu_yuzu_1emu_NativeLibrary_initializeEmptyUserDirectory(JNIEnv*
void Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerInit(JNIEnv* env, jobject obj) {
// for some reason the full user directory isnt initialized in Android, so we need to create it
const auto play_time_dir = Common::FS::GetEdenPath(Common::FS::EdenPath::PlayTimeDir);
if (!Common::FS::IsDir(play_time_dir) && !Common::FS::CreateDir(play_time_dir))
LOG_WARNING(Frontend, "Failed to create play time directory");
if (!Common::FS::IsDir(play_time_dir)) {
if (!Common::FS::CreateDir(play_time_dir)) {
LOG_WARNING(Frontend, "Failed to create play time directory");
}
}
play_time_manager = std::make_unique<PlayTime::PlayTimeManager>();
}
@@ -1180,16 +1183,13 @@ void Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerStart(JNIEnv* env, job
}
void Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerStop(JNIEnv* env, jobject obj) {
if (play_time_manager)
play_time_manager->Stop();
play_time_manager->Stop();
}
jlong Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerGetPlayTime(JNIEnv* env, jobject obj, jstring jprogramId) {
if (play_time_manager) {
u64 program_id = EmulationSession::GetProgramId(env, jprogramId);
return play_time_manager->GetPlayTime(program_id);
}
return 0UL;
jlong Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerGetPlayTime(JNIEnv* env, jobject obj,
jstring jprogramId) {
u64 program_id = EmulationSession::GetProgramId(env, jprogramId);
return play_time_manager->GetPlayTime(program_id);
}
jlong Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerGetCurrentTitleId(JNIEnv* env,
@@ -1199,17 +1199,17 @@ jlong Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerGetCurrentTitleId(JNI
void Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerResetProgramPlayTime(JNIEnv* env, jobject obj,
jstring jprogramId) {
u64 program_id = EmulationSession::GetProgramId(env, jprogramId);
if (play_time_manager) {
u64 program_id = EmulationSession::GetProgramId(env, jprogramId);
play_time_manager->ResetProgramPlayTime(program_id);
}
}
void Java_org_yuzu_yuzu_1emu_NativeLibrary_playTimeManagerSetPlayTime(JNIEnv* env, jobject obj,
jstring jprogramId, jlong playTimeSeconds) {
u64 program_id = EmulationSession::GetProgramId(env, jprogramId);
if (play_time_manager) {
u64 program_id = EmulationSession::GetProgramId(env, jprogramId);
play_time_manager->SetPlayTime(program_id, u64(playTimeSeconds));
play_time_manager->SetPlayTime(program_id, static_cast<u64>(playTimeSeconds));
}
}

View File

@@ -1160,8 +1160,12 @@ add_library(core STATIC
if (ENABLE_WIFI_SCAN)
# find_package(libiw REQUIRED)
target_compile_definitions(core PRIVATE ENABLE_WIFI_SCAN)
target_link_libraries(core PRIVATE iw)
target_sources(core PRIVATE internal_network/wifi_scanner.cpp)
if (PLATFORM_LINUX)
target_link_libraries(core PRIVATE iw)
endif()
else()
target_sources(core PRIVATE internal_network/wifi_scanner_dummy.cpp)
endif()
if (WIN32)

View File

@@ -9,6 +9,30 @@
#include <ranges>
#include <bit>
#ifdef _WIN32
#include <iphlpapi.h>
#elif defined(__linux__) || defined(__ANDROID__)
#include <cerrno>
#include <ifaddrs.h>
#include <net/if.h>
#elif defined(__FreeBSD__)
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/route.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/route.h>
#include <net/if_dl.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <arpa/inet.h>
#include <netdb.h>
#endif
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/settings.h"
@@ -16,14 +40,6 @@
#include "core/internal_network/emu_net_state.h"
#include "core/internal_network/network_interface.h"
#ifdef _WIN32
#include <iphlpapi.h>
#else
#include <cerrno>
#include <ifaddrs.h>
#include <net/if.h>
#endif
namespace Network {
#ifdef _WIN32
@@ -71,22 +87,12 @@ std::vector<Network::NetworkInterface> GetAvailableNetworkInterfaces() {
gw = reinterpret_cast<sockaddr_in*>(a->FirstGatewayAddress->Address.lpSockaddr)
->sin_addr;
HostAdapterKind kind = HostAdapterKind::Ethernet;
switch (a->IfType) {
case IF_TYPE_IEEE80211: // 802.11 Wi-Fi
kind = HostAdapterKind::Wifi;
break;
default:
kind = HostAdapterKind::Ethernet;
break;
}
result.emplace_back(Network::NetworkInterface{
.name = Common::UTF16ToUTF8(std::wstring{a->FriendlyName}),
.ip_address = ip,
.subnet_mask = mask,
.gateway = gw,
.kind = kind
.kind = (a->IfType == IF_TYPE_IEEE80211 ? HostAdapterKind::Wifi : HostAdapterKind::Ethernet)
});
}
@@ -96,158 +102,146 @@ std::vector<Network::NetworkInterface> GetAvailableNetworkInterfaces() {
#else
std::vector<Network::NetworkInterface> GetAvailableNetworkInterfaces() {
#if defined(__ANDROID__) || defined(__linux__)
struct ifaddrs* ifaddr = nullptr;
if (getifaddrs(&ifaddr) != 0) {
LOG_ERROR(Network, "Failed to get network interfaces with getifaddrs: {}",
std::strerror(errno));
LOG_ERROR(Network, "getifaddrs: {}", std::strerror(errno));
return {};
}
std::vector<Network::NetworkInterface> result;
for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr) {
continue;
}
if (ifa->ifa_addr->sa_family != AF_INET) {
continue;
}
if ((ifa->ifa_flags & IFF_UP) == 0 || (ifa->ifa_flags & IFF_LOOPBACK) != 0) {
continue;
}
#ifdef ANDROID
// On Android, we can't reliably get gateway info from /proc/net/route
// Just use 0 as the gateway address
result.emplace_back(Network::NetworkInterface{
.name{ifa->ifa_name},
.ip_address{std::bit_cast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
.subnet_mask{std::bit_cast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
.gateway{in_addr{.s_addr = 0}}
});
// TODO: This is still horrible, it was worse before (somehow)
struct RoutingEntry {
std::string iface_name;
u32 dest;
u32 gateway;
u32 flags;
};
std::vector<RoutingEntry> routes{};
#ifdef __ANDROID__
// Even through Linux based, we can't reliably obtain routing information from there :(
#else
u32 gateway{};
std::ifstream file{"/proc/net/route"};
if (!file.is_open()) {
LOG_ERROR(Network, "Failed to open \"/proc/net/route\"");
// Solaris defines s_addr as a macro, can't use special C++ shenanigans here
in_addr gateway_0;
gateway_0.s_addr = gateway;
result.emplace_back(Network::NetworkInterface{
.name = ifa->ifa_name,
.ip_address = std::bit_cast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr,
.subnet_mask = std::bit_cast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr,
.gateway = gateway_0
});
continue;
}
// ignore header
file.ignore((std::numeric_limits<std::streamsize>::max)(), '\n');
bool gateway_found = false;
if (std::ifstream file("/proc/net/route"); file.is_open()) {
file.ignore((std::numeric_limits<std::streamsize>::max)(), '\n'); //ignore header
for (std::string line; std::getline(file, line);) {
std::istringstream iss{line};
std::string iface_name;
iss >> iface_name;
if (iface_name != ifa->ifa_name) {
continue;
}
iss >> std::hex;
u32 dest{};
iss >> dest;
if (dest != 0) {
// not the default route
continue;
}
iss >> gateway;
u16 flags{};
iss >> flags;
// flag RTF_GATEWAY (defined in <linux/route.h>)
if ((flags & 0x2) == 0) {
continue;
}
gateway_found = true;
break;
RoutingEntry info{};
iss >> info.iface_name >> std::hex
>> info.dest >> info.gateway >> info.flags;
routes.emplace_back(info);
}
if (!gateway_found) {
gateway = 0;
}
in_addr gateway_0;
gateway_0.s_addr = gateway;
result.emplace_back(Network::NetworkInterface{
} else {
LOG_WARNING(Network, "\"/proc/net/route\" not found - using gateway 0");
}
#endif
std::vector<Network::NetworkInterface> ifaces;
for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr /* Have a netmask and address */
|| ifa->ifa_addr->sa_family != AF_INET /* Must be of kind AF_INET */
|| (ifa->ifa_flags & IFF_UP) == 0 || (ifa->ifa_flags & IFF_LOOPBACK) != 0) /* Not loopback */
continue;
// Just use 0 as the gateway address if not found OR routes are empty :)
auto const it = std::ranges::find_if(routes, [&ifa](auto const& e) {
return e.iface_name == ifa->ifa_name
&& e.dest == 0 // not the default route
&& (e.flags & 0x02) != 0; // flag RTF_GATEWAY (defined in <linux/route.h>)
});
in_addr gw; // Solaris defines s_addr as a macro, can't use special C++ shenanigans here
gw.s_addr = it != routes.end() ? it->gateway : 0;
ifaces.emplace_back(Network::NetworkInterface{
.name = ifa->ifa_name,
.ip_address = std::bit_cast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr,
.subnet_mask = std::bit_cast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr,
.gateway = gateway_0
.gateway = gw
});
#endif // ANDROID
}
freeifaddrs(ifaddr);
return ifaces;
#elif defined(__FreeBSD__)
std::vector<Network::NetworkInterface> ifaces;
int fd = ::socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
if (fd < 0) {
LOG_ERROR(Network, "socket: {}", std::strerror(errno));
return {};
}
freeifaddrs(ifaddr);
return result;
size_t bufsz = 0;
int mib[6] = {
CTL_NET, PF_ROUTE, 0,
AF_UNSPEC, NET_RT_IFLIST, 0
};
if (::sysctl(mib, sizeof(mib) / sizeof(mib[0]), nullptr, &bufsz, nullptr, 0) < 0) {
LOG_ERROR(Network, "sysctl.1: {}", std::strerror(errno));
::close(fd);
return {};
}
std::vector<char> buf(bufsz);
if (::sysctl(mib, sizeof(mib) / sizeof(mib[0]), buf.data(), &bufsz, nullptr, 0) < 0) {
LOG_ERROR(Network, "sysctl.2: {}", std::strerror(errno));
::close(fd);
return {};
}
struct rt_msghdr const *rtm = NULL;
for (char *next = buf.data(); next < buf.data() + bufsz; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr const *)next;
if (rtm->rtm_type == RTM_IFINFO) {
struct if_msghdr const* ifm = (struct if_msghdr const *)rtm;
size_t msglen = rtm->rtm_msglen - sizeof(*ifm);
char const* p = (char const*)(ifm + 1);
Network::NetworkInterface iface{};
for (size_t i = 0; i < RTAX_MAX; i++)
if ((ifm->ifm_addrs & (1 << i)) != 0) {
struct sockaddr const* sa = reinterpret_cast<struct sockaddr const*>(p);
if (msglen == 0 || msglen < SA_SIZE(sa))
break;
if (i == RTA_NETMASK && sa->sa_family == AF_LINK) {
size_t namelen = 0;
struct sockaddr_dl const* sdl = reinterpret_cast<struct sockaddr_dl const*>(sa);
::link_ntoa_r(sdl, nullptr, &namelen);
iface.name = std::string(namelen, ' ');
::link_ntoa_r(sdl, iface.name.data(), &namelen);
std::memcpy(&iface.ip_address, sa, sizeof(struct sockaddr_in));
}
msglen -= SA_SIZE(sa);
p += SA_SIZE(sa);
}
ifaces.push_back(iface);
}
}
::close(fd);
return ifaces;
#else
return {};
#endif
}
#endif // _WIN32
std::optional<Network::NetworkInterface> GetSelectedNetworkInterface() {
const auto& selected_network_interface = Settings::values.network_interface.GetValue();
const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
if (network_interfaces.empty()) {
LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces");
return std::nullopt;
}
#ifdef __ANDROID__
if (selected_network_interface.empty()) {
return network_interfaces[0];
}
#endif
const auto res =
std::ranges::find_if(network_interfaces, [&selected_network_interface](const auto& iface) {
return iface.name == selected_network_interface;
});
if (res == network_interfaces.end()) {
auto const& sel_if = Settings::values.network_interface.GetValue();
if (auto const ifaces = Network::GetAvailableNetworkInterfaces(); ifaces.size() > 0) {
if (sel_if.empty())
return ifaces[0];
if (auto const res = std::ranges::find_if(ifaces, [&sel_if](const auto& iface) {
return iface.name == sel_if;
}); res != ifaces.end())
return *res;
// Only print the error once to avoid log spam
static bool print_error = true;
if (print_error) {
LOG_ERROR(Network, "Couldn't find selected interface \"{}\"",
selected_network_interface);
LOG_WARNING(Network, "Couldn't find interface \"{}\"", sel_if);
print_error = false;
}
return std::nullopt;
}
return *res;
LOG_WARNING(Network, "No interfaces");
return std::nullopt;
}
void SelectFirstNetworkInterface() {
const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
if (network_interfaces.empty()) {
return;
}
Settings::values.network_interface.SetValue(network_interfaces[0].name);
if (auto const ifaces = Network::GetAvailableNetworkInterfaces(); ifaces.size() > 0)
Settings::values.network_interface.SetValue(ifaces[0].name);
}
} // namespace Network

View File

@@ -6,11 +6,6 @@
#include <thread>
#include <vector>
#include "common/logging/log.h"
#include "core/internal_network/wifi_scanner.h"
using namespace std::chrono_literals;
#ifdef _WIN32
#define NOMINMAX
#include <windows.h>
@@ -18,16 +13,30 @@ using namespace std::chrono_literals;
#ifdef _MSC_VER
#pragma comment(lib, "wlanapi.lib")
#endif
#elif defined(__linux__) && !defined(__ANDROID__)
#include <iwlib.h>
#elif defined(__FreeBSD__)
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/ethernet.h>
#include <net80211/ieee80211_ioctl.h>
#endif
#include "common/logging/log.h"
#include "core/internal_network/network_interface.h"
#include "core/internal_network/wifi_scanner.h"
using namespace std::chrono_literals;
namespace Network {
#ifdef ENABLE_WIFI_SCAN
#ifdef _WIN32
static u8 QualityToPercent(DWORD q) {
return static_cast<u8>(q);
return u8(q);
}
static std::vector<Network::ScanData> ScanWifiWin(std::chrono::milliseconds deadline) {
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
std::vector<Network::ScanData> out;
HANDLE hClient{};
@@ -85,38 +94,16 @@ static std::vector<Network::ScanData> ScanWifiWin(std::chrono::milliseconds dead
WlanCloseHandle(hClient, nullptr);
return out;
}
#endif /* _WIN32 */
#if defined(__linux__) && !defined(_WIN32) && !defined(ANDROID)
#include <iwlib.h>
#elif defined(__linux__) && !defined(__ANDROID__)
static u8 QualityToPercent(const iwrange& r, const wireless_scan* ws) {
const iw_quality qual = ws->stats.qual;
const int lvl = qual.level;
const int max = r.max_qual.level ? r.max_qual.level : 100;
return static_cast<u8>(std::clamp(100 * lvl / max, 0, 100));
}
static int wifi_callback(int skfd, char* ifname, char* args[], int count)
{
iwrange range;
int res = iw_get_range_info(skfd, ifname, &range);
LOG_INFO(Network, "ifname {} returned {} on iw_get_range_info", ifname, res);
if (res >= 0) {
strncpy(args[0], ifname, IFNAMSIZ - 1);
args[0][IFNAMSIZ - 1] = 0;
return 1;
}
return 0;
return u8(std::clamp(100 * lvl / max, 0, 100));
}
// TODO(crueter, Maufeat): Check if driver supports wireless extensions, fallback to nl80211 if not
static std::vector<Network::ScanData> ScanWifiLinux(std::chrono::milliseconds deadline) {
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
std::vector<Network::ScanData> out;
int sock = iw_sockets_open();
if (sock < 0) {
@@ -127,7 +114,17 @@ static std::vector<Network::ScanData> ScanWifiLinux(std::chrono::milliseconds de
char ifname[IFNAMSIZ] = {0};
char *args[1] = {ifname};
iw_enum_devices(sock, &wifi_callback, args, 0);
iw_enum_devices(sock, [](int skfd, char* ifname, char* args[], int count) -> int {
iwrange range;
int res = iw_get_range_info(skfd, ifname, &range);
LOG_INFO(Network, "ifname {} returned {} on iw_get_range_info", ifname, res);
if (res >= 0) {
strncpy(args[0], ifname, IFNAMSIZ - 1);
args[0][IFNAMSIZ - 1] = 0;
return 1;
}
return 0;
}, args, 0);
if (strlen(ifname) == 0) {
LOG_WARNING(Network, "No wireless interface found");
@@ -153,20 +150,19 @@ static std::vector<Network::ScanData> ScanWifiLinux(std::chrono::milliseconds de
out.clear();
for (auto* ws = head.result; ws; ws = ws->next) {
if (!ws->b.has_essid)
continue;
if (ws->b.has_essid) {
Network::ScanData sd{};
sd.ssid_len = static_cast<u8>(std::min<int>(ws->b.essid_len, 0x20));
std::memcpy(sd.ssid, ws->b.essid, sd.ssid_len);
sd.quality = QualityToPercent(range, ws);
sd.flags |= 1;
if (ws->b.has_key)
sd.flags |= 2;
Network::ScanData sd{};
sd.ssid_len = static_cast<u8>(std::min<int>(ws->b.essid_len, 0x20));
std::memcpy(sd.ssid, ws->b.essid, sd.ssid_len);
sd.quality = QualityToPercent(range, ws);
sd.flags |= 1;
if (ws->b.has_key)
sd.flags |= 2;
out.emplace_back(sd);
char tmp[0x22]{};
std::memcpy(tmp, sd.ssid, sd.ssid_len);
out.emplace_back(sd);
char tmp[0x22]{};
std::memcpy(tmp, sd.ssid, sd.ssid_len);
}
}
have = !out.empty();
}
@@ -174,21 +170,14 @@ static std::vector<Network::ScanData> ScanWifiLinux(std::chrono::milliseconds de
iw_sockets_close(sock);
return out;
}
#endif /* linux */
#endif
#elif defined(__FreeBSD__)
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
#ifdef ENABLE_WIFI_SCAN
#if defined(_WIN32)
return ScanWifiWin(deadline);
#elif defined(__linux__) && !defined(ANDROID)
return ScanWifiLinux(deadline);
#else
return {}; // unsupported host, pretend no results
#endif
#else
return {}; // disabled, pretend no results
#endif
}
#else
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
return {}; // disabled, pretend no results
}
#endif
} // namespace Network

View File

@@ -0,0 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include <chrono>
#include "core/internal_network/wifi_scanner.h"
namespace Network {
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
return {}; // disabled, pretend no results
}
} // namespace Network

View File

@@ -77,9 +77,9 @@ void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
ASSERT(inst->GetArg(0).IsImmediate());
u64 imm64 = inst->GetArg(0).GetU64();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr(code, {HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(code);
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr(code).cvt32();
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());

View File

@@ -175,7 +175,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
backend/x64/exclusive_monitor.cpp
backend/x64/exclusive_monitor_friend.h
backend/x64/host_feature.h
backend/x64/hostloc.cpp
backend/x64/hostloc.h
backend/x64/jitstate_info.h
backend/x64/oparg.h

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@@ -60,7 +63,7 @@ void EmitIR<IR::Opcode::Pack2x32To1x64>(oaknut::CodeGenerator& code, EmitContext
template<>
void EmitIR<IR::Opcode::Pack2x64To1x128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
bool const args_in_gpr[] = { args[0].IsInGpr(), args[1].IsInGpr() };
bool const args_in_gpr[] = { args[0].IsInGpr(ctx.reg_alloc), args[1].IsInGpr(ctx.reg_alloc) };
if (args_in_gpr[0] && args_in_gpr[1]) {
auto Xlo = ctx.reg_alloc.ReadX(args[0]);
auto Xhi = ctx.reg_alloc.ReadX(args[1]);

View File

@@ -84,7 +84,7 @@ IR::AccType Argument::GetImmediateAccType() const {
return value.GetAccType();
}
HostLoc::Kind Argument::CurrentLocationKind() const {
HostLoc::Kind Argument::CurrentLocationKind(RegAlloc& reg_alloc) const {
return reg_alloc.ValueLocation(value.GetInst())->kind;
}
@@ -131,7 +131,7 @@ void HostLocInfo::UpdateUses() {
}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
ArgumentInfo ret = {Argument{}, Argument{}, Argument{}, Argument{}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value arg = inst->GetArg(i);
ret[i].value = arg;

View File

@@ -64,18 +64,18 @@ public:
IR::AccType GetImmediateAccType() const;
// Only valid if not immediate
HostLoc::Kind CurrentLocationKind() const;
bool IsInGpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Gpr; }
bool IsInFpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Fpr; }
HostLoc::Kind CurrentLocationKind(RegAlloc& reg_alloc) const;
bool IsInGpr(RegAlloc& reg_alloc) const {
return !IsImmediate() && CurrentLocationKind(reg_alloc) == HostLoc::Kind::Gpr;
}
bool IsInFpr(RegAlloc& reg_alloc) const {
return !IsImmediate() && CurrentLocationKind(reg_alloc) == HostLoc::Kind::Fpr;
}
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc)
: reg_alloc{reg_alloc} {}
bool allocated = false;
RegAlloc& reg_alloc;
IR::Value value;
bool allocated = false;
};
struct FlagsTag final {

View File

@@ -117,7 +117,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
return gprs;
}();
new (&this->reg_alloc) RegAlloc(&code, gpr_order, any_xmm);
new (&this->reg_alloc) RegAlloc(gpr_order, any_xmm);
A32EmitContext ctx{conf, reg_alloc, block};
// Start emitting.
@@ -283,47 +283,47 @@ void A32EmitX64::GenTerminalHandlers() {
void A32EmitX64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
code.mov(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], to_store);
}
void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, MJitStateReg(reg));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32GetExtendedRegister32(A32EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsSingleExtReg(reg));
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movss(result, MJitStateExtReg(reg));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32GetExtendedRegister64(A32EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg));
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movsd(result, MJitStateExtReg(reg));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32GetVector(A32EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
if (A32::IsDoubleExtReg(reg)) {
code.movsd(result, MJitStateExtReg(reg));
} else {
code.movaps(result, MJitStateExtReg(reg));
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) {
@@ -332,11 +332,11 @@ void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) {
if (args[1].IsImmediate()) {
code.mov(MJitStateReg(reg), args[1].GetImmediateU32());
} else if (args[1].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
} else if (args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movd(MJitStateReg(reg), to_store);
} else {
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
code.mov(MJitStateReg(reg), to_store);
}
}
@@ -346,11 +346,11 @@ void A32EmitX64::EmitA32SetExtendedRegister32(A32EmitContext& ctx, IR::Inst* ins
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsSingleExtReg(reg));
if (args[1].IsInXmm()) {
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
if (args[1].IsInXmm(ctx.reg_alloc)) {
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movss(MJitStateExtReg(reg), to_store);
} else {
Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
code.mov(MJitStateExtReg(reg), to_store);
}
}
@@ -360,11 +360,11 @@ void A32EmitX64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg));
if (args[1].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
if (args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movsd(MJitStateExtReg(reg), to_store);
} else {
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[1]);
code.mov(MJitStateExtReg(reg), to_store);
}
}
@@ -374,7 +374,7 @@ void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
if (A32::IsDoubleExtReg(reg)) {
code.movsd(MJitStateExtReg(reg), to_store);
} else {
@@ -383,9 +383,9 @@ void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) {
}
void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr(code).cvt32();
if (code.HasHostFeature(HostFeature::FastBMI2)) {
// Here we observe that cpsr_et and cpsr_ge are right next to each other in memory,
@@ -428,15 +428,15 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr(code).cvt32();
if (conf.always_little_endian) {
code.and_(cpsr, 0xFFFFFDFF);
@@ -501,7 +501,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store);
}
@@ -512,15 +512,15 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.shr(a, 28);
code.mov(b, NZCV::x64_mask);
code.pdep(a, a, b);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(a, 28);
code.imul(a, a, NZCV::to_x64_multiplier);
@@ -537,8 +537,8 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.shr(a, 28);
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
@@ -546,7 +546,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
code.pdep(a, a, b);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(a, 28);
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
@@ -559,8 +559,8 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]);
code.and_(tmp, 1);
@@ -577,12 +577,12 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
} else {
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8();
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(code, args[1]).cvt8();
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
}
} else {
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
if (args[1].IsImmediate()) {
const bool c = args[1].GetImmediateU1();
@@ -590,7 +590,7 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
code.or_(nz, c);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
} else {
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
code.or_(nz, c);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
@@ -599,13 +599,13 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
}
static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
if (flag_bit != 0) {
code.shr(result, static_cast<int>(flag_bit));
}
code.and_(result, 1);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) {
@@ -619,27 +619,27 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1);
}
} else {
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store);
}
}
void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(!args[0].IsImmediate());
if (args[0].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
if (args[0].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
} else {
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
}
}
@@ -656,8 +656,8 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge);
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(b, 0x01010101);
code.shr(a, 16);
@@ -665,7 +665,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
code.imul(a, a, 0xFF);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(a, 16);
code.and_(a, 0xF);
@@ -690,7 +690,7 @@ void A32EmitX64::EmitA32InstructionSynchronizationBarrier(A32EmitContext& ctx, I
return;
}
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
Devirtualize<&A32::UserCallbacks::InstructionSynchronizationBarrierRaised>(conf.callbacks).EmitCall(code);
}
@@ -718,9 +718,9 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
} else {
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32();
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 new_upper = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(code, arg).cvt32();
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 new_upper = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(mask, new_pc);
code.and_(mask, 1);
@@ -745,7 +745,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
code.SwitchMxcsrOnExit();
if (conf.enable_cycle_counting) {
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
@@ -753,7 +753,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code);
if (conf.enable_cycle_counting) {
@@ -767,7 +767,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
code.SwitchMxcsrOnExit();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
if (conf.enable_cycle_counting) {
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
@@ -797,7 +797,7 @@ static u32 GetFpscrImpl(A32JitState* jit_state) {
}
void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst);
ctx.reg_alloc.HostCall(code, inst);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
@@ -810,7 +810,7 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) {
void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, args[0]);
ctx.reg_alloc.HostCall(code, nullptr, args[0]);
code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR);
code.CallFunction(&SetFpscrImpl);
@@ -818,17 +818,17 @@ void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
}
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(tmp, NZCV::x64_mask);
code.pext(tmp, value, tmp);
@@ -838,7 +838,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
return;
}
const Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.and_(value, NZCV::x64_mask);
code.imul(value, value, NZCV::from_x64_multiplier);
@@ -851,7 +851,7 @@ static void EmitCoprocessorException() {
}
static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
reg_alloc.HostCall(inst, {}, arg0, arg1);
reg_alloc.HostCall(code, inst, {}, arg0, arg1);
if (callback.user_arg) {
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(*callback.user_arg));
@@ -914,8 +914,8 @@ void A32EmitX64::EmitA32CoprocSendOneWord(A32EmitContext& ctx, IR::Inst* inst) {
}
if (const auto destination_ptr = std::get_if<u32*>(&action)) {
const Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
code.mov(reg_destination_addr, reinterpret_cast<u64>(*destination_ptr));
code.mov(code.dword[reg_destination_addr], reg_word);
@@ -954,9 +954,9 @@ void A32EmitX64::EmitA32CoprocSendTwoWords(A32EmitContext& ctx, IR::Inst* inst)
}
if (const auto destination_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
const Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(args[2]).cvt32();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(code, args[2]).cvt32();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
code.mov(reg_destination_addr, reinterpret_cast<u64>((*destination_ptrs)[0]));
code.mov(code.dword[reg_destination_addr], reg_word1);
@@ -998,13 +998,13 @@ void A32EmitX64::EmitA32CoprocGetOneWord(A32EmitContext& ctx, IR::Inst* inst) {
}
if (const auto source_ptr = std::get_if<u32*>(&action)) {
const Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr(code);
code.mov(reg_source_addr, reinterpret_cast<u64>(*source_ptr));
code.mov(reg_word, code.dword[reg_source_addr]);
ctx.reg_alloc.DefineValue(inst, reg_word);
ctx.reg_alloc.DefineValue(code, inst, reg_word);
return;
}
@@ -1038,9 +1038,9 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) {
}
if (const auto source_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
const Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr(code);
code.mov(reg_destination_addr, reinterpret_cast<u64>((*source_ptrs)[1]));
code.mov(reg_result.cvt32(), code.dword[reg_destination_addr]);
@@ -1049,7 +1049,7 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(reg_tmp.cvt32(), code.dword[reg_destination_addr]);
code.or_(reg_result, reg_tmp);
ctx.reg_alloc.DefineValue(inst, reg_result);
ctx.reg_alloc.DefineValue(code, inst, reg_result);
return;
}

View File

@@ -91,7 +91,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
return gprs;
}();
new (&this->reg_alloc) RegAlloc{&code, gpr_order, any_xmm};
new (&this->reg_alloc) RegAlloc{gpr_order, any_xmm};
A64EmitContext ctx{conf, reg_alloc, block};
// Start emitting.
@@ -159,7 +159,7 @@ finish_this_inst:
}
code.int3();
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
const size_t size = size_t(code.getCurr() - entrypoint);
const A64::LocationDescriptor descriptor{block.Location()};
const A64::LocationDescriptor end_location{block.EndLocation()};
@@ -266,25 +266,25 @@ void A64EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
code.mov(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], to_store);
}
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
code.shr(result, NZCV::x64_c_flag_bit);
code.and_(result, 1);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(tmp, NZCV::x64_mask);
code.pext(nzcv_raw, nzcv_raw, tmp);
code.shl(nzcv_raw, 28);
@@ -294,16 +294,16 @@ void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
code.and_(nzcv_raw, NZCV::arm_mask);
}
ctx.reg_alloc.DefineValue(inst, nzcv_raw);
ctx.reg_alloc.DefineValue(code, inst, nzcv_raw);
}
void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(nzcv_raw, 28);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(tmp, NZCV::x64_mask);
code.pdep(nzcv_raw, nzcv_raw, tmp);
} else {
@@ -315,63 +315,63 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store);
}
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movd(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movq(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
static u32 GetFPSRImpl(A64JitState* jit_state) {
@@ -379,7 +379,7 @@ static u32 GetFPSRImpl(A64JitState* jit_state) {
}
void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst);
ctx.reg_alloc.HostCall(code, inst);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
code.CallFunction(GetFPSRImpl);
@@ -393,7 +393,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
code.mov(addr, args[1].GetImmediateS32());
} else {
// TODO: zext tracking, xmm variant
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(code, args[1]);
code.mov(to_store.cvt32(), to_store.cvt32());
code.mov(addr, to_store);
}
@@ -405,11 +405,11 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
if (args[1].FitsInImmediateS32()) {
code.mov(addr, args[1].GetImmediateS32());
} else if (args[1].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
} else if (args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movq(addr, to_store);
} else {
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[1]);
code.mov(addr, to_store);
}
}
@@ -419,8 +419,8 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
// TODO: Optimize
code.pxor(tmp, tmp);
code.movss(tmp, to_store);
@@ -432,7 +432,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(code, args[1]);
code.movq(to_store, to_store); // TODO: Remove when able
code.movaps(addr, to_store);
}
@@ -442,7 +442,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movaps(addr, to_store);
}
@@ -451,11 +451,11 @@ void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)];
if (args[0].FitsInImmediateS32()) {
code.mov(addr, args[0].GetImmediateS32());
} else if (args[0].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
} else if (args[0].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
code.movq(addr, to_store);
} else {
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[0]);
code.mov(addr, to_store);
}
}
@@ -466,7 +466,7 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) {
void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.CallFunction(SetFPCRImpl);
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
@@ -478,7 +478,7 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.CallFunction(SetFPSRImpl);
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
@@ -489,17 +489,17 @@ void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)];
if (args[0].FitsInImmediateS32()) {
code.mov(addr, args[0].GetImmediateS32());
} else if (args[0].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
} else if (args[0].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
code.movq(addr, to_store);
} else {
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[0]);
code.mov(addr, to_store);
}
}
void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
const u32 imm = args[0].GetImmediateU32();
@@ -511,7 +511,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate() && args[1].IsImmediate());
const u64 pc = args[0].GetImmediateU64();
@@ -524,13 +524,13 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64DataCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1], args[2]);
Devirtualize<&A64::UserCallbacks::DataCacheOperationRaised>(conf.callbacks).EmitCall(code);
}
void A64EmitX64::EmitA64InstructionCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0], args[1]);
Devirtualize<&A64::UserCallbacks::InstructionCacheOperationRaised>(conf.callbacks).EmitCall(code);
}
@@ -548,18 +548,18 @@ void A64EmitX64::EmitA64InstructionSynchronizationBarrier(A64EmitContext& ctx, I
return;
}
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
Devirtualize<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(conf.callbacks).EmitCall(code);
}
void A64EmitX64::EmitA64GetCNTFRQ(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, conf.cntfrq_el0);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst);
ctx.reg_alloc.HostCall(code, inst);
if (!conf.wall_clock_cntpct) {
code.UpdateTicks();
}
@@ -567,43 +567,43 @@ void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64GetCTR(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, conf.ctr_el0);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetDCZID(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, conf.dczid_el0);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetTPIDR(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
if (conf.tpidr_el0) {
code.mov(result, u64(conf.tpidr_el0));
code.mov(result, qword[result]);
} else {
code.xor_(result.cvt32(), result.cvt32());
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
if (conf.tpidrro_el0) {
code.mov(result, u64(conf.tpidrro_el0));
code.mov(result, qword[result]);
} else {
code.xor_(result.cvt32(), result.cvt32());
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64SetTPIDR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(code, args[0]);
const Xbyak::Reg64 addr = ctx.reg_alloc.ScratchGpr(code);
if (conf.tpidr_el0) {
code.mov(addr, u64(conf.tpidr_el0));
code.mov(qword[addr], value);

View File

@@ -68,7 +68,7 @@ void EmitX64::EmitVoid(EmitContext&, IR::Inst*) {
void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (!args[0].IsImmediate()) {
ctx.reg_alloc.DefineValue(inst, args[0]);
ctx.reg_alloc.DefineValue(code, inst, args[0]);
}
}
@@ -78,7 +78,7 @@ void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) {
void EmitX64::EmitCallHostFunction(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, args[1], args[2], args[3]);
ctx.reg_alloc.HostCall(code, nullptr, args[1], args[2], args[3]);
code.mov(rax, args[0].GetImmediateU64());
code.call(rax);
}
@@ -120,7 +120,7 @@ void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) {
code.lea(rax, ptr[rsp + sizeof(RegisterData) + offsetof(StackLayout, spill)]);
code.mov(qword[rsp + offsetof(RegisterData, spill)], rax);
reg_alloc.EmitVerboseDebuggingOutput();
reg_alloc.EmitVerboseDebuggingOutput(code);
for (int i = 0; i < 16; i++) {
if (rsp.getIdx() == i) {
@@ -140,9 +140,9 @@ void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[0].IsImmediate());
const u64 unique_hash_of_target = args[0].GetImmediateU64();
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr();
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr(code);
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
}
@@ -190,12 +190,12 @@ void EmitX64::EmitGetNZFromOp(EmitContext& ctx, IR::Inst* inst) {
}
}();
const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize);
code.test(value, value);
code.lahf();
code.movzx(eax, ah);
ctx.reg_alloc.DefineValue(inst, nz);
ctx.reg_alloc.DefineValue(code, inst, nz);
}
void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
@@ -221,27 +221,27 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
}
}();
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize);
code.test(value, value);
code.lahf();
code.xor_(al, al);
ctx.reg_alloc.DefineValue(inst, nzcv);
ctx.reg_alloc.DefineValue(code, inst, nzcv);
}
void EmitX64::EmitGetCFlagFromNZCV(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
const u32 value = (args[0].GetImmediateU32() >> 8) & 1;
code.mov(result, value);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(result, 8);
code.and_(result, 1);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
}
@@ -249,30 +249,30 @@ void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr(code).cvt32();
u32 value = 0;
value |= mcl::bit::get_bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
value |= mcl::bit::get_bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
value |= mcl::bit::get_bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
value |= mcl::bit::get_bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
code.mov(nzcv, value);
ctx.reg_alloc.DefineValue(inst, nzcv);
ctx.reg_alloc.DefineValue(code, inst, nzcv);
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.shr(nzcv, 28);
code.mov(tmp, NZCV::x64_mask);
code.pdep(nzcv, nzcv, tmp);
ctx.reg_alloc.DefineValue(inst, nzcv);
ctx.reg_alloc.DefineValue(code, inst, nzcv);
} else {
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(nzcv, 28);
code.imul(nzcv, nzcv, NZCV::to_x64_multiplier);
code.and_(nzcv, NZCV::x64_mask);
ctx.reg_alloc.DefineValue(inst, nzcv);
ctx.reg_alloc.DefineValue(code, inst, nzcv);
}
}

View File

@@ -23,13 +23,13 @@ using AESFn = void(AES::State&, const AES::State&);
static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, BlockOfCode& code, IR::Inst* inst, AESFn fn) {
constexpr u32 stack_space = static_cast<u32>(sizeof(AES::State)) * 2;
const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]);
@@ -37,22 +37,22 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitAESDecryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.pxor(zero, zero);
code.aesdeclast(data, zero);
ctx.reg_alloc.DefineValue(inst, data);
ctx.reg_alloc.DefineValue(code, inst, data);
return;
}
@@ -63,13 +63,13 @@ void EmitX64::EmitAESEncryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.pxor(zero, zero);
code.aesenclast(data, zero);
ctx.reg_alloc.DefineValue(inst, data);
ctx.reg_alloc.DefineValue(code, inst, data);
return;
}
@@ -80,11 +80,11 @@ void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
code.aesimc(data, data);
ctx.reg_alloc.DefineValue(inst, data);
ctx.reg_alloc.DefineValue(code, inst, data);
return;
}
@@ -95,14 +95,14 @@ void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.pxor(zero, zero);
code.aesdeclast(data, zero);
code.aesenc(data, zero);
ctx.reg_alloc.DefineValue(inst, data);
ctx.reg_alloc.DefineValue(code, inst, data);
return;
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
@@ -19,16 +22,16 @@ namespace CRC32 = Common::Crypto::CRC32;
static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::SSE42)) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(data_size);
if (data_size != 64) {
code.crc32(crc, value);
} else {
code.crc32(crc.cvt64(), value);
}
ctx.reg_alloc.DefineValue(inst, crc);
ctx.reg_alloc.DefineValue(code, inst, crc);
} else {
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
ctx.reg_alloc.HostCall(code, inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
}
@@ -38,11 +41,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(code, args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
@@ -64,12 +67,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
ctx.reg_alloc.DefineValue(code, inst, crc);
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
@@ -82,12 +85,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
ctx.reg_alloc.DefineValue(code, inst, crc);
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(code, args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
@@ -100,9 +103,9 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
ctx.reg_alloc.DefineValue(code, inst, crc);
} else {
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
ctx.reg_alloc.HostCall(code, inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
code.CallFunction(&CRC32::ComputeCRC32ISO);
}

View File

@@ -54,14 +54,14 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
if (!conf.page_table && !fastmem_marker) {
// Neither fastmem nor page table: Use callbacks
if constexpr (bitsize == 128) {
ctx.reg_alloc.HostCall(nullptr, {}, args[1]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1]);
if (ordered) {
code.mfence();
}
code.CallFunction(memory_read_128);
ctx.reg_alloc.DefineValue(inst, xmm1);
ctx.reg_alloc.DefineValue(code, inst, xmm1);
} else {
ctx.reg_alloc.HostCall(inst, {}, args[1]);
ctx.reg_alloc.HostCall(code, inst, {}, args[1]);
if (ordered) {
code.mfence();
}
@@ -74,14 +74,14 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
if (ordered && bitsize == 128) {
// Required for atomic 128-bit loads/stores
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm(code).getIdx() : ctx.reg_alloc.ScratchGpr(code).getIdx();
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
@@ -126,9 +126,9 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
code.L(*end);
if constexpr (bitsize == 128) {
ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx});
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx});
} else {
ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx});
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Reg64{value_idx});
}
}
@@ -141,13 +141,13 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
if (!conf.page_table && !fastmem_marker) {
// Neither fastmem nor page table: Use callbacks
if constexpr (bitsize == 128) {
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
ctx.reg_alloc.Use(code, args[2], HostLoc::XMM1);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
code.CallFunction(memory_write_128);
} else {
ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1], args[2]);
Devirtualize<callback>(conf.callbacks).EmitCall(code);
}
if (ordered) {
@@ -159,16 +159,16 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
if (ordered && bitsize == 128) {
// Required for atomic 128-bit loads/stores
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const int value_idx = bitsize == 128
? ctx.reg_alloc.UseXmm(args[2]).getIdx()
: (ordered ? ctx.reg_alloc.UseScratchGpr(args[2]).getIdx() : ctx.reg_alloc.UseGpr(args[2]).getIdx());
? ctx.reg_alloc.UseXmm(code, args[2]).getIdx()
: (ordered ? ctx.reg_alloc.UseScratchGpr(code, args[2]).getIdx() : ctx.reg_alloc.UseGpr(code, args[2]).getIdx());
const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
@@ -222,7 +222,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
if constexpr (bitsize != 128) {
using T = mcl::unsigned_integer_of_size<bitsize>;
ctx.reg_alloc.HostCall(inst, {}, args[1]);
ctx.reg_alloc.HostCall(code, inst, {}, args[1]);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
@@ -237,14 +237,14 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
});
code.ZeroExtendFrom(bitsize, code.ABI_RETURN);
} else {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
if (ordered) {
code.mfence();
@@ -256,9 +256,9 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
});
});
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
EmitCheckMemoryAbort(ctx, inst);
@@ -271,15 +271,15 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
if constexpr (bitsize == 128) {
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
ctx.reg_alloc.Use(code, args[2], HostLoc::XMM1);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(inst);
ctx.reg_alloc.HostCall(code, inst);
} else {
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
ctx.reg_alloc.HostCall(code, inst, {}, args[1], args[2]);
}
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
Xbyak::Label end;
code.mov(code.ABI_RETURN, u32(1));
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
@@ -299,7 +299,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
code.mfence();
}
} else {
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.movaps(xword[code.ABI_PARAM3], xmm1);
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
@@ -310,7 +310,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
if (ordered) {
code.mfence();
}
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE);
}
code.L(end);
@@ -330,16 +330,16 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
if constexpr (ordered && bitsize == 128) {
// Required for atomic 128-bit loads/stores
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm(code).getIdx() : ctx.reg_alloc.ScratchGpr(code).getIdx();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code);
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
@@ -386,9 +386,9 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32());
if constexpr (bitsize == 128) {
ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx});
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx});
} else {
ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx});
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Reg64{value_idx});
}
EmitCheckMemoryAbort(ctx, inst);
@@ -407,19 +407,19 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const auto value = [&] {
if constexpr (bitsize == 128) {
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
return ctx.reg_alloc.UseXmm(args[2]);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
return ctx.reg_alloc.UseXmm(code, args[2]);
} else {
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
return ctx.reg_alloc.UseGpr(args[2]);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
return ctx.reg_alloc.UseGpr(code, args[2]);
}
}();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
@@ -518,7 +518,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
code.L(*end);
EmitExclusiveUnlock(code, conf, tmp, eax);
ctx.reg_alloc.DefineValue(inst, status);
ctx.reg_alloc.DefineValue(code, inst, status);
EmitCheckMemoryAbort(ctx, inst);
}

View File

@@ -75,8 +75,8 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, EmitContext& ctx, size_t bitsiz
template<>
[[maybe_unused]] Xbyak::RegExp EmitVAddrLookup<A32EmitContext>(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr(code).cvt32();
EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp.cvt64());
@@ -105,8 +105,8 @@ template<>
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr(code);
EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp);
@@ -116,7 +116,7 @@ template<>
} else if (ctx.conf.silently_mirror_page_table) {
if (valid_page_index_bits >= 32) {
if (code.HasHostFeature(HostFeature::BMI2)) {
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(code);
code.mov(bit_count, unused_top_bits);
code.bzhi(tmp, vaddr, bit_count);
code.shr(tmp, int(page_bits));
@@ -168,7 +168,7 @@ template<>
return r13 + vaddr;
} else if (ctx.conf.silently_mirror_fastmem) {
if (!tmp) {
tmp = ctx.reg_alloc.ScratchGpr();
tmp = ctx.reg_alloc.ScratchGpr(code);
}
if (unused_top_bits < 32) {
code.mov(*tmp, vaddr);
@@ -189,7 +189,7 @@ template<>
} else {
// TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator
if (!tmp) {
tmp = ctx.reg_alloc.ScratchGpr();
tmp = ctx.reg_alloc.ScratchGpr(code);
}
code.mov(*tmp, vaddr);
code.shr(*tmp, int(ctx.conf.fastmem_address_space_bits));

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
@@ -16,14 +19,14 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
code.paddb(xmm_a, xmm_b);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqb(ones, ones);
@@ -32,21 +35,21 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
code.pcmpeqb(xmm_ge, xmm_b);
code.pxor(xmm_ge, ones);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqb(xmm0, xmm0);
@@ -54,27 +57,27 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
code.paddsb(xmm_ge, xmm_b);
code.pcmpgtb(xmm_ge, xmm0);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.paddb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
code.paddw(xmm_a, xmm_b);
if (ge_inst) {
if (code.HasHostFeature(HostFeature::SSE41)) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqb(ones, ones);
@@ -83,10 +86,10 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
code.pcmpeqw(xmm_ge, xmm_b);
code.pxor(xmm_ge, ones);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
} else {
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(code);
// !(b <= a+b) == b > a+b
code.movdqa(tmp_a, xmm_a);
@@ -95,22 +98,22 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
code.paddw(tmp_b, code.Const(xword, 0x80008000));
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
ctx.reg_alloc.DefineValue(code, ge_inst, tmp_b);
}
}
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqw(xmm0, xmm0);
@@ -118,45 +121,45 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
code.paddsw(xmm_ge, xmm_b);
code.pcmpgtw(xmm_ge, xmm0);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.paddw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_ge, xmm_a);
code.pmaxub(xmm_ge, xmm_b);
code.pcmpeqb(xmm_ge, xmm_a);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.psubb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqb(xmm0, xmm0);
@@ -164,12 +167,12 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
code.psubsb(xmm_ge, xmm_b);
code.pcmpgtb(xmm_ge, xmm0);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.psubb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
@@ -177,19 +180,19 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
if (!ge_inst) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
return;
}
if (code.HasHostFeature(HostFeature::SSE41)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_ge, xmm_a);
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
@@ -197,15 +200,15 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
return;
}
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
// (a >= b) == !(b > a)
code.pcmpeqb(ones, ones);
@@ -217,19 +220,19 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqw(xmm0, xmm0);
@@ -237,21 +240,21 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
code.psubsw(xmm_ge, xmm_b);
code.pcmpgtw(xmm_ge, xmm0);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsInXmm() || args[1].IsInXmm()) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
if (args[0].IsInXmm(ctx.reg_alloc) || args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
// Since,
// pavg(a, b) == (a + b + 1) >> 1
@@ -264,11 +267,11 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
code.pavgb(xmm_a, xmm_b);
code.pxor(xmm_a, ones);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
} else {
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 and_a_b = reg_a;
const Xbyak::Reg32 result = reg_a;
@@ -284,17 +287,17 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
code.and_(xor_a_b, 0x7F7F7F7F);
code.add(result, xor_a_b);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
}
void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsInXmm() || args[1].IsInXmm()) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
if (args[0].IsInXmm(ctx.reg_alloc) || args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(tmp, xmm_a);
code.pand(xmm_a, xmm_b);
@@ -302,11 +305,11 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
code.psrlw(tmp, 1);
code.paddw(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
} else {
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 and_a_b = reg_a;
const Xbyak::Reg32 result = reg_a;
@@ -322,19 +325,19 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
code.and_(xor_a_b, 0x7FFF7FFF);
code.add(result, xor_a_b);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
}
void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 and_a_b = reg_a;
const Xbyak::Reg32 result = reg_a;
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr(code).cvt32();
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result.
@@ -352,15 +355,15 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
code.add(result, xor_a_b);
code.xor_(result, carry);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result.
@@ -373,14 +376,14 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
code.psraw(tmp, 1);
code.paddw(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
@@ -403,16 +406,16 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
code.xor_(minuend, 0x80808080);
// minuend now contains the desired result.
ctx.reg_alloc.DefineValue(inst, minuend);
ctx.reg_alloc.DefineValue(code, inst, minuend);
}
void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr(code).cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
@@ -439,14 +442,14 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
code.xor_(minuend, 0x80808080);
code.xor_(minuend, carry);
ctx.reg_alloc.DefineValue(inst, minuend);
ctx.reg_alloc.DefineValue(code, inst, minuend);
}
void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(code, args[1]);
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
@@ -462,14 +465,14 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
code.psubw(minuend, subtrahend);
ctx.reg_alloc.DefineValue(inst, minuend);
ctx.reg_alloc.DefineValue(code, inst, minuend);
}
void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(code, args[1]);
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
@@ -485,17 +488,17 @@ void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
code.psubw(minuend, subtrahend);
ctx.reg_alloc.DefineValue(inst, minuend);
ctx.reg_alloc.DefineValue(code, inst, minuend);
}
static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr(code).cvt32();
Xbyak::Reg32 reg_sum, reg_diff;
if (is_signed) {
@@ -543,7 +546,7 @@ static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
code.or_(ge_sum, ge_diff);
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
ctx.reg_alloc.DefineValue(code, ge_inst, ge_sum);
}
if (is_halving) {
@@ -557,7 +560,7 @@ static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
// Merge them.
code.shld(reg_a_hi, reg_a_lo, 16);
ctx.reg_alloc.DefineValue(inst, reg_a_hi);
ctx.reg_alloc.DefineValue(code, inst, reg_a_hi);
}
void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
@@ -595,12 +598,12 @@ void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
(code.*fn)(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
@@ -638,9 +641,9 @@ void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
// TODO: Optimize with zero-extension detection
code.movaps(tmp, code.Const(xword, 0x0000'0000'ffff'ffff));
@@ -648,45 +651,45 @@ void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) {
code.pand(xmm_b, tmp);
code.psadbw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm();
const size_t num_args_in_xmm = args[0].IsInXmm(ctx.reg_alloc) + args[1].IsInXmm(ctx.reg_alloc) + args[2].IsInXmm(ctx.reg_alloc);
if (num_args_in_xmm >= 2) {
const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(code, args[2]);
code.pand(from, ge);
code.pandn(ge, to);
code.por(from, ge);
ctx.reg_alloc.DefineValue(inst, from);
ctx.reg_alloc.DefineValue(code, inst, from);
} else if (code.HasHostFeature(HostFeature::BMI1)) {
const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32();
code.and_(from, ge);
code.andn(to, ge, to);
code.or_(from, to);
ctx.reg_alloc.DefineValue(inst, from);
ctx.reg_alloc.DefineValue(code, inst, from);
} else {
const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32();
code.and_(from, ge);
code.not_(ge);
code.and_(ge, to);
code.or_(from, ge);
ctx.reg_alloc.DefineValue(inst, from);
ctx.reg_alloc.DefineValue(code, inst, from);
}
}

View File

@@ -34,9 +34,9 @@ template<Op op, size_t size, bool has_overflow_inst = false>
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size);
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size);
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(size);
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(code).changeBit(size);
constexpr u64 int_max = static_cast<u64>((std::numeric_limits<mcl::signed_integer_of_size<size>>::max)());
if constexpr (size < 64) {
@@ -66,21 +66,21 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
code.seto(overflow.cvt8());
if constexpr (has_overflow_inst) {
if (const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)) {
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
}
} else {
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
template<Op op, size_t size>
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size);
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(code, args[1]).changeBit(size);
constexpr u64 boundary = op == Op::Add ? (std::numeric_limits<mcl::unsigned_integer_of_size<size>>::max)() : 0;
@@ -96,11 +96,11 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
code.cmovae(addend, op_result);
}
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(code);
code.setb(overflow.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
ctx.reg_alloc.DefineValue(inst, addend);
ctx.reg_alloc.DefineValue(code, inst, addend);
}
} // anonymous namespace
@@ -126,10 +126,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
overflow_inst->ReplaceUsesWith(no_overflow);
}
// TODO: DefineValue directly on Argument
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]);
code.mov(result.cvt32(), source.cvt32());
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@@ -137,9 +137,9 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = 1u << (N - 1);
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr(code).cvt32();
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
@@ -156,10 +156,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
if (overflow_inst) {
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
@@ -171,9 +171,9 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const u32 saturated_value = (1u << N) - 1;
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr(code).cvt32();
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.xor_(overflow, overflow);
@@ -185,10 +185,10 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
if (overflow_inst) {
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
@@ -210,9 +210,9 @@ void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.movsx(x, x.cvt16());
code.movsx(y, y.cvt16());
@@ -228,15 +228,15 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx,
code.sets(tmp.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y);
ctx.reg_alloc.DefineValue(code, inst, y);
}
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(code, args[0]);
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(code, args[1]);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
code.movsxd(x, x.cvt32());
code.movsxd(y, y.cvt32());
@@ -252,7 +252,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
code.sets(tmp.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y);
ctx.reg_alloc.DefineValue(code, inst, y);
}
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@@ -22,9 +25,9 @@ void EmitX64::EmitSHA256Hash(EmitContext& ctx, IR::Inst* inst) {
// y = h g f e
// w = wk3 wk2 wk1 wk0
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm w = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm w = ctx.reg_alloc.UseXmm(code, args[2]);
// x64 expects:
// 3 2 1 0
@@ -45,7 +48,7 @@ void EmitX64::EmitSHA256Hash(EmitContext& ctx, IR::Inst* inst) {
code.shufps(y, x, part1 ? 0b10111011 : 0b00010001);
ctx.reg_alloc.DefineValue(inst, y);
ctx.reg_alloc.DefineValue(code, inst, y);
}
void EmitX64::EmitSHA256MessageSchedule0(EmitContext& ctx, IR::Inst* inst) {
@@ -53,12 +56,12 @@ void EmitX64::EmitSHA256MessageSchedule0(EmitContext& ctx, IR::Inst* inst) {
ASSERT(code.HasHostFeature(HostFeature::SHA));
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]);
code.sha256msg1(x, y);
ctx.reg_alloc.DefineValue(inst, x);
ctx.reg_alloc.DefineValue(code, inst, x);
}
void EmitX64::EmitSHA256MessageSchedule1(EmitContext& ctx, IR::Inst* inst) {
@@ -66,16 +69,16 @@ void EmitX64::EmitSHA256MessageSchedule1(EmitContext& ctx, IR::Inst* inst) {
ASSERT(code.HasHostFeature(HostFeature::SHA));
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm z = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm z = ctx.reg_alloc.UseXmm(code, args[2]);
code.movaps(xmm0, z);
code.palignr(xmm0, y, 4);
code.paddd(x, xmm0);
code.sha256msg2(x, z);
ctx.reg_alloc.DefineValue(inst, x);
ctx.reg_alloc.DefineValue(code, inst, x);
}
} // namespace Dynarmic::Backend::X64

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
@@ -13,7 +16,7 @@ namespace Dynarmic::Backend::X64 {
void EmitX64::EmitSM4AccessSubstitutionBox(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]);
ctx.reg_alloc.HostCall(code, inst, args[0]);
code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox);
code.movzx(code.ABI_RETURN.cvt32(), code.ABI_RETURN.cvt8());
}

File diff suppressed because it is too large Load Diff

View File

@@ -96,7 +96,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::
if (code.HasHostFeature(HostFeature::SSE41)) {
code.ptest(nan_mask, nan_mask);
} else {
const Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.movmskps(bitmask, nan_mask);
code.cmp(bitmask, 0);
}
@@ -312,13 +312,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
Xbyak::Xmm result;
if constexpr (std::is_member_function_pointer_v<Function>) {
result = ctx.reg_alloc.UseScratchXmm(args[0]);
result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
(code.*fn)(result);
});
} else {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
result = ctx.reg_alloc.ScratchXmm(code);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
fn(result, xmm_a);
});
@@ -328,13 +328,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
if constexpr (std::is_member_function_pointer_v<Function>) {
code.movaps(result, xmm_a);
@@ -352,7 +352,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
HandleNaNs<fsize, 1>(code, ctx, fpcr_controlled, {result, xmm_a}, nan_mask, nan_handler);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
enum class CheckInputNaN {
@@ -368,8 +368,8 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
const bool fpcr_controlled = args[2].GetImmediateU1();
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if constexpr (std::is_member_function_pointer_v<Function>) {
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
@@ -385,14 +385,14 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
}
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
return;
}
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, xmm_a);
@@ -422,7 +422,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
template<typename F>
@@ -448,16 +448,16 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak
template<size_t fpcr_controlled_arg_index = 1, typename F>
void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, F lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
EmitTwoOpFallbackWithoutRegAlloc(code, ctx, result, arg1, lambda, fpcr_controlled);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
template<typename Lambda>
@@ -501,17 +501,17 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
template<typename Lambda>
void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
const bool fpcr_controlled = args[2].GetImmediateU1();
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda, fpcr_controlled);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
enum class LoadPreviousResult {
@@ -565,16 +565,16 @@ template<typename Lambda>
void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[3].GetImmediateU1();
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(code, args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, arg3, lambda, fpcr_controlled);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
} // anonymous namespace
@@ -582,9 +582,9 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
template<size_t fsize>
void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
code.andps(a, GetNonSignMaskVector<fsize>(code));
ctx.reg_alloc.DefineValue(inst, a);
ctx.reg_alloc.DefineValue(code, inst, a);
}
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
@@ -626,29 +626,29 @@ void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqps(a, b);
});
ctx.reg_alloc.DefineValue(inst, a);
ctx.reg_alloc.DefineValue(code, inst, a);
}
void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqpd(a, b);
});
ctx.reg_alloc.DefineValue(inst, a);
ctx.reg_alloc.DefineValue(code, inst, a);
}
template<FP::RoundingMode rounding_mode>
@@ -664,13 +664,13 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(code, args[0]);
code.vcvtph2ps(result, value);
ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
@@ -696,7 +696,7 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const int fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
@@ -709,12 +709,12 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
}
});
ctx.reg_alloc.DefineValue(inst, xmm);
ctx.reg_alloc.DefineValue(code, inst, xmm);
}
void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const int fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
@@ -724,8 +724,8 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvtqq2pd(xmm, xmm);
} else if (code.HasHostFeature(HostFeature::SSE41)) {
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
// First quadword
code.movq(tmp, xmm);
@@ -738,9 +738,9 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
// Combine
code.unpcklpd(xmm, xmm_tmp);
} else {
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
// First quadword
code.movhlps(high_xmm, xmm);
@@ -760,12 +760,12 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
}
});
ctx.reg_alloc.DefineValue(inst, xmm);
ctx.reg_alloc.DefineValue(code, inst, xmm);
}
void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const int fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
@@ -779,7 +779,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
const Xbyak::Address mem_53000000 = code.BConst<32>(xword, 0x53000000);
const Xbyak::Address mem_D3000080 = code.BConst<32>(xword, 0xD3000080);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpblendw(tmp, xmm, mem_4B000000, 0b10101010);
@@ -810,12 +810,12 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
}
});
ctx.reg_alloc.DefineValue(inst, xmm);
ctx.reg_alloc.DefineValue(code, inst, xmm);
}
void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const int fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
@@ -828,9 +828,9 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
const Xbyak::Address unpack = code.Const(xword, 0x4530000043300000, 0);
const Xbyak::Address subtrahend = code.Const(xword, 0x4330000000000000, 0x4530000000000000);
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovapd(unpack_reg, unpack);
@@ -846,7 +846,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
code.vhaddpd(xmm, tmp1, xmm);
} else {
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code);
code.movapd(unpack_reg, unpack);
code.movapd(subtrahend_reg, subtrahend);
@@ -877,63 +877,63 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
}
});
ctx.reg_alloc.DefineValue(inst, xmm);
ctx.reg_alloc.DefineValue(code, inst, xmm);
}
void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltps(b, a);
});
ctx.reg_alloc.DefineValue(inst, b);
ctx.reg_alloc.DefineValue(code, inst, b);
}
void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltpd(b, a);
});
ctx.reg_alloc.DefineValue(inst, b);
ctx.reg_alloc.DefineValue(code, inst, b);
}
void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpleps(b, a);
});
ctx.reg_alloc.DefineValue(inst, b);
ctx.reg_alloc.DefineValue(code, inst, b);
}
void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmplepd(b, a);
});
ctx.reg_alloc.DefineValue(inst, b);
ctx.reg_alloc.DefineValue(code, inst, b);
}
template<size_t fsize, bool is_max>
@@ -942,12 +942,12 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
if (ctx.FPCR(fpcr_controlled).DN()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm mask = xmm0;
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
@@ -994,7 +994,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
}
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@@ -1002,11 +1002,11 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
EmitThreeOpVectorOperation<fsize, DefaultIndexer>(
code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm mask = xmm0;
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(code);
if (ctx.FPCR(fpcr_controlled).FZ()) {
const Xbyak::Xmm prev_xmm_b = xmm_b;
xmm_b = ctx.reg_alloc.ScratchXmm();
xmm_b = ctx.reg_alloc.ScratchXmm(code);
code.movaps(xmm_b, prev_xmm_b);
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
}
@@ -1053,13 +1053,13 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
const bool fpcr_controlled = inst->GetArg(2).GetU1();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm intermediate_result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm intermediate_result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm tmp1 = xmm0;
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code);
// NaN requirements:
// op1 op2 result
@@ -1139,7 +1139,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
}
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@@ -1230,7 +1230,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
}
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
@@ -1316,27 +1316,27 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::FMA) && !needs_rounding_correction && !needs_nan_correction) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(code, args[2]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(code, args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
@@ -1375,21 +1375,21 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(code, args[2]);
FCODE(mulp)(operand2, operand3);
FCODE(addp)(operand1, operand2);
ctx.reg_alloc.DefineValue(inst, operand1);
ctx.reg_alloc.DefineValue(code, inst, operand1);
return;
}
}
@@ -1417,10 +1417,10 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
const bool fpcr_controlled = args[2].GetImmediateU1();
if (ctx.FPCR(fpcr_controlled).DN() && code.HasHostFeature(HostFeature::AVX)) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm(code);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
FCODE(vcmpunordp)(xmm0, result, operand);
@@ -1434,14 +1434,14 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
FCODE(blendvp)(result, twos);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
code.movaps(nan_mask, xmm_b);
code.movaps(result, xmm_a);
@@ -1464,7 +1464,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitFPVectorMulX32(EmitContext& ctx, IR::Inst* inst) {
@@ -1482,12 +1482,12 @@ void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Address mask = code.BConst<fsize>(xword, sign_mask);
code.xorps(a, mask);
ctx.reg_alloc.DefineValue(inst, a);
ctx.reg_alloc.DefineValue(code, inst, a);
}
void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) {
@@ -1512,7 +1512,7 @@ void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.xorps(zero, zero);
code.punpcklqdq(result, xmm_b);
code.haddps(result, zero);
@@ -1521,7 +1521,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.xorps(zero, zero);
code.punpcklqdq(result, xmm_b);
code.haddpd(result, zero);
@@ -1535,8 +1535,8 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
if constexpr (fsize != 16) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
FCODE(vrcp14p)(result, operand);
@@ -1550,7 +1550,7 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
}
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
}
@@ -1589,16 +1589,16 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(vfnmadd231p)(result, operand1, operand2);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@@ -1606,10 +1606,10 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
@@ -1633,22 +1633,22 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(mulp)(operand1, operand2);
FCODE(subp)(result, operand1);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
}
@@ -1757,8 +1757,8 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
if constexpr (fsize != 16) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
FCODE(vrsqrt14p)(result, operand);
@@ -1772,7 +1772,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
}
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@@ -1780,9 +1780,9 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[1].GetImmediateU1();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel();
@@ -1816,7 +1816,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
}
@@ -1851,9 +1851,9 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
@@ -1861,7 +1861,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@@ -1869,11 +1869,11 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code);
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
@@ -1902,23 +1902,23 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, GetVectorOf<fsize, false, 0, 3>(code));
FCODE(mulp)(operand1, operand2);
FCODE(subp)(result, operand1);
FCODE(mulp)(result, GetVectorOf<fsize, false, -1, 1>(code));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
}
@@ -1972,12 +1972,12 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result);
code.vcvtps2ph(result, result, u8(*round_imm));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
@@ -2018,7 +2018,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
const int round_imm = [&] {
@@ -2045,8 +2045,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvttpd2qq(src, src);
} else {
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code);
code.cvttsd2si(lo, src);
code.punpckhqdq(src, src);
@@ -2093,12 +2093,12 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
FCODE(andp)(src, xmm0);
// Will we exceed unsigned range?
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code);
code.movaps(exceed_unsigned, GetVectorOf<fsize, float_upper_limit_unsigned>(code));
FCODE(cmplep)(exceed_unsigned, src);
// Will be exceed signed range?
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
code.movaps(tmp, GetVectorOf<fsize, float_upper_limit_signed>(code));
code.movaps(xmm0, tmp);
FCODE(cmplep)(xmm0, src);
@@ -2122,7 +2122,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
}
});
ctx.reg_alloc.DefineValue(inst, src);
ctx.reg_alloc.DefineValue(code, inst, src);
return;
}
}

View File

@@ -26,9 +26,9 @@ namespace {
void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*saturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*unsaturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*sub_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
code.movaps(xmm0, result);
@@ -39,7 +39,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
if (code.HasHostFeature(HostFeature::SSE41)) {
code.ptest(xmm0, xmm0);
} else {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
code.pxor(tmp, tmp);
code.pcmpeqw(xmm0, tmp);
code.pmovmskb(overflow.cvt32(), xmm0);
@@ -49,7 +49,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.setnz(overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
enum class Op {
@@ -65,10 +65,10 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) {
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
code.movaps(xmm0, operand1);
@@ -91,15 +91,15 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.setnz(overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(args[0]) : ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm() : operand1;
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(code, args[0]) : ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm(code) : operand1;
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX)) {
if constexpr (op == Op::Add) {
@@ -150,7 +150,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
if (code.HasHostFeature(HostFeature::SSE41)) {
FCODE(blendvp)(result, tmp);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
code.psrad(xmm0, 31);
if constexpr (esize == 64) {
@@ -161,7 +161,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.pandn(xmm0, result);
code.por(tmp, xmm0);
ctx.reg_alloc.DefineValue(inst, tmp);
ctx.reg_alloc.DefineValue(code, inst, tmp);
}
}
@@ -172,10 +172,10 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) {
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
if constexpr (op == Op::Add) {
ICODE(vpadd)(result, operand1, operand2);
@@ -191,15 +191,15 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.setnz(overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(args[0]) : ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm() : operand1;
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(code, args[0]) : ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm(code) : operand1;
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
if constexpr (op == Op::Add) {
if (code.HasHostFeature(HostFeature::AVX)) {
@@ -252,10 +252,10 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
if constexpr (op == Op::Add) {
code.por(result, tmp);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
code.pandn(tmp, result);
ctx.reg_alloc.DefineValue(inst, tmp);
ctx.reg_alloc.DefineValue(code, inst, tmp);
}
}

View File

@@ -1,25 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/x64/hostloc.h"
#include <xbyak/xbyak.h>
#include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/stack_layout.h"
namespace Dynarmic::Backend::X64 {
Xbyak::Reg64 HostLocToReg64(HostLoc loc) {
ASSERT(HostLocIsGPR(loc));
return Xbyak::Reg64(static_cast<int>(loc));
}
Xbyak::Xmm HostLocToXmm(HostLoc loc) {
ASSERT(HostLocIsXMM(loc));
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
}
} // namespace Dynarmic::Backend::X64

View File

@@ -152,7 +152,14 @@ const HostLocList any_xmm = {
HostLoc::XMM15,
};
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
Xbyak::Xmm HostLocToXmm(HostLoc loc);
inline Xbyak::Reg64 HostLocToReg64(HostLoc loc) noexcept {
ASSERT(HostLocIsGPR(loc));
return Xbyak::Reg64(int(loc));
}
inline Xbyak::Xmm HostLocToXmm(HostLoc loc) noexcept {
ASSERT(HostLocIsXMM(loc));
return Xbyak::Xmm(int(loc) - int(HostLoc::XMM0));
}
} // namespace Dynarmic::Backend::X64

View File

@@ -24,15 +24,6 @@
namespace Dynarmic::Backend::X64 {
#define MAYBE_AVX(OPCODE, ...) \
[&] { \
if (code->HasHostFeature(HostFeature::AVX)) { \
code->v##OPCODE(__VA_ARGS__); \
} else { \
code->OPCODE(__VA_ARGS__); \
} \
}()
static inline bool CanExchange(const HostLoc a, const HostLoc b) noexcept {
return HostLocIsGPR(a) && HostLocIsGPR(b);
}
@@ -107,14 +98,14 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
}
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode& code, size_t host_loc_index) const noexcept {
using namespace Xbyak::util;
for (auto const value : values) {
code->mov(code->ABI_PARAM1, rsp);
code->mov(code->ABI_PARAM2, host_loc_index);
code->mov(code->ABI_PARAM3, value->GetName());
code->mov(code->ABI_PARAM4, GetBitWidth(value->GetType()));
code->CallFunction(PrintVerboseDebuggingOutputLine);
code.mov(code.ABI_PARAM1, rsp);
code.mov(code.ABI_PARAM2, host_loc_index);
code.mov(code.ABI_PARAM3, value->GetName());
code.mov(code.ABI_PARAM4, GetBitWidth(value->GetType()));
code.CallFunction(PrintVerboseDebuggingOutputLine);
}
}
@@ -128,7 +119,7 @@ bool Argument::FitsInImmediateU32() const noexcept {
bool Argument::FitsInImmediateS32() const noexcept {
if (!IsImmediate())
return false;
const s64 imm = static_cast<s64>(value.GetImmediateAsU64());
const s64 imm = s64(value.GetImmediateAsU64());
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
}
@@ -174,36 +165,38 @@ IR::AccType Argument::GetImmediateAccType() const noexcept {
}
/// Is this value currently in a GPR?
bool Argument::IsInGpr() const noexcept {
bool Argument::IsInGpr(RegAlloc& reg_alloc) const noexcept {
if (IsImmediate())
return false;
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
}
/// Is this value currently in a XMM?
bool Argument::IsInXmm() const noexcept {
bool Argument::IsInXmm(RegAlloc& reg_alloc) const noexcept {
if (IsImmediate())
return false;
return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst()));
}
/// Is this value currently in memory?
bool Argument::IsInMemory() const noexcept {
bool Argument::IsInMemory(RegAlloc& reg_alloc) const noexcept {
if (IsImmediate())
return false;
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
}
RegAlloc::RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept
RegAlloc::RegAlloc(boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept
: gpr_order(gpr_order),
xmm_order(xmm_order),
code(code)
xmm_order(xmm_order)
{}
//static std::uint64_t Zfncwjkrt_blockOfCodeShim = 0;
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(const IR::Inst* inst) noexcept {
ArgumentInfo ret{Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
ArgumentInfo ret{
Argument{},
Argument{},
Argument{},
Argument{}
};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const auto arg = inst->GetArg(i);
ret[i].value = arg;
@@ -228,34 +221,34 @@ void RegAlloc::RegisterPseudoOperation(const IR::Inst* inst) noexcept {
}
}
Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) noexcept {
Xbyak::Reg64 RegAlloc::UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseScratchImpl(arg.value, gpr_order));
return HostLocToReg64(UseScratchImpl(code, arg.value, gpr_order));
}
Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) noexcept {
Xbyak::Xmm RegAlloc::UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseScratchImpl(arg.value, xmm_order));
return HostLocToXmm(UseScratchImpl(code, arg.value, xmm_order));
}
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) noexcept {
void RegAlloc::UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
UseScratchImpl(arg.value, {host_loc});
UseScratchImpl(code, arg.value, {host_loc});
}
void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept {
void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept {
ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
const auto hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
DefineValueImpl(inst, hostloc);
DefineValueImpl(code, inst, hostloc);
}
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) noexcept {
void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
DefineValueImpl(inst, arg.value);
DefineValueImpl(code, inst, arg.value);
}
void RegAlloc::Release(const Xbyak::Reg& reg) noexcept {
@@ -264,9 +257,9 @@ void RegAlloc::Release(const Xbyak::Reg& reg) noexcept {
LocInfo(hostloc).ReleaseOne();
}
HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
HostLoc RegAlloc::UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations));
}
const auto* use_inst = use_value.GetInst();
@@ -280,25 +273,25 @@ HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_ve
}
if (LocInfo(current_location).IsLocked()) {
return UseScratchImpl(use_value, desired_locations);
return UseScratchImpl(code, use_value, desired_locations);
}
const HostLoc destination_location = SelectARegister(desired_locations);
if (max_bit_width > HostLocBitWidth(destination_location)) {
return UseScratchImpl(use_value, desired_locations);
return UseScratchImpl(code, use_value, desired_locations);
} else if (CanExchange(destination_location, current_location)) {
Exchange(destination_location, current_location);
Exchange(code, destination_location, current_location);
} else {
MoveOutOfTheWay(destination_location);
Move(destination_location, current_location);
MoveOutOfTheWay(code, destination_location);
Move(code, destination_location, current_location);
}
LocInfo(destination_location).ReadLock();
return destination_location;
}
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
HostLoc RegAlloc::UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations));
}
const auto* use_inst = use_value.GetInst();
@@ -308,7 +301,7 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
if (!LocInfo(current_location).IsLastUse()) {
MoveOutOfTheWay(current_location);
MoveOutOfTheWay(code, current_location);
} else {
LocInfo(current_location).SetLastUse();
}
@@ -317,20 +310,22 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st
}
const HostLoc destination_location = SelectARegister(desired_locations);
MoveOutOfTheWay(destination_location);
CopyToScratch(bit_width, destination_location, current_location);
MoveOutOfTheWay(code, destination_location);
CopyToScratch(code, bit_width, destination_location, current_location);
LocInfo(destination_location).WriteLock();
return destination_location;
}
HostLoc RegAlloc::ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
HostLoc RegAlloc::ScratchImpl(BlockOfCode& code, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
const HostLoc location = SelectARegister(desired_locations);
MoveOutOfTheWay(location);
MoveOutOfTheWay(code, location);
LocInfo(location).WriteLock();
return location;
}
void RegAlloc::HostCall(IR::Inst* result_def,
void RegAlloc::HostCall(
BlockOfCode& code,
IR::Inst* result_def,
const std::optional<Argument::copyable_reference> arg0,
const std::optional<Argument::copyable_reference> arg1,
const std::optional<Argument::copyable_reference> arg2,
@@ -348,20 +343,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
return ret;
}();
ScratchGpr(ABI_RETURN);
if (result_def) {
DefineValueImpl(result_def, ABI_RETURN);
}
ScratchGpr(code, ABI_RETURN);
if (result_def)
DefineValueImpl(code, result_def, ABI_RETURN);
for (size_t i = 0; i < args.size(); i++) {
if (args[i]) {
UseScratch(*args[i], args_hostloc[i]);
UseScratch(code, *args[i], args_hostloc[i]);
} else {
ScratchGpr(args_hostloc[i]); // TODO: Force spill
ScratchGpr(code, args_hostloc[i]); // TODO: Force spill
}
}
// Must match with with ScratchImpl
for (auto const gpr : other_caller_save) {
MoveOutOfTheWay(gpr);
MoveOutOfTheWay(code, gpr);
LocInfo(gpr).WriteLock();
}
for (size_t i = 0; i < args.size(); i++) {
@@ -370,13 +365,13 @@ void RegAlloc::HostCall(IR::Inst* result_def,
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
switch (args[i]->get().GetType()) {
case IR::Type::U8:
code->movzx(reg.cvt32(), reg.cvt8());
code.movzx(reg.cvt32(), reg.cvt8());
break;
case IR::Type::U16:
code->movzx(reg.cvt32(), reg.cvt16());
code.movzx(reg.cvt32(), reg.cvt16());
break;
case IR::Type::U32:
code->mov(reg.cvt32(), reg.cvt32());
code.mov(reg.cvt32(), reg.cvt32());
break;
case IR::Type::U64:
break; //no op
@@ -387,18 +382,18 @@ void RegAlloc::HostCall(IR::Inst* result_def,
}
}
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
void RegAlloc::AllocStackSpace(BlockOfCode& code, const size_t stack_space) noexcept {
ASSERT(stack_space < size_t((std::numeric_limits<s32>::max)()));
ASSERT(reserved_stack_space == 0);
reserved_stack_space = stack_space;
code->sub(code->rsp, u32(stack_space));
code.sub(code.rsp, u32(stack_space));
}
void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept {
void RegAlloc::ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) noexcept {
ASSERT(stack_space < size_t((std::numeric_limits<s32>::max)()));
ASSERT(reserved_stack_space == stack_space);
reserved_stack_space = 0;
code->add(code->rsp, u32(stack_space));
code.add(code.rsp, u32(stack_space));
}
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept {
@@ -458,92 +453,75 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
return *it_final;
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept {
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++)
if (hostloc_info[i].ContainsValue(value))
return HostLoc(i);
return std::nullopt;
}
void RegAlloc::DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept {
ASSERT(!ValueLocation(def_inst) && "def_inst has already been defined");
LocInfo(host_loc).AddValue(def_inst);
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept {
void RegAlloc::DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept {
ASSERT(!ValueLocation(def_inst) && "def_inst has already been defined");
if (use_inst.IsImmediate()) {
const HostLoc location = ScratchImpl(gpr_order);
DefineValueImpl(def_inst, location);
LoadImmediate(use_inst, location);
const HostLoc location = ScratchImpl(code, gpr_order);
DefineValueImpl(code, def_inst, location);
LoadImmediate(code, use_inst, location);
return;
}
ASSERT(ValueLocation(use_inst.GetInst()) && "use_inst must already be defined");
const HostLoc location = *ValueLocation(use_inst.GetInst());
DefineValueImpl(def_inst, location);
DefineValueImpl(code, def_inst, location);
}
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept {
ASSERT(imm.IsImmediate() && "imm is not an immediate");
if (HostLocIsGPR(host_loc)) {
const Xbyak::Reg64 reg = HostLocToReg64(host_loc);
const u64 imm_value = imm.GetImmediateAsU64();
if (imm_value == 0) {
code->xor_(reg.cvt32(), reg.cvt32());
} else {
code->mov(reg, imm_value);
}
} else if (HostLocIsXMM(host_loc)) {
const Xbyak::Xmm reg = HostLocToXmm(host_loc);
const u64 imm_value = imm.GetImmediateAsU64();
if (imm_value == 0) {
MAYBE_AVX(xorps, reg, reg);
} else {
MAYBE_AVX(movaps, reg, code->Const(code->xword, imm_value));
}
} else {
UNREACHABLE();
}
return host_loc;
}
void RegAlloc::Move(HostLoc to, HostLoc from) noexcept {
void RegAlloc::Move(BlockOfCode& code, HostLoc to, HostLoc from) noexcept {
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
ASSERT(bit_width <= HostLocBitWidth(to));
ASSERT(!LocInfo(from).IsEmpty() && "Mov eliminated");
EmitMove(bit_width, to, from);
EmitMove(code, bit_width, to, from);
LocInfo(to) = std::exchange(LocInfo(from), {});
}
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept {
void RegAlloc::CopyToScratch(BlockOfCode& code, size_t bit_width, HostLoc to, HostLoc from) noexcept {
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
EmitMove(bit_width, to, from);
EmitMove(code, bit_width, to, from);
}
void RegAlloc::Exchange(HostLoc a, HostLoc b) noexcept {
void RegAlloc::Exchange(BlockOfCode& code, HostLoc a, HostLoc b) noexcept {
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
if (LocInfo(a).IsEmpty()) {
Move(a, b);
Move(code, a, b);
} else if (LocInfo(b).IsEmpty()) {
Move(b, a);
Move(code, b, a);
} else {
EmitExchange(a, b);
EmitExchange(code, a, b);
std::swap(LocInfo(a), LocInfo(b));
}
}
void RegAlloc::MoveOutOfTheWay(HostLoc reg) noexcept {
void RegAlloc::MoveOutOfTheWay(BlockOfCode& code, HostLoc reg) noexcept {
ASSERT(!LocInfo(reg).IsLocked());
if (!LocInfo(reg).IsEmpty()) {
SpillRegister(reg);
SpillRegister(code, reg);
}
}
void RegAlloc::SpillRegister(HostLoc loc) noexcept {
void RegAlloc::SpillRegister(BlockOfCode& code, HostLoc loc) noexcept {
ASSERT(HostLocIsRegister(loc) && "Only registers can be spilled");
ASSERT(!LocInfo(loc).IsEmpty() && "There is no need to spill unoccupied registers");
ASSERT(!LocInfo(loc).IsLocked() && "Registers that have been allocated must not be spilt");
auto const new_loc = FindFreeSpill(HostLocIsXMM(loc));
Move(new_loc, loc);
Move(code, new_loc, loc);
}
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
@@ -568,9 +546,39 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
return loc;
UNREACHABLE();
};
}
void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
#define MAYBE_AVX(OPCODE, ...) \
[&] { \
if (code.HasHostFeature(HostFeature::AVX)) code.v##OPCODE(__VA_ARGS__); \
else code.OPCODE(__VA_ARGS__); \
}()
HostLoc RegAlloc::LoadImmediate(BlockOfCode& code, IR::Value imm, HostLoc host_loc) noexcept {
ASSERT(imm.IsImmediate() && "imm is not an immediate");
if (HostLocIsGPR(host_loc)) {
const Xbyak::Reg64 reg = HostLocToReg64(host_loc);
const u64 imm_value = imm.GetImmediateAsU64();
if (imm_value == 0) {
code.xor_(reg.cvt32(), reg.cvt32());
} else {
code.mov(reg, imm_value);
}
} else if (HostLocIsXMM(host_loc)) {
const Xbyak::Xmm reg = HostLocToXmm(host_loc);
const u64 imm_value = imm.GetImmediateAsU64();
if (imm_value == 0) {
MAYBE_AVX(xorps, reg, reg);
} else {
MAYBE_AVX(movaps, reg, code.Const(code.xword, imm_value));
}
} else {
UNREACHABLE();
}
return host_loc;
}
void RegAlloc::EmitMove(BlockOfCode& code, const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) {
ASSERT(HostLocIsSpill(loc));
size_t i = size_t(loc) - size_t(HostLoc::FirstSpill);
@@ -585,9 +593,9 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(HostLocToReg64(to), HostLocToReg64(from));
code.mov(HostLocToReg64(to), HostLocToReg64(from));
} else {
code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
}
} else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
@@ -642,25 +650,26 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
code.mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
} else {
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
code.mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
}
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
code.mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
} else {
code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
code.mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
}
} else {
UNREACHABLE();
}
}
#undef MAYBE_AVX
void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept {
void RegAlloc::EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept {
ASSERT(HostLocIsGPR(a) && HostLocIsGPR(b) && "Exchanging XMM registers is uneeded OR invalid emit");
code->xchg(HostLocToReg64(a), HostLocToReg64(b));
code.xchg(HostLocToReg64(a), HostLocToReg64(b));
}
} // namespace Dynarmic::Backend::X64

View File

@@ -81,7 +81,7 @@ public:
return 1 << max_bit_width;
}
void AddValue(IR::Inst* inst) noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode& code, size_t host_loc_index) const noexcept;
private:
//non trivial
boost::container::small_vector<IR::Inst*, 3> values; //24
@@ -129,16 +129,15 @@ public:
IR::AccType GetImmediateAccType() const noexcept;
/// Is this value currently in a GPR?
bool IsInGpr() const noexcept;
bool IsInXmm() const noexcept;
bool IsInMemory() const noexcept;
bool IsInGpr(RegAlloc& reg_alloc) const noexcept;
bool IsInXmm(RegAlloc& reg_alloc) const noexcept;
bool IsInMemory(RegAlloc& reg_alloc) const noexcept;
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
explicit Argument() {}
//data
IR::Value value; //8
RegAlloc& reg_alloc; //8
bool allocated = false; //1
};
@@ -146,55 +145,57 @@ class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
RegAlloc() noexcept = default;
RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept;
RegAlloc(boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept;
ArgumentInfo GetArgumentInfo(const IR::Inst* inst) noexcept;
void RegisterPseudoOperation(const IR::Inst* inst) noexcept;
inline bool IsValueLive(const IR::Inst* inst) const noexcept {
return !!ValueLocation(inst);
}
inline Xbyak::Reg64 UseGpr(Argument& arg) noexcept {
inline Xbyak::Reg64 UseGpr(BlockOfCode& code, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseImpl(arg.value, gpr_order));
return HostLocToReg64(UseImpl(code, arg.value, gpr_order));
}
inline Xbyak::Xmm UseXmm(Argument& arg) noexcept {
inline Xbyak::Xmm UseXmm(BlockOfCode& code, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseImpl(arg.value, xmm_order));
return HostLocToXmm(UseImpl(code, arg.value, xmm_order));
}
inline OpArg UseOpArg(Argument& arg) noexcept {
return UseGpr(arg);
inline OpArg UseOpArg(BlockOfCode& code, Argument& arg) noexcept {
return UseGpr(code, arg);
}
inline void Use(Argument& arg, const HostLoc host_loc) noexcept {
inline void Use(BlockOfCode& code, Argument& arg, const HostLoc host_loc) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
UseImpl(arg.value, {host_loc});
UseImpl(code, arg.value, {host_loc});
}
Xbyak::Reg64 UseScratchGpr(Argument& arg) noexcept;
Xbyak::Xmm UseScratchXmm(Argument& arg) noexcept;
void UseScratch(Argument& arg, HostLoc host_loc) noexcept;
Xbyak::Reg64 UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept;
Xbyak::Xmm UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept;
void UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept;
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept;
void DefineValue(IR::Inst* inst, Argument& arg) noexcept;
void DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept;
void DefineValue(BlockOfCode& code, IR::Inst* inst, Argument& arg) noexcept;
void Release(const Xbyak::Reg& reg) noexcept;
inline Xbyak::Reg64 ScratchGpr() noexcept {
return HostLocToReg64(ScratchImpl(gpr_order));
inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code) noexcept {
return HostLocToReg64(ScratchImpl(code, gpr_order));
}
inline Xbyak::Reg64 ScratchGpr(const HostLoc desired_location) noexcept {
return HostLocToReg64(ScratchImpl({desired_location}));
inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code, const HostLoc desired_location) noexcept {
return HostLocToReg64(ScratchImpl(code, {desired_location}));
}
inline Xbyak::Xmm ScratchXmm() noexcept {
return HostLocToXmm(ScratchImpl(xmm_order));
inline Xbyak::Xmm ScratchXmm(BlockOfCode& code) noexcept {
return HostLocToXmm(ScratchImpl(code, xmm_order));
}
inline Xbyak::Xmm ScratchXmm(HostLoc desired_location) noexcept {
return HostLocToXmm(ScratchImpl({desired_location}));
inline Xbyak::Xmm ScratchXmm(BlockOfCode& code, HostLoc desired_location) noexcept {
return HostLocToXmm(ScratchImpl(code, {desired_location}));
}
void HostCall(IR::Inst* result_def = nullptr,
void HostCall(
BlockOfCode& code,
IR::Inst* result_def = nullptr,
const std::optional<Argument::copyable_reference> arg0 = {},
const std::optional<Argument::copyable_reference> arg1 = {},
const std::optional<Argument::copyable_reference> arg2 = {},
@@ -202,67 +203,56 @@ public:
) noexcept;
// TODO: Values in host flags
void AllocStackSpace(const size_t stack_space) noexcept;
void ReleaseStackSpace(const size_t stack_space) noexcept;
void AllocStackSpace(BlockOfCode& code, const size_t stack_space) noexcept;
void ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) noexcept;
inline void EndOfAllocScope() noexcept {
for (auto& iter : hostloc_info) {
for (auto& iter : hostloc_info)
iter.ReleaseAll();
}
}
inline void AssertNoMoreUses() noexcept {
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) noexcept { return i.IsEmpty(); }));
}
inline void EmitVerboseDebuggingOutput() noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++) {
inline void EmitVerboseDebuggingOutput(BlockOfCode& code) noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++)
hostloc_info[i].EmitVerboseDebuggingOutput(code, i);
}
}
private:
friend struct Argument;
HostLoc SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept;
inline std::optional<HostLoc> ValueLocation(const IR::Inst* value) const noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++) {
if (hostloc_info[i].ContainsValue(value)) {
return HostLoc(i);
}
}
return std::nullopt;
}
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const noexcept;
HostLoc UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc ScratchImpl(BlockOfCode& code, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept;
void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept;
HostLoc UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept;
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept;
HostLoc LoadImmediate(BlockOfCode& code, IR::Value imm, HostLoc host_loc) noexcept;
void Move(BlockOfCode& code, HostLoc to, HostLoc from) noexcept;
void CopyToScratch(BlockOfCode& code, size_t bit_width, HostLoc to, HostLoc from) noexcept;
void Exchange(BlockOfCode& code, HostLoc a, HostLoc b) noexcept;
void MoveOutOfTheWay(BlockOfCode& code, HostLoc reg) noexcept;
HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept;
void Move(HostLoc to, HostLoc from) noexcept;
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept;
void Exchange(HostLoc a, HostLoc b) noexcept;
void MoveOutOfTheWay(HostLoc reg) noexcept;
void SpillRegister(HostLoc loc) noexcept;
void SpillRegister(BlockOfCode& code, HostLoc loc) noexcept;
HostLoc FindFreeSpill(bool is_xmm) const noexcept;
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
return hostloc_info[static_cast<size_t>(loc)];
return hostloc_info[size_t(loc)];
}
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
return hostloc_info[static_cast<size_t>(loc)];
return hostloc_info[size_t(loc)];
}
void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
void EmitExchange(const HostLoc a, const HostLoc b) noexcept;
void EmitMove(BlockOfCode& code, const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
void EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept;
//data
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
alignas(64) boost::container::static_vector<HostLoc, 28> xmm_order;
alignas(64) std::array<HostLocInfo, NonSpillHostLocCount + SpillCount> hostloc_info;
BlockOfCode* code = nullptr;
size_t reserved_stack_space = 0;
};
// Ensure a cache line (or less) is used, this is primordial

View File

@@ -100,9 +100,14 @@ bool Value::IsEmpty() const noexcept {
}
bool Value::IsImmediate() const noexcept {
if (IsIdentity())
return inner.inst->GetArg(0).IsImmediate();
return type != Type::Opaque;
IR::Type current_type = type;
IR::Inst const* current_inst = inner.inst;
while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
Value const& arg = current_inst->GetArg(0);
current_type = arg.type;
current_inst = arg.inner.inst;
}
return current_type != Type::Opaque;
}
Type Value::GetType() const noexcept {