Compare commits
6 Commits
pt-fix-att
...
netgate1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
295e2df7a9 | ||
|
|
93d7ba9a70 | ||
|
|
4dd8c0ca9e | ||
|
|
63da545b28 | ||
|
|
77d83b008a | ||
|
|
69a84ee0a6 |
@@ -1160,8 +1160,12 @@ add_library(core STATIC
|
||||
|
||||
if (ENABLE_WIFI_SCAN)
|
||||
# find_package(libiw REQUIRED)
|
||||
target_compile_definitions(core PRIVATE ENABLE_WIFI_SCAN)
|
||||
target_link_libraries(core PRIVATE iw)
|
||||
target_sources(core PRIVATE internal_network/wifi_scanner.cpp)
|
||||
if (PLATFORM_LINUX)
|
||||
target_link_libraries(core PRIVATE iw)
|
||||
endif()
|
||||
else()
|
||||
target_sources(core PRIVATE internal_network/wifi_scanner_dummy.cpp)
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
|
||||
@@ -9,6 +9,30 @@
|
||||
#include <ranges>
|
||||
#include <bit>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <iphlpapi.h>
|
||||
#elif defined(__linux__) || defined(__ANDROID__)
|
||||
#include <cerrno>
|
||||
#include <ifaddrs.h>
|
||||
#include <net/if.h>
|
||||
#elif defined(__FreeBSD__)
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/socket.h>
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
#include <net/if_dl.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/if_ether.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
@@ -16,14 +40,6 @@
|
||||
#include "core/internal_network/emu_net_state.h"
|
||||
#include "core/internal_network/network_interface.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <iphlpapi.h>
|
||||
#else
|
||||
#include <cerrno>
|
||||
#include <ifaddrs.h>
|
||||
#include <net/if.h>
|
||||
#endif
|
||||
|
||||
namespace Network {
|
||||
|
||||
#ifdef _WIN32
|
||||
@@ -71,22 +87,12 @@ std::vector<Network::NetworkInterface> GetAvailableNetworkInterfaces() {
|
||||
gw = reinterpret_cast<sockaddr_in*>(a->FirstGatewayAddress->Address.lpSockaddr)
|
||||
->sin_addr;
|
||||
|
||||
HostAdapterKind kind = HostAdapterKind::Ethernet;
|
||||
switch (a->IfType) {
|
||||
case IF_TYPE_IEEE80211: // 802.11 Wi-Fi
|
||||
kind = HostAdapterKind::Wifi;
|
||||
break;
|
||||
default:
|
||||
kind = HostAdapterKind::Ethernet;
|
||||
break;
|
||||
}
|
||||
|
||||
result.emplace_back(Network::NetworkInterface{
|
||||
.name = Common::UTF16ToUTF8(std::wstring{a->FriendlyName}),
|
||||
.ip_address = ip,
|
||||
.subnet_mask = mask,
|
||||
.gateway = gw,
|
||||
.kind = kind
|
||||
.kind = (a->IfType == IF_TYPE_IEEE80211 ? HostAdapterKind::Wifi : HostAdapterKind::Ethernet)
|
||||
});
|
||||
}
|
||||
|
||||
@@ -96,158 +102,146 @@ std::vector<Network::NetworkInterface> GetAvailableNetworkInterfaces() {
|
||||
#else
|
||||
|
||||
std::vector<Network::NetworkInterface> GetAvailableNetworkInterfaces() {
|
||||
#if defined(__ANDROID__) || defined(__linux__)
|
||||
struct ifaddrs* ifaddr = nullptr;
|
||||
|
||||
if (getifaddrs(&ifaddr) != 0) {
|
||||
LOG_ERROR(Network, "Failed to get network interfaces with getifaddrs: {}",
|
||||
std::strerror(errno));
|
||||
LOG_ERROR(Network, "getifaddrs: {}", std::strerror(errno));
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<Network::NetworkInterface> result;
|
||||
|
||||
for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
|
||||
if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ifa->ifa_addr->sa_family != AF_INET) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((ifa->ifa_flags & IFF_UP) == 0 || (ifa->ifa_flags & IFF_LOOPBACK) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef ANDROID
|
||||
// On Android, we can't reliably get gateway info from /proc/net/route
|
||||
// Just use 0 as the gateway address
|
||||
result.emplace_back(Network::NetworkInterface{
|
||||
.name{ifa->ifa_name},
|
||||
.ip_address{std::bit_cast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
|
||||
.subnet_mask{std::bit_cast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
|
||||
.gateway{in_addr{.s_addr = 0}}
|
||||
});
|
||||
// TODO: This is still horrible, it was worse before (somehow)
|
||||
struct RoutingEntry {
|
||||
std::string iface_name;
|
||||
u32 dest;
|
||||
u32 gateway;
|
||||
u32 flags;
|
||||
};
|
||||
std::vector<RoutingEntry> routes{};
|
||||
#ifdef __ANDROID__
|
||||
// Even through Linux based, we can't reliably obtain routing information from there :(
|
||||
#else
|
||||
u32 gateway{};
|
||||
|
||||
std::ifstream file{"/proc/net/route"};
|
||||
if (!file.is_open()) {
|
||||
LOG_ERROR(Network, "Failed to open \"/proc/net/route\"");
|
||||
|
||||
// Solaris defines s_addr as a macro, can't use special C++ shenanigans here
|
||||
in_addr gateway_0;
|
||||
gateway_0.s_addr = gateway;
|
||||
result.emplace_back(Network::NetworkInterface{
|
||||
.name = ifa->ifa_name,
|
||||
.ip_address = std::bit_cast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr,
|
||||
.subnet_mask = std::bit_cast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr,
|
||||
.gateway = gateway_0
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// ignore header
|
||||
file.ignore((std::numeric_limits<std::streamsize>::max)(), '\n');
|
||||
|
||||
bool gateway_found = false;
|
||||
|
||||
if (std::ifstream file("/proc/net/route"); file.is_open()) {
|
||||
file.ignore((std::numeric_limits<std::streamsize>::max)(), '\n'); //ignore header
|
||||
for (std::string line; std::getline(file, line);) {
|
||||
std::istringstream iss{line};
|
||||
|
||||
std::string iface_name;
|
||||
iss >> iface_name;
|
||||
if (iface_name != ifa->ifa_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
iss >> std::hex;
|
||||
|
||||
u32 dest{};
|
||||
iss >> dest;
|
||||
if (dest != 0) {
|
||||
// not the default route
|
||||
continue;
|
||||
}
|
||||
|
||||
iss >> gateway;
|
||||
|
||||
u16 flags{};
|
||||
iss >> flags;
|
||||
|
||||
// flag RTF_GATEWAY (defined in <linux/route.h>)
|
||||
if ((flags & 0x2) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
gateway_found = true;
|
||||
break;
|
||||
RoutingEntry info{};
|
||||
iss >> info.iface_name >> std::hex
|
||||
>> info.dest >> info.gateway >> info.flags;
|
||||
routes.emplace_back(info);
|
||||
}
|
||||
|
||||
if (!gateway_found) {
|
||||
gateway = 0;
|
||||
}
|
||||
|
||||
in_addr gateway_0;
|
||||
gateway_0.s_addr = gateway;
|
||||
result.emplace_back(Network::NetworkInterface{
|
||||
} else {
|
||||
LOG_WARNING(Network, "\"/proc/net/route\" not found - using gateway 0");
|
||||
}
|
||||
#endif
|
||||
std::vector<Network::NetworkInterface> ifaces;
|
||||
for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
|
||||
if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr /* Have a netmask and address */
|
||||
|| ifa->ifa_addr->sa_family != AF_INET /* Must be of kind AF_INET */
|
||||
|| (ifa->ifa_flags & IFF_UP) == 0 || (ifa->ifa_flags & IFF_LOOPBACK) != 0) /* Not loopback */
|
||||
continue;
|
||||
// Just use 0 as the gateway address if not found OR routes are empty :)
|
||||
auto const it = std::ranges::find_if(routes, [&ifa](auto const& e) {
|
||||
return e.iface_name == ifa->ifa_name
|
||||
&& e.dest == 0 // not the default route
|
||||
&& (e.flags & 0x02) != 0; // flag RTF_GATEWAY (defined in <linux/route.h>)
|
||||
});
|
||||
in_addr gw; // Solaris defines s_addr as a macro, can't use special C++ shenanigans here
|
||||
gw.s_addr = it != routes.end() ? it->gateway : 0;
|
||||
ifaces.emplace_back(Network::NetworkInterface{
|
||||
.name = ifa->ifa_name,
|
||||
.ip_address = std::bit_cast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr,
|
||||
.subnet_mask = std::bit_cast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr,
|
||||
.gateway = gateway_0
|
||||
.gateway = gw
|
||||
});
|
||||
#endif // ANDROID
|
||||
}
|
||||
freeifaddrs(ifaddr);
|
||||
return ifaces;
|
||||
#elif defined(__FreeBSD__)
|
||||
std::vector<Network::NetworkInterface> ifaces;
|
||||
int fd = ::socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
|
||||
if (fd < 0) {
|
||||
LOG_ERROR(Network, "socket: {}", std::strerror(errno));
|
||||
return {};
|
||||
}
|
||||
|
||||
freeifaddrs(ifaddr);
|
||||
return result;
|
||||
size_t bufsz = 0;
|
||||
int mib[6] = {
|
||||
CTL_NET, PF_ROUTE, 0,
|
||||
AF_UNSPEC, NET_RT_IFLIST, 0
|
||||
};
|
||||
if (::sysctl(mib, sizeof(mib) / sizeof(mib[0]), nullptr, &bufsz, nullptr, 0) < 0) {
|
||||
LOG_ERROR(Network, "sysctl.1: {}", std::strerror(errno));
|
||||
::close(fd);
|
||||
return {};
|
||||
}
|
||||
std::vector<char> buf(bufsz);
|
||||
if (::sysctl(mib, sizeof(mib) / sizeof(mib[0]), buf.data(), &bufsz, nullptr, 0) < 0) {
|
||||
LOG_ERROR(Network, "sysctl.2: {}", std::strerror(errno));
|
||||
::close(fd);
|
||||
return {};
|
||||
}
|
||||
|
||||
struct rt_msghdr const *rtm = NULL;
|
||||
for (char *next = buf.data(); next < buf.data() + bufsz; next += rtm->rtm_msglen) {
|
||||
rtm = (struct rt_msghdr const *)next;
|
||||
if (rtm->rtm_type == RTM_IFINFO) {
|
||||
struct if_msghdr const* ifm = (struct if_msghdr const *)rtm;
|
||||
size_t msglen = rtm->rtm_msglen - sizeof(*ifm);
|
||||
char const* p = (char const*)(ifm + 1);
|
||||
|
||||
Network::NetworkInterface iface{};
|
||||
for (size_t i = 0; i < RTAX_MAX; i++)
|
||||
if ((ifm->ifm_addrs & (1 << i)) != 0) {
|
||||
struct sockaddr const* sa = reinterpret_cast<struct sockaddr const*>(p);
|
||||
if (msglen == 0 || msglen < SA_SIZE(sa))
|
||||
break;
|
||||
if (i == RTA_NETMASK && sa->sa_family == AF_LINK) {
|
||||
size_t namelen = 0;
|
||||
struct sockaddr_dl const* sdl = reinterpret_cast<struct sockaddr_dl const*>(sa);
|
||||
::link_ntoa_r(sdl, nullptr, &namelen);
|
||||
iface.name = std::string(namelen, ' ');
|
||||
::link_ntoa_r(sdl, iface.name.data(), &namelen);
|
||||
std::memcpy(&iface.ip_address, sa, sizeof(struct sockaddr_in));
|
||||
}
|
||||
msglen -= SA_SIZE(sa);
|
||||
p += SA_SIZE(sa);
|
||||
}
|
||||
ifaces.push_back(iface);
|
||||
}
|
||||
}
|
||||
::close(fd);
|
||||
return ifaces;
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
std::optional<Network::NetworkInterface> GetSelectedNetworkInterface() {
|
||||
|
||||
const auto& selected_network_interface = Settings::values.network_interface.GetValue();
|
||||
const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
|
||||
if (network_interfaces.empty()) {
|
||||
LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
#ifdef __ANDROID__
|
||||
if (selected_network_interface.empty()) {
|
||||
return network_interfaces[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
const auto res =
|
||||
std::ranges::find_if(network_interfaces, [&selected_network_interface](const auto& iface) {
|
||||
return iface.name == selected_network_interface;
|
||||
});
|
||||
|
||||
if (res == network_interfaces.end()) {
|
||||
auto const& sel_if = Settings::values.network_interface.GetValue();
|
||||
if (auto const ifaces = Network::GetAvailableNetworkInterfaces(); ifaces.size() > 0) {
|
||||
if (sel_if.empty())
|
||||
return ifaces[0];
|
||||
if (auto const res = std::ranges::find_if(ifaces, [&sel_if](const auto& iface) {
|
||||
return iface.name == sel_if;
|
||||
}); res != ifaces.end())
|
||||
return *res;
|
||||
// Only print the error once to avoid log spam
|
||||
static bool print_error = true;
|
||||
if (print_error) {
|
||||
LOG_ERROR(Network, "Couldn't find selected interface \"{}\"",
|
||||
selected_network_interface);
|
||||
LOG_WARNING(Network, "Couldn't find interface \"{}\"", sel_if);
|
||||
print_error = false;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return *res;
|
||||
LOG_WARNING(Network, "No interfaces");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void SelectFirstNetworkInterface() {
|
||||
const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
|
||||
|
||||
if (network_interfaces.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Settings::values.network_interface.SetValue(network_interfaces[0].name);
|
||||
if (auto const ifaces = Network::GetAvailableNetworkInterfaces(); ifaces.size() > 0)
|
||||
Settings::values.network_interface.SetValue(ifaces[0].name);
|
||||
}
|
||||
|
||||
} // namespace Network
|
||||
|
||||
@@ -6,11 +6,6 @@
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/internal_network/wifi_scanner.h"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
#ifdef _WIN32
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
@@ -18,16 +13,30 @@ using namespace std::chrono_literals;
|
||||
#ifdef _MSC_VER
|
||||
#pragma comment(lib, "wlanapi.lib")
|
||||
#endif
|
||||
#elif defined(__linux__) && !defined(__ANDROID__)
|
||||
#include <iwlib.h>
|
||||
#elif defined(__FreeBSD__)
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/socket.h>
|
||||
#include <net/if.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <net80211/ieee80211_ioctl.h>
|
||||
#endif
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/internal_network/network_interface.h"
|
||||
#include "core/internal_network/wifi_scanner.h"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace Network {
|
||||
#ifdef ENABLE_WIFI_SCAN
|
||||
#ifdef _WIN32
|
||||
static u8 QualityToPercent(DWORD q) {
|
||||
return static_cast<u8>(q);
|
||||
return u8(q);
|
||||
}
|
||||
|
||||
static std::vector<Network::ScanData> ScanWifiWin(std::chrono::milliseconds deadline) {
|
||||
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
|
||||
std::vector<Network::ScanData> out;
|
||||
|
||||
HANDLE hClient{};
|
||||
@@ -85,38 +94,16 @@ static std::vector<Network::ScanData> ScanWifiWin(std::chrono::milliseconds dead
|
||||
WlanCloseHandle(hClient, nullptr);
|
||||
return out;
|
||||
}
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#if defined(__linux__) && !defined(_WIN32) && !defined(ANDROID)
|
||||
#include <iwlib.h>
|
||||
|
||||
#elif defined(__linux__) && !defined(__ANDROID__)
|
||||
static u8 QualityToPercent(const iwrange& r, const wireless_scan* ws) {
|
||||
const iw_quality qual = ws->stats.qual;
|
||||
const int lvl = qual.level;
|
||||
const int max = r.max_qual.level ? r.max_qual.level : 100;
|
||||
return static_cast<u8>(std::clamp(100 * lvl / max, 0, 100));
|
||||
}
|
||||
|
||||
static int wifi_callback(int skfd, char* ifname, char* args[], int count)
|
||||
{
|
||||
iwrange range;
|
||||
|
||||
int res = iw_get_range_info(skfd, ifname, &range);
|
||||
|
||||
LOG_INFO(Network, "ifname {} returned {} on iw_get_range_info", ifname, res);
|
||||
|
||||
if (res >= 0) {
|
||||
strncpy(args[0], ifname, IFNAMSIZ - 1);
|
||||
args[0][IFNAMSIZ - 1] = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return u8(std::clamp(100 * lvl / max, 0, 100));
|
||||
}
|
||||
|
||||
// TODO(crueter, Maufeat): Check if driver supports wireless extensions, fallback to nl80211 if not
|
||||
static std::vector<Network::ScanData> ScanWifiLinux(std::chrono::milliseconds deadline) {
|
||||
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
|
||||
std::vector<Network::ScanData> out;
|
||||
int sock = iw_sockets_open();
|
||||
if (sock < 0) {
|
||||
@@ -127,7 +114,17 @@ static std::vector<Network::ScanData> ScanWifiLinux(std::chrono::milliseconds de
|
||||
char ifname[IFNAMSIZ] = {0};
|
||||
char *args[1] = {ifname};
|
||||
|
||||
iw_enum_devices(sock, &wifi_callback, args, 0);
|
||||
iw_enum_devices(sock, [](int skfd, char* ifname, char* args[], int count) -> int {
|
||||
iwrange range;
|
||||
int res = iw_get_range_info(skfd, ifname, &range);
|
||||
LOG_INFO(Network, "ifname {} returned {} on iw_get_range_info", ifname, res);
|
||||
if (res >= 0) {
|
||||
strncpy(args[0], ifname, IFNAMSIZ - 1);
|
||||
args[0][IFNAMSIZ - 1] = 0;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}, args, 0);
|
||||
|
||||
if (strlen(ifname) == 0) {
|
||||
LOG_WARNING(Network, "No wireless interface found");
|
||||
@@ -153,20 +150,19 @@ static std::vector<Network::ScanData> ScanWifiLinux(std::chrono::milliseconds de
|
||||
|
||||
out.clear();
|
||||
for (auto* ws = head.result; ws; ws = ws->next) {
|
||||
if (!ws->b.has_essid)
|
||||
continue;
|
||||
if (ws->b.has_essid) {
|
||||
Network::ScanData sd{};
|
||||
sd.ssid_len = static_cast<u8>(std::min<int>(ws->b.essid_len, 0x20));
|
||||
std::memcpy(sd.ssid, ws->b.essid, sd.ssid_len);
|
||||
sd.quality = QualityToPercent(range, ws);
|
||||
sd.flags |= 1;
|
||||
if (ws->b.has_key)
|
||||
sd.flags |= 2;
|
||||
|
||||
Network::ScanData sd{};
|
||||
sd.ssid_len = static_cast<u8>(std::min<int>(ws->b.essid_len, 0x20));
|
||||
std::memcpy(sd.ssid, ws->b.essid, sd.ssid_len);
|
||||
sd.quality = QualityToPercent(range, ws);
|
||||
sd.flags |= 1;
|
||||
if (ws->b.has_key)
|
||||
sd.flags |= 2;
|
||||
|
||||
out.emplace_back(sd);
|
||||
char tmp[0x22]{};
|
||||
std::memcpy(tmp, sd.ssid, sd.ssid_len);
|
||||
out.emplace_back(sd);
|
||||
char tmp[0x22]{};
|
||||
std::memcpy(tmp, sd.ssid, sd.ssid_len);
|
||||
}
|
||||
}
|
||||
have = !out.empty();
|
||||
}
|
||||
@@ -174,21 +170,14 @@ static std::vector<Network::ScanData> ScanWifiLinux(std::chrono::milliseconds de
|
||||
iw_sockets_close(sock);
|
||||
return out;
|
||||
}
|
||||
#endif /* linux */
|
||||
#endif
|
||||
|
||||
#elif defined(__FreeBSD__)
|
||||
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
|
||||
#ifdef ENABLE_WIFI_SCAN
|
||||
#if defined(_WIN32)
|
||||
return ScanWifiWin(deadline);
|
||||
#elif defined(__linux__) && !defined(ANDROID)
|
||||
return ScanWifiLinux(deadline);
|
||||
#else
|
||||
return {}; // unsupported host, pretend no results
|
||||
#endif
|
||||
#else
|
||||
return {}; // disabled, pretend no results
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
|
||||
return {}; // disabled, pretend no results
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace Network
|
||||
|
||||
11
src/core/internal_network/wifi_scanner_dummy.cpp
Normal file
11
src/core/internal_network/wifi_scanner_dummy.cpp
Normal file
@@ -0,0 +1,11 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <chrono>
|
||||
#include "core/internal_network/wifi_scanner.h"
|
||||
|
||||
namespace Network {
|
||||
std::vector<Network::ScanData> ScanWifiNetworks(std::chrono::milliseconds deadline) {
|
||||
return {}; // disabled, pretend no results
|
||||
}
|
||||
} // namespace Network
|
||||
@@ -77,9 +77,9 @@ void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
||||
ASSERT(inst->GetArg(0).IsImmediate());
|
||||
u64 imm64 = inst->GetArg(0).GetU64();
|
||||
|
||||
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
||||
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr(code, {HostLoc::RCX});
|
||||
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(code);
|
||||
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr(code).cvt32();
|
||||
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
|
||||
? u64(unique_hash_to_code_ptr[imm64])
|
||||
: u64(code->GetReturnFromRunCodeAddress());
|
||||
|
||||
@@ -175,7 +175,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
backend/x64/exclusive_monitor.cpp
|
||||
backend/x64/exclusive_monitor_friend.h
|
||||
backend/x64/host_feature.h
|
||||
backend/x64/hostloc.cpp
|
||||
backend/x64/hostloc.h
|
||||
backend/x64/jitstate_info.h
|
||||
backend/x64/oparg.h
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
@@ -60,7 +63,7 @@ void EmitIR<IR::Opcode::Pack2x32To1x64>(oaknut::CodeGenerator& code, EmitContext
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Pack2x64To1x128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
bool const args_in_gpr[] = { args[0].IsInGpr(), args[1].IsInGpr() };
|
||||
bool const args_in_gpr[] = { args[0].IsInGpr(ctx.reg_alloc), args[1].IsInGpr(ctx.reg_alloc) };
|
||||
if (args_in_gpr[0] && args_in_gpr[1]) {
|
||||
auto Xlo = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto Xhi = ctx.reg_alloc.ReadX(args[1]);
|
||||
|
||||
@@ -84,7 +84,7 @@ IR::AccType Argument::GetImmediateAccType() const {
|
||||
return value.GetAccType();
|
||||
}
|
||||
|
||||
HostLoc::Kind Argument::CurrentLocationKind() const {
|
||||
HostLoc::Kind Argument::CurrentLocationKind(RegAlloc& reg_alloc) const {
|
||||
return reg_alloc.ValueLocation(value.GetInst())->kind;
|
||||
}
|
||||
|
||||
@@ -131,7 +131,7 @@ void HostLocInfo::UpdateUses() {
|
||||
}
|
||||
|
||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
||||
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||
ArgumentInfo ret = {Argument{}, Argument{}, Argument{}, Argument{}};
|
||||
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||
const IR::Value arg = inst->GetArg(i);
|
||||
ret[i].value = arg;
|
||||
|
||||
@@ -64,18 +64,18 @@ public:
|
||||
IR::AccType GetImmediateAccType() const;
|
||||
|
||||
// Only valid if not immediate
|
||||
HostLoc::Kind CurrentLocationKind() const;
|
||||
bool IsInGpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Gpr; }
|
||||
bool IsInFpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Fpr; }
|
||||
HostLoc::Kind CurrentLocationKind(RegAlloc& reg_alloc) const;
|
||||
bool IsInGpr(RegAlloc& reg_alloc) const {
|
||||
return !IsImmediate() && CurrentLocationKind(reg_alloc) == HostLoc::Kind::Gpr;
|
||||
}
|
||||
bool IsInFpr(RegAlloc& reg_alloc) const {
|
||||
return !IsImmediate() && CurrentLocationKind(reg_alloc) == HostLoc::Kind::Fpr;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class RegAlloc;
|
||||
explicit Argument(RegAlloc& reg_alloc)
|
||||
: reg_alloc{reg_alloc} {}
|
||||
|
||||
bool allocated = false;
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Value value;
|
||||
bool allocated = false;
|
||||
};
|
||||
|
||||
struct FlagsTag final {
|
||||
|
||||
@@ -117,7 +117,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
||||
return gprs;
|
||||
}();
|
||||
|
||||
new (&this->reg_alloc) RegAlloc(&code, gpr_order, any_xmm);
|
||||
new (&this->reg_alloc) RegAlloc(gpr_order, any_xmm);
|
||||
A32EmitContext ctx{conf, reg_alloc, block};
|
||||
|
||||
// Start emitting.
|
||||
@@ -283,47 +283,47 @@ void A32EmitX64::GenTerminalHandlers() {
|
||||
|
||||
void A32EmitX64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
|
||||
code.mov(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], to_store);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.mov(result, MJitStateReg(reg));
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetExtendedRegister32(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsSingleExtReg(reg));
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movss(result, MJitStateExtReg(reg));
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetExtendedRegister64(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsDoubleExtReg(reg));
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movsd(result, MJitStateExtReg(reg));
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetVector(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
code.movsd(result, MJitStateExtReg(reg));
|
||||
} else {
|
||||
code.movaps(result, MJitStateExtReg(reg));
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -332,11 +332,11 @@ void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
if (args[1].IsImmediate()) {
|
||||
code.mov(MJitStateReg(reg), args[1].GetImmediateU32());
|
||||
} else if (args[1].IsInXmm()) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
} else if (args[1].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
code.movd(MJitStateReg(reg), to_store);
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
code.mov(MJitStateReg(reg), to_store);
|
||||
}
|
||||
}
|
||||
@@ -346,11 +346,11 @@ void A32EmitX64::EmitA32SetExtendedRegister32(A32EmitContext& ctx, IR::Inst* ins
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsSingleExtReg(reg));
|
||||
|
||||
if (args[1].IsInXmm()) {
|
||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
if (args[1].IsInXmm(ctx.reg_alloc)) {
|
||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
code.movss(MJitStateExtReg(reg), to_store);
|
||||
} else {
|
||||
Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
code.mov(MJitStateExtReg(reg), to_store);
|
||||
}
|
||||
}
|
||||
@@ -360,11 +360,11 @@ void A32EmitX64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsDoubleExtReg(reg));
|
||||
|
||||
if (args[1].IsInXmm()) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
if (args[1].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
code.movsd(MJitStateExtReg(reg), to_store);
|
||||
} else {
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[1]);
|
||||
code.mov(MJitStateExtReg(reg), to_store);
|
||||
}
|
||||
}
|
||||
@@ -374,7 +374,7 @@ void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
code.movsd(MJitStateExtReg(reg), to_store);
|
||||
} else {
|
||||
@@ -383,9 +383,9 @@ void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
// Here we observe that cpsr_et and cpsr_ge are right next to each other in memory,
|
||||
@@ -428,15 +428,15 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
if (conf.always_little_endian) {
|
||||
code.and_(cpsr, 0xFFFFFDFF);
|
||||
@@ -501,7 +501,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
|
||||
@@ -512,15 +512,15 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.mov(b, NZCV::x64_mask);
|
||||
code.pdep(a, a, b);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.imul(a, a, NZCV::to_x64_multiplier);
|
||||
@@ -537,8 +537,8 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
@@ -546,7 +546,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
code.pdep(a, a, b);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
@@ -559,8 +559,8 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]);
|
||||
code.and_(tmp, 1);
|
||||
@@ -577,12 +577,12 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
} else {
|
||||
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8();
|
||||
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(code, args[1]).cvt8();
|
||||
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
|
||||
if (args[1].IsImmediate()) {
|
||||
const bool c = args[1].GetImmediateU1();
|
||||
@@ -590,7 +590,7 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
code.or_(nz, c);
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
} else {
|
||||
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
|
||||
code.or_(nz, c);
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
@@ -599,13 +599,13 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
if (flag_bit != 0) {
|
||||
code.shr(result, static_cast<int>(flag_bit));
|
||||
}
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -619,27 +619,27 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
|
||||
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store);
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(!args[0].IsImmediate());
|
||||
|
||||
if (args[0].IsInXmm()) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
||||
if (args[0].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
}
|
||||
}
|
||||
@@ -656,8 +656,8 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
|
||||
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge);
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.mov(b, 0x01010101);
|
||||
code.shr(a, 16);
|
||||
@@ -665,7 +665,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
|
||||
code.imul(a, a, 0xFF);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
|
||||
code.shr(a, 16);
|
||||
code.and_(a, 0xF);
|
||||
@@ -690,7 +690,7 @@ void A32EmitX64::EmitA32InstructionSynchronizationBarrier(A32EmitContext& ctx, I
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
Devirtualize<&A32::UserCallbacks::InstructionSynchronizationBarrierRaised>(conf.callbacks).EmitCall(code);
|
||||
}
|
||||
|
||||
@@ -718,9 +718,9 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
} else {
|
||||
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32();
|
||||
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 new_upper = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(code, arg).cvt32();
|
||||
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 new_upper = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.mov(mask, new_pc);
|
||||
code.and_(mask, 1);
|
||||
@@ -745,7 +745,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
code.SwitchMxcsrOnExit();
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
|
||||
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
|
||||
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
|
||||
@@ -753,7 +753,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
|
||||
Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
@@ -767,7 +767,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
code.SwitchMxcsrOnExit();
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
|
||||
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
|
||||
@@ -797,7 +797,7 @@ static u32 GetFpscrImpl(A32JitState* jit_state) {
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
ctx.reg_alloc.HostCall(code, inst);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
|
||||
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
|
||||
@@ -810,7 +810,7 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) {
|
||||
|
||||
void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, args[0]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, args[0]);
|
||||
code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR);
|
||||
|
||||
code.CallFunction(&SetFpscrImpl);
|
||||
@@ -818,17 +818,17 @@ void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pext(tmp, value, tmp);
|
||||
@@ -838,7 +838,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
return;
|
||||
}
|
||||
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
|
||||
code.and_(value, NZCV::x64_mask);
|
||||
code.imul(value, value, NZCV::from_x64_multiplier);
|
||||
@@ -851,7 +851,7 @@ static void EmitCoprocessorException() {
|
||||
}
|
||||
|
||||
static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
|
||||
reg_alloc.HostCall(inst, {}, arg0, arg1);
|
||||
reg_alloc.HostCall(code, inst, {}, arg0, arg1);
|
||||
|
||||
if (callback.user_arg) {
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(*callback.user_arg));
|
||||
@@ -914,8 +914,8 @@ void A32EmitX64::EmitA32CoprocSendOneWord(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
if (const auto destination_ptr = std::get_if<u32*>(&action)) {
|
||||
const Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
code.mov(reg_destination_addr, reinterpret_cast<u64>(*destination_ptr));
|
||||
code.mov(code.dword[reg_destination_addr], reg_word);
|
||||
@@ -954,9 +954,9 @@ void A32EmitX64::EmitA32CoprocSendTwoWords(A32EmitContext& ctx, IR::Inst* inst)
|
||||
}
|
||||
|
||||
if (const auto destination_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
|
||||
const Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(args[2]).cvt32();
|
||||
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(code, args[2]).cvt32();
|
||||
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
code.mov(reg_destination_addr, reinterpret_cast<u64>((*destination_ptrs)[0]));
|
||||
code.mov(code.dword[reg_destination_addr], reg_word1);
|
||||
@@ -998,13 +998,13 @@ void A32EmitX64::EmitA32CoprocGetOneWord(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
if (const auto source_ptr = std::get_if<u32*>(&action)) {
|
||||
const Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
code.mov(reg_source_addr, reinterpret_cast<u64>(*source_ptr));
|
||||
code.mov(reg_word, code.dword[reg_source_addr]);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, reg_word);
|
||||
ctx.reg_alloc.DefineValue(code, inst, reg_word);
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -1038,9 +1038,9 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
if (const auto source_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
|
||||
const Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
code.mov(reg_destination_addr, reinterpret_cast<u64>((*source_ptrs)[1]));
|
||||
code.mov(reg_result.cvt32(), code.dword[reg_destination_addr]);
|
||||
@@ -1049,7 +1049,7 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
code.mov(reg_tmp.cvt32(), code.dword[reg_destination_addr]);
|
||||
code.or_(reg_result, reg_tmp);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, reg_result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, reg_result);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
||||
return gprs;
|
||||
}();
|
||||
|
||||
new (&this->reg_alloc) RegAlloc{&code, gpr_order, any_xmm};
|
||||
new (&this->reg_alloc) RegAlloc{gpr_order, any_xmm};
|
||||
A64EmitContext ctx{conf, reg_alloc, block};
|
||||
|
||||
// Start emitting.
|
||||
@@ -159,7 +159,7 @@ finish_this_inst:
|
||||
}
|
||||
code.int3();
|
||||
|
||||
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
|
||||
const size_t size = size_t(code.getCurr() - entrypoint);
|
||||
|
||||
const A64::LocationDescriptor descriptor{block.Location()};
|
||||
const A64::LocationDescriptor end_location{block.EndLocation()};
|
||||
@@ -266,25 +266,25 @@ void A64EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
|
||||
code.mov(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], to_store);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.shr(result, NZCV::x64_c_flag_bit);
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pext(nzcv_raw, nzcv_raw, tmp);
|
||||
code.shl(nzcv_raw, 28);
|
||||
@@ -294,16 +294,16 @@ void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
code.and_(nzcv_raw, NZCV::arm_mask);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv_raw);
|
||||
ctx.reg_alloc.DefineValue(code, inst, nzcv_raw);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
|
||||
code.shr(nzcv_raw, 28);
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pdep(nzcv_raw, nzcv_raw, tmp);
|
||||
} else {
|
||||
@@ -315,63 +315,63 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movd(result, addr);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movq(result, addr);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(result, addr);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
|
||||
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
static u32 GetFPSRImpl(A64JitState* jit_state) {
|
||||
@@ -379,7 +379,7 @@ static u32 GetFPSRImpl(A64JitState* jit_state) {
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
ctx.reg_alloc.HostCall(code, inst);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.CallFunction(GetFPSRImpl);
|
||||
@@ -393,7 +393,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
code.mov(addr, args[1].GetImmediateS32());
|
||||
} else {
|
||||
// TODO: zext tracking, xmm variant
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(code, args[1]);
|
||||
code.mov(to_store.cvt32(), to_store.cvt32());
|
||||
code.mov(addr, to_store);
|
||||
}
|
||||
@@ -405,11 +405,11 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
if (args[1].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[1].GetImmediateS32());
|
||||
} else if (args[1].IsInXmm()) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
} else if (args[1].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
code.movq(addr, to_store);
|
||||
} else {
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[1]);
|
||||
code.mov(addr, to_store);
|
||||
}
|
||||
}
|
||||
@@ -419,8 +419,8 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
// TODO: Optimize
|
||||
code.pxor(tmp, tmp);
|
||||
code.movss(tmp, to_store);
|
||||
@@ -432,7 +432,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
code.movq(to_store, to_store); // TODO: Remove when able
|
||||
code.movaps(addr, to_store);
|
||||
}
|
||||
@@ -442,7 +442,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
code.movaps(addr, to_store);
|
||||
}
|
||||
|
||||
@@ -451,11 +451,11 @@ void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)];
|
||||
if (args[0].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[0].GetImmediateS32());
|
||||
} else if (args[0].IsInXmm()) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
||||
} else if (args[0].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
code.movq(addr, to_store);
|
||||
} else {
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[0]);
|
||||
code.mov(addr, to_store);
|
||||
}
|
||||
}
|
||||
@@ -466,7 +466,7 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) {
|
||||
|
||||
void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.CallFunction(SetFPCRImpl);
|
||||
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
@@ -478,7 +478,7 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
|
||||
|
||||
void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.CallFunction(SetFPSRImpl);
|
||||
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
@@ -489,17 +489,17 @@ void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)];
|
||||
if (args[0].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[0].GetImmediateS32());
|
||||
} else if (args[0].IsInXmm()) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
||||
} else if (args[0].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
code.movq(addr, to_store);
|
||||
} else {
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
|
||||
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[0]);
|
||||
code.mov(addr, to_store);
|
||||
}
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[0].IsImmediate());
|
||||
const u32 imm = args[0].GetImmediateU32();
|
||||
@@ -511,7 +511,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[0].IsImmediate() && args[1].IsImmediate());
|
||||
const u64 pc = args[0].GetImmediateU64();
|
||||
@@ -524,13 +524,13 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
void A64EmitX64::EmitA64DataCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1], args[2]);
|
||||
Devirtualize<&A64::UserCallbacks::DataCacheOperationRaised>(conf.callbacks).EmitCall(code);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64InstructionCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0], args[1]);
|
||||
Devirtualize<&A64::UserCallbacks::InstructionCacheOperationRaised>(conf.callbacks).EmitCall(code);
|
||||
}
|
||||
|
||||
@@ -548,18 +548,18 @@ void A64EmitX64::EmitA64InstructionSynchronizationBarrier(A64EmitContext& ctx, I
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
Devirtualize<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(conf.callbacks).EmitCall(code);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetCNTFRQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(result, conf.cntfrq_el0);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
ctx.reg_alloc.HostCall(code, inst);
|
||||
if (!conf.wall_clock_cntpct) {
|
||||
code.UpdateTicks();
|
||||
}
|
||||
@@ -567,43 +567,43 @@ void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetCTR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(result, conf.ctr_el0);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetDCZID(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.mov(result, conf.dczid_el0);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetTPIDR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
|
||||
if (conf.tpidr_el0) {
|
||||
code.mov(result, u64(conf.tpidr_el0));
|
||||
code.mov(result, qword[result]);
|
||||
} else {
|
||||
code.xor_(result.cvt32(), result.cvt32());
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
|
||||
if (conf.tpidrro_el0) {
|
||||
code.mov(result, u64(conf.tpidrro_el0));
|
||||
code.mov(result, qword[result]);
|
||||
} else {
|
||||
code.xor_(result.cvt32(), result.cvt32());
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetTPIDR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[0]);
|
||||
const Xbyak::Reg64 addr = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(code, args[0]);
|
||||
const Xbyak::Reg64 addr = ctx.reg_alloc.ScratchGpr(code);
|
||||
if (conf.tpidr_el0) {
|
||||
code.mov(addr, u64(conf.tpidr_el0));
|
||||
code.mov(qword[addr], value);
|
||||
|
||||
@@ -68,7 +68,7 @@ void EmitX64::EmitVoid(EmitContext&, IR::Inst*) {
|
||||
void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (!args[0].IsImmediate()) {
|
||||
ctx.reg_alloc.DefineValue(inst, args[0]);
|
||||
ctx.reg_alloc.DefineValue(code, inst, args[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) {
|
||||
|
||||
void EmitX64::EmitCallHostFunction(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, args[1], args[2], args[3]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, args[1], args[2], args[3]);
|
||||
code.mov(rax, args[0].GetImmediateU64());
|
||||
code.call(rax);
|
||||
}
|
||||
@@ -120,7 +120,7 @@ void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) {
|
||||
code.lea(rax, ptr[rsp + sizeof(RegisterData) + offsetof(StackLayout, spill)]);
|
||||
code.mov(qword[rsp + offsetof(RegisterData, spill)], rax);
|
||||
|
||||
reg_alloc.EmitVerboseDebuggingOutput();
|
||||
reg_alloc.EmitVerboseDebuggingOutput(code);
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (rsp.getIdx() == i) {
|
||||
@@ -140,9 +140,9 @@ void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
|
||||
ASSERT(args[0].IsImmediate());
|
||||
const u64 unique_hash_of_target = args[0].GetImmediateU64();
|
||||
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
|
||||
const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr();
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
|
||||
const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
|
||||
}
|
||||
@@ -190,12 +190,12 @@ void EmitX64::EmitGetNZFromOp(EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
}();
|
||||
|
||||
const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
|
||||
const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize);
|
||||
code.test(value, value);
|
||||
code.lahf();
|
||||
code.movzx(eax, ah);
|
||||
ctx.reg_alloc.DefineValue(inst, nz);
|
||||
ctx.reg_alloc.DefineValue(code, inst, nz);
|
||||
}
|
||||
|
||||
void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -221,27 +221,27 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
}();
|
||||
|
||||
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
|
||||
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize);
|
||||
code.test(value, value);
|
||||
code.lahf();
|
||||
code.xor_(al, al);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
ctx.reg_alloc.DefineValue(code, inst, nzcv);
|
||||
}
|
||||
|
||||
void EmitX64::EmitGetCFlagFromNZCV(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const u32 value = (args[0].GetImmediateU32() >> 8) & 1;
|
||||
code.mov(result, value);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
} else {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
code.shr(result, 8);
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,30 +249,30 @@ void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
u32 value = 0;
|
||||
value |= mcl::bit::get_bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
|
||||
value |= mcl::bit::get_bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
|
||||
value |= mcl::bit::get_bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
|
||||
value |= mcl::bit::get_bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
|
||||
code.mov(nzcv, value);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
ctx.reg_alloc.DefineValue(code, inst, nzcv);
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.shr(nzcv, 28);
|
||||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pdep(nzcv, nzcv, tmp);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
ctx.reg_alloc.DefineValue(code, inst, nzcv);
|
||||
} else {
|
||||
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
|
||||
code.shr(nzcv, 28);
|
||||
code.imul(nzcv, nzcv, NZCV::to_x64_multiplier);
|
||||
code.and_(nzcv, NZCV::x64_mask);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
ctx.reg_alloc.DefineValue(code, inst, nzcv);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,13 +23,13 @@ using AESFn = void(AES::State&, const AES::State&);
|
||||
|
||||
static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, BlockOfCode& code, IR::Inst* inst, AESFn fn) {
|
||||
constexpr u32 stack_space = static_cast<u32>(sizeof(AES::State)) * 2;
|
||||
const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
|
||||
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]);
|
||||
@@ -37,22 +37,22 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
|
||||
code.CallFunction(fn);
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]);
|
||||
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitAESDecryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AES)) {
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pxor(zero, zero);
|
||||
code.aesdeclast(data, zero);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
ctx.reg_alloc.DefineValue(code, inst, data);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -63,13 +63,13 @@ void EmitX64::EmitAESEncryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AES)) {
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pxor(zero, zero);
|
||||
code.aesenclast(data, zero);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
ctx.reg_alloc.DefineValue(code, inst, data);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -80,11 +80,11 @@ void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AES)) {
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
|
||||
code.aesimc(data, data);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
ctx.reg_alloc.DefineValue(code, inst, data);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -95,14 +95,14 @@ void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AES)) {
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pxor(zero, zero);
|
||||
code.aesdeclast(data, zero);
|
||||
code.aesenc(data, zero);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
ctx.reg_alloc.DefineValue(code, inst, data);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
@@ -19,16 +22,16 @@ namespace CRC32 = Common::Crypto::CRC32;
|
||||
static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(data_size);
|
||||
if (data_size != 64) {
|
||||
code.crc32(crc, value);
|
||||
} else {
|
||||
code.crc32(crc.cvt64(), value);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
ctx.reg_alloc.DefineValue(code, inst, crc);
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
ctx.reg_alloc.HostCall(code, inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext
|
||||
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
|
||||
}
|
||||
@@ -38,11 +41,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(code, args[1]);
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
||||
|
||||
@@ -64,12 +67,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||
|
||||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
ctx.reg_alloc.DefineValue(code, inst, crc);
|
||||
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
||||
|
||||
@@ -82,12 +85,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||
|
||||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
ctx.reg_alloc.DefineValue(code, inst, crc);
|
||||
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(code, args[1]);
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
||||
|
||||
@@ -100,9 +103,9 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||
|
||||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
ctx.reg_alloc.DefineValue(code, inst, crc);
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
ctx.reg_alloc.HostCall(code, inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -54,14 +54,14 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
if (!conf.page_table && !fastmem_marker) {
|
||||
// Neither fastmem nor page table: Use callbacks
|
||||
if constexpr (bitsize == 128) {
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[1]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1]);
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
}
|
||||
code.CallFunction(memory_read_128);
|
||||
ctx.reg_alloc.DefineValue(inst, xmm1);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm1);
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1]);
|
||||
ctx.reg_alloc.HostCall(code, inst, {}, args[1]);
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
}
|
||||
@@ -74,14 +74,14 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
if (ordered && bitsize == 128) {
|
||||
// Required for atomic 128-bit loads/stores
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
|
||||
}
|
||||
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
|
||||
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm(code).getIdx() : ctx.reg_alloc.ScratchGpr(code).getIdx();
|
||||
|
||||
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
||||
|
||||
@@ -126,9 +126,9 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
code.L(*end);
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx});
|
||||
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx});
|
||||
} else {
|
||||
ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx});
|
||||
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Reg64{value_idx});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,13 +141,13 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
if (!conf.page_table && !fastmem_marker) {
|
||||
// Neither fastmem nor page table: Use callbacks
|
||||
if constexpr (bitsize == 128) {
|
||||
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
|
||||
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
|
||||
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
|
||||
ctx.reg_alloc.Use(code, args[2], HostLoc::XMM1);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
code.CallFunction(memory_write_128);
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]);
|
||||
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1], args[2]);
|
||||
Devirtualize<callback>(conf.callbacks).EmitCall(code);
|
||||
}
|
||||
if (ordered) {
|
||||
@@ -159,16 +159,16 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
if (ordered && bitsize == 128) {
|
||||
// Required for atomic 128-bit loads/stores
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
|
||||
}
|
||||
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
|
||||
const int value_idx = bitsize == 128
|
||||
? ctx.reg_alloc.UseXmm(args[2]).getIdx()
|
||||
: (ordered ? ctx.reg_alloc.UseScratchGpr(args[2]).getIdx() : ctx.reg_alloc.UseGpr(args[2]).getIdx());
|
||||
? ctx.reg_alloc.UseXmm(code, args[2]).getIdx()
|
||||
: (ordered ? ctx.reg_alloc.UseScratchGpr(code, args[2]).getIdx() : ctx.reg_alloc.UseGpr(code, args[2]).getIdx());
|
||||
|
||||
const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
||||
|
||||
@@ -222,7 +222,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
if constexpr (bitsize != 128) {
|
||||
using T = mcl::unsigned_integer_of_size<bitsize>;
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1]);
|
||||
ctx.reg_alloc.HostCall(code, inst, {}, args[1]);
|
||||
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
@@ -237,14 +237,14 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
});
|
||||
code.ZeroExtendFrom(bitsize, code.ABI_RETURN);
|
||||
} else {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
@@ -256,9 +256,9 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
});
|
||||
});
|
||||
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
|
||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
EmitCheckMemoryAbort(ctx, inst);
|
||||
@@ -271,15 +271,15 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
|
||||
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
|
||||
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
|
||||
ctx.reg_alloc.Use(code, args[2], HostLoc::XMM1);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
ctx.reg_alloc.HostCall(code, inst);
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
|
||||
ctx.reg_alloc.HostCall(code, inst, {}, args[1], args[2]);
|
||||
}
|
||||
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
Xbyak::Label end;
|
||||
code.mov(code.ABI_RETURN, u32(1));
|
||||
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||
@@ -299,7 +299,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
code.mfence();
|
||||
}
|
||||
} else {
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.movaps(xword[code.ABI_PARAM3], xmm1);
|
||||
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
|
||||
@@ -310,7 +310,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
}
|
||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE);
|
||||
}
|
||||
code.L(end);
|
||||
|
||||
@@ -330,16 +330,16 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
|
||||
|
||||
if constexpr (ordered && bitsize == 128) {
|
||||
// Required for atomic 128-bit loads/stores
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
|
||||
}
|
||||
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
|
||||
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm(code).getIdx() : ctx.reg_alloc.ScratchGpr(code).getIdx();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
||||
|
||||
@@ -386,9 +386,9 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
|
||||
EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32());
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx});
|
||||
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx});
|
||||
} else {
|
||||
ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx});
|
||||
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Reg64{value_idx});
|
||||
}
|
||||
|
||||
EmitCheckMemoryAbort(ctx, inst);
|
||||
@@ -407,19 +407,19 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
||||
|
||||
const auto value = [&] {
|
||||
if constexpr (bitsize == 128) {
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
|
||||
return ctx.reg_alloc.UseXmm(args[2]);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
|
||||
return ctx.reg_alloc.UseXmm(code, args[2]);
|
||||
} else {
|
||||
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
return ctx.reg_alloc.UseGpr(args[2]);
|
||||
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
|
||||
return ctx.reg_alloc.UseGpr(code, args[2]);
|
||||
}
|
||||
}();
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
|
||||
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
|
||||
|
||||
@@ -518,7 +518,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
||||
|
||||
code.L(*end);
|
||||
EmitExclusiveUnlock(code, conf, tmp, eax);
|
||||
ctx.reg_alloc.DefineValue(inst, status);
|
||||
ctx.reg_alloc.DefineValue(code, inst, status);
|
||||
EmitCheckMemoryAbort(ctx, inst);
|
||||
}
|
||||
|
||||
|
||||
@@ -75,8 +75,8 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, EmitContext& ctx, size_t bitsiz
|
||||
|
||||
template<>
|
||||
[[maybe_unused]] Xbyak::RegExp EmitVAddrLookup<A32EmitContext>(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
|
||||
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp.cvt64());
|
||||
|
||||
@@ -105,8 +105,8 @@ template<>
|
||||
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
|
||||
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
|
||||
|
||||
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp);
|
||||
|
||||
@@ -116,7 +116,7 @@ template<>
|
||||
} else if (ctx.conf.silently_mirror_page_table) {
|
||||
if (valid_page_index_bits >= 32) {
|
||||
if (code.HasHostFeature(HostFeature::BMI2)) {
|
||||
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(code);
|
||||
code.mov(bit_count, unused_top_bits);
|
||||
code.bzhi(tmp, vaddr, bit_count);
|
||||
code.shr(tmp, int(page_bits));
|
||||
@@ -168,7 +168,7 @@ template<>
|
||||
return r13 + vaddr;
|
||||
} else if (ctx.conf.silently_mirror_fastmem) {
|
||||
if (!tmp) {
|
||||
tmp = ctx.reg_alloc.ScratchGpr();
|
||||
tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
}
|
||||
if (unused_top_bits < 32) {
|
||||
code.mov(*tmp, vaddr);
|
||||
@@ -189,7 +189,7 @@ template<>
|
||||
} else {
|
||||
// TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator
|
||||
if (!tmp) {
|
||||
tmp = ctx.reg_alloc.ScratchGpr();
|
||||
tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
}
|
||||
code.mov(*tmp, vaddr);
|
||||
code.shr(*tmp, int(ctx.conf.fastmem_address_space_bits));
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
@@ -16,14 +19,14 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
code.paddb(xmm_a, xmm_b);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pcmpeqb(ones, ones);
|
||||
|
||||
@@ -32,21 +35,21 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.pcmpeqb(xmm_ge, xmm_b);
|
||||
code.pxor(xmm_ge, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pcmpeqb(xmm0, xmm0);
|
||||
|
||||
@@ -54,27 +57,27 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.paddsb(xmm_ge, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
}
|
||||
|
||||
code.paddb(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
code.paddw(xmm_a, xmm_b);
|
||||
|
||||
if (ge_inst) {
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pcmpeqb(ones, ones);
|
||||
|
||||
@@ -83,10 +86,10 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.pcmpeqw(xmm_ge, xmm_b);
|
||||
code.pxor(xmm_ge, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
} else {
|
||||
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
// !(b <= a+b) == b > a+b
|
||||
code.movdqa(tmp_a, xmm_a);
|
||||
@@ -95,22 +98,22 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.paddw(tmp_b, code.Const(xword, 0x80008000));
|
||||
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, tmp_b);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pcmpeqw(xmm0, xmm0);
|
||||
|
||||
@@ -118,45 +121,45 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.paddsw(xmm_ge, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
}
|
||||
|
||||
code.paddw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.pmaxub(xmm_ge, xmm_b);
|
||||
code.pcmpeqb(xmm_ge, xmm_a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
}
|
||||
|
||||
code.psubb(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pcmpeqb(xmm0, xmm0);
|
||||
|
||||
@@ -164,12 +167,12 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.psubsb(xmm_ge, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
}
|
||||
|
||||
code.psubb(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -177,19 +180,19 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
if (!ge_inst) {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
code.psubw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
||||
@@ -197,15 +200,15 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
code.psubw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
return;
|
||||
}
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
// (a >= b) == !(b > a)
|
||||
code.pcmpeqb(ones, ones);
|
||||
@@ -217,19 +220,19 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
code.psubw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.pcmpeqw(xmm0, xmm0);
|
||||
|
||||
@@ -237,21 +240,21 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.psubsw(xmm_ge, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
|
||||
}
|
||||
|
||||
code.psubw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsInXmm() || args[1].IsInXmm()) {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
if (args[0].IsInXmm(ctx.reg_alloc) || args[1].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
// Since,
|
||||
// pavg(a, b) == (a + b + 1) >> 1
|
||||
@@ -264,11 +267,11 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.pavgb(xmm_a, xmm_b);
|
||||
code.pxor(xmm_a, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
} else {
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 and_a_b = reg_a;
|
||||
const Xbyak::Reg32 result = reg_a;
|
||||
|
||||
@@ -284,17 +287,17 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.and_(xor_a_b, 0x7F7F7F7F);
|
||||
code.add(result, xor_a_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsInXmm() || args[1].IsInXmm()) {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
if (args[0].IsInXmm(ctx.reg_alloc) || args[1].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movdqa(tmp, xmm_a);
|
||||
code.pand(xmm_a, xmm_b);
|
||||
@@ -302,11 +305,11 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.psrlw(tmp, 1);
|
||||
code.paddw(xmm_a, tmp);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
} else {
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 and_a_b = reg_a;
|
||||
const Xbyak::Reg32 result = reg_a;
|
||||
|
||||
@@ -322,19 +325,19 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.and_(xor_a_b, 0x7FFF7FFF);
|
||||
code.add(result, xor_a_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 and_a_b = reg_a;
|
||||
const Xbyak::Reg32 result = reg_a;
|
||||
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
@@ -352,15 +355,15 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.add(result, xor_a_b);
|
||||
code.xor_(result, carry);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
@@ -373,14 +376,14 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.psraw(tmp, 1);
|
||||
code.paddw(xmm_a, tmp);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
@@ -403,16 +406,16 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.xor_(minuend, 0x80808080);
|
||||
|
||||
// minuend now contains the desired result.
|
||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||
ctx.reg_alloc.DefineValue(code, inst, minuend);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
|
||||
|
||||
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
@@ -439,14 +442,14 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.xor_(minuend, 0x80808080);
|
||||
code.xor_(minuend, carry);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||
ctx.reg_alloc.DefineValue(code, inst, minuend);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
@@ -462,14 +465,14 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
code.psubw(minuend, subtrahend);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||
ctx.reg_alloc.DefineValue(code, inst, minuend);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
@@ -485,17 +488,17 @@ void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
code.psubw(minuend, subtrahend);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||
ctx.reg_alloc.DefineValue(code, inst, minuend);
|
||||
}
|
||||
|
||||
static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
Xbyak::Reg32 reg_sum, reg_diff;
|
||||
|
||||
if (is_signed) {
|
||||
@@ -543,7 +546,7 @@ static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||
code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
|
||||
code.or_(ge_sum, ge_diff);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
|
||||
ctx.reg_alloc.DefineValue(code, ge_inst, ge_sum);
|
||||
}
|
||||
|
||||
if (is_halving) {
|
||||
@@ -557,7 +560,7 @@ static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||
// Merge them.
|
||||
code.shld(reg_a_hi, reg_a_lo, 16);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, reg_a_hi);
|
||||
ctx.reg_alloc.DefineValue(code, inst, reg_a_hi);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -595,12 +598,12 @@ void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
(code.*fn)(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -638,9 +641,9 @@ void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
// TODO: Optimize with zero-extension detection
|
||||
code.movaps(tmp, code.Const(xword, 0x0000'0000'ffff'ffff));
|
||||
@@ -648,45 +651,45 @@ void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
code.pand(xmm_b, tmp);
|
||||
code.psadbw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm();
|
||||
const size_t num_args_in_xmm = args[0].IsInXmm(ctx.reg_alloc) + args[1].IsInXmm(ctx.reg_alloc) + args[2].IsInXmm(ctx.reg_alloc);
|
||||
|
||||
if (num_args_in_xmm >= 2) {
|
||||
const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
|
||||
const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(code, args[2]);
|
||||
|
||||
code.pand(from, ge);
|
||||
code.pandn(ge, to);
|
||||
code.por(from, ge);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, from);
|
||||
ctx.reg_alloc.DefineValue(code, inst, from);
|
||||
} else if (code.HasHostFeature(HostFeature::BMI1)) {
|
||||
const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
||||
const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32();
|
||||
|
||||
code.and_(from, ge);
|
||||
code.andn(to, ge, to);
|
||||
code.or_(from, to);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, from);
|
||||
ctx.reg_alloc.DefineValue(code, inst, from);
|
||||
} else {
|
||||
const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
||||
const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32();
|
||||
|
||||
code.and_(from, ge);
|
||||
code.not_(ge);
|
||||
code.and_(ge, to);
|
||||
code.or_(from, ge);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, from);
|
||||
ctx.reg_alloc.DefineValue(code, inst, from);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -34,9 +34,9 @@ template<Op op, size_t size, bool has_overflow_inst = false>
|
||||
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
|
||||
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size);
|
||||
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size);
|
||||
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size);
|
||||
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(size);
|
||||
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(code).changeBit(size);
|
||||
|
||||
constexpr u64 int_max = static_cast<u64>((std::numeric_limits<mcl::signed_integer_of_size<size>>::max)());
|
||||
if constexpr (size < 64) {
|
||||
@@ -66,21 +66,21 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
|
||||
code.seto(overflow.cvt8());
|
||||
if constexpr (has_overflow_inst) {
|
||||
if (const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)) {
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
|
||||
}
|
||||
} else {
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
template<Op op, size_t size>
|
||||
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
|
||||
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size);
|
||||
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size);
|
||||
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(code, args[1]).changeBit(size);
|
||||
|
||||
constexpr u64 boundary = op == Op::Add ? (std::numeric_limits<mcl::unsigned_integer_of_size<size>>::max)() : 0;
|
||||
|
||||
@@ -96,11 +96,11 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||
code.cmovae(addend, op_result);
|
||||
}
|
||||
|
||||
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(code);
|
||||
code.setb(overflow.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, addend);
|
||||
ctx.reg_alloc.DefineValue(code, inst, addend);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
@@ -126,10 +126,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
overflow_inst->ReplaceUsesWith(no_overflow);
|
||||
}
|
||||
// TODO: DefineValue directly on Argument
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]);
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]);
|
||||
code.mov(result.cvt32(), source.cvt32());
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -137,9 +137,9 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
||||
const u32 negative_saturated_value = 1u << (N - 1);
|
||||
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
||||
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
|
||||
@@ -156,10 +156,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (overflow_inst) {
|
||||
code.seta(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -171,9 +171,9 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
const u32 saturated_value = (1u << N) - 1;
|
||||
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||
code.xor_(overflow, overflow);
|
||||
@@ -185,10 +185,10 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (overflow_inst) {
|
||||
code.seta(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -210,9 +210,9 @@ void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
|
||||
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
code.movsx(x, x.cvt16());
|
||||
code.movsx(y, y.cvt16());
|
||||
@@ -228,15 +228,15 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx,
|
||||
code.sets(tmp.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, y);
|
||||
ctx.reg_alloc.DefineValue(code, inst, y);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(code, args[0]);
|
||||
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(code, args[1]);
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
code.movsxd(x, x.cvt32());
|
||||
code.movsxd(y, y.cvt32());
|
||||
@@ -252,7 +252,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
|
||||
code.sets(tmp.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, y);
|
||||
ctx.reg_alloc.DefineValue(code, inst, y);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
@@ -22,9 +25,9 @@ void EmitX64::EmitSHA256Hash(EmitContext& ctx, IR::Inst* inst) {
|
||||
// y = h g f e
|
||||
// w = wk3 wk2 wk1 wk0
|
||||
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm w = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
const Xbyak::Xmm w = ctx.reg_alloc.UseXmm(code, args[2]);
|
||||
|
||||
// x64 expects:
|
||||
// 3 2 1 0
|
||||
@@ -45,7 +48,7 @@ void EmitX64::EmitSHA256Hash(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
code.shufps(y, x, part1 ? 0b10111011 : 0b00010001);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, y);
|
||||
ctx.reg_alloc.DefineValue(code, inst, y);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSHA256MessageSchedule0(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -53,12 +56,12 @@ void EmitX64::EmitSHA256MessageSchedule0(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
ASSERT(code.HasHostFeature(HostFeature::SHA));
|
||||
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
code.sha256msg1(x, y);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, x);
|
||||
ctx.reg_alloc.DefineValue(code, inst, x);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSHA256MessageSchedule1(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -66,16 +69,16 @@ void EmitX64::EmitSHA256MessageSchedule1(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
ASSERT(code.HasHostFeature(HostFeature::SHA));
|
||||
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm z = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm z = ctx.reg_alloc.UseXmm(code, args[2]);
|
||||
|
||||
code.movaps(xmm0, z);
|
||||
code.palignr(xmm0, y, 4);
|
||||
code.paddd(x, xmm0);
|
||||
code.sha256msg2(x, z);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, x);
|
||||
ctx.reg_alloc.DefineValue(code, inst, x);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
@@ -13,7 +16,7 @@ namespace Dynarmic::Backend::X64 {
|
||||
void EmitX64::EmitSM4AccessSubstitutionBox(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
ctx.reg_alloc.HostCall(code, inst, args[0]);
|
||||
code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox);
|
||||
code.movzx(code.ABI_RETURN.cvt32(), code.ABI_RETURN.cvt8());
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -96,7 +96,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
code.ptest(nan_mask, nan_mask);
|
||||
} else {
|
||||
const Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
code.movmskps(bitmask, nan_mask);
|
||||
code.cmp(bitmask, 0);
|
||||
}
|
||||
@@ -312,13 +312,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
Xbyak::Xmm result;
|
||||
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
(code.*fn)(result);
|
||||
});
|
||||
} else {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
result = ctx.reg_alloc.ScratchXmm(code);
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
fn(result, xmm_a);
|
||||
});
|
||||
@@ -328,13 +328,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
code.movaps(result, xmm_a);
|
||||
@@ -352,7 +352,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
|
||||
HandleNaNs<fsize, 1>(code, ctx, fpcr_controlled, {result, xmm_a}, nan_mask, nan_handler);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
enum class CheckInputNaN {
|
||||
@@ -368,8 +368,8 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
@@ -385,14 +385,14 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
|
||||
return;
|
||||
}
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movaps(result, xmm_a);
|
||||
|
||||
@@ -422,7 +422,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||
|
||||
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
@@ -448,16 +448,16 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak
|
||||
template<size_t fpcr_controlled_arg_index = 1, typename F>
|
||||
void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, F lambda) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
|
||||
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||
|
||||
EmitTwoOpFallbackWithoutRegAlloc(code, ctx, result, arg1, lambda, fpcr_controlled);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
template<typename Lambda>
|
||||
@@ -501,17 +501,17 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
|
||||
template<typename Lambda>
|
||||
void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda, fpcr_controlled);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
enum class LoadPreviousResult {
|
||||
@@ -565,16 +565,16 @@ template<typename Lambda>
|
||||
void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(code, args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
ctx.reg_alloc.HostCall(code, nullptr);
|
||||
|
||||
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, arg3, lambda, fpcr_controlled);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
@@ -582,9 +582,9 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
|
||||
template<size_t fsize>
|
||||
void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
code.andps(a, GetNonSignMaskVector<fsize>(code));
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -626,29 +626,29 @@ void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) {
|
||||
void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||
code.cmpeqps(a, b);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||
code.cmpeqpd(a, b);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, a);
|
||||
}
|
||||
|
||||
template<FP::RoundingMode rounding_mode>
|
||||
@@ -664,13 +664,13 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
|
||||
code.vcvtph2ps(result, value);
|
||||
ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
} else {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
@@ -696,7 +696,7 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const int fbits = args[1].GetImmediateU8();
|
||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||
@@ -709,12 +709,12 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const int fbits = args[1].GetImmediateU8();
|
||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||
@@ -724,8 +724,8 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
code.vcvtqq2pd(xmm, xmm);
|
||||
} else if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
// First quadword
|
||||
code.movq(tmp, xmm);
|
||||
@@ -738,9 +738,9 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||
// Combine
|
||||
code.unpcklpd(xmm, xmm_tmp);
|
||||
} else {
|
||||
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
// First quadword
|
||||
code.movhlps(high_xmm, xmm);
|
||||
@@ -760,12 +760,12 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const int fbits = args[1].GetImmediateU8();
|
||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||
@@ -779,7 +779,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
|
||||
const Xbyak::Address mem_53000000 = code.BConst<32>(xword, 0x53000000);
|
||||
const Xbyak::Address mem_D3000080 = code.BConst<32>(xword, 0xD3000080);
|
||||
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vpblendw(tmp, xmm, mem_4B000000, 0b10101010);
|
||||
@@ -810,12 +810,12 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
|
||||
}
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const int fbits = args[1].GetImmediateU8();
|
||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||
@@ -828,9 +828,9 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
|
||||
const Xbyak::Address unpack = code.Const(xword, 0x4530000043300000, 0);
|
||||
const Xbyak::Address subtrahend = code.Const(xword, 0x4330000000000000, 0x4530000000000000);
|
||||
|
||||
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovapd(unpack_reg, unpack);
|
||||
@@ -846,7 +846,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
|
||||
|
||||
code.vhaddpd(xmm, tmp1, xmm);
|
||||
} else {
|
||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movapd(unpack_reg, unpack);
|
||||
code.movapd(subtrahend_reg, subtrahend);
|
||||
@@ -877,63 +877,63 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
|
||||
}
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||
ctx.reg_alloc.DefineValue(code, inst, xmm);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||
code.cmpltps(b, a);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, b);
|
||||
ctx.reg_alloc.DefineValue(code, inst, b);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||
code.cmpltpd(b, a);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, b);
|
||||
ctx.reg_alloc.DefineValue(code, inst, b);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||
code.cmpleps(b, a);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, b);
|
||||
ctx.reg_alloc.DefineValue(code, inst, b);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||
code.cmplepd(b, a);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, b);
|
||||
ctx.reg_alloc.DefineValue(code, inst, b);
|
||||
}
|
||||
|
||||
template<size_t fsize, bool is_max>
|
||||
@@ -942,12 +942,12 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
|
||||
if (ctx.FPCR(fpcr_controlled).DN()) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
const Xbyak::Xmm mask = xmm0;
|
||||
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
|
||||
@@ -994,7 +994,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
}
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -1002,11 +1002,11 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
EmitThreeOpVectorOperation<fsize, DefaultIndexer>(
|
||||
code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b) {
|
||||
const Xbyak::Xmm mask = xmm0;
|
||||
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if (ctx.FPCR(fpcr_controlled).FZ()) {
|
||||
const Xbyak::Xmm prev_xmm_b = xmm_b;
|
||||
xmm_b = ctx.reg_alloc.ScratchXmm();
|
||||
xmm_b = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(xmm_b, prev_xmm_b);
|
||||
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
|
||||
}
|
||||
@@ -1053,13 +1053,13 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
|
||||
const bool fpcr_controlled = inst->GetArg(2).GetU1();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm intermediate_result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm intermediate_result = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
const Xbyak::Xmm tmp1 = xmm0;
|
||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
// NaN requirements:
|
||||
// op1 op2 result
|
||||
@@ -1139,7 +1139,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
|
||||
}
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1230,7 +1230,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
|
||||
}
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -1316,27 +1316,27 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.HasHostFeature(HostFeature::FMA) && !needs_rounding_correction && !needs_nan_correction) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(code, args[2]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
|
||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(code, args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
||||
|
||||
@@ -1375,21 +1375,21 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(code, args[1]);
|
||||
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(code, args[2]);
|
||||
|
||||
FCODE(mulp)(operand2, operand3);
|
||||
FCODE(addp)(operand1, operand2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, operand1);
|
||||
ctx.reg_alloc.DefineValue(code, inst, operand1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1417,10 +1417,10 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
if (ctx.FPCR(fpcr_controlled).DN() && code.HasHostFeature(HostFeature::AVX)) {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
FCODE(vcmpunordp)(xmm0, result, operand);
|
||||
@@ -1434,14 +1434,14 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||
FCODE(blendvp)(result, twos);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movaps(nan_mask, xmm_b);
|
||||
code.movaps(result, xmm_a);
|
||||
@@ -1464,7 +1464,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||
|
||||
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorMulX32(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -1482,12 +1482,12 @@ void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Address mask = code.BConst<fsize>(xword, sign_mask);
|
||||
|
||||
code.xorps(a, mask);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
ctx.reg_alloc.DefineValue(code, inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) {
|
||||
@@ -1512,7 +1512,7 @@ void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.xorps(zero, zero);
|
||||
code.punpcklqdq(result, xmm_b);
|
||||
code.haddps(result, zero);
|
||||
@@ -1521,7 +1521,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
||||
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.xorps(zero, zero);
|
||||
code.punpcklqdq(result, xmm_b);
|
||||
code.haddpd(result, zero);
|
||||
@@ -1535,8 +1535,8 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
if constexpr (fsize != 16) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
FCODE(vrcp14p)(result, operand);
|
||||
@@ -1550,7 +1550,7 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
}
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1589,16 +1589,16 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
||||
FCODE(vfnmadd231p)(result, operand1, operand2);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1606,10 +1606,10 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
||||
|
||||
@@ -1633,22 +1633,22 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
||||
FCODE(mulp)(operand1, operand2);
|
||||
FCODE(subp)(result, operand1);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1757,8 +1757,8 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
if constexpr (fsize != 16) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
FCODE(vrsqrt14p)(result, operand);
|
||||
@@ -1772,7 +1772,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
}
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1780,9 +1780,9 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[1].GetImmediateU1();
|
||||
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel();
|
||||
|
||||
@@ -1816,7 +1816,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1851,9 +1851,9 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
||||
@@ -1861,7 +1861,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1869,11 +1869,11 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
||||
|
||||
@@ -1902,23 +1902,23 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
code.movaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
||||
FCODE(mulp)(operand1, operand2);
|
||||
FCODE(subp)(result, operand1);
|
||||
FCODE(mulp)(result, GetVectorOf<fsize, false, -1, 1>(code));
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1972,12 +1972,12 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
|
||||
ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result);
|
||||
code.vcvtps2ph(result, result, u8(*round_imm));
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
} else {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
@@ -2018,7 +2018,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
const int round_imm = [&] {
|
||||
@@ -2045,8 +2045,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
code.vcvttpd2qq(src, src);
|
||||
} else {
|
||||
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr();
|
||||
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
code.cvttsd2si(lo, src);
|
||||
code.punpckhqdq(src, src);
|
||||
@@ -2093,12 +2093,12 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
FCODE(andp)(src, xmm0);
|
||||
|
||||
// Will we exceed unsigned range?
|
||||
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(exceed_unsigned, GetVectorOf<fsize, float_upper_limit_unsigned>(code));
|
||||
FCODE(cmplep)(exceed_unsigned, src);
|
||||
|
||||
// Will be exceed signed range?
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(tmp, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||
code.movaps(xmm0, tmp);
|
||||
FCODE(cmplep)(xmm0, src);
|
||||
@@ -2122,7 +2122,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
}
|
||||
});
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, src);
|
||||
ctx.reg_alloc.DefineValue(code, inst, src);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,9 +26,9 @@ namespace {
|
||||
void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*saturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*unsaturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*sub_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
|
||||
|
||||
code.movaps(xmm0, result);
|
||||
|
||||
@@ -39,7 +39,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
code.ptest(xmm0, xmm0);
|
||||
} else {
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.pxor(tmp, tmp);
|
||||
code.pcmpeqw(xmm0, tmp);
|
||||
code.pmovmskb(overflow.cvt32(), xmm0);
|
||||
@@ -49,7 +49,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
enum class Op {
|
||||
@@ -65,10 +65,10 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) {
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
|
||||
|
||||
code.movaps(xmm0, operand1);
|
||||
|
||||
@@ -91,15 +91,15 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(args[0]) : ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm() : operand1;
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(code, args[0]) : ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm(code) : operand1;
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
if constexpr (op == Op::Add) {
|
||||
@@ -150,7 +150,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
FCODE(blendvp)(result, tmp);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
} else {
|
||||
code.psrad(xmm0, 31);
|
||||
if constexpr (esize == 64) {
|
||||
@@ -161,7 +161,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
code.pandn(xmm0, result);
|
||||
code.por(tmp, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, tmp);
|
||||
ctx.reg_alloc.DefineValue(code, inst, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,10 +172,10 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) {
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
ICODE(vpadd)(result, operand1, operand2);
|
||||
@@ -191,15 +191,15 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(args[0]) : ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm() : operand1;
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(code, args[0]) : ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm(code) : operand1;
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
@@ -252,10 +252,10 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
code.por(result, tmp);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
} else {
|
||||
code.pandn(tmp, result);
|
||||
ctx.reg_alloc.DefineValue(inst, tmp);
|
||||
ctx.reg_alloc.DefineValue(code, inst, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/x64/hostloc.h"
|
||||
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
#include "dynarmic/backend/x64/abi.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
Xbyak::Reg64 HostLocToReg64(HostLoc loc) {
|
||||
ASSERT(HostLocIsGPR(loc));
|
||||
return Xbyak::Reg64(static_cast<int>(loc));
|
||||
}
|
||||
|
||||
Xbyak::Xmm HostLocToXmm(HostLoc loc) {
|
||||
ASSERT(HostLocIsXMM(loc));
|
||||
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
@@ -152,7 +152,14 @@ const HostLocList any_xmm = {
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
|
||||
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
|
||||
Xbyak::Xmm HostLocToXmm(HostLoc loc);
|
||||
inline Xbyak::Reg64 HostLocToReg64(HostLoc loc) noexcept {
|
||||
ASSERT(HostLocIsGPR(loc));
|
||||
return Xbyak::Reg64(int(loc));
|
||||
}
|
||||
|
||||
inline Xbyak::Xmm HostLocToXmm(HostLoc loc) noexcept {
|
||||
ASSERT(HostLocIsXMM(loc));
|
||||
return Xbyak::Xmm(int(loc) - int(HostLoc::XMM0));
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
||||
@@ -24,15 +24,6 @@
|
||||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
#define MAYBE_AVX(OPCODE, ...) \
|
||||
[&] { \
|
||||
if (code->HasHostFeature(HostFeature::AVX)) { \
|
||||
code->v##OPCODE(__VA_ARGS__); \
|
||||
} else { \
|
||||
code->OPCODE(__VA_ARGS__); \
|
||||
} \
|
||||
}()
|
||||
|
||||
static inline bool CanExchange(const HostLoc a, const HostLoc b) noexcept {
|
||||
return HostLocIsGPR(a) && HostLocIsGPR(b);
|
||||
}
|
||||
@@ -107,14 +98,14 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
|
||||
max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
|
||||
}
|
||||
|
||||
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
|
||||
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode& code, size_t host_loc_index) const noexcept {
|
||||
using namespace Xbyak::util;
|
||||
for (auto const value : values) {
|
||||
code->mov(code->ABI_PARAM1, rsp);
|
||||
code->mov(code->ABI_PARAM2, host_loc_index);
|
||||
code->mov(code->ABI_PARAM3, value->GetName());
|
||||
code->mov(code->ABI_PARAM4, GetBitWidth(value->GetType()));
|
||||
code->CallFunction(PrintVerboseDebuggingOutputLine);
|
||||
code.mov(code.ABI_PARAM1, rsp);
|
||||
code.mov(code.ABI_PARAM2, host_loc_index);
|
||||
code.mov(code.ABI_PARAM3, value->GetName());
|
||||
code.mov(code.ABI_PARAM4, GetBitWidth(value->GetType()));
|
||||
code.CallFunction(PrintVerboseDebuggingOutputLine);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,7 +119,7 @@ bool Argument::FitsInImmediateU32() const noexcept {
|
||||
bool Argument::FitsInImmediateS32() const noexcept {
|
||||
if (!IsImmediate())
|
||||
return false;
|
||||
const s64 imm = static_cast<s64>(value.GetImmediateAsU64());
|
||||
const s64 imm = s64(value.GetImmediateAsU64());
|
||||
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
|
||||
}
|
||||
|
||||
@@ -174,36 +165,38 @@ IR::AccType Argument::GetImmediateAccType() const noexcept {
|
||||
}
|
||||
|
||||
/// Is this value currently in a GPR?
|
||||
bool Argument::IsInGpr() const noexcept {
|
||||
bool Argument::IsInGpr(RegAlloc& reg_alloc) const noexcept {
|
||||
if (IsImmediate())
|
||||
return false;
|
||||
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
|
||||
}
|
||||
|
||||
/// Is this value currently in a XMM?
|
||||
bool Argument::IsInXmm() const noexcept {
|
||||
bool Argument::IsInXmm(RegAlloc& reg_alloc) const noexcept {
|
||||
if (IsImmediate())
|
||||
return false;
|
||||
return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst()));
|
||||
}
|
||||
|
||||
/// Is this value currently in memory?
|
||||
bool Argument::IsInMemory() const noexcept {
|
||||
bool Argument::IsInMemory(RegAlloc& reg_alloc) const noexcept {
|
||||
if (IsImmediate())
|
||||
return false;
|
||||
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
|
||||
}
|
||||
|
||||
RegAlloc::RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept
|
||||
RegAlloc::RegAlloc(boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept
|
||||
: gpr_order(gpr_order),
|
||||
xmm_order(xmm_order),
|
||||
code(code)
|
||||
xmm_order(xmm_order)
|
||||
{}
|
||||
|
||||
//static std::uint64_t Zfncwjkrt_blockOfCodeShim = 0;
|
||||
|
||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(const IR::Inst* inst) noexcept {
|
||||
ArgumentInfo ret{Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||
ArgumentInfo ret{
|
||||
Argument{},
|
||||
Argument{},
|
||||
Argument{},
|
||||
Argument{}
|
||||
};
|
||||
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||
const auto arg = inst->GetArg(i);
|
||||
ret[i].value = arg;
|
||||
@@ -228,34 +221,34 @@ void RegAlloc::RegisterPseudoOperation(const IR::Inst* inst) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) noexcept {
|
||||
Xbyak::Reg64 RegAlloc::UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
return HostLocToReg64(UseScratchImpl(arg.value, gpr_order));
|
||||
return HostLocToReg64(UseScratchImpl(code, arg.value, gpr_order));
|
||||
}
|
||||
|
||||
Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) noexcept {
|
||||
Xbyak::Xmm RegAlloc::UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
return HostLocToXmm(UseScratchImpl(arg.value, xmm_order));
|
||||
return HostLocToXmm(UseScratchImpl(code, arg.value, xmm_order));
|
||||
}
|
||||
|
||||
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) noexcept {
|
||||
void RegAlloc::UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
UseScratchImpl(arg.value, {host_loc});
|
||||
UseScratchImpl(code, arg.value, {host_loc});
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept {
|
||||
void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept {
|
||||
ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
|
||||
const auto hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
|
||||
DefineValueImpl(inst, hostloc);
|
||||
DefineValueImpl(code, inst, hostloc);
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) noexcept {
|
||||
void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, Argument& arg) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
DefineValueImpl(inst, arg.value);
|
||||
DefineValueImpl(code, inst, arg.value);
|
||||
}
|
||||
|
||||
void RegAlloc::Release(const Xbyak::Reg& reg) noexcept {
|
||||
@@ -264,9 +257,9 @@ void RegAlloc::Release(const Xbyak::Reg& reg) noexcept {
|
||||
LocInfo(hostloc).ReleaseOne();
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
HostLoc RegAlloc::UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
if (use_value.IsImmediate()) {
|
||||
return LoadImmediate(use_value, ScratchImpl(desired_locations));
|
||||
return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations));
|
||||
}
|
||||
|
||||
const auto* use_inst = use_value.GetInst();
|
||||
@@ -280,25 +273,25 @@ HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_ve
|
||||
}
|
||||
|
||||
if (LocInfo(current_location).IsLocked()) {
|
||||
return UseScratchImpl(use_value, desired_locations);
|
||||
return UseScratchImpl(code, use_value, desired_locations);
|
||||
}
|
||||
|
||||
const HostLoc destination_location = SelectARegister(desired_locations);
|
||||
if (max_bit_width > HostLocBitWidth(destination_location)) {
|
||||
return UseScratchImpl(use_value, desired_locations);
|
||||
return UseScratchImpl(code, use_value, desired_locations);
|
||||
} else if (CanExchange(destination_location, current_location)) {
|
||||
Exchange(destination_location, current_location);
|
||||
Exchange(code, destination_location, current_location);
|
||||
} else {
|
||||
MoveOutOfTheWay(destination_location);
|
||||
Move(destination_location, current_location);
|
||||
MoveOutOfTheWay(code, destination_location);
|
||||
Move(code, destination_location, current_location);
|
||||
}
|
||||
LocInfo(destination_location).ReadLock();
|
||||
return destination_location;
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
HostLoc RegAlloc::UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
if (use_value.IsImmediate()) {
|
||||
return LoadImmediate(use_value, ScratchImpl(desired_locations));
|
||||
return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations));
|
||||
}
|
||||
|
||||
const auto* use_inst = use_value.GetInst();
|
||||
@@ -308,7 +301,7 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st
|
||||
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
|
||||
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
|
||||
if (!LocInfo(current_location).IsLastUse()) {
|
||||
MoveOutOfTheWay(current_location);
|
||||
MoveOutOfTheWay(code, current_location);
|
||||
} else {
|
||||
LocInfo(current_location).SetLastUse();
|
||||
}
|
||||
@@ -317,20 +310,22 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st
|
||||
}
|
||||
|
||||
const HostLoc destination_location = SelectARegister(desired_locations);
|
||||
MoveOutOfTheWay(destination_location);
|
||||
CopyToScratch(bit_width, destination_location, current_location);
|
||||
MoveOutOfTheWay(code, destination_location);
|
||||
CopyToScratch(code, bit_width, destination_location, current_location);
|
||||
LocInfo(destination_location).WriteLock();
|
||||
return destination_location;
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
HostLoc RegAlloc::ScratchImpl(BlockOfCode& code, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
const HostLoc location = SelectARegister(desired_locations);
|
||||
MoveOutOfTheWay(location);
|
||||
MoveOutOfTheWay(code, location);
|
||||
LocInfo(location).WriteLock();
|
||||
return location;
|
||||
}
|
||||
|
||||
void RegAlloc::HostCall(IR::Inst* result_def,
|
||||
void RegAlloc::HostCall(
|
||||
BlockOfCode& code,
|
||||
IR::Inst* result_def,
|
||||
const std::optional<Argument::copyable_reference> arg0,
|
||||
const std::optional<Argument::copyable_reference> arg1,
|
||||
const std::optional<Argument::copyable_reference> arg2,
|
||||
@@ -348,20 +343,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
||||
return ret;
|
||||
}();
|
||||
|
||||
ScratchGpr(ABI_RETURN);
|
||||
if (result_def) {
|
||||
DefineValueImpl(result_def, ABI_RETURN);
|
||||
}
|
||||
ScratchGpr(code, ABI_RETURN);
|
||||
if (result_def)
|
||||
DefineValueImpl(code, result_def, ABI_RETURN);
|
||||
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i]) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
UseScratch(code, *args[i], args_hostloc[i]);
|
||||
} else {
|
||||
ScratchGpr(args_hostloc[i]); // TODO: Force spill
|
||||
ScratchGpr(code, args_hostloc[i]); // TODO: Force spill
|
||||
}
|
||||
}
|
||||
// Must match with with ScratchImpl
|
||||
for (auto const gpr : other_caller_save) {
|
||||
MoveOutOfTheWay(gpr);
|
||||
MoveOutOfTheWay(code, gpr);
|
||||
LocInfo(gpr).WriteLock();
|
||||
}
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
@@ -370,13 +365,13 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
||||
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
||||
switch (args[i]->get().GetType()) {
|
||||
case IR::Type::U8:
|
||||
code->movzx(reg.cvt32(), reg.cvt8());
|
||||
code.movzx(reg.cvt32(), reg.cvt8());
|
||||
break;
|
||||
case IR::Type::U16:
|
||||
code->movzx(reg.cvt32(), reg.cvt16());
|
||||
code.movzx(reg.cvt32(), reg.cvt16());
|
||||
break;
|
||||
case IR::Type::U32:
|
||||
code->mov(reg.cvt32(), reg.cvt32());
|
||||
code.mov(reg.cvt32(), reg.cvt32());
|
||||
break;
|
||||
case IR::Type::U64:
|
||||
break; //no op
|
||||
@@ -387,18 +382,18 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
|
||||
void RegAlloc::AllocStackSpace(BlockOfCode& code, const size_t stack_space) noexcept {
|
||||
ASSERT(stack_space < size_t((std::numeric_limits<s32>::max)()));
|
||||
ASSERT(reserved_stack_space == 0);
|
||||
reserved_stack_space = stack_space;
|
||||
code->sub(code->rsp, u32(stack_space));
|
||||
code.sub(code.rsp, u32(stack_space));
|
||||
}
|
||||
|
||||
void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept {
|
||||
void RegAlloc::ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) noexcept {
|
||||
ASSERT(stack_space < size_t((std::numeric_limits<s32>::max)()));
|
||||
ASSERT(reserved_stack_space == stack_space);
|
||||
reserved_stack_space = 0;
|
||||
code->add(code->rsp, u32(stack_space));
|
||||
code.add(code.rsp, u32(stack_space));
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept {
|
||||
@@ -458,92 +453,75 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
||||
return *it_final;
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept {
|
||||
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const noexcept {
|
||||
for (size_t i = 0; i < hostloc_info.size(); i++)
|
||||
if (hostloc_info[i].ContainsValue(value))
|
||||
return HostLoc(i);
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept {
|
||||
ASSERT(!ValueLocation(def_inst) && "def_inst has already been defined");
|
||||
LocInfo(host_loc).AddValue(def_inst);
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept {
|
||||
void RegAlloc::DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept {
|
||||
ASSERT(!ValueLocation(def_inst) && "def_inst has already been defined");
|
||||
|
||||
if (use_inst.IsImmediate()) {
|
||||
const HostLoc location = ScratchImpl(gpr_order);
|
||||
DefineValueImpl(def_inst, location);
|
||||
LoadImmediate(use_inst, location);
|
||||
const HostLoc location = ScratchImpl(code, gpr_order);
|
||||
DefineValueImpl(code, def_inst, location);
|
||||
LoadImmediate(code, use_inst, location);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(ValueLocation(use_inst.GetInst()) && "use_inst must already be defined");
|
||||
const HostLoc location = *ValueLocation(use_inst.GetInst());
|
||||
DefineValueImpl(def_inst, location);
|
||||
DefineValueImpl(code, def_inst, location);
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept {
|
||||
ASSERT(imm.IsImmediate() && "imm is not an immediate");
|
||||
if (HostLocIsGPR(host_loc)) {
|
||||
const Xbyak::Reg64 reg = HostLocToReg64(host_loc);
|
||||
const u64 imm_value = imm.GetImmediateAsU64();
|
||||
if (imm_value == 0) {
|
||||
code->xor_(reg.cvt32(), reg.cvt32());
|
||||
} else {
|
||||
code->mov(reg, imm_value);
|
||||
}
|
||||
} else if (HostLocIsXMM(host_loc)) {
|
||||
const Xbyak::Xmm reg = HostLocToXmm(host_loc);
|
||||
const u64 imm_value = imm.GetImmediateAsU64();
|
||||
if (imm_value == 0) {
|
||||
MAYBE_AVX(xorps, reg, reg);
|
||||
} else {
|
||||
MAYBE_AVX(movaps, reg, code->Const(code->xword, imm_value));
|
||||
}
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
return host_loc;
|
||||
}
|
||||
|
||||
void RegAlloc::Move(HostLoc to, HostLoc from) noexcept {
|
||||
void RegAlloc::Move(BlockOfCode& code, HostLoc to, HostLoc from) noexcept {
|
||||
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
|
||||
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
|
||||
ASSERT(bit_width <= HostLocBitWidth(to));
|
||||
ASSERT(!LocInfo(from).IsEmpty() && "Mov eliminated");
|
||||
EmitMove(bit_width, to, from);
|
||||
EmitMove(code, bit_width, to, from);
|
||||
LocInfo(to) = std::exchange(LocInfo(from), {});
|
||||
}
|
||||
|
||||
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept {
|
||||
void RegAlloc::CopyToScratch(BlockOfCode& code, size_t bit_width, HostLoc to, HostLoc from) noexcept {
|
||||
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
|
||||
EmitMove(bit_width, to, from);
|
||||
EmitMove(code, bit_width, to, from);
|
||||
}
|
||||
|
||||
void RegAlloc::Exchange(HostLoc a, HostLoc b) noexcept {
|
||||
void RegAlloc::Exchange(BlockOfCode& code, HostLoc a, HostLoc b) noexcept {
|
||||
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
|
||||
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
|
||||
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
|
||||
|
||||
if (LocInfo(a).IsEmpty()) {
|
||||
Move(a, b);
|
||||
Move(code, a, b);
|
||||
} else if (LocInfo(b).IsEmpty()) {
|
||||
Move(b, a);
|
||||
Move(code, b, a);
|
||||
} else {
|
||||
EmitExchange(a, b);
|
||||
EmitExchange(code, a, b);
|
||||
std::swap(LocInfo(a), LocInfo(b));
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::MoveOutOfTheWay(HostLoc reg) noexcept {
|
||||
void RegAlloc::MoveOutOfTheWay(BlockOfCode& code, HostLoc reg) noexcept {
|
||||
ASSERT(!LocInfo(reg).IsLocked());
|
||||
if (!LocInfo(reg).IsEmpty()) {
|
||||
SpillRegister(reg);
|
||||
SpillRegister(code, reg);
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::SpillRegister(HostLoc loc) noexcept {
|
||||
void RegAlloc::SpillRegister(BlockOfCode& code, HostLoc loc) noexcept {
|
||||
ASSERT(HostLocIsRegister(loc) && "Only registers can be spilled");
|
||||
ASSERT(!LocInfo(loc).IsEmpty() && "There is no need to spill unoccupied registers");
|
||||
ASSERT(!LocInfo(loc).IsLocked() && "Registers that have been allocated must not be spilt");
|
||||
auto const new_loc = FindFreeSpill(HostLocIsXMM(loc));
|
||||
Move(new_loc, loc);
|
||||
Move(code, new_loc, loc);
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||
@@ -568,9 +546,39 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
UNREACHABLE();
|
||||
};
|
||||
}
|
||||
|
||||
void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
|
||||
#define MAYBE_AVX(OPCODE, ...) \
|
||||
[&] { \
|
||||
if (code.HasHostFeature(HostFeature::AVX)) code.v##OPCODE(__VA_ARGS__); \
|
||||
else code.OPCODE(__VA_ARGS__); \
|
||||
}()
|
||||
|
||||
HostLoc RegAlloc::LoadImmediate(BlockOfCode& code, IR::Value imm, HostLoc host_loc) noexcept {
|
||||
ASSERT(imm.IsImmediate() && "imm is not an immediate");
|
||||
if (HostLocIsGPR(host_loc)) {
|
||||
const Xbyak::Reg64 reg = HostLocToReg64(host_loc);
|
||||
const u64 imm_value = imm.GetImmediateAsU64();
|
||||
if (imm_value == 0) {
|
||||
code.xor_(reg.cvt32(), reg.cvt32());
|
||||
} else {
|
||||
code.mov(reg, imm_value);
|
||||
}
|
||||
} else if (HostLocIsXMM(host_loc)) {
|
||||
const Xbyak::Xmm reg = HostLocToXmm(host_loc);
|
||||
const u64 imm_value = imm.GetImmediateAsU64();
|
||||
if (imm_value == 0) {
|
||||
MAYBE_AVX(xorps, reg, reg);
|
||||
} else {
|
||||
MAYBE_AVX(movaps, reg, code.Const(code.xword, imm_value));
|
||||
}
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
return host_loc;
|
||||
}
|
||||
|
||||
void RegAlloc::EmitMove(BlockOfCode& code, const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
|
||||
auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
size_t i = size_t(loc) - size_t(HostLoc::FirstSpill);
|
||||
@@ -585,9 +593,9 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
||||
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(HostLocToReg64(to), HostLocToReg64(from));
|
||||
code.mov(HostLocToReg64(to), HostLocToReg64(from));
|
||||
} else {
|
||||
code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
|
||||
code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
@@ -642,25 +650,26 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
||||
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||
code.mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||
} else {
|
||||
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||
code.mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
|
||||
code.mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
|
||||
} else {
|
||||
code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
|
||||
code.mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#undef MAYBE_AVX
|
||||
|
||||
void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept {
|
||||
void RegAlloc::EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept {
|
||||
ASSERT(HostLocIsGPR(a) && HostLocIsGPR(b) && "Exchanging XMM registers is uneeded OR invalid emit");
|
||||
code->xchg(HostLocToReg64(a), HostLocToReg64(b));
|
||||
code.xchg(HostLocToReg64(a), HostLocToReg64(b));
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
||||
@@ -81,7 +81,7 @@ public:
|
||||
return 1 << max_bit_width;
|
||||
}
|
||||
void AddValue(IR::Inst* inst) noexcept;
|
||||
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
|
||||
void EmitVerboseDebuggingOutput(BlockOfCode& code, size_t host_loc_index) const noexcept;
|
||||
private:
|
||||
//non trivial
|
||||
boost::container::small_vector<IR::Inst*, 3> values; //24
|
||||
@@ -129,16 +129,15 @@ public:
|
||||
IR::AccType GetImmediateAccType() const noexcept;
|
||||
|
||||
/// Is this value currently in a GPR?
|
||||
bool IsInGpr() const noexcept;
|
||||
bool IsInXmm() const noexcept;
|
||||
bool IsInMemory() const noexcept;
|
||||
bool IsInGpr(RegAlloc& reg_alloc) const noexcept;
|
||||
bool IsInXmm(RegAlloc& reg_alloc) const noexcept;
|
||||
bool IsInMemory(RegAlloc& reg_alloc) const noexcept;
|
||||
private:
|
||||
friend class RegAlloc;
|
||||
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
|
||||
explicit Argument() {}
|
||||
|
||||
//data
|
||||
IR::Value value; //8
|
||||
RegAlloc& reg_alloc; //8
|
||||
bool allocated = false; //1
|
||||
};
|
||||
|
||||
@@ -146,55 +145,57 @@ class RegAlloc final {
|
||||
public:
|
||||
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||
RegAlloc() noexcept = default;
|
||||
RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept;
|
||||
RegAlloc(boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept;
|
||||
|
||||
ArgumentInfo GetArgumentInfo(const IR::Inst* inst) noexcept;
|
||||
void RegisterPseudoOperation(const IR::Inst* inst) noexcept;
|
||||
inline bool IsValueLive(const IR::Inst* inst) const noexcept {
|
||||
return !!ValueLocation(inst);
|
||||
}
|
||||
inline Xbyak::Reg64 UseGpr(Argument& arg) noexcept {
|
||||
inline Xbyak::Reg64 UseGpr(BlockOfCode& code, Argument& arg) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
return HostLocToReg64(UseImpl(arg.value, gpr_order));
|
||||
return HostLocToReg64(UseImpl(code, arg.value, gpr_order));
|
||||
}
|
||||
inline Xbyak::Xmm UseXmm(Argument& arg) noexcept {
|
||||
inline Xbyak::Xmm UseXmm(BlockOfCode& code, Argument& arg) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
return HostLocToXmm(UseImpl(arg.value, xmm_order));
|
||||
return HostLocToXmm(UseImpl(code, arg.value, xmm_order));
|
||||
}
|
||||
inline OpArg UseOpArg(Argument& arg) noexcept {
|
||||
return UseGpr(arg);
|
||||
inline OpArg UseOpArg(BlockOfCode& code, Argument& arg) noexcept {
|
||||
return UseGpr(code, arg);
|
||||
}
|
||||
inline void Use(Argument& arg, const HostLoc host_loc) noexcept {
|
||||
inline void Use(BlockOfCode& code, Argument& arg, const HostLoc host_loc) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
UseImpl(arg.value, {host_loc});
|
||||
UseImpl(code, arg.value, {host_loc});
|
||||
}
|
||||
|
||||
Xbyak::Reg64 UseScratchGpr(Argument& arg) noexcept;
|
||||
Xbyak::Xmm UseScratchXmm(Argument& arg) noexcept;
|
||||
void UseScratch(Argument& arg, HostLoc host_loc) noexcept;
|
||||
Xbyak::Reg64 UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept;
|
||||
Xbyak::Xmm UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept;
|
||||
void UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept;
|
||||
|
||||
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept;
|
||||
void DefineValue(IR::Inst* inst, Argument& arg) noexcept;
|
||||
void DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept;
|
||||
void DefineValue(BlockOfCode& code, IR::Inst* inst, Argument& arg) noexcept;
|
||||
|
||||
void Release(const Xbyak::Reg& reg) noexcept;
|
||||
|
||||
inline Xbyak::Reg64 ScratchGpr() noexcept {
|
||||
return HostLocToReg64(ScratchImpl(gpr_order));
|
||||
inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code) noexcept {
|
||||
return HostLocToReg64(ScratchImpl(code, gpr_order));
|
||||
}
|
||||
inline Xbyak::Reg64 ScratchGpr(const HostLoc desired_location) noexcept {
|
||||
return HostLocToReg64(ScratchImpl({desired_location}));
|
||||
inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code, const HostLoc desired_location) noexcept {
|
||||
return HostLocToReg64(ScratchImpl(code, {desired_location}));
|
||||
}
|
||||
inline Xbyak::Xmm ScratchXmm() noexcept {
|
||||
return HostLocToXmm(ScratchImpl(xmm_order));
|
||||
inline Xbyak::Xmm ScratchXmm(BlockOfCode& code) noexcept {
|
||||
return HostLocToXmm(ScratchImpl(code, xmm_order));
|
||||
}
|
||||
inline Xbyak::Xmm ScratchXmm(HostLoc desired_location) noexcept {
|
||||
return HostLocToXmm(ScratchImpl({desired_location}));
|
||||
inline Xbyak::Xmm ScratchXmm(BlockOfCode& code, HostLoc desired_location) noexcept {
|
||||
return HostLocToXmm(ScratchImpl(code, {desired_location}));
|
||||
}
|
||||
|
||||
void HostCall(IR::Inst* result_def = nullptr,
|
||||
void HostCall(
|
||||
BlockOfCode& code,
|
||||
IR::Inst* result_def = nullptr,
|
||||
const std::optional<Argument::copyable_reference> arg0 = {},
|
||||
const std::optional<Argument::copyable_reference> arg1 = {},
|
||||
const std::optional<Argument::copyable_reference> arg2 = {},
|
||||
@@ -202,67 +203,56 @@ public:
|
||||
) noexcept;
|
||||
|
||||
// TODO: Values in host flags
|
||||
void AllocStackSpace(const size_t stack_space) noexcept;
|
||||
void ReleaseStackSpace(const size_t stack_space) noexcept;
|
||||
void AllocStackSpace(BlockOfCode& code, const size_t stack_space) noexcept;
|
||||
void ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) noexcept;
|
||||
|
||||
inline void EndOfAllocScope() noexcept {
|
||||
for (auto& iter : hostloc_info) {
|
||||
for (auto& iter : hostloc_info)
|
||||
iter.ReleaseAll();
|
||||
}
|
||||
}
|
||||
inline void AssertNoMoreUses() noexcept {
|
||||
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) noexcept { return i.IsEmpty(); }));
|
||||
}
|
||||
inline void EmitVerboseDebuggingOutput() noexcept {
|
||||
for (size_t i = 0; i < hostloc_info.size(); i++) {
|
||||
inline void EmitVerboseDebuggingOutput(BlockOfCode& code) noexcept {
|
||||
for (size_t i = 0; i < hostloc_info.size(); i++)
|
||||
hostloc_info[i].EmitVerboseDebuggingOutput(code, i);
|
||||
}
|
||||
}
|
||||
private:
|
||||
friend struct Argument;
|
||||
|
||||
HostLoc SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept;
|
||||
inline std::optional<HostLoc> ValueLocation(const IR::Inst* value) const noexcept {
|
||||
for (size_t i = 0; i < hostloc_info.size(); i++) {
|
||||
if (hostloc_info[i].ContainsValue(value)) {
|
||||
return HostLoc(i);
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const noexcept;
|
||||
HostLoc UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
HostLoc UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
HostLoc ScratchImpl(BlockOfCode& code, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept;
|
||||
void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept;
|
||||
|
||||
HostLoc UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
HostLoc UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
HostLoc ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept;
|
||||
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept;
|
||||
HostLoc LoadImmediate(BlockOfCode& code, IR::Value imm, HostLoc host_loc) noexcept;
|
||||
void Move(BlockOfCode& code, HostLoc to, HostLoc from) noexcept;
|
||||
void CopyToScratch(BlockOfCode& code, size_t bit_width, HostLoc to, HostLoc from) noexcept;
|
||||
void Exchange(BlockOfCode& code, HostLoc a, HostLoc b) noexcept;
|
||||
void MoveOutOfTheWay(BlockOfCode& code, HostLoc reg) noexcept;
|
||||
|
||||
HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept;
|
||||
void Move(HostLoc to, HostLoc from) noexcept;
|
||||
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept;
|
||||
void Exchange(HostLoc a, HostLoc b) noexcept;
|
||||
void MoveOutOfTheWay(HostLoc reg) noexcept;
|
||||
|
||||
void SpillRegister(HostLoc loc) noexcept;
|
||||
void SpillRegister(BlockOfCode& code, HostLoc loc) noexcept;
|
||||
HostLoc FindFreeSpill(bool is_xmm) const noexcept;
|
||||
|
||||
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
|
||||
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
return hostloc_info[size_t(loc)];
|
||||
}
|
||||
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
|
||||
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
return hostloc_info[size_t(loc)];
|
||||
}
|
||||
|
||||
void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
|
||||
void EmitExchange(const HostLoc a, const HostLoc b) noexcept;
|
||||
void EmitMove(BlockOfCode& code, const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
|
||||
void EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept;
|
||||
|
||||
//data
|
||||
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
|
||||
alignas(64) boost::container::static_vector<HostLoc, 28> xmm_order;
|
||||
alignas(64) std::array<HostLocInfo, NonSpillHostLocCount + SpillCount> hostloc_info;
|
||||
BlockOfCode* code = nullptr;
|
||||
size_t reserved_stack_space = 0;
|
||||
};
|
||||
// Ensure a cache line (or less) is used, this is primordial
|
||||
|
||||
@@ -100,9 +100,14 @@ bool Value::IsEmpty() const noexcept {
|
||||
}
|
||||
|
||||
bool Value::IsImmediate() const noexcept {
|
||||
if (IsIdentity())
|
||||
return inner.inst->GetArg(0).IsImmediate();
|
||||
return type != Type::Opaque;
|
||||
IR::Type current_type = type;
|
||||
IR::Inst const* current_inst = inner.inst;
|
||||
while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
|
||||
Value const& arg = current_inst->GetArg(0);
|
||||
current_type = arg.type;
|
||||
current_inst = arg.inner.inst;
|
||||
}
|
||||
return current_type != Type::Opaque;
|
||||
}
|
||||
|
||||
Type Value::GetType() const noexcept {
|
||||
|
||||
Reference in New Issue
Block a user