[vk] SL Sample Count Clamp
This commit is contained in:
committed by
Caio Oliveira
parent
4b0f581973
commit
7d69ff6044
@@ -830,14 +830,17 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
const bool alpha_to_one_supported = device.SupportsAlphaToOne();
|
||||
const auto msaa_mode = key.state.msaa_mode.Value();
|
||||
const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode);
|
||||
const auto [grid_width, grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode);
|
||||
const auto [guest_grid_width, guest_grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode);
|
||||
const auto& sample_location_props = device.SampleLocationProperties();
|
||||
const bool grid_within_limits = grid_width <= sample_location_props.maxSampleLocationGridSize.width &&
|
||||
grid_height <= sample_location_props.maxSampleLocationGridSize.height;
|
||||
const VkExtent2D host_grid_limit = device.SampleLocationGridSizeFor(vk_samples);
|
||||
const VkExtent2D grid_size{
|
||||
.width = (std::max)(1u, (std::min)(guest_grid_width, host_grid_limit.width)),
|
||||
.height = (std::max)(1u, (std::min)(guest_grid_height, host_grid_limit.height)),
|
||||
};
|
||||
const bool supports_sample_locations = device.IsExtSampleLocationsSupported() &&
|
||||
device.SupportsSampleLocationsFor(vk_samples) &&
|
||||
sample_location_props.variableSampleLocations == VK_TRUE &&
|
||||
grid_within_limits;
|
||||
grid_size.width > 0 && grid_size.height > 0;
|
||||
|
||||
VkPipelineMultisampleStateCreateInfo multisample_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
@@ -863,13 +866,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
sample_locations_chain.emplace();
|
||||
auto& chain = *sample_locations_chain;
|
||||
const u32 samples_per_pixel = static_cast<u32>(VideoCommon::NumSamples(msaa_mode));
|
||||
const u32 sample_locations_count = grid_width * grid_height * samples_per_pixel;
|
||||
const u32 sample_locations_count = grid_size.width * grid_size.height * samples_per_pixel;
|
||||
chain.locations.fill(VkSampleLocationEXT{0.5f, 0.5f});
|
||||
chain.info = VkSampleLocationsInfoEXT{
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.sampleLocationsPerPixel = vk_samples,
|
||||
.sampleLocationGridSize = VkExtent2D{grid_width, grid_height},
|
||||
.sampleLocationGridSize = grid_size,
|
||||
.sampleLocationsCount = sample_locations_count,
|
||||
.pSampleLocations = chain.locations.data(),
|
||||
};
|
||||
|
||||
@@ -1712,16 +1712,20 @@ void RasterizerVulkan::UpdateSampleLocations(Maxwell& regs) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto [grid_width_u32, grid_height_u32] = VideoCommon::SampleLocationGridSize(msaa_mode);
|
||||
const u32 grid_width = grid_width_u32;
|
||||
const u32 grid_height = grid_height_u32;
|
||||
if (grid_width > sample_props.maxSampleLocationGridSize.width ||
|
||||
grid_height > sample_props.maxSampleLocationGridSize.height) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Sample location grid {}x{} exceeds device limit {}x{}, falling back to fixed pattern",
|
||||
grid_width, grid_height, sample_props.maxSampleLocationGridSize.width,
|
||||
sample_props.maxSampleLocationGridSize.height);
|
||||
return;
|
||||
const auto [guest_grid_width, guest_grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode);
|
||||
const VkExtent2D host_grid_limit = device.SampleLocationGridSizeFor(vk_samples);
|
||||
const u32 grid_width = (std::max)(1u, (std::min)(guest_grid_width, host_grid_limit.width));
|
||||
const u32 grid_height = (std::max)(1u, (std::min)(guest_grid_height, host_grid_limit.height));
|
||||
const bool grid_clamped = grid_width != guest_grid_width || grid_height != guest_grid_height;
|
||||
if (grid_clamped) {
|
||||
static bool logged_clamp = false;
|
||||
if (!logged_clamp) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Host supports sample grid up to {}x{} (requested {}x{}); clamping",
|
||||
host_grid_limit.width, host_grid_limit.height, guest_grid_width,
|
||||
guest_grid_height);
|
||||
logged_clamp = true;
|
||||
}
|
||||
}
|
||||
const u32 samples_per_pixel = static_cast<u32>(VideoCommon::NumSamples(msaa_mode));
|
||||
const u32 grid_cells = grid_width * grid_height;
|
||||
@@ -1747,8 +1751,16 @@ void RasterizerVulkan::UpdateSampleLocations(Maxwell& regs) {
|
||||
const u32 slot_base = cell * samples_per_pixel;
|
||||
const u32 cell_x = cell % grid_width;
|
||||
const u32 cell_y = cell / grid_width;
|
||||
const u32 guest_cell_x = guest_grid_width == grid_width
|
||||
? cell_x
|
||||
: (cell_x * guest_grid_width) / grid_width;
|
||||
const u32 guest_cell_y = guest_grid_height == grid_height
|
||||
? cell_y
|
||||
: (cell_y * guest_grid_height) / grid_height;
|
||||
const u32 guest_cell = guest_cell_y * guest_grid_width + guest_cell_x;
|
||||
const u32 guest_slot_base = guest_cell * samples_per_pixel;
|
||||
for (u32 sample = 0; sample < samples_per_pixel; ++sample) {
|
||||
const VkSampleLocationEXT raw = raw_locations[slot_base + sample];
|
||||
const VkSampleLocationEXT raw = raw_locations[guest_slot_base + sample];
|
||||
const float sample_x = static_cast<float>(cell_x) + raw.x;
|
||||
const float sample_y = static_cast<float>(cell_y) + raw.y;
|
||||
resolved[slot_base + sample] = VkSampleLocationEXT{
|
||||
|
||||
@@ -110,6 +110,24 @@ constexpr std::array R16G16B16A16_UNORM{
|
||||
|
||||
} // namespace Alternatives
|
||||
|
||||
constexpr std::array<VkSampleCountFlagBits, Device::sample_location_table_size>
|
||||
sample_location_query_counts{
|
||||
VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_2_BIT, VK_SAMPLE_COUNT_4_BIT,
|
||||
VK_SAMPLE_COUNT_8_BIT, VK_SAMPLE_COUNT_16_BIT, VK_SAMPLE_COUNT_32_BIT,
|
||||
VK_SAMPLE_COUNT_64_BIT,
|
||||
};
|
||||
|
||||
static_assert(sample_location_query_counts.size() == Device::sample_location_table_size);
|
||||
|
||||
constexpr size_t SampleCountIndex(VkSampleCountFlagBits samples) {
|
||||
for (size_t index = 0; index < sample_location_query_counts.size(); ++index) {
|
||||
if (sample_location_query_counts[index] == samples) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return sample_location_query_counts.size();
|
||||
}
|
||||
|
||||
[[maybe_unused]] constexpr VkShaderStageFlags GraphicsStageMask =
|
||||
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
|
||||
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT |
|
||||
@@ -1312,6 +1330,8 @@ bool Device::GetSuitability(bool requires_swapchain) {
|
||||
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable = false;
|
||||
}
|
||||
|
||||
PopulateSampleLocationGrids();
|
||||
|
||||
// Return whether we were suitable.
|
||||
return suitable;
|
||||
}
|
||||
@@ -1578,6 +1598,66 @@ void Device::RemoveUnsuitableExtensions() {
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance4, features.maintenance4,
|
||||
VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||
|
||||
|
||||
VkExtent2D Device::SampleLocationGridSizeFor(VkSampleCountFlagBits samples) const {
|
||||
const auto sanitize = [](VkExtent2D grid) {
|
||||
if (grid.width == 0 || grid.height == 0) {
|
||||
return VkExtent2D{1, 1};
|
||||
}
|
||||
return grid;
|
||||
};
|
||||
const VkExtent2D fallback = sanitize(properties.sample_locations.maxSampleLocationGridSize);
|
||||
if (!extensions.sample_locations) {
|
||||
return fallback;
|
||||
}
|
||||
const size_t index = SampleCountIndex(samples);
|
||||
if (index >= sample_location_grids.size()) {
|
||||
return fallback;
|
||||
}
|
||||
const VkExtent2D grid = sample_location_grids[index];
|
||||
return grid.width == 0 || grid.height == 0 ? fallback : grid;
|
||||
}
|
||||
|
||||
void Device::PopulateSampleLocationGrids() {
|
||||
for (auto& grid : sample_location_grids) {
|
||||
grid = VkExtent2D{1, 1};
|
||||
}
|
||||
if (!extensions.sample_locations) {
|
||||
return;
|
||||
}
|
||||
const auto sanitize = [](VkExtent2D grid) {
|
||||
if (grid.width == 0 || grid.height == 0) {
|
||||
return VkExtent2D{1, 1};
|
||||
}
|
||||
return grid;
|
||||
};
|
||||
const VkExtent2D fallback = sanitize(properties.sample_locations.maxSampleLocationGridSize);
|
||||
const VkSampleCountFlags supported_counts =
|
||||
properties.sample_locations.sampleLocationSampleCounts;
|
||||
if (supported_counts == 0) {
|
||||
return;
|
||||
}
|
||||
const bool can_query = dld.vkGetPhysicalDeviceMultisamplePropertiesEXT != nullptr;
|
||||
for (size_t index = 0; index < sample_location_grids.size(); ++index) {
|
||||
const VkSampleCountFlagBits bit = sample_location_query_counts[index];
|
||||
if ((supported_counts & bit) == 0) {
|
||||
continue;
|
||||
}
|
||||
VkExtent2D grid = fallback;
|
||||
if (can_query) {
|
||||
VkMultisamplePropertiesEXT props{
|
||||
.sType = VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT,
|
||||
.pNext = nullptr,
|
||||
};
|
||||
dld.vkGetPhysicalDeviceMultisamplePropertiesEXT(physical, bit, &props);
|
||||
if (props.maxSampleLocationGridSize.width != 0 &&
|
||||
props.maxSampleLocationGridSize.height != 0) {
|
||||
grid = props.maxSampleLocationGridSize;
|
||||
}
|
||||
}
|
||||
sample_location_grids[index] = grid;
|
||||
}
|
||||
}
|
||||
// VK_KHR_maintenance5
|
||||
extensions.maintenance5 = features.maintenance5.maintenance5;
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <span>
|
||||
@@ -350,6 +351,9 @@ public:
|
||||
return properties.sample_locations;
|
||||
}
|
||||
|
||||
/// Returns the host-supported sample location grid for the requested sample count.
|
||||
VkExtent2D SampleLocationGridSizeFor(VkSampleCountFlagBits samples) const;
|
||||
|
||||
/// Returns true if ASTC is natively supported.
|
||||
bool IsOptimalAstcSupported() const {
|
||||
return features.features.textureCompressionASTC_LDR;
|
||||
@@ -976,6 +980,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t sample_location_table_size = 7;
|
||||
|
||||
/// Checks if the physical device is suitable and configures the object state
|
||||
/// with all necessary info about its properties.
|
||||
bool GetSuitability(bool requires_swapchain);
|
||||
@@ -983,6 +989,8 @@ private:
|
||||
// Remove extensions which have incomplete feature support.
|
||||
void RemoveUnsuitableExtensions();
|
||||
|
||||
void PopulateSampleLocationGrids();
|
||||
|
||||
void RemoveExtension(bool& extension, const std::string& extension_name);
|
||||
void RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name);
|
||||
|
||||
@@ -1077,6 +1085,8 @@ private:
|
||||
VkPhysicalDeviceFeatures2 features2{};
|
||||
VkPhysicalDeviceProperties2 properties2{};
|
||||
|
||||
std::array<VkExtent2D, sample_location_table_size> sample_location_grids{};
|
||||
|
||||
// Misc features
|
||||
bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats.
|
||||
bool is_blit_depth24_stencil8_supported{}; ///< Support for blitting from and to D24S8.
|
||||
|
||||
@@ -294,6 +294,7 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
|
||||
X(vkDestroySurfaceKHR);
|
||||
X(vkGetPhysicalDeviceFeatures2);
|
||||
X(vkGetPhysicalDeviceProperties2);
|
||||
X(vkGetPhysicalDeviceMultisamplePropertiesEXT);
|
||||
X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR);
|
||||
X(vkGetPhysicalDeviceSurfaceFormatsKHR);
|
||||
X(vkGetPhysicalDeviceSurfacePresentModesKHR);
|
||||
|
||||
@@ -170,6 +170,7 @@ struct InstanceDispatch {
|
||||
PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{};
|
||||
PFN_vkGetPhysicalDeviceProperties2 vkGetPhysicalDeviceProperties2{};
|
||||
PFN_vkGetPhysicalDeviceToolProperties vkGetPhysicalDeviceToolProperties{};
|
||||
PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT vkGetPhysicalDeviceMultisamplePropertiesEXT{};
|
||||
PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{};
|
||||
PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR{};
|
||||
PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR{};
|
||||
|
||||
Reference in New Issue
Block a user