summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp301
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h62
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h78
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp232
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h2
11 files changed, 514 insertions, 199 deletions
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 0bbbf6851..3c5acda3e 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -143,6 +143,7 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBX16F
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 02a9f5ecb..d2e9f4031 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -109,8 +109,8 @@ void VKBufferCache::Reserve(std::size_t max_size) {
}
}
-VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
- return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
+void VKBufferCache::Send() {
+ stream_buffer->Send(buffer_offset - buffer_offset_base);
}
void VKBufferCache::AlignBuffer(std::size_t alignment) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3edf460df..49f13bcdc 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -77,7 +77,7 @@ public:
void Reserve(std::size_t max_size);
/// Ensures that the set data is sent to the device.
- [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
+ void Send();
/// Returns the buffer cache handle.
vk::Buffer GetBuffer() const {
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 3b966ddc3..897cbb4e8 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -2,9 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <map>
+#include <bitset>
#include <optional>
#include <set>
+#include <string_view>
#include <vector>
#include "common/assert.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -12,13 +13,32 @@
namespace Vulkan {
+namespace {
+
+template <typename T>
+void SetNext(void**& next, T& data) {
+ *next = &data;
+ next = &data.pNext;
+}
+
+template <typename T>
+T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
+ vk::PhysicalDeviceFeatures2 features;
+ T extension_features;
+ features.pNext = &extension_features;
+ physical.getFeatures2(&features, dldi);
+ return extension_features;
+}
+
+} // Anonymous namespace
+
namespace Alternatives {
-constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
- vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
-constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
- vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
-constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}};
+constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
+ vk::Format::eD16UnormS8Uint, vk::Format{}};
+constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
+ vk::Format::eD32SfloatS8Uint, vk::Format{}};
+constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
} // namespace Alternatives
@@ -58,16 +78,53 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy
VKDevice::~VKDevice() = default;
bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
- vk::PhysicalDeviceFeatures device_features;
- device_features.vertexPipelineStoresAndAtomics = true;
- device_features.independentBlend = true;
- device_features.textureCompressionASTC_LDR = is_optimal_astc_supported;
-
const auto queue_cis = GetDeviceQueueCreateInfos();
- const std::vector<const char*> extensions = LoadExtensions(dldi);
- const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
- 0, nullptr, static_cast<u32>(extensions.size()),
- extensions.data(), &device_features);
+ const std::vector extensions = LoadExtensions(dldi);
+
+ vk::PhysicalDeviceFeatures2 features2;
+ void** next = &features2.pNext;
+ auto& features = features2.features;
+ features.vertexPipelineStoresAndAtomics = true;
+ features.independentBlend = true;
+ features.depthClamp = true;
+ features.samplerAnisotropy = true;
+ features.largePoints = true;
+ features.textureCompressionASTC_LDR = is_optimal_astc_supported;
+
+ vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
+ vertex_divisor.vertexAttributeInstanceRateDivisor = true;
+ vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
+ SetNext(next, vertex_divisor);
+
+ vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
+ if (is_float16_supported) {
+ float16_int8.shaderFloat16 = true;
+ SetNext(next, float16_int8);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
+ }
+
+ vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
+ if (khr_uniform_buffer_standard_layout) {
+ std430_layout.uniformBufferStandardLayout = true;
+ SetNext(next, std430_layout);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
+ }
+
+ vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
+ if (ext_index_type_uint8) {
+ index_type_uint8.indexTypeUint8 = true;
+ SetNext(next, index_type_uint8);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
+ }
+
+ vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
+ nullptr, static_cast<u32>(extensions.size()), extensions.data(),
+ nullptr);
+ device_ci.pNext = &features2;
+
vk::Device dummy_logical;
if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
@@ -78,6 +135,17 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
logical = UniqueDevice(
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+ if (khr_driver_properties) {
+ vk::PhysicalDeviceDriverPropertiesKHR driver;
+ vk::PhysicalDeviceProperties2 properties;
+ properties.pNext = &driver;
+ physical.getProperties2(&properties, dld);
+ driver_id = driver.driverID;
+ LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
+ } else {
+ LOG_INFO(Render_Vulkan, "Driver: Unknown");
+ }
+
graphics_queue = logical->getQueue(graphics_family, 0, dld);
present_queue = logical->getQueue(present_family, 0, dld);
return true;
@@ -92,20 +160,19 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
// The wanted format is not supported by hardware, search for alternatives
const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
- LOG_CRITICAL(Render_Vulkan,
- "Format={} with usage={} and type={} has no defined alternatives and host "
- "hardware does not support it",
- vk::to_string(wanted_format), vk::to_string(wanted_usage),
- static_cast<u32>(format_type));
- UNREACHABLE();
+ UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host "
+ "hardware does not support it",
+ vk::to_string(wanted_format), vk::to_string(wanted_usage),
+ static_cast<u32>(format_type));
return wanted_format;
}
std::size_t i = 0;
for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
alternative = alternatives[++i]) {
- if (!IsFormatSupported(alternative, wanted_usage, format_type))
+ if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue;
+ }
LOG_WARNING(Render_Vulkan,
"Emulating format={} with alternative format={} with usage={} and type={}",
static_cast<u32>(wanted_format), static_cast<u32>(alternative),
@@ -114,12 +181,10 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
}
// No alternatives found, panic
- LOG_CRITICAL(Render_Vulkan,
- "Format={} with usage={} and type={} is not supported by the host hardware and "
- "doesn't support any of the alternatives",
- static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
- static_cast<u32>(format_type));
- UNREACHABLE();
+ UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and "
+ "doesn't support any of the alternatives",
+ static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+ static_cast<u32>(format_type));
return wanted_format;
}
@@ -132,7 +197,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eTransferDst};
- constexpr std::array<vk::Format, 9> astc_formats = {
+ constexpr std::array astc_formats = {
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
@@ -151,76 +216,120 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
FormatType format_type) const {
const auto it = format_properties.find(wanted_format);
if (it == format_properties.end()) {
- LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format));
return true;
}
- const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
+ const auto supported_usage = GetFormatFeatures(it->second, format_type);
return (supported_usage & wanted_usage) == wanted_usage;
}
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface) {
- bool has_swapchain{};
+ LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
+ bool is_suitable = true;
+
+ constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
+ std::bitset<required_extensions.size()> available_extensions{};
+
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
- has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ for (std::size_t i = 0; i < required_extensions.size(); ++i) {
+ if (available_extensions[i]) {
+ continue;
+ }
+ available_extensions[i] =
+ required_extensions[i] == std::string_view{prop.extensionName};
+ }
}
- if (!has_swapchain) {
- // The device doesn't support creating swapchains.
- return false;
+ if (!available_extensions.all()) {
+ for (std::size_t i = 0; i < required_extensions.size(); ++i) {
+ if (available_extensions[i]) {
+ continue;
+ }
+ LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
+ is_suitable = false;
+ }
}
bool has_graphics{}, has_present{};
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
const auto& family = queue_family_properties[i];
- if (family.queueCount == 0)
+ if (family.queueCount == 0) {
continue;
-
+ }
has_graphics |=
(family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
}
if (!has_graphics || !has_present) {
- // The device doesn't have a graphics and present queue.
- return false;
+ LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
+ is_suitable = false;
}
// TODO(Rodrigo): Check if the device matches all requeriments.
const auto properties{physical.getProperties(dldi)};
- const auto limits{properties.limits};
- if (limits.maxUniformBufferRange < 65536) {
- return false;
+ const auto& limits{properties.limits};
+
+ constexpr u32 required_ubo_size = 65536;
+ if (limits.maxUniformBufferRange < required_ubo_size) {
+ LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
+ limits.maxUniformBufferRange, required_ubo_size);
+ is_suitable = false;
}
- const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)};
- if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) {
- return false;
+ const auto features{physical.getFeatures(dldi)};
+ const std::array feature_report = {
+ std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
+ std::make_pair(features.independentBlend, "independentBlend"),
+ std::make_pair(features.depthClamp, "depthClamp"),
+ std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
+ std::make_pair(features.largePoints, "largePoints"),
+ };
+ for (const auto& [supported, name] : feature_report) {
+ if (supported) {
+ continue;
+ }
+ LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
+ is_suitable = false;
}
- // Device is suitable.
- return true;
+ return is_suitable;
}
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
std::vector<const char*> extensions;
- extensions.reserve(2);
+ extensions.reserve(7);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
const auto Test = [&](const vk::ExtensionProperties& extension,
std::optional<std::reference_wrapper<bool>> status, const char* name,
- u32 revision) {
- if (extension.extensionName != std::string(name)) {
+ bool push) {
+ if (extension.extensionName != std::string_view(name)) {
return;
}
- extensions.push_back(name);
+ if (push) {
+ extensions.push_back(name);
+ }
if (status) {
status->get() = true;
}
};
+ bool khr_shader_float16_int8{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
- Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1);
+ Test(extension, khr_uniform_buffer_standard_layout,
+ VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
+ Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
+ Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
+ Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
+ }
+
+ if (khr_shader_float16_int8) {
+ is_float16_supported =
+ GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
+ extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
return extensions;
@@ -250,9 +359,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
}
void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
- const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+ const auto props = physical.getProperties(dldi);
device_type = props.deviceType;
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+ storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
}
@@ -273,42 +383,53 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
return queue_cis;
}
-std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
+std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
- static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
- vk::Format::eB5G6R5UnormPack16,
- vk::Format::eA2B10G10R10UnormPack32,
- vk::Format::eR32G32B32A32Sfloat,
- vk::Format::eR16G16Unorm,
- vk::Format::eR16G16Snorm,
- vk::Format::eR8G8B8A8Srgb,
- vk::Format::eR8Unorm,
- vk::Format::eB10G11R11UfloatPack32,
- vk::Format::eR32Sfloat,
- vk::Format::eR16Sfloat,
- vk::Format::eR16G16B16A16Sfloat,
- vk::Format::eD32Sfloat,
- vk::Format::eD16Unorm,
- vk::Format::eD16UnormS8Uint,
- vk::Format::eD24UnormS8Uint,
- vk::Format::eD32SfloatS8Uint,
- vk::Format::eBc1RgbaUnormBlock,
- vk::Format::eBc2UnormBlock,
- vk::Format::eBc3UnormBlock,
- vk::Format::eBc4UnormBlock,
- vk::Format::eBc5UnormBlock,
- vk::Format::eBc5SnormBlock,
- vk::Format::eBc7UnormBlock,
- vk::Format::eAstc4x4UnormBlock,
- vk::Format::eAstc4x4SrgbBlock,
- vk::Format::eAstc8x8SrgbBlock,
- vk::Format::eAstc8x6SrgbBlock,
- vk::Format::eAstc5x4SrgbBlock,
- vk::Format::eAstc5x5UnormBlock,
- vk::Format::eAstc5x5SrgbBlock,
- vk::Format::eAstc10x8UnormBlock,
- vk::Format::eAstc10x8SrgbBlock};
- std::map<vk::Format, vk::FormatProperties> format_properties;
+ constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
+ vk::Format::eA8B8G8R8SnormPack32,
+ vk::Format::eA8B8G8R8SrgbPack32,
+ vk::Format::eB5G6R5UnormPack16,
+ vk::Format::eA2B10G10R10UnormPack32,
+ vk::Format::eR32G32B32A32Sfloat,
+ vk::Format::eR16G16B16A16Uint,
+ vk::Format::eR16G16Unorm,
+ vk::Format::eR16G16Snorm,
+ vk::Format::eR16G16Sfloat,
+ vk::Format::eR16Unorm,
+ vk::Format::eR8G8B8A8Srgb,
+ vk::Format::eR8G8Unorm,
+ vk::Format::eR8G8Snorm,
+ vk::Format::eR8Unorm,
+ vk::Format::eB10G11R11UfloatPack32,
+ vk::Format::eR32Sfloat,
+ vk::Format::eR16Sfloat,
+ vk::Format::eR16G16B16A16Sfloat,
+ vk::Format::eB8G8R8A8Unorm,
+ vk::Format::eD32Sfloat,
+ vk::Format::eD16Unorm,
+ vk::Format::eD16UnormS8Uint,
+ vk::Format::eD24UnormS8Uint,
+ vk::Format::eD32SfloatS8Uint,
+ vk::Format::eBc1RgbaUnormBlock,
+ vk::Format::eBc2UnormBlock,
+ vk::Format::eBc3UnormBlock,
+ vk::Format::eBc4UnormBlock,
+ vk::Format::eBc5UnormBlock,
+ vk::Format::eBc5SnormBlock,
+ vk::Format::eBc7UnormBlock,
+ vk::Format::eBc1RgbaSrgbBlock,
+ vk::Format::eBc3SrgbBlock,
+ vk::Format::eBc7SrgbBlock,
+ vk::Format::eAstc4x4UnormBlock,
+ vk::Format::eAstc4x4SrgbBlock,
+ vk::Format::eAstc8x8SrgbBlock,
+ vk::Format::eAstc8x6SrgbBlock,
+ vk::Format::eAstc5x4SrgbBlock,
+ vk::Format::eAstc5x5UnormBlock,
+ vk::Format::eAstc5x5SrgbBlock,
+ vk::Format::eAstc10x8UnormBlock,
+ vk::Format::eAstc10x8SrgbBlock};
+ std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 537825d8b..010d4c3d6 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -4,7 +4,7 @@
#pragma once
-#include <map>
+#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -69,16 +69,26 @@ public:
return present_family;
}
- /// Returns if the device is integrated with the host CPU.
+ /// Returns true if the device is integrated with the host CPU.
bool IsIntegrated() const {
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
}
+ /// Returns the driver ID.
+ vk::DriverIdKHR GetDriverID() const {
+ return driver_id;
+ }
+
/// Returns uniform buffer alignment requeriment.
u64 GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
+ /// Returns storage alignment requeriment.
+ u64 GetStorageBufferAlignment() const {
+ return storage_buffer_alignment;
+ }
+
/// Returns the maximum range for storage buffers.
u64 GetMaxStorageBufferRange() const {
return max_storage_buffer_range;
@@ -89,9 +99,19 @@ public:
return is_optimal_astc_supported;
}
+ /// Returns true if the device supports float16 natively
+ bool IsFloat16Supported() const {
+ return is_float16_supported;
+ }
+
/// Returns true if the device supports VK_EXT_scalar_block_layout.
- bool IsExtScalarBlockLayoutSupported() const {
- return ext_scalar_block_layout;
+ bool IsKhrUniformBufferStandardLayoutSupported() const {
+ return khr_uniform_buffer_standard_layout;
+ }
+
+ /// Returns true if the device supports VK_EXT_index_type_uint8.
+ bool IsExtIndexTypeUint8Supported() const {
+ return ext_index_type_uint8;
}
/// Checks if the physical device is suitable.
@@ -123,22 +143,28 @@ private:
FormatType format_type) const;
/// Returns the device properties for Vulkan formats.
- static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
+ static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
- const vk::PhysicalDevice physical; ///< Physical device.
- vk::DispatchLoaderDynamic dld; ///< Device function pointers.
- UniqueDevice logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 graphics_family{}; ///< Main graphics queue family index.
- u32 present_family{}; ///< Main present queue family index.
- vk::PhysicalDeviceType device_type; ///< Physical device type.
- u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
- u64 max_storage_buffer_range{}; ///< Max storage buffer size.
- bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout.
- std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary.
+ const vk::PhysicalDevice physical; ///< Physical device.
+ vk::DispatchLoaderDynamic dld; ///< Device function pointers.
+ UniqueDevice logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ vk::PhysicalDeviceType device_type; ///< Physical device type.
+ vk::DriverIdKHR driver_id{}; ///< Driver ID.
+ u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
+ u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment.
+ u64 max_storage_buffer_range{}; ///< Max storage buffer size.
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
+ bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
+ bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties.
+ std::unordered_map<vk::Format, vk::FormatProperties>
+ format_properties; ///< Format properties dictionary.
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
index 771b05c73..1f73b716b 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -4,9 +4,6 @@
#pragma once
-#include <unordered_map>
-
-#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/sampler_cache.h"
#include "video_core/textures/texture.h"
@@ -21,9 +18,9 @@ public:
~VKSamplerCache();
protected:
- UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const;
+ UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
- vk::Sampler ToSamplerType(const UniqueSampler& sampler) const;
+ vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override;
private:
const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f1fea1871..0f8116458 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -19,23 +19,19 @@ VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_man
VKScheduler::~VKScheduler() = default;
-VKExecutionContext VKScheduler::GetExecutionContext() const {
- return VKExecutionContext(current_fence, current_cmdbuf);
-}
-
-VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
+void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore);
- current_fence->Release();
+ if (release_fence)
+ current_fence->Release();
AllocateNewContext();
- return GetExecutionContext();
}
-VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
+void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore);
current_fence->Wait();
- current_fence->Release();
+ if (release_fence)
+ current_fence->Release();
AllocateNewContext();
- return GetExecutionContext();
}
void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index cfaf5376f..0e5b49c7f 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -10,10 +10,43 @@
namespace Vulkan {
class VKDevice;
-class VKExecutionContext;
class VKFence;
class VKResourceManager;
+class VKFenceView {
+public:
+ VKFenceView() = default;
+ VKFenceView(VKFence* const& fence) : fence{fence} {}
+
+ VKFence* operator->() const noexcept {
+ return fence;
+ }
+
+ operator VKFence&() const noexcept {
+ return *fence;
+ }
+
+private:
+ VKFence* const& fence;
+};
+
+class VKCommandBufferView {
+public:
+ VKCommandBufferView() = default;
+ VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
+
+ const vk::CommandBuffer* operator->() const noexcept {
+ return &cmdbuf;
+ }
+
+ operator vk::CommandBuffer() const noexcept {
+ return cmdbuf;
+ }
+
+private:
+ const vk::CommandBuffer& cmdbuf;
+};
+
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
@@ -21,16 +54,21 @@ public:
explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
~VKScheduler();
- /// Gets the current execution context.
- [[nodiscard]] VKExecutionContext GetExecutionContext() const;
+ /// Gets a reference to the current fence.
+ VKFenceView GetFence() const {
+ return current_fence;
+ }
+
+ /// Gets a reference to the current command buffer.
+ VKCommandBufferView GetCommandBuffer() const {
+ return current_cmdbuf;
+ }
- /// Sends the current execution context to the GPU. It invalidates the current execution context
- /// and returns a new one.
- VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
+ /// Sends the current execution context to the GPU.
+ void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
- /// Sends the current execution context to the GPU and waits for it to complete. It invalidates
- /// the current execution context and returns a new one.
- VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
+ /// Sends the current execution context to the GPU and waits for it to complete.
+ void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
private:
void SubmitExecution(vk::Semaphore semaphore);
@@ -44,26 +82,4 @@ private:
VKFence* next_fence = nullptr;
};
-class VKExecutionContext {
- friend class VKScheduler;
-
-public:
- VKExecutionContext() = default;
-
- VKFence& GetFence() const {
- return *fence;
- }
-
- vk::CommandBuffer GetCommandBuffer() const {
- return cmdbuf;
- }
-
-private:
- explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
- : fence{fence}, cmdbuf{cmdbuf} {}
-
- VKFence* fence{};
- vk::CommandBuffer cmdbuf;
-};
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 547883425..77fc58f25 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -19,6 +19,7 @@
#include "video_core/engines/shader_header.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
namespace Vulkan::VKShader {
@@ -132,20 +133,16 @@ public:
branch_labels.push_back(label);
}
- // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
- // that shaders will use 20 nested SSYs and PBKs.
- constexpr u32 FLOW_STACK_SIZE = 20;
- const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
spv::StorageClass::Function, Constant(t_uint, first_address)));
- flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type),
- spv::StorageClass::Function, ConstantNull(flow_stack_type)));
- flow_stack_top =
- Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0)));
+ std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
+ std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
Name(jmp_to, "jmp_to");
- Name(flow_stack, "flow_stack");
- Name(flow_stack_top, "flow_stack_top");
+ Name(ssy_flow_stack, "ssy_flow_stack");
+ Name(ssy_flow_stack_top, "ssy_flow_stack_top");
+ Name(pbk_flow_stack, "pbk_flow_stack");
+ Name(pbk_flow_stack_top, "pbk_flow_stack_top");
Emit(OpBranch(loop_label));
Emit(loop_label);
@@ -209,10 +206,6 @@ public:
}
private:
- using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
- using OperationDecompilersArray =
- std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
-
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
void AllocateBindings() {
@@ -378,8 +371,8 @@ private:
u32 binding = const_buffers_base_binding;
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
- const Id type =
- device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
+ const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
+ : t_cbuf_std140_ubo;
const Id id = OpVariable(type, spv::StorageClass::Uniform);
AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
@@ -434,20 +427,17 @@ private:
instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
t_in_uint, "instance_index");
- bool is_point_size_declared = false;
bool is_clip_distances_declared = false;
for (const auto index : ir.GetOutputAttributes()) {
- if (index == Attribute::Index::PointSize) {
- is_point_size_declared = true;
- } else if (index == Attribute::Index::ClipDistances0123 ||
- index == Attribute::Index::ClipDistances4567) {
+ if (index == Attribute::Index::ClipDistances0123 ||
+ index == Attribute::Index::ClipDistances4567) {
is_clip_distances_declared = true;
}
}
std::vector<Id> members;
members.push_back(t_float4);
- if (is_point_size_declared) {
+ if (ir.UsesPointSize()) {
members.push_back(t_float);
}
if (is_clip_distances_declared) {
@@ -470,7 +460,7 @@ private:
position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
point_size_index =
- MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared);
+ MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize());
clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
is_clip_distances_declared);
@@ -576,7 +566,7 @@ private:
const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
Id pointer{};
- if (device.IsExtScalarBlockLayoutSupported()) {
+ if (device.IsKhrUniformBufferStandardLayoutSupported()) {
const Id buffer_offset = Emit(OpShiftRightLogical(
t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
pointer = Emit(
@@ -716,7 +706,8 @@ private:
case Attribute::Index::Position:
return AccessElement(t_out_float, per_vertex, position_index,
abuf->GetElement());
- case Attribute::Index::PointSize:
+ case Attribute::Index::LayerViewportPointSize:
+ UNIMPLEMENTED_IF(abuf->GetElement() != 3);
return AccessElement(t_out_float, per_vertex, point_size_index);
case Attribute::Index::ClipDistances0123:
return AccessElement(t_out_float, per_vertex, clip_distances_index,
@@ -745,6 +736,16 @@ private:
return {};
}
+ Id FCastHalf0(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id FCastHalf1(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id HNegate(Operation operation) {
UNIMPLEMENTED();
return {};
@@ -755,6 +756,11 @@ private:
return {};
}
+ Id HCastFloat(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id HUnpack(Operation operation) {
UNIMPLEMENTED();
return {};
@@ -810,12 +816,7 @@ private:
return {};
}
- Id LogicalAll2(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id LogicalAny2(Operation operation) {
+ Id LogicalAnd2(Operation operation) {
UNIMPLEMENTED();
return {};
}
@@ -939,6 +940,41 @@ private:
return {};
}
+ Id ImageLoad(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ImageStore(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageAdd(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageAnd(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageOr(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageXor(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageExchange(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -948,10 +984,19 @@ private:
return {};
}
+ Id BranchIndirect(Operation operation) {
+ const Id op_a = VisitOperand<Type::Uint>(operation, 0);
+
+ Emit(OpStore(jmp_to, op_a));
+ BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+ return {};
+ }
+
Id PushFlowStack(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
ASSERT(target);
+ const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
const Id current = Emit(OpLoad(t_uint, flow_stack_top));
const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1)));
const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current));
@@ -962,6 +1007,7 @@ private:
}
Id PopFlowStack(Operation operation) {
+ const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
const Id current = Emit(OpLoad(t_uint, flow_stack_top));
const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1)));
const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous));
@@ -1057,6 +1103,66 @@ private:
return {};
}
+ Id BallotThread(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id VoteAll(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id VoteAny(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id VoteEqual(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleIndexed(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleUp(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleDown(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleButterfly(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleIndexed(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleUp(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleDown(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleButterfly(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
const std::string& name) {
const Id id = OpVariable(type, storage);
@@ -1172,7 +1278,32 @@ private:
Emit(skip_label);
}
- static constexpr OperationDecompilersArray operation_decompilers = {
+ std::tuple<Id, Id> CreateFlowStack() {
+ // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
+ // that shaders will use 20 nested SSYs and PBKs.
+ constexpr u32 FLOW_STACK_SIZE = 20;
+ constexpr auto storage_class = spv::StorageClass::Function;
+
+ const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
+ const Id stack = Emit(OpVariable(TypePointer(storage_class, flow_stack_type), storage_class,
+ ConstantNull(flow_stack_type)));
+ const Id top = Emit(OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)));
+ return std::tie(stack, top);
+ }
+
+ std::pair<Id, Id> GetFlowStack(Operation operation) {
+ const auto stack_class = std::get<MetaStackClass>(operation.GetMeta());
+ switch (stack_class) {
+ case MetaStackClass::Ssy:
+ return {ssy_flow_stack, ssy_flow_stack_top};
+ case MetaStackClass::Pbk:
+ return {pbk_flow_stack, pbk_flow_stack_top};
+ }
+ UNREACHABLE();
+ return {};
+ }
+
+ static constexpr std::array operation_decompilers = {
&SPIRVDecompiler::Assign,
&SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1185,6 +1316,8 @@ private:
&SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
&SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
&SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
+ &SPIRVDecompiler::FCastHalf0,
+ &SPIRVDecompiler::FCastHalf1,
&SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
&SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
@@ -1245,6 +1378,7 @@ private:
&SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
&SPIRVDecompiler::HNegate,
&SPIRVDecompiler::HClamp,
+ &SPIRVDecompiler::HCastFloat,
&SPIRVDecompiler::HUnpack,
&SPIRVDecompiler::HMergeF32,
&SPIRVDecompiler::HMergeH0,
@@ -1257,8 +1391,7 @@ private:
&SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
&SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
&SPIRVDecompiler::LogicalPick2,
- &SPIRVDecompiler::LogicalAll2,
- &SPIRVDecompiler::LogicalAny2,
+ &SPIRVDecompiler::LogicalAnd2,
&SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1303,7 +1436,16 @@ private:
&SPIRVDecompiler::TextureQueryLod,
&SPIRVDecompiler::TexelFetch,
+ &SPIRVDecompiler::ImageLoad,
+ &SPIRVDecompiler::ImageStore,
+ &SPIRVDecompiler::AtomicImageAdd,
+ &SPIRVDecompiler::AtomicImageAnd,
+ &SPIRVDecompiler::AtomicImageOr,
+ &SPIRVDecompiler::AtomicImageXor,
+ &SPIRVDecompiler::AtomicImageExchange,
+
&SPIRVDecompiler::Branch,
+ &SPIRVDecompiler::BranchIndirect,
&SPIRVDecompiler::PushFlowStack,
&SPIRVDecompiler::PopFlowStack,
&SPIRVDecompiler::Exit,
@@ -1319,7 +1461,23 @@ private:
&SPIRVDecompiler::WorkGroupId<0>,
&SPIRVDecompiler::WorkGroupId<1>,
&SPIRVDecompiler::WorkGroupId<2>,
+
+ &SPIRVDecompiler::BallotThread,
+ &SPIRVDecompiler::VoteAll,
+ &SPIRVDecompiler::VoteAny,
+ &SPIRVDecompiler::VoteEqual,
+
+ &SPIRVDecompiler::ShuffleIndexed,
+ &SPIRVDecompiler::ShuffleUp,
+ &SPIRVDecompiler::ShuffleDown,
+ &SPIRVDecompiler::ShuffleButterfly,
+
+ &SPIRVDecompiler::InRangeShuffleIndexed,
+ &SPIRVDecompiler::InRangeShuffleUp,
+ &SPIRVDecompiler::InRangeShuffleDown,
+ &SPIRVDecompiler::InRangeShuffleButterfly,
};
+ static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
const VKDevice& device;
const ShaderIR& ir;
@@ -1414,8 +1572,10 @@ private:
Id execute_function{};
Id jmp_to{};
- Id flow_stack_top{};
- Id flow_stack{};
+ Id ssy_flow_stack_top{};
+ Id pbk_flow_stack_top{};
+ Id ssy_flow_stack{};
+ Id pbk_flow_stack{};
Id continue_label{};
std::map<u32, Id> labels;
};
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 58ffa42f2..62f1427f5 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -46,12 +46,12 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
}
-VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
+void VKStreamBuffer::Send(u64 size) {
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
if (invalidation_mark) {
// TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
- exctx = scheduler.Flush();
+ scheduler.Flush();
std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
[&](auto& resource) { resource->Wait(); });
invalidation_mark = std::nullopt;
@@ -62,11 +62,9 @@ VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
ReserveWatches(WATCHES_RESERVE_CHUNK);
}
// Add a watch for this allocation.
- watches[used_watches++]->Watch(exctx.GetFence());
+ watches[used_watches++]->Watch(scheduler.GetFence());
offset += size;
-
- return exctx;
}
void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 69d036ccd..842e54162 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -37,7 +37,7 @@ public:
std::tuple<u8*, u64, bool> Reserve(u64 size);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
- [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
+ void Send(u64 size);
vk::Buffer GetBuffer() const {
return *buffer;