summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp12
-rw-r--r--src/video_core/engines/maxwell_3d.h8
-rw-r--r--src/video_core/engines/shader_bytecode.h32
-rw-r--r--src/video_core/macro_interpreter.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp145
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp301
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h62
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp48
-rw-r--r--src/video_core/shader/decode/image.cpp92
-rw-r--r--src/video_core/shader/node.h57
-rw-r--r--src/video_core/shader/shader_ir.h10
-rw-r--r--src/video_core/surface.cpp20
-rw-r--r--src/video_core/surface.h2
14 files changed, 629 insertions, 184 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 965c4c06b..c7a3c85a0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -332,6 +332,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessMacroBind(method_call.argument);
break;
}
+ case MAXWELL3D_REG_INDEX(firmware[4]): {
+ ProcessFirmwareCall4();
+ break;
+ }
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@@ -422,6 +426,14 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
macro_positions[regs.macros.entry++] = data;
}
+void Maxwell3D::ProcessFirmwareCall4() {
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
+
+ // Firmware call 4 is a blob that changes some registers depending on its parameters.
+ // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
+ regs.reg_array[0xd00] = 1;
+}
+
void Maxwell3D::ProcessQueryGet() {
const GPUVAddr sequence_address{regs.query.QueryAddress()};
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index f67a5389f..e5ec90717 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1089,7 +1089,9 @@ public:
INSERT_PADDING_WORDS(14);
} shader_config[MaxShaderProgram];
- INSERT_PADDING_WORDS(0x80);
+ INSERT_PADDING_WORDS(0x60);
+
+ u32 firmware[0x20];
struct {
u32 cb_size;
@@ -1319,6 +1321,9 @@ private:
/// Handles writes to the macro bind register.
void ProcessMacroBind(u32 data);
+ /// Handles firmware blob 4
+ void ProcessFirmwareCall4();
+
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers();
@@ -1431,6 +1436,7 @@ ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);
ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
ASSERT_REG_POSITION(shader_config[0], 0x800);
+ASSERT_REG_POSITION(firmware, 0x8C0);
ASSERT_REG_POSITION(const_buffer, 0x8E0);
ASSERT_REG_POSITION(cb_bind[0], 0x904);
ASSERT_REG_POSITION(tex_cb_index, 0x982);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index bd8c1ada0..052e6d24e 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -544,6 +544,28 @@ enum class VoteOperation : u64 {
Eq = 2, // allThreadsEqualNV
};
+enum class ImageAtomicSize : u64 {
+ U32 = 0,
+ S32 = 1,
+ U64 = 2,
+ F32 = 3,
+ S64 = 5,
+ SD32 = 6,
+ SD64 = 7,
+};
+
+enum class ImageAtomicOperation : u64 {
+ Add = 0,
+ Min = 1,
+ Max = 2,
+ Inc = 3,
+ Dec = 4,
+ And = 5,
+ Or = 6,
+ Xor = 7,
+ Exch = 8,
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -1392,6 +1414,14 @@ union Instruction {
} sust;
union {
+ BitField<28, 1, u64> is_ba;
+ BitField<51, 3, ImageAtomicSize> size;
+ BitField<33, 3, ImageType> image_type;
+ BitField<29, 4, ImageAtomicOperation> operation;
+ BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
+ } suatom_d;
+
+ union {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
@@ -1543,6 +1573,7 @@ public:
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
SUST, // Surface Store
+ SUATOM, // Surface Atomic Operation
EXIT,
NOP,
IPA,
@@ -1826,6 +1857,7 @@ private:
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
+ INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 4e1cb98db..62afc0d11 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -131,9 +131,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
// An instruction with the Exit flag will not actually
// cause an exit if it's executed inside a delay slot.
- // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further
- // testing on the MME code.
- if (opcode.is_exit) {
+ if (opcode.is_exit && !is_delay_slot) {
// Exit has a delay slot, execute the next instruction
Step(offset, true);
return false;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 6edb2ca38..137b23740 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -706,7 +706,7 @@ private:
void DeclareImages() {
const auto& images{ir.GetImages()};
for (const auto& [offset, image] : images) {
- const std::string image_type = [&]() {
+ const char* image_type = [&] {
switch (image.GetType()) {
case Tegra::Shader::ImageType::Texture1D:
return "image1D";
@@ -725,6 +725,23 @@ private:
return "image1D";
}
}();
+
+ const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> {
+ if (!image.IsSizeKnown()) {
+ return {"", ""};
+ }
+ switch (image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return {"u", "r32ui, "};
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return {"i", "r32i, "};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented atomic size={}",
+ static_cast<u32>(image.GetSize()));
+ return {"", ""};
+ }
+ }();
+
std::string qualifier = "coherent volatile";
if (image.IsRead() && !image.IsWritten()) {
qualifier += " readonly";
@@ -1180,6 +1197,74 @@ private:
return expr;
}
+ std::string BuildIntegerCoordinates(Operation operation) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ const std::size_t coords_count{operation.GetOperandsCount()};
+ std::string expr = constructors.at(coords_count - 1);
+ for (std::size_t i = 0; i < coords_count; ++i) {
+ expr += VisitOperand(operation, i).AsInt();
+ if (i + 1 < coords_count) {
+ expr += ", ";
+ }
+ }
+ expr += ')';
+ return expr;
+ }
+
+ std::string BuildImageValues(Operation operation) {
+ const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> {
+ constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"};
+ if (!meta.image.IsSizeKnown()) {
+ return {float_constructors, Type::Float};
+ }
+ switch (meta.image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint};
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented image size={}",
+ static_cast<u32>(meta.image.GetSize()));
+ return {float_constructors, Type::Float};
+ }
+ }();
+
+ const std::size_t values_count{meta.values.size()};
+ std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
+ for (std::size_t i = 0; i < values_count; ++i) {
+ expr += Visit(meta.values.at(i)).As(type);
+ if (i + 1 < values_count) {
+ expr += ", ";
+ }
+ }
+ expr += ')';
+ return expr;
+ }
+
+ Expression AtomicImage(Operation operation, const char* opname) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ ASSERT(meta.values.size() == 1);
+ ASSERT(meta.image.IsSizeKnown());
+
+ const auto type = [&]() {
+ switch (const auto size = meta.image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return Type::Uint;
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return Type::Int;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size));
+ return Type::Uint;
+ }
+ }();
+
+ return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image),
+ BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)),
+ type};
+ }
+
Expression Assign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -1694,36 +1779,37 @@ private:
}
Expression ImageStore(Operation operation) {
- constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
+ BuildIntegerCoordinates(operation), BuildImageValues(operation));
+ return {};
+ }
- std::string expr = "imageStore(";
- expr += GetImage(meta.image);
- expr += ", ";
+ Expression AtomicImageAdd(Operation operation) {
+ return AtomicImage(operation, "imageAtomicAdd");
+ }
- const std::size_t coords_count{operation.GetOperandsCount()};
- expr += constructors.at(coords_count - 1);
- for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- if (i + 1 < coords_count) {
- expr += ", ";
- }
- }
- expr += "), ";
+ Expression AtomicImageMin(Operation operation) {
+ return AtomicImage(operation, "imageAtomicMin");
+ }
- const std::size_t values_count{meta.values.size()};
- UNIMPLEMENTED_IF(values_count != 4);
- expr += "vec4(";
- for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i)).AsFloat();
- if (i + 1 < values_count) {
- expr += ", ";
- }
- }
- expr += "));";
+ Expression AtomicImageMax(Operation operation) {
+ return AtomicImage(operation, "imageAtomicMax");
+ }
+ Expression AtomicImageAnd(Operation operation) {
+ return AtomicImage(operation, "imageAtomicAnd");
+ }
- code.AddLine(expr);
- return {};
+ Expression AtomicImageOr(Operation operation) {
+ return AtomicImage(operation, "imageAtomicOr");
+ }
+
+ Expression AtomicImageXor(Operation operation) {
+ return AtomicImage(operation, "imageAtomicXor");
+ }
+
+ Expression AtomicImageExchange(Operation operation) {
+ return AtomicImage(operation, "imageAtomicExchange");
}
Expression Branch(Operation operation) {
@@ -2019,6 +2105,13 @@ private:
&GLSLDecompiler::TexelFetch,
&GLSLDecompiler::ImageStore,
+ &GLSLDecompiler::AtomicImageAdd,
+ &GLSLDecompiler::AtomicImageMin,
+ &GLSLDecompiler::AtomicImageMax,
+ &GLSLDecompiler::AtomicImageAnd,
+ &GLSLDecompiler::AtomicImageOr,
+ &GLSLDecompiler::AtomicImageXor,
+ &GLSLDecompiler::AtomicImageExchange,
&GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 5450feedf..f141c4e3b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -341,16 +341,22 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u64 index{};
u32 type{};
u8 is_bindless{};
- u8 is_read{};
u8 is_written{};
+ u8 is_read{};
+ u8 is_size_known{};
+ u32 size{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
- !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
+ !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
+ !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) {
return {};
}
- entry.entries.images.emplace_back(static_cast<u64>(offset), static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::ImageType>(type),
- is_bindless != 0, is_written != 0, is_read != 0);
+ entry.entries.images.emplace_back(
+ static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+ static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
+ is_read != 0,
+ is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size))
+ : std::nullopt);
}
u32 global_memory_count{};
@@ -429,12 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
return false;
}
for (const auto& image : entries.images) {
+ const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U;
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) {
return false;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 3b966ddc3..897cbb4e8 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -2,9 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <map>
+#include <bitset>
#include <optional>
#include <set>
+#include <string_view>
#include <vector>
#include "common/assert.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -12,13 +13,32 @@
namespace Vulkan {
+namespace {
+
+template <typename T>
+void SetNext(void**& next, T& data) {
+ *next = &data;
+ next = &data.pNext;
+}
+
+template <typename T>
+T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
+ vk::PhysicalDeviceFeatures2 features;
+ T extension_features;
+ features.pNext = &extension_features;
+ physical.getFeatures2(&features, dldi);
+ return extension_features;
+}
+
+} // Anonymous namespace
+
namespace Alternatives {
-constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
- vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
-constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
- vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
-constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}};
+constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
+ vk::Format::eD16UnormS8Uint, vk::Format{}};
+constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
+ vk::Format::eD32SfloatS8Uint, vk::Format{}};
+constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
} // namespace Alternatives
@@ -58,16 +78,53 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy
VKDevice::~VKDevice() = default;
bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
- vk::PhysicalDeviceFeatures device_features;
- device_features.vertexPipelineStoresAndAtomics = true;
- device_features.independentBlend = true;
- device_features.textureCompressionASTC_LDR = is_optimal_astc_supported;
-
const auto queue_cis = GetDeviceQueueCreateInfos();
- const std::vector<const char*> extensions = LoadExtensions(dldi);
- const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
- 0, nullptr, static_cast<u32>(extensions.size()),
- extensions.data(), &device_features);
+ const std::vector extensions = LoadExtensions(dldi);
+
+ vk::PhysicalDeviceFeatures2 features2;
+ void** next = &features2.pNext;
+ auto& features = features2.features;
+ features.vertexPipelineStoresAndAtomics = true;
+ features.independentBlend = true;
+ features.depthClamp = true;
+ features.samplerAnisotropy = true;
+ features.largePoints = true;
+ features.textureCompressionASTC_LDR = is_optimal_astc_supported;
+
+ vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
+ vertex_divisor.vertexAttributeInstanceRateDivisor = true;
+ vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
+ SetNext(next, vertex_divisor);
+
+ vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
+ if (is_float16_supported) {
+ float16_int8.shaderFloat16 = true;
+ SetNext(next, float16_int8);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
+ }
+
+ vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
+ if (khr_uniform_buffer_standard_layout) {
+ std430_layout.uniformBufferStandardLayout = true;
+ SetNext(next, std430_layout);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
+ }
+
+ vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
+ if (ext_index_type_uint8) {
+ index_type_uint8.indexTypeUint8 = true;
+ SetNext(next, index_type_uint8);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
+ }
+
+ vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
+ nullptr, static_cast<u32>(extensions.size()), extensions.data(),
+ nullptr);
+ device_ci.pNext = &features2;
+
vk::Device dummy_logical;
if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
@@ -78,6 +135,17 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
logical = UniqueDevice(
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+ if (khr_driver_properties) {
+ vk::PhysicalDeviceDriverPropertiesKHR driver;
+ vk::PhysicalDeviceProperties2 properties;
+ properties.pNext = &driver;
+ physical.getProperties2(&properties, dld);
+ driver_id = driver.driverID;
+ LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
+ } else {
+ LOG_INFO(Render_Vulkan, "Driver: Unknown");
+ }
+
graphics_queue = logical->getQueue(graphics_family, 0, dld);
present_queue = logical->getQueue(present_family, 0, dld);
return true;
@@ -92,20 +160,19 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
// The wanted format is not supported by hardware, search for alternatives
const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
- LOG_CRITICAL(Render_Vulkan,
- "Format={} with usage={} and type={} has no defined alternatives and host "
- "hardware does not support it",
- vk::to_string(wanted_format), vk::to_string(wanted_usage),
- static_cast<u32>(format_type));
- UNREACHABLE();
+ UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host "
+ "hardware does not support it",
+ vk::to_string(wanted_format), vk::to_string(wanted_usage),
+ static_cast<u32>(format_type));
return wanted_format;
}
std::size_t i = 0;
for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
alternative = alternatives[++i]) {
- if (!IsFormatSupported(alternative, wanted_usage, format_type))
+ if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue;
+ }
LOG_WARNING(Render_Vulkan,
"Emulating format={} with alternative format={} with usage={} and type={}",
static_cast<u32>(wanted_format), static_cast<u32>(alternative),
@@ -114,12 +181,10 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
}
// No alternatives found, panic
- LOG_CRITICAL(Render_Vulkan,
- "Format={} with usage={} and type={} is not supported by the host hardware and "
- "doesn't support any of the alternatives",
- static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
- static_cast<u32>(format_type));
- UNREACHABLE();
+ UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and "
+ "doesn't support any of the alternatives",
+ static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+ static_cast<u32>(format_type));
return wanted_format;
}
@@ -132,7 +197,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eTransferDst};
- constexpr std::array<vk::Format, 9> astc_formats = {
+ constexpr std::array astc_formats = {
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
@@ -151,76 +216,120 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
FormatType format_type) const {
const auto it = format_properties.find(wanted_format);
if (it == format_properties.end()) {
- LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format));
return true;
}
- const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
+ const auto supported_usage = GetFormatFeatures(it->second, format_type);
return (supported_usage & wanted_usage) == wanted_usage;
}
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface) {
- bool has_swapchain{};
+ LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
+ bool is_suitable = true;
+
+ constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
+ std::bitset<required_extensions.size()> available_extensions{};
+
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
- has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ for (std::size_t i = 0; i < required_extensions.size(); ++i) {
+ if (available_extensions[i]) {
+ continue;
+ }
+ available_extensions[i] =
+ required_extensions[i] == std::string_view{prop.extensionName};
+ }
}
- if (!has_swapchain) {
- // The device doesn't support creating swapchains.
- return false;
+ if (!available_extensions.all()) {
+ for (std::size_t i = 0; i < required_extensions.size(); ++i) {
+ if (available_extensions[i]) {
+ continue;
+ }
+ LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
+ is_suitable = false;
+ }
}
bool has_graphics{}, has_present{};
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
const auto& family = queue_family_properties[i];
- if (family.queueCount == 0)
+ if (family.queueCount == 0) {
continue;
-
+ }
has_graphics |=
(family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
}
if (!has_graphics || !has_present) {
- // The device doesn't have a graphics and present queue.
- return false;
+ LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
+ is_suitable = false;
}
// TODO(Rodrigo): Check if the device matches all requeriments.
const auto properties{physical.getProperties(dldi)};
- const auto limits{properties.limits};
- if (limits.maxUniformBufferRange < 65536) {
- return false;
+ const auto& limits{properties.limits};
+
+ constexpr u32 required_ubo_size = 65536;
+ if (limits.maxUniformBufferRange < required_ubo_size) {
+ LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
+ limits.maxUniformBufferRange, required_ubo_size);
+ is_suitable = false;
}
- const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)};
- if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) {
- return false;
+ const auto features{physical.getFeatures(dldi)};
+ const std::array feature_report = {
+ std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
+ std::make_pair(features.independentBlend, "independentBlend"),
+ std::make_pair(features.depthClamp, "depthClamp"),
+ std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
+ std::make_pair(features.largePoints, "largePoints"),
+ };
+ for (const auto& [supported, name] : feature_report) {
+ if (supported) {
+ continue;
+ }
+ LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
+ is_suitable = false;
}
- // Device is suitable.
- return true;
+ return is_suitable;
}
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
std::vector<const char*> extensions;
- extensions.reserve(2);
+ extensions.reserve(7);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
const auto Test = [&](const vk::ExtensionProperties& extension,
std::optional<std::reference_wrapper<bool>> status, const char* name,
- u32 revision) {
- if (extension.extensionName != std::string(name)) {
+ bool push) {
+ if (extension.extensionName != std::string_view(name)) {
return;
}
- extensions.push_back(name);
+ if (push) {
+ extensions.push_back(name);
+ }
if (status) {
status->get() = true;
}
};
+ bool khr_shader_float16_int8{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
- Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1);
+ Test(extension, khr_uniform_buffer_standard_layout,
+ VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
+ Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
+ Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
+ Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
+ }
+
+ if (khr_shader_float16_int8) {
+ is_float16_supported =
+ GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
+ extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
return extensions;
@@ -250,9 +359,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
}
void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
- const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+ const auto props = physical.getProperties(dldi);
device_type = props.deviceType;
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+ storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
}
@@ -273,42 +383,53 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
return queue_cis;
}
-std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
+std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
- static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
- vk::Format::eB5G6R5UnormPack16,
- vk::Format::eA2B10G10R10UnormPack32,
- vk::Format::eR32G32B32A32Sfloat,
- vk::Format::eR16G16Unorm,
- vk::Format::eR16G16Snorm,
- vk::Format::eR8G8B8A8Srgb,
- vk::Format::eR8Unorm,
- vk::Format::eB10G11R11UfloatPack32,
- vk::Format::eR32Sfloat,
- vk::Format::eR16Sfloat,
- vk::Format::eR16G16B16A16Sfloat,
- vk::Format::eD32Sfloat,
- vk::Format::eD16Unorm,
- vk::Format::eD16UnormS8Uint,
- vk::Format::eD24UnormS8Uint,
- vk::Format::eD32SfloatS8Uint,
- vk::Format::eBc1RgbaUnormBlock,
- vk::Format::eBc2UnormBlock,
- vk::Format::eBc3UnormBlock,
- vk::Format::eBc4UnormBlock,
- vk::Format::eBc5UnormBlock,
- vk::Format::eBc5SnormBlock,
- vk::Format::eBc7UnormBlock,
- vk::Format::eAstc4x4UnormBlock,
- vk::Format::eAstc4x4SrgbBlock,
- vk::Format::eAstc8x8SrgbBlock,
- vk::Format::eAstc8x6SrgbBlock,
- vk::Format::eAstc5x4SrgbBlock,
- vk::Format::eAstc5x5UnormBlock,
- vk::Format::eAstc5x5SrgbBlock,
- vk::Format::eAstc10x8UnormBlock,
- vk::Format::eAstc10x8SrgbBlock};
- std::map<vk::Format, vk::FormatProperties> format_properties;
+ constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
+ vk::Format::eA8B8G8R8SnormPack32,
+ vk::Format::eA8B8G8R8SrgbPack32,
+ vk::Format::eB5G6R5UnormPack16,
+ vk::Format::eA2B10G10R10UnormPack32,
+ vk::Format::eR32G32B32A32Sfloat,
+ vk::Format::eR16G16B16A16Uint,
+ vk::Format::eR16G16Unorm,
+ vk::Format::eR16G16Snorm,
+ vk::Format::eR16G16Sfloat,
+ vk::Format::eR16Unorm,
+ vk::Format::eR8G8B8A8Srgb,
+ vk::Format::eR8G8Unorm,
+ vk::Format::eR8G8Snorm,
+ vk::Format::eR8Unorm,
+ vk::Format::eB10G11R11UfloatPack32,
+ vk::Format::eR32Sfloat,
+ vk::Format::eR16Sfloat,
+ vk::Format::eR16G16B16A16Sfloat,
+ vk::Format::eB8G8R8A8Unorm,
+ vk::Format::eD32Sfloat,
+ vk::Format::eD16Unorm,
+ vk::Format::eD16UnormS8Uint,
+ vk::Format::eD24UnormS8Uint,
+ vk::Format::eD32SfloatS8Uint,
+ vk::Format::eBc1RgbaUnormBlock,
+ vk::Format::eBc2UnormBlock,
+ vk::Format::eBc3UnormBlock,
+ vk::Format::eBc4UnormBlock,
+ vk::Format::eBc5UnormBlock,
+ vk::Format::eBc5SnormBlock,
+ vk::Format::eBc7UnormBlock,
+ vk::Format::eBc1RgbaSrgbBlock,
+ vk::Format::eBc3SrgbBlock,
+ vk::Format::eBc7SrgbBlock,
+ vk::Format::eAstc4x4UnormBlock,
+ vk::Format::eAstc4x4SrgbBlock,
+ vk::Format::eAstc8x8SrgbBlock,
+ vk::Format::eAstc8x6SrgbBlock,
+ vk::Format::eAstc5x4SrgbBlock,
+ vk::Format::eAstc5x5UnormBlock,
+ vk::Format::eAstc5x5SrgbBlock,
+ vk::Format::eAstc10x8UnormBlock,
+ vk::Format::eAstc10x8SrgbBlock};
+ std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 537825d8b..010d4c3d6 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -4,7 +4,7 @@
#pragma once
-#include <map>
+#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -69,16 +69,26 @@ public:
return present_family;
}
- /// Returns if the device is integrated with the host CPU.
+ /// Returns true if the device is integrated with the host CPU.
bool IsIntegrated() const {
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
}
+ /// Returns the driver ID.
+ vk::DriverIdKHR GetDriverID() const {
+ return driver_id;
+ }
+
/// Returns uniform buffer alignment requeriment.
u64 GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
+ /// Returns storage alignment requeriment.
+ u64 GetStorageBufferAlignment() const {
+ return storage_buffer_alignment;
+ }
+
/// Returns the maximum range for storage buffers.
u64 GetMaxStorageBufferRange() const {
return max_storage_buffer_range;
@@ -89,9 +99,19 @@ public:
return is_optimal_astc_supported;
}
+ /// Returns true if the device supports float16 natively
+ bool IsFloat16Supported() const {
+ return is_float16_supported;
+ }
+
/// Returns true if the device supports VK_EXT_scalar_block_layout.
- bool IsExtScalarBlockLayoutSupported() const {
- return ext_scalar_block_layout;
+ bool IsKhrUniformBufferStandardLayoutSupported() const {
+ return khr_uniform_buffer_standard_layout;
+ }
+
+ /// Returns true if the device supports VK_EXT_index_type_uint8.
+ bool IsExtIndexTypeUint8Supported() const {
+ return ext_index_type_uint8;
}
/// Checks if the physical device is suitable.
@@ -123,22 +143,28 @@ private:
FormatType format_type) const;
/// Returns the device properties for Vulkan formats.
- static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
+ static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
- const vk::PhysicalDevice physical; ///< Physical device.
- vk::DispatchLoaderDynamic dld; ///< Device function pointers.
- UniqueDevice logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 graphics_family{}; ///< Main graphics queue family index.
- u32 present_family{}; ///< Main present queue family index.
- vk::PhysicalDeviceType device_type; ///< Physical device type.
- u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
- u64 max_storage_buffer_range{}; ///< Max storage buffer size.
- bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout.
- std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary.
+ const vk::PhysicalDevice physical; ///< Physical device.
+ vk::DispatchLoaderDynamic dld; ///< Device function pointers.
+ UniqueDevice logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ vk::PhysicalDeviceType device_type; ///< Physical device type.
+ vk::DriverIdKHR driver_id{}; ///< Driver ID.
+ u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
+ u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment.
+ u64 max_storage_buffer_range{}; ///< Max storage buffer size.
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
+ bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
+ bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties.
+ std::unordered_map<vk::Format, vk::FormatProperties>
+ format_properties; ///< Format properties dictionary.
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a35b45c9c..b9153934e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -370,8 +370,8 @@ private:
u32 binding = const_buffers_base_binding;
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
- const Id type =
- device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
+ const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
+ : t_cbuf_std140_ubo;
const Id id = OpVariable(type, spv::StorageClass::Uniform);
AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
@@ -565,7 +565,7 @@ private:
const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
Id pointer{};
- if (device.IsExtScalarBlockLayoutSupported()) {
+ if (device.IsKhrUniformBufferStandardLayoutSupported()) {
const Id buffer_offset = Emit(OpShiftRightLogical(
t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
pointer = Emit(
@@ -944,6 +944,41 @@ private:
return {};
}
+ Id AtomicImageAdd(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageMin(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageMax(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageAnd(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageOr(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageXor(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageExchange(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -1366,6 +1401,13 @@ private:
&SPIRVDecompiler::TexelFetch,
&SPIRVDecompiler::ImageStore,
+ &SPIRVDecompiler::AtomicImageAdd,
+ &SPIRVDecompiler::AtomicImageMin,
+ &SPIRVDecompiler::AtomicImageMax,
+ &SPIRVDecompiler::AtomicImageAnd,
+ &SPIRVDecompiler::AtomicImageOr,
+ &SPIRVDecompiler::AtomicImageXor,
+ &SPIRVDecompiler::AtomicImageExchange,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect,
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 008109a99..d54fb88c9 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::SUST: {
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
- UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
@@ -66,8 +65,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
image.MarkWrite();
MetaImage meta{image, values};
- const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
- bb.push_back(store);
+ bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords)));
+ break;
+ }
+ case OpCode::Id::SUATOM: {
+ UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
+
+ Node value = GetRegister(instr.gpr0);
+
+ std::vector<Node> coords;
+ const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ coords.push_back(GetRegister(instr.gpr8.Value() + i));
+ }
+
+ const OperationCode operation_code = [instr] {
+ switch (instr.suatom_d.operation) {
+ case Tegra::Shader::ImageAtomicOperation::Add:
+ return OperationCode::AtomicImageAdd;
+ case Tegra::Shader::ImageAtomicOperation::Min:
+ return OperationCode::AtomicImageMin;
+ case Tegra::Shader::ImageAtomicOperation::Max:
+ return OperationCode::AtomicImageMax;
+ case Tegra::Shader::ImageAtomicOperation::And:
+ return OperationCode::AtomicImageAnd;
+ case Tegra::Shader::ImageAtomicOperation::Or:
+ return OperationCode::AtomicImageOr;
+ case Tegra::Shader::ImageAtomicOperation::Xor:
+ return OperationCode::AtomicImageXor;
+ case Tegra::Shader::ImageAtomicOperation::Exch:
+ return OperationCode::AtomicImageExchange;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation={}",
+ static_cast<u32>(instr.suatom_d.operation.Value()));
+ return OperationCode::AtomicImageAdd;
+ }
+ }();
+
+ const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)};
+ MetaImage meta{image, {std::move(value)}};
+ SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords)));
break;
}
default:
@@ -77,38 +114,51 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
return pc;
}
-Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
- const auto offset{static_cast<u64>(image.index.Value())};
-
- // If this image has already been used, return the existing mapping.
- const auto it = used_images.find(offset);
- if (it != used_images.end()) {
- ASSERT(it->second.GetType() == type);
- return it->second;
+Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
+ const auto offset{static_cast<std::size_t>(image.index.Value())};
+ if (const auto image = TryUseExistingImage(offset, type, size)) {
+ return *image;
}
- // Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
- return used_images.emplace(offset, Image{offset, next_index, type}).first->second;
+ return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second;
}
-Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
+Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
const Node image_register{GetRegister(reg)};
const auto [base_image, cbuf_index, cbuf_offset]{
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
- // If this image has already been used, return the existing mapping.
- const auto it = used_images.find(cbuf_key);
- if (it != used_images.end()) {
- ASSERT(it->second.GetType() == type);
- return it->second;
+ if (const auto image = TryUseExistingImage(cbuf_key, type, size)) {
+ return *image;
}
- // Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
- return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type})
+ return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size})
.first->second;
}
+Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
+ auto it = used_images.find(offset);
+ if (it == used_images.end()) {
+ return nullptr;
+ }
+ auto& image = it->second;
+ ASSERT(image.GetType() == type);
+
+ if (size) {
+ // We know the size, if it's known it has to be the same as before, otherwise we can set it.
+ if (image.IsSizeKnown()) {
+ ASSERT(image.GetSize() == size);
+ } else {
+ image.SetSize(*size);
+ }
+ }
+ return &image;
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index b29aedce8..b47b201cf 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -7,6 +7,7 @@
#include <array>
#include <cstddef>
#include <memory>
+#include <optional>
#include <string>
#include <tuple>
#include <utility>
@@ -148,7 +149,14 @@ enum class OperationCode {
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
- ImageStore, /// (MetaImage, float[N] coords) -> void
+ ImageStore, /// (MetaImage, int[N] values) -> void
+ AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
+ AtomicImageMin, /// (MetaImage, int[N] coords) -> void
+ AtomicImageMax, /// (MetaImage, int[N] coords) -> void
+ AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
+ AtomicImageOr, /// (MetaImage, int[N] coords) -> void
+ AtomicImageXor, /// (MetaImage, int[N] coords) -> void
+ AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
@@ -275,25 +283,32 @@ private:
class Image final {
public:
- constexpr explicit Image(u64 offset, std::size_t index, Tegra::Shader::ImageType type)
- : offset{offset}, index{index}, type{type}, is_bindless{false} {}
+ constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
+ : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {}
constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::ImageType type)
+ Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
: offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_bindless{true} {}
+ is_bindless{true}, size{size} {}
constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
- bool is_bindless, bool is_written, bool is_read)
+ bool is_bindless, bool is_written, bool is_read,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
: offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
- is_written{is_written}, is_read{is_read} {}
+ is_written{is_written}, is_read{is_read}, size{size} {}
+
+ void MarkWrite() {
+ is_written = true;
+ }
void MarkRead() {
is_read = true;
}
- void MarkWrite() {
- is_written = true;
+ void SetSize(Tegra::Shader::ImageAtomicSize size_) {
+ size = size_;
}
constexpr std::size_t GetOffset() const {
@@ -312,25 +327,39 @@ public:
return is_bindless;
}
- constexpr bool IsRead() const {
- return is_read;
- }
-
constexpr bool IsWritten() const {
return is_written;
}
+ constexpr bool IsRead() const {
+ return is_read;
+ }
+
constexpr std::pair<u32, u32> GetBindlessCBuf() const {
return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
}
+ constexpr bool IsSizeKnown() const {
+ return size.has_value();
+ }
+
+ constexpr Tegra::Shader::ImageAtomicSize GetSize() const {
+ return size.value();
+ }
+
+ constexpr bool operator<(const Image& rhs) const {
+ return std::tie(offset, index, type, size, is_bindless) <
+ std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless);
+ }
+
private:
u64 offset{};
std::size_t index{};
Tegra::Shader::ImageType type{};
bool is_bindless{};
- bool is_read{};
bool is_written{};
+ bool is_read{};
+ std::optional<Tegra::Shader::ImageAtomicSize> size{};
};
struct GlobalMemoryBase {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0f891eace..62816bd56 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -272,10 +272,16 @@ private:
bool is_shadow);
/// Accesses an image.
- Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
+ Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size = {});
/// Access a bindless image sampler.
- Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
+ Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size = {});
+
+ /// Tries to access an existing image, updating it's state as needed
+ Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 4ceb219be..53d0142cb 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -513,6 +513,26 @@ bool IsPixelFormatASTC(PixelFormat format) {
}
}
+bool IsPixelFormatSRGB(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::RGBA8_SRGB:
+ case PixelFormat::BGRA8_SRGB:
+ case PixelFormat::DXT1_SRGB:
+ case PixelFormat::DXT23_SRGB:
+ case PixelFormat::DXT45_SRGB:
+ case PixelFormat::BC7U_SRGB:
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ case PixelFormat::ASTC_2D_8X5_SRGB:
+ case PixelFormat::ASTC_2D_5X4_SRGB:
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8_SRGB:
+ return true;
+ default:
+ return false;
+ }
+}
+
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)};
}
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 83f31c12c..19268b7cd 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -547,6 +547,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format);
bool IsPixelFormatASTC(PixelFormat format);
+bool IsPixelFormatSRGB(PixelFormat format);
+
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN