summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/page_table.cpp12
-rw-r--r--src/common/page_table.h15
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/maxwell_3d.h15
-rw-r--r--src/video_core/engines/shader_bytecode.h11
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/memory_manager.h2
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp70
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp153
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp288
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp48
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp130
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp138
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h13
-rw-r--r--src/video_core/shader/decode/bfe.cpp69
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h142
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/surface_params.cpp6
-rw-r--r--src/video_core/texture_cache/texture_cache.h41
32 files changed, 1018 insertions, 391 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 1f621fb1f..fbebed715 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -83,6 +83,8 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/shader_ir.cpp"
"${VIDEO_CORE}/shader/shader_ir.h"
"${VIDEO_CORE}/shader/track.cpp"
+ "${VIDEO_CORE}/shader/transform_feedback.cpp"
+ "${VIDEO_CORE}/shader/transform_feedback.h"
# and also check that the scm_rev files haven't changed
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
index 69b7abc54..566b57b62 100644
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
pointers.resize(num_page_table_entries);
attributes.resize(num_page_table_entries);
- backing_addr.resize(num_page_table_entries);
// The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
// vector size is subsequently decreased (via resize), the vector might not automatically
@@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
pointers.shrink_to_fit();
attributes.shrink_to_fit();
+}
+
+BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {}
+
+BackingPageTable::~BackingPageTable() = default;
+
+void BackingPageTable::Resize(std::size_t address_space_width_in_bits) {
+ PageTable::Resize(address_space_width_in_bits);
+ const std::size_t num_page_table_entries = 1ULL
+ << (address_space_width_in_bits - page_size_in_bits);
+ backing_addr.resize(num_page_table_entries);
backing_addr.shrink_to_fit();
}
diff --git a/src/common/page_table.h b/src/common/page_table.h
index 8b8ff0bb8..dbc272ab7 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -76,9 +76,20 @@ struct PageTable {
*/
std::vector<PageType> attributes;
- std::vector<u64> backing_addr;
-
const std::size_t page_size_in_bits{};
};
+/**
+ * A more advanced Page Table with the ability to save a backing address when using it
+ * depends on another MMU.
+ */
+struct BackingPageTable : PageTable {
+ explicit BackingPageTable(std::size_t page_size_in_bits);
+ ~BackingPageTable();
+
+ void Resize(std::size_t address_space_width_in_bits);
+
+ std::vector<u64> backing_addr;
+};
+
} // namespace Common
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 0101e5f0e..91df062d7 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -129,6 +129,8 @@ add_library(video_core STATIC
shader/shader_ir.cpp
shader/shader_ir.h
shader/track.cpp
+ shader/transform_feedback.cpp
+ shader/transform_feedback.h
surface.cpp
surface.h
texture_cache/format_lookup_table.cpp
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8752a1cfb..8a9e9992e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -628,19 +628,26 @@ public:
float depth_range_far;
};
- struct alignas(32) TransformFeedbackBinding {
+ struct TransformFeedbackBinding {
u32 buffer_enable;
u32 address_high;
u32 address_low;
s32 buffer_size;
s32 buffer_offset;
+ INSERT_UNION_PADDING_WORDS(3);
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
};
static_assert(sizeof(TransformFeedbackBinding) == 32);
- struct alignas(16) TransformFeedbackLayout {
+ struct TransformFeedbackLayout {
u32 stream;
u32 varying_count;
u32 stride;
+ INSERT_UNION_PADDING_WORDS(1);
};
static_assert(sizeof(TransformFeedbackLayout) == 16);
@@ -652,6 +659,10 @@ public:
return shader_config[index].enable != 0;
}
+ bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
+ return IsShaderConfigEnabled(static_cast<std::size_t>(type));
+ }
+
union {
struct {
INSERT_UNION_PADDING_WORDS(0x45);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c9bc83cd7..eba42deb4 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -911,14 +911,9 @@ union Instruction {
} fadd32i;
union {
- BitField<20, 8, u64> shift_position;
- BitField<28, 8, u64> shift_length;
- BitField<48, 1, u64> negate_b;
- BitField<49, 1, u64> negate_a;
-
- u64 GetLeftShiftValue() const {
- return 32 - (shift_position + shift_length);
- }
+ BitField<40, 1, u64> brev;
+ BitField<47, 1, u64> rd_cc;
+ BitField<48, 1, u64> is_signed;
} bfe;
union {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ba8c9d665..64acb17df 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 {
RGBA32_FLOAT = 0xC0,
RGBA32_UINT = 0xC2,
RGBA16_UNORM = 0xC6,
+ RGBA16_SNORM = 0xC7,
RGBA16_UINT = 0xC9,
RGBA16_FLOAT = 0xCA,
RG32_FLOAT = 0xCB,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index aea010087..073bdb491 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -174,7 +174,7 @@ private:
/// End of address space, based on address space in bits.
static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
- Common::PageTable page_table{page_bits};
+ Common::BackingPageTable page_table{page_bits};
VMAMap vma_map;
VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index f2c83266e..6d522c318 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::R8UI>,
MortonCopy<true, PixelFormat::RGBA16F>,
MortonCopy<true, PixelFormat::RGBA16U>,
+ MortonCopy<true, PixelFormat::RGBA16S>,
MortonCopy<true, PixelFormat::RGBA16UI>,
MortonCopy<true, PixelFormat::R11FG11FB10F>,
MortonCopy<true, PixelFormat::RGBA32UI>,
@@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::R8U>,
MortonCopy<false, PixelFormat::R8UI>,
MortonCopy<false, PixelFormat::RGBA16F>,
+ MortonCopy<false, PixelFormat::RGBA16S>,
MortonCopy<false, PixelFormat::RGBA16U>,
MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8a2db8e36..1af4268a4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -496,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest();
- SyncTransformFeedback();
SyncPointState();
SyncPolygonOffset();
SyncAlphaTest();
@@ -569,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
glTextureBarrier();
}
- ++num_queued_commands;
+ BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
const GLsizei num_instances =
@@ -608,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
+
+ EndTransformFeedback();
+
+ ++num_queued_commands;
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -1290,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() {
}
}
-void RasterizerOpenGL::SyncTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
- UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
-}
-
void RasterizerOpenGL::SyncPointState() {
auto& gpu = system.GPU().Maxwell3D();
auto& flags = gpu.dirty.flags;
@@ -1370,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
}
+void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
+
+ for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
+ const auto& binding = regs.tfb_bindings[index];
+ if (!binding.buffer_enable) {
+ if (enabled_transform_feedback_buffers[index]) {
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
+ 0);
+ }
+ enabled_transform_feedback_buffers[index] = false;
+ continue;
+ }
+ enabled_transform_feedback_buffers[index] = true;
+
+ auto& tfb_buffer = transform_feedback_buffers[index];
+ tfb_buffer.Create();
+
+ const GLuint handle = tfb_buffer.handle;
+ const std::size_t size = binding.buffer_size;
+ glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
+ static_cast<GLsizeiptr>(size));
+ }
+
+ glBeginTransformFeedback(GL_POINTS);
+}
+
+void RasterizerOpenGL::EndTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ glEndTransformFeedback();
+
+ for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
+ const auto& binding = regs.tfb_bindings[index];
+ if (!binding.buffer_enable) {
+ continue;
+ }
+ UNIMPLEMENTED_IF(binding.buffer_offset != 0);
+
+ const GLuint handle = transform_feedback_buffers[index].handle;
+ const GPUVAddr gpu_addr = binding.Address();
+ const std::size_t size = binding.buffer_size;
+ const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+ glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
+ }
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e6424f5d2..2d3be2437 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -168,9 +168,6 @@ private:
/// Syncs the scissor test state to match the guest state
void SyncScissorTest();
- /// Syncs the transform feedback state to match the guest state
- void SyncTransformFeedback();
-
/// Syncs the point state to match the guest state
void SyncPointState();
@@ -192,6 +189,12 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
+ /// Begin a transform feedback
+ void BeginTransformFeedback(GLenum primitive_mode);
+
+ /// End a transform feedback
+ void EndTransformFeedback();
+
/// Check for extension that are not strictly required but are needed for correct emulation
void CheckExtensions();
@@ -229,6 +232,11 @@ private:
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
+ std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+ transform_feedback_buffers;
+ std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+ enabled_transform_feedback_buffers;
+
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 19d6f3dcb..849839fe3 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -23,6 +23,7 @@
#include "video_core/shader/ast.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader/transform_feedback.h"
namespace OpenGL {
@@ -36,6 +37,7 @@ using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::Register;
+using VideoCommon::Shader::BuildTransformFeedback;
using VideoCommon::Shader::Registry;
using namespace std::string_literals;
@@ -49,6 +51,11 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
+constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
+
+constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
+constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
+
struct TextureOffset {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
@@ -390,12 +397,22 @@ std::string FlowStackTopName(MetaStackClass stack) {
return stage == ShaderType::Vertex;
}
+struct GenericVaryingDescription {
+ std::string name;
+ u8 first_element = 0;
+ bool is_scalar = false;
+};
+
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view identifier, std::string_view suffix)
: device{device}, ir{ir}, registry{registry}, stage{stage},
- identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {}
+ identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
+ if (stage != ShaderType::Compute) {
+ transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
+ }
+ }
void Decompile() {
DeclareHeader();
@@ -403,17 +420,17 @@ public:
DeclareGeometry();
DeclareFragment();
DeclareCompute();
- DeclareRegisters();
- DeclareCustomVariables();
- DeclarePredicates();
- DeclareLocalMemory();
- DeclareInternalFlags();
DeclareInputAttributes();
DeclareOutputAttributes();
- DeclareConstantBuffers();
- DeclareGlobalMemory();
- DeclareSamplers();
DeclareImages();
+ DeclareSamplers();
+ DeclareGlobalMemory();
+ DeclareConstantBuffers();
+ DeclareLocalMemory();
+ DeclareRegisters();
+ DeclarePredicates();
+ DeclareInternalFlags();
+ DeclareCustomVariables();
DeclarePhysicalAttributeReader();
code.AddLine("void main() {{");
@@ -485,7 +502,7 @@ private:
if (!identifier.empty()) {
code.AddLine("// {}", identifier);
}
- code.AddLine("#version 430 core");
+ code.AddLine("#version 440 core");
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
if (device.HasShaderBallot()) {
code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -570,7 +587,13 @@ private:
code.AddLine("out gl_PerVertex {{");
++code.scope;
- code.AddLine("vec4 gl_Position;");
+ auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
+ if (!pos_xfb.empty()) {
+ pos_xfb = fmt::format("layout ({}) ", pos_xfb);
+ }
+ const char* pos_type =
+ FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
+ code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
for (const auto attribute : ir.GetOutputAttributes()) {
if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -703,7 +726,7 @@ private:
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
const u32 location{GetGenericAttributeIndex(index)};
- std::string name{GetInputAttribute(index)};
+ std::string name{GetGenericInputAttribute(index)};
if (stage == ShaderType::Geometry) {
name = "gs_" + name + "[]";
}
@@ -740,9 +763,59 @@ private:
}
}
+ std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+ return it->second.components;
+ }
+
+ std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+
+ const VaryingTFB& tfb = it->second;
+ return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
+ tfb.offset, tfb.stride);
+ }
+
void DeclareOutputAttribute(Attribute::Index index) {
- const u32 location{GetGenericAttributeIndex(index)};
- code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
+ static constexpr std::string_view swizzle = "xyzw";
+ u8 element = 0;
+ while (element < 4) {
+ auto xfb = GetTransformFeedbackDecoration(index, element);
+ if (!xfb.empty()) {
+ xfb = fmt::format(", {}", xfb);
+ }
+ const std::size_t remainder = 4 - element;
+ const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
+ const char* const type = FLOAT_TYPES.at(num_components - 1);
+
+ const u32 location = GetGenericAttributeIndex(index);
+
+ GenericVaryingDescription description;
+ description.first_element = static_cast<u8>(element);
+ description.is_scalar = num_components == 1;
+ description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
+ if (element != 0 || num_components != 4) {
+ const std::string_view name_swizzle = swizzle.substr(element, num_components);
+ description.name = fmt::format("{}_{}", description.name, name_swizzle);
+ }
+ for (std::size_t i = 0; i < num_components; ++i) {
+ const u8 offset = static_cast<u8>(location * 4 + element + i);
+ varying_description.insert({offset, description});
+ }
+
+ code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
+ xfb, type, description.name);
+
+ element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
+ }
}
void DeclareConstantBuffers() {
@@ -1095,7 +1168,7 @@ private:
return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
- return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
+ return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
Type::Float};
}
break;
@@ -1164,8 +1237,7 @@ private:
return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
default:
if (IsGenericAttribute(attribute)) {
- return {
- {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
+ return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
@@ -1937,16 +2009,19 @@ private:
expr += GetSampler(meta->sampler);
expr += ", ";
- expr += constructors.at(operation.GetOperandsCount() - 1);
+ expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- const std::size_t next = i + 1;
- if (next == count)
- expr += ')';
- else if (next < count)
+ if (i > 0) {
expr += ", ";
+ }
+ expr += VisitOperand(operation, i).AsInt();
+ }
+ if (meta->array) {
+ expr += ", ";
+ expr += Visit(meta->array).AsInt();
}
+ expr += ')';
if (meta->lod && !meta->sampler.IsBuffer()) {
expr += ", ";
@@ -2376,27 +2451,34 @@ private:
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
- return GetDeclarationWithSuffix(index, "gpr");
+ return AppendSuffix(index, "gpr");
}
std::string GetCustomVariable(u32 index) const {
- return GetDeclarationWithSuffix(index, "custom_var");
+ return AppendSuffix(index, "custom_var");
}
std::string GetPredicate(Tegra::Shader::Pred pred) const {
- return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
+ return AppendSuffix(static_cast<u32>(pred), "pred");
}
- std::string GetInputAttribute(Attribute::Index attribute) const {
- return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
+ std::string GetGenericInputAttribute(Attribute::Index attribute) const {
+ return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
}
- std::string GetOutputAttribute(Attribute::Index attribute) const {
- return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
+ std::unordered_map<u8, GenericVaryingDescription> varying_description;
+
+ std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
+ const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
+ const auto& description = varying_description.at(offset);
+ if (description.is_scalar) {
+ return description.name;
+ }
+ return fmt::format("{}[{}]", description.name, element - description.first_element);
}
std::string GetConstBuffer(u32 index) const {
- return GetDeclarationWithSuffix(index, "cbuf");
+ return AppendSuffix(index, "cbuf");
}
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2409,7 +2491,7 @@ private:
}
std::string GetConstBufferBlock(u32 index) const {
- return GetDeclarationWithSuffix(index, "cbuf_block");
+ return AppendSuffix(index, "cbuf_block");
}
std::string GetLocalMemory() const {
@@ -2434,14 +2516,14 @@ private:
}
std::string GetSampler(const Sampler& sampler) const {
- return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
+ return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
}
std::string GetImage(const Image& image) const {
- return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
+ return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
}
- std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
+ std::string AppendSuffix(u32 index, std::string_view name) const {
if (suffix.empty()) {
return fmt::format("{}{}", name, index);
} else {
@@ -2477,6 +2559,7 @@ private:
const std::string_view identifier;
const std::string_view suffix;
const Header header;
+ std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 2d3838a7a..f424e3000 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 12333e8c9..fca5e3ec0 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,8 +5,11 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
+#include <cstring>
#include <memory>
+
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@@ -25,6 +28,8 @@
namespace OpenGL {
+namespace {
+
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames.
constexpr std::size_t SWAP_CHAIN_SIZE = 3;
@@ -41,124 +46,6 @@ struct Frame {
bool is_srgb{}; /// Framebuffer is sRGB or RGB
};
-/**
- * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
- * but also make sure that rendering happens at the pace that the frontend dictates. This is a
- * helper class that the renderer uses to sync frames between the render thread and the presentation
- * thread
- */
-class FrameMailbox {
-public:
- std::mutex swap_chain_lock;
- std::condition_variable present_cv;
- std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
- std::queue<Frame*> free_queue;
- std::deque<Frame*> present_queue;
- Frame* previous_frame{};
-
- FrameMailbox() {
- for (auto& frame : swap_chain) {
- free_queue.push(&frame);
- }
- }
-
- ~FrameMailbox() {
- // lock the mutex and clear out the present and free_queues and notify any people who are
- // blocked to prevent deadlock on shutdown
- std::scoped_lock lock{swap_chain_lock};
- std::queue<Frame*>().swap(free_queue);
- present_queue.clear();
- present_cv.notify_all();
- }
-
- void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
- frame->present.Release();
- frame->present.Create();
- GLint previous_draw_fbo{};
- glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
- glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
- }
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
- frame->color_reloaded = false;
- }
-
- void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
- // Recreate the color texture attachment
- frame->color.Release();
- frame->color.Create();
- const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
- glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
-
- // Recreate the FBO for the render target
- frame->render.Release();
- frame->render.Create();
- glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
- }
-
- frame->width = width;
- frame->height = height;
- frame->color_reloaded = true;
- }
-
- Frame* GetRenderFrame() {
- std::unique_lock lock{swap_chain_lock};
-
- // If theres no free frames, we will reuse the oldest render frame
- if (free_queue.empty()) {
- auto frame = present_queue.back();
- present_queue.pop_back();
- return frame;
- }
-
- Frame* frame = free_queue.front();
- free_queue.pop();
- return frame;
- }
-
- void ReleaseRenderFrame(Frame* frame) {
- std::unique_lock lock{swap_chain_lock};
- present_queue.push_front(frame);
- present_cv.notify_one();
- }
-
- Frame* TryGetPresentFrame(int timeout_ms) {
- std::unique_lock lock{swap_chain_lock};
- // wait for new entries in the present_queue
- present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
- [&] { return !present_queue.empty(); });
- if (present_queue.empty()) {
- // timed out waiting for a frame to draw so return the previous frame
- return previous_frame;
- }
-
- // free the previous frame and add it back to the free queue
- if (previous_frame) {
- free_queue.push(previous_frame);
- }
-
- // the newest entries are pushed to the front of the queue
- Frame* frame = present_queue.front();
- present_queue.pop_front();
- // remove all old entries from the present queue and move them back to the free_queue
- for (auto f : present_queue) {
- free_queue.push(f);
- }
- present_queue.clear();
- previous_frame = frame;
- return frame;
- }
-};
-
-namespace {
-
constexpr char VERTEX_SHADER[] = R"(
#version 430 core
@@ -211,6 +98,24 @@ struct ScreenRectVertex {
std::array<GLfloat, 2> tex_coord;
};
+/// Returns true if any debug tool is attached
+bool HasDebugTool() {
+ const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
+ if (nsight) {
+ return true;
+ }
+
+ GLint num_extensions;
+ glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
+ for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
+ const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
+ if (!std::strcmp(name, "GL_EXT_debug_tool")) {
+ return true;
+ }
+ }
+ return false;
+}
+
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
@@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
} // Anonymous namespace
+/**
+ * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
+ * but also make sure that rendering happens at the pace that the frontend dictates. This is a
+ * helper class that the renderer uses to sync frames between the render thread and the presentation
+ * thread
+ */
+class FrameMailbox {
+public:
+ std::mutex swap_chain_lock;
+ std::condition_variable present_cv;
+ std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
+ std::queue<Frame*> free_queue;
+ std::deque<Frame*> present_queue;
+ Frame* previous_frame{};
+
+ FrameMailbox() : has_debug_tool{HasDebugTool()} {
+ for (auto& frame : swap_chain) {
+ free_queue.push(&frame);
+ }
+ }
+
+ ~FrameMailbox() {
+ // lock the mutex and clear out the present and free_queues and notify any people who are
+ // blocked to prevent deadlock on shutdown
+ std::scoped_lock lock{swap_chain_lock};
+ std::queue<Frame*>().swap(free_queue);
+ present_queue.clear();
+ present_cv.notify_all();
+ }
+
+ void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
+ frame->present.Release();
+ frame->present.Create();
+ GLint previous_draw_fbo{};
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
+ glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ frame->color.handle);
+ if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
+ }
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
+ frame->color_reloaded = false;
+ }
+
+ void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
+ // Recreate the color texture attachment
+ frame->color.Release();
+ frame->color.Create();
+ const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
+ glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
+
+ // Recreate the FBO for the render target
+ frame->render.Release();
+ frame->render.Create();
+ glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ frame->color.handle);
+ if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
+ }
+
+ frame->width = width;
+ frame->height = height;
+ frame->color_reloaded = true;
+ }
+
+ Frame* GetRenderFrame() {
+ std::unique_lock lock{swap_chain_lock};
+
+ // If theres no free frames, we will reuse the oldest render frame
+ if (free_queue.empty()) {
+ auto frame = present_queue.back();
+ present_queue.pop_back();
+ return frame;
+ }
+
+ Frame* frame = free_queue.front();
+ free_queue.pop();
+ return frame;
+ }
+
+ void ReleaseRenderFrame(Frame* frame) {
+ std::unique_lock lock{swap_chain_lock};
+ present_queue.push_front(frame);
+ present_cv.notify_one();
+
+ DebugNotifyNextFrame();
+ }
+
+ Frame* TryGetPresentFrame(int timeout_ms) {
+ DebugWaitForNextFrame();
+
+ std::unique_lock lock{swap_chain_lock};
+ // wait for new entries in the present_queue
+ present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
+ [&] { return !present_queue.empty(); });
+ if (present_queue.empty()) {
+ // timed out waiting for a frame to draw so return the previous frame
+ return previous_frame;
+ }
+
+ // free the previous frame and add it back to the free queue
+ if (previous_frame) {
+ free_queue.push(previous_frame);
+ }
+
+ // the newest entries are pushed to the front of the queue
+ Frame* frame = present_queue.front();
+ present_queue.pop_front();
+ // remove all old entries from the present queue and move them back to the free_queue
+ for (auto f : present_queue) {
+ free_queue.push(f);
+ }
+ present_queue.clear();
+ previous_frame = frame;
+ return frame;
+ }
+
+private:
+ std::mutex debug_synch_mutex;
+ std::condition_variable debug_synch_condition;
+ std::atomic_int frame_for_debug{};
+ const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
+
+ /// Signal that a new frame is available (called from GPU thread)
+ void DebugNotifyNextFrame() {
+ if (!has_debug_tool) {
+ return;
+ }
+ frame_for_debug++;
+ std::lock_guard lock{debug_synch_mutex};
+ debug_synch_condition.notify_one();
+ }
+
+ /// Wait for a new frame to be available (called from presentation thread)
+ void DebugWaitForNextFrame() {
+ if (!has_debug_tool) {
+ return;
+ }
+ const int last_frame = frame_for_debug;
+ std::unique_lock lock{debug_synch_mutex};
+ debug_synch_condition.wait(lock,
+ [this, last_frame] { return frame_for_debug > last_frame; });
+ }
+};
+
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
frame_mailbox{std::make_unique<FrameMailbox>()} {}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index df3ac707c..0e2e5e6c7 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -125,6 +125,7 @@ struct FormatTuple {
{vk::Format::eR8Uint, Attachable | Storage}, // R8UI
{vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
{vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
+ {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S
{vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
{vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
{vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
@@ -331,6 +332,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR16G16B16Unorm;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Unorm;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return vk::Format::eA2B10G10R10UnormPack32;
default:
break;
}
@@ -364,6 +367,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR8G8B8A8Uint;
case Maxwell::VertexAttribute::Size::Size_32:
return vk::Format::eR32Uint;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return vk::Format::eR32G32Uint;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return vk::Format::eR32G32B32Uint;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return vk::Format::eR32G32B32A32Uint;
default:
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 886bde3b9..28d2fbc4f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
- features.shaderStorageImageReadWithoutFormat =
- is_shader_storage_img_read_without_format_supported;
+ features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
+ vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
+ if (ext_transform_feedback) {
+ transform_feedback.transformFeedback = true;
+ transform_feedback.geometryStreams = true;
+ SetNext(next, transform_feedback);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
+ }
+
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
- extensions.reserve(14);
+ extensions.reserve(15);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
- bool khr_shader_float16_int8{};
- bool ext_subgroup_size_control{};
+ bool has_khr_shader_float16_int8{};
+ bool has_ext_subgroup_size_control{};
+ bool has_ext_transform_feedback{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
- Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
+ Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
+ false);
Test(extension, ext_depth_range_unrestricted,
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, ext_shader_viewport_index_layer,
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
- Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
+ Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
+ false);
+ Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
}
- if (khr_shader_float16_int8) {
+ if (has_khr_shader_float16_int8) {
is_float16_supported =
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
- if (ext_subgroup_size_control) {
+ if (has_ext_subgroup_size_control) {
const auto features =
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
is_warp_potentially_bigger = true;
}
+ if (has_ext_transform_feedback) {
+ const auto features =
+ GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
+ const auto properties =
+ GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
+
+ if (features.transformFeedback && features.geometryStreams &&
+ properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
+ properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
+ extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
+ ext_transform_feedback = true;
+ }
+ }
+
return extensions;
}
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
- is_shader_storage_img_read_without_format_supported =
- supported_features.shaderStorageImageReadWithoutFormat;
+ is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
@@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32Uint,
vk::Format::eR16G16B16A16Uint,
+ vk::Format::eR16G16B16A16Snorm,
vk::Format::eR16G16B16A16Unorm,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 2c27ad730..6e656517f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,11 +122,6 @@ public:
return properties.limits.maxPushConstantsSize;
}
- /// Returns true if Shader storage Image Read Without Format supported.
- bool IsShaderStorageImageReadWithoutFormatSupported() const {
- return is_shader_storage_img_read_without_format_supported;
- }
-
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
}
+ /// Returns true if formatless image load is supported.
+ bool IsFormatlessImageLoadSupported() const {
+ return is_formatless_image_load_supported;
+ }
+
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
return ext_shader_viewport_index_layer;
}
+ /// Returns true if the device supports VK_EXT_transform_feedback.
+ bool IsExtTransformFeedbackSupported() const {
+ return ext_transform_feedback;
+ }
+
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
bool IsNvDeviceDiagnosticCheckpoints() const {
return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
- const vk::PhysicalDevice physical; ///< Physical device.
- vk::DispatchLoaderDynamic dld; ///< Device function pointers.
- vk::PhysicalDeviceProperties properties; ///< Device properties.
- UniqueDevice logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 graphics_family{}; ///< Main graphics queue family index.
- u32 present_family{}; ///< Main present queue family index.
- vk::DriverIdKHR driver_id{}; ///< Driver ID.
- vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
- bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool is_float16_supported{}; ///< Support for float16 arithmetics.
- bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
+ const vk::PhysicalDevice physical; ///< Physical device.
+ vk::DispatchLoaderDynamic dld; ///< Device function pointers.
+ vk::PhysicalDeviceProperties properties; ///< Device properties.
+ UniqueDevice logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ vk::DriverIdKHR driver_id{}; ///< Driver ID.
+ vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
+ bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
+ bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
- bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
- ///< image read without format
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index ebf85f311..91e7b7791 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -179,10 +179,11 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue)
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ VKRenderPassCache& renderpass_cache)
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
- renderpass_cache(device) {}
+ renderpass_cache{renderpass_cache} {}
VKPipelineCache::~VKPipelineCache() = default;
@@ -273,9 +274,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
specialization.workgroup_size = key.workgroup_size;
specialization.shared_memory_size = key.shared_memory_size;
- const SPIRVShader spirv_shader{
- Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
- shader->GetEntries()};
+ const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
+ shader->GetRegistry(), specialization),
+ shader->GetEntries()};
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, spirv_shader);
return *entry;
@@ -324,8 +325,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
- specialization.primitive_topology = fixed_state.input_assembly.topology;
- if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
+ if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
specialization.point_size = fixed_state.input_assembly.point_size;
}
@@ -333,9 +333,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
- specialization.tessellation.primitive = fixed_state.tessellation.primitive;
- specialization.tessellation.spacing = fixed_state.tessellation.spacing;
- specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
SPIRVProgram program;
std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -356,8 +353,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
const auto program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
- program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
- entries};
+ program[stage] = {
+ Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
+ entries};
if (program_enum == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e292526bb..c4c112290 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -132,6 +132,10 @@ public:
return shader_ir;
}
+ const VideoCommon::Shader::Registry& GetRegistry() const {
+ return registry;
+ }
+
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
@@ -157,7 +161,8 @@ public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue);
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ VKRenderPassCache& renderpass_cache);
~VKPipelineCache();
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
@@ -180,8 +185,7 @@ private:
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
-
- VKRenderPassCache renderpass_cache;
+ VKRenderPassCache& renderpass_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2bcb17b56..755aad643 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -287,12 +287,13 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
screen_info{screen_info}, device{device}, resource_manager{resource_manager},
memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
- update_descriptor_queue(device, scheduler),
+ update_descriptor_queue(device, scheduler), renderpass_cache(device),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
staging_pool),
- pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
+ pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
+ renderpass_cache),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device), query_cache(system, *this, device, scheduler) {
scheduler.SetQueryCache(query_cache);
@@ -347,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
[&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
}
+ BeginTransformFeedback();
+
const auto pipeline_layout = pipeline.GetLayout();
const auto descriptor_set = pipeline.CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -356,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
}
draw_params.Draw(cmdbuf, dld);
});
+
+ EndTransformFeedback();
}
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- query_cache.UpdateCounters();
-
const auto& gpu = system.GPU().Maxwell3D();
if (!system.GPU().Maxwell3D().ShouldExecute()) {
return;
}
+ sampled_views.clear();
+ image_views.clear();
+
+ query_cache.UpdateCounters();
+
const auto& regs = gpu.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
@@ -376,52 +384,54 @@ void RasterizerVulkan::Clear() {
if (!use_color && !use_depth && !use_stencil) {
return;
}
- // Clearing images requires to be out of a renderpass
- scheduler.RequestOutsideRenderPassOperationContext();
- // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass.
+ [[maybe_unused]] const auto texceptions = UpdateAttachments();
+ DEBUG_ASSERT(texceptions.none());
+ SetupImageTransitions(0, color_attachments, zeta_attachment);
- if (use_color) {
- View color_view;
- {
- MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
- }
+ const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0));
+ const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
+ scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
+
+ const auto& scissor = regs.scissor_test[0];
+ const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y);
+ vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y};
+ scissor_extent.width = std::min(scissor_extent.width, render_area.width);
+ scissor_extent.height = std::min(scissor_extent.height, render_area.height);
- color_view->Transition(vk::ImageLayout::eTransferDstOptimal,
- vk::PipelineStageFlagBits::eTransfer,
- vk::AccessFlagBits::eTransferWrite);
+ const u32 layer = regs.clear_buffers.layer;
+ const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1);
+ if (use_color) {
const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
regs.clear_color[2], regs.clear_color[3]};
- const vk::ClearColorValue clear(clear_color);
- scheduler.Record([image = color_view->GetImage(),
- subresource = color_view->GetImageSubresourceRange(),
- clear](auto cmdbuf, auto& dld) {
- cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource,
- dld);
+ const vk::ClearValue clear_value{clear_color};
+ const u32 color_attachment = regs.clear_buffers.RT;
+ scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) {
+ const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment,
+ clear_value);
+ cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
});
}
- if (use_depth || use_stencil) {
- View zeta_surface;
- {
- MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- zeta_surface = texture_cache.GetDepthBufferSurface(false);
- }
- zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal,
- vk::PipelineStageFlagBits::eTransfer,
- vk::AccessFlagBits::eTransferWrite);
-
- const vk::ClearDepthStencilValue clear(regs.clear_depth,
- static_cast<u32>(regs.clear_stencil));
- scheduler.Record([image = zeta_surface->GetImage(),
- subresource = zeta_surface->GetImageSubresourceRange(),
- clear](auto cmdbuf, auto& dld) {
- cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
- subresource, dld);
- });
+ if (!use_depth && !use_stencil) {
+ return;
+ }
+ vk::ImageAspectFlags aspect_flags;
+ if (use_depth) {
+ aspect_flags |= vk::ImageAspectFlagBits::eDepth;
}
+ if (use_stencil) {
+ aspect_flags |= vk::ImageAspectFlagBits::eStencil;
+ }
+
+ scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
+ clear_rect, aspect_flags](auto cmdbuf, auto& dld) {
+ const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil);
+ const vk::ClearValue clear_value{clear_zeta};
+ const vk::ClearAttachment attachment(aspect_flags, 0, clear_value);
+ cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
+ });
}
void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
@@ -738,6 +748,44 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateStencilFaces(regs);
}
+void RasterizerVulkan::BeginTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
+
+ UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
+ UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
+ UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
+
+ const auto& binding = regs.tfb_bindings[0];
+ UNIMPLEMENTED_IF(binding.buffer_enable == 0);
+ UNIMPLEMENTED_IF(binding.buffer_offset != 0);
+
+ const GPUVAddr gpu_addr = binding.Address();
+ const std::size_t size = binding.buffer_size;
+ const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+
+ scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
+ cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
+ cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
+ });
+}
+
+void RasterizerVulkan::EndTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ scheduler.Record(
+ [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
+}
+
void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
BufferBindings& buffer_bindings) {
const auto& regs = system.GPU().Maxwell3D().regs;
@@ -1109,7 +1157,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
// This implementation assumes that all attributes are used in the shader.
const GPUVAddr start{regs.vertex_array[index].StartAddress()};
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
- DEBUG_ASSERT(end > start);
+ DEBUG_ASSERT(end >= start);
size += (end - start + 1) * regs.vertex_array[index].enable;
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 96ea05f0a..3185868e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -169,6 +169,10 @@ private:
void UpdateDynamicStates();
+ void BeginTransformFeedback();
+
+ void EndTransformFeedback();
+
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -249,6 +253,7 @@ private:
VKStagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
+ VKRenderPassCache renderpass_cache;
QuadArrayPass quad_array_pass;
Uint8Pass uint8_pass;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index cfcca5af0..51ecb5567 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -5,7 +5,9 @@
#include <functional>
#include <limits>
#include <map>
+#include <optional>
#include <type_traits>
+#include <unordered_map>
#include <utility>
#include <fmt/format.h>
@@ -24,6 +26,7 @@
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader/transform_feedback.h"
namespace Vulkan {
@@ -93,6 +96,12 @@ struct VertexIndices {
std::optional<u32> clip_distances;
};
+struct GenericVaryingDescription {
+ Id id = nullptr;
+ u32 first_element = 0;
+ bool is_scalar = false;
+};
+
spv::Dim GetSamplerDim(const Sampler& sampler) {
ASSERT(!sampler.IsBuffer());
switch (sampler.GetType()) {
@@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) {
class SPIRVDecompiler final : public Sirit::Module {
public:
explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
- const Specialization& specialization)
+ const Registry& registry, const Specialization& specialization)
: Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
- specialization{specialization} {
+ registry{registry}, specialization{specialization} {
+ if (stage != ShaderType::Compute) {
+ transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
+ }
+
AddCapability(spv::Capability::Shader);
AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
AddCapability(spv::Capability::ImageQuery);
@@ -286,6 +299,15 @@ public:
AddExtension("SPV_KHR_variable_pointers");
AddExtension("SPV_KHR_shader_draw_parameters");
+ if (!transform_feedback.empty()) {
+ if (device.IsExtTransformFeedbackSupported()) {
+ AddCapability(spv::Capability::TransformFeedback);
+ } else {
+ LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
+ "supported on this device");
+ }
+ }
+
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
if (ir.UsesViewportIndex()) {
AddCapability(spv::Capability::MultiViewport);
@@ -296,7 +318,7 @@ public:
}
}
- if (device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ if (device.IsFormatlessImageLoadSupported()) {
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
}
@@ -318,25 +340,29 @@ public:
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common2.threads_per_input_primitive);
break;
- case ShaderType::TesselationEval:
+ case ShaderType::TesselationEval: {
+ const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Tessellation);
AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive));
- AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing));
- AddExecutionMode(main, specialization.tessellation.clockwise
+ AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
+ AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
+ AddExecutionMode(main, info.tessellation_clockwise
? spv::ExecutionMode::VertexOrderCw
: spv::ExecutionMode::VertexOrderCcw);
break;
- case ShaderType::Geometry:
+ }
+ case ShaderType::Geometry: {
+ const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Geometry);
AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology));
+ AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common4.max_output_vertices);
// TODO(Rodrigo): Where can we get this info from?
AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
break;
+ }
case ShaderType::Fragment:
AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -545,7 +571,8 @@ private:
if (stage != ShaderType::Geometry) {
return;
}
- const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
+ const auto& info = registry.GetGraphicsInfo();
+ const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
DeclareInputVertexArray(num_input);
DeclareOutputVertex();
}
@@ -742,12 +769,34 @@ private:
}
void DeclareOutputAttributes() {
+ if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
for (const auto index : ir.GetOutputAttributes()) {
if (!IsGenericAttribute(index)) {
continue;
}
- const u32 location = GetGenericAttributeLocation(index);
- Id type = t_float4;
+ DeclareOutputAttribute(index);
+ }
+ }
+
+ void DeclareOutputAttribute(Attribute::Index index) {
+ static constexpr std::string_view swizzle = "xyzw";
+
+ const u32 location = GetGenericAttributeLocation(index);
+ u8 element = 0;
+ while (element < 4) {
+ const std::size_t remainder = 4 - element;
+
+ std::size_t num_components = remainder;
+ const std::optional tfb = GetTransformFeedbackInfo(index, element);
+ if (tfb) {
+ num_components = tfb->components;
+ }
+
+ Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
Id varying_default = v_varying_default;
if (IsOutputAttributeArray()) {
const u32 num = GetNumOutputVertices();
@@ -760,13 +809,45 @@ private:
}
type = TypePointer(spv::StorageClass::Output, type);
+ std::string name = fmt::format("out_attr{}", location);
+ if (num_components < 4 || element > 0) {
+ name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
+ }
+
const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
- Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
- output_attributes.emplace(index, id);
+ Name(AddGlobalVariable(id), name);
+
+ GenericVaryingDescription description;
+ description.id = id;
+ description.first_element = element;
+ description.is_scalar = num_components == 1;
+ for (u32 i = 0; i < num_components; ++i) {
+ const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
+ output_attributes.emplace(offset, description);
+ }
interfaces.push_back(id);
Decorate(id, spv::Decoration::Location, location);
+ if (element > 0) {
+ Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
+ }
+ if (tfb && device.IsExtTransformFeedbackSupported()) {
+ Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
+ Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
+ Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
+ }
+
+ element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
+ }
+ }
+
+ std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
}
+ return it->second;
}
u32 DeclareConstantBuffers(u32 binding) {
@@ -898,7 +979,7 @@ private:
u32 GetNumInputVertices() const {
switch (stage) {
case ShaderType::Geometry:
- return GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
+ return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
case ShaderType::TesselationControl:
case ShaderType::TesselationEval:
return NumInputPatches;
@@ -1346,8 +1427,14 @@ private:
}
default:
if (IsGenericAttribute(attribute)) {
- const Id composite = output_attributes.at(attribute);
- return {ArrayPass(t_out_float, composite, {element}), Type::Float};
+ const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
+ const GenericVaryingDescription description = output_attributes.at(offset);
+ const Id composite = description.id;
+ std::vector<u32> indices;
+ if (!description.is_scalar) {
+ indices.push_back(element - description.first_element);
+ }
+ return {ArrayPass(t_out_float, composite, indices), Type::Float};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
static_cast<u32>(attribute));
@@ -1793,7 +1880,7 @@ private:
}
Expression ImageLoad(Operation operation) {
- if (!device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ if (!device.IsFormatlessImageLoadSupported()) {
return {v_float_zero, Type::Float};
}
@@ -2258,11 +2345,11 @@ private:
std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
switch (type) {
case Type::Float:
- return {nullptr, t_float2, t_float3, t_float4};
+ return {t_float, t_float2, t_float3, t_float4};
case Type::Int:
- return {nullptr, t_int2, t_int3, t_int4};
+ return {t_int, t_int2, t_int3, t_int4};
case Type::Uint:
- return {nullptr, t_uint2, t_uint3, t_uint4};
+ return {t_uint, t_uint2, t_uint3, t_uint4};
default:
UNIMPLEMENTED();
return {};
@@ -2495,7 +2582,9 @@ private:
const ShaderIR& ir;
const ShaderType stage;
const Tegra::Shader::Header header;
+ const Registry& registry;
const Specialization& specialization;
+ std::unordered_map<u8, VaryingTFB> transform_feedback;
const Id t_void = Name(TypeVoid(), "void");
@@ -2584,7 +2673,7 @@ private:
Id shared_memory{};
std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
std::map<Attribute::Index, Id> input_attributes;
- std::map<Attribute::Index, Id> output_attributes;
+ std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
std::map<u32, TexelBuffer> texel_buffers;
@@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
- ShaderType stage, const Specialization& specialization) {
- return SPIRVDecompiler(device, ir, stage, specialization).Assemble();
+ ShaderType stage, const VideoCommon::Shader::Registry& registry,
+ const Specialization& specialization) {
+ return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f5dc14d9e..ffea4709e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,6 +15,7 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
u32 shared_memory_size{};
// Graphics specific
- Maxwell::PrimitiveTopology primitive_topology{};
std::optional<float> point_size{};
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
-
- // Tessellation specific
- struct {
- Maxwell::TessellationPrimitive primitive{};
- Maxwell::TessellationSpacing spacing{};
- bool clockwise{};
- } tessellation;
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage, const Specialization& specialization);
+ Tegra::Engines::ShaderType stage,
+ const VideoCommon::Shader::Registry& registry,
+ const Specialization& specialization);
} // namespace Vulkan
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index e02bcd097..8e3b46e8e 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- UNIMPLEMENTED_IF(instr.bfe.negate_b);
-
Node op_a = GetRegister(instr.gpr8);
- op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::BFE_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in BFE is not implemented");
+ Node op_b = [&] {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::BFE_R:
+ return GetRegister(instr.gpr20);
+ case OpCode::Id::BFE_C:
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ case OpCode::Id::BFE_IMM:
+ return Immediate(instr.alu.GetSignedImm20_20());
+ default:
+ UNREACHABLE();
+ return Immediate(0);
+ }
+ }();
- const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
- const Node outer_shift_imm =
- Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
+ UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
- const Node inner_shift =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
- const Node outer_shift =
- Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
+ const bool is_signed = instr.bfe.is_signed;
- SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
- SetRegister(bb, instr.gpr0, outer_shift);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
+ // using reverse parallel method in
+ // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+ // note for later if possible to implement faster method.
+ if (instr.bfe.brev) {
+ const auto swap = [&](u32 s, u32 mask) {
+ Node v1 =
+ SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
+ if (mask != 0) {
+ v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
+ Immediate(mask));
+ }
+ Node v2 = op_a;
+ if (mask != 0) {
+ v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
+ Immediate(mask));
+ }
+ v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
+ Immediate(s));
+ return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
+ std::move(v2));
+ };
+ op_a = swap(1, 0x55555555U);
+ op_a = swap(2, 0x33333333U);
+ op_a = swap(4, 0x0F0F0F0FU);
+ op_a = swap(8, 0x00FF00FFU);
+ op_a = swap(16, 0);
}
+ const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+ Immediate(0), Immediate(8));
+ const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+ Immediate(8), Immediate(8));
+ auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
+ SetRegister(bb, instr.gpr0, std::move(result));
+
return pc;
}
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index b3dcd291c..76c56abb5 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
return OperationCode::UBitwiseXor;
case OperationCode::IBitwiseNot:
return OperationCode::UBitwiseNot;
+ case OperationCode::IBitfieldExtract:
+ return OperationCode::UBitfieldExtract;
case OperationCode::IBitfieldInsert:
return OperationCode::UBitfieldInsert;
case OperationCode::IBitCount:
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
new file mode 100644
index 000000000..22a933761
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.cpp
@@ -0,0 +1,115 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/registry.h"
+#include "video_core/shader/transform_feedback.h"
+
+namespace VideoCommon::Shader {
+
+namespace {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
+
+/// Attribute offsets that describe a vector
+constexpr std::array VECTORS = {
+ 28, // gl_Position
+ 32, // Generic 0
+ 36, // Generic 1
+ 40, // Generic 2
+ 44, // Generic 3
+ 48, // Generic 4
+ 52, // Generic 5
+ 56, // Generic 6
+ 60, // Generic 7
+ 64, // Generic 8
+ 68, // Generic 9
+ 72, // Generic 10
+ 76, // Generic 11
+ 80, // Generic 12
+ 84, // Generic 13
+ 88, // Generic 14
+ 92, // Generic 15
+ 96, // Generic 16
+ 100, // Generic 17
+ 104, // Generic 18
+ 108, // Generic 19
+ 112, // Generic 20
+ 116, // Generic 21
+ 120, // Generic 22
+ 124, // Generic 23
+ 128, // Generic 24
+ 132, // Generic 25
+ 136, // Generic 26
+ 140, // Generic 27
+ 144, // Generic 28
+ 148, // Generic 29
+ 152, // Generic 30
+ 156, // Generic 31
+ 160, // gl_FrontColor
+ 164, // gl_FrontSecondaryColor
+ 160, // gl_BackColor
+ 164, // gl_BackSecondaryColor
+ 192, // gl_TexCoord[0]
+ 196, // gl_TexCoord[1]
+ 200, // gl_TexCoord[2]
+ 204, // gl_TexCoord[3]
+ 208, // gl_TexCoord[4]
+ 212, // gl_TexCoord[5]
+ 216, // gl_TexCoord[6]
+ 220, // gl_TexCoord[7]
+};
+} // namespace
+
+std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
+
+ std::unordered_map<u8, VaryingTFB> tfb;
+
+ for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
+ const auto& locations = info.tfb_varying_locs[buffer];
+ const auto& layout = info.tfb_layouts[buffer];
+ const std::size_t varying_count = layout.varying_count;
+
+ std::size_t highest = 0;
+
+ for (std::size_t offset = 0; offset < varying_count; ++offset) {
+ const std::size_t base_offset = offset;
+ const u8 location = locations[offset];
+
+ VaryingTFB varying;
+ varying.buffer = layout.stream;
+ varying.stride = layout.stride;
+ varying.offset = offset * sizeof(u32);
+ varying.components = 1;
+
+ if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
+ UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
+
+ const u8 base_index = location / 4;
+ while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
+ ++offset;
+ ++varying.components;
+ }
+ }
+
+ [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
+ UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
+
+ highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
+ }
+
+ UNIMPLEMENTED_IF(highest != layout.stride);
+ }
+ return tfb;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
new file mode 100644
index 000000000..77d05f64c
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.h
@@ -0,0 +1,23 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "common/common_types.h"
+#include "video_core/shader/registry.h"
+
+namespace VideoCommon::Shader {
+
+struct VaryingTFB {
+ std::size_t buffer;
+ std::size_t stride;
+ std::size_t offset;
+ std::size_t components;
+};
+
+std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 9707c353d..cc7181229 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::RGBA16F;
case Tegra::RenderTargetFormat::RGBA16_UNORM:
return PixelFormat::RGBA16U;
+ case Tegra::RenderTargetFormat::RGBA16_SNORM:
+ return PixelFormat::RGBA16S;
case Tegra::RenderTargetFormat::RGBA16_UINT:
return PixelFormat::RGBA16UI;
case Tegra::RenderTargetFormat::RGBA32_FLOAT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index d88109e5a..ae8817465 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -25,82 +25,83 @@ enum class PixelFormat {
R8UI = 7,
RGBA16F = 8,
RGBA16U = 9,
- RGBA16UI = 10,
- R11FG11FB10F = 11,
- RGBA32UI = 12,
- DXT1 = 13,
- DXT23 = 14,
- DXT45 = 15,
- DXN1 = 16, // This is also known as BC4
- DXN2UNORM = 17,
- DXN2SNORM = 18,
- BC7U = 19,
- BC6H_UF16 = 20,
- BC6H_SF16 = 21,
- ASTC_2D_4X4 = 22,
- BGRA8 = 23,
- RGBA32F = 24,
- RG32F = 25,
- R32F = 26,
- R16F = 27,
- R16U = 28,
- R16S = 29,
- R16UI = 30,
- R16I = 31,
- RG16 = 32,
- RG16F = 33,
- RG16UI = 34,
- RG16I = 35,
- RG16S = 36,
- RGB32F = 37,
- RGBA8_SRGB = 38,
- RG8U = 39,
- RG8S = 40,
- RG32UI = 41,
- RGBX16F = 42,
- R32UI = 43,
- R32I = 44,
- ASTC_2D_8X8 = 45,
- ASTC_2D_8X5 = 46,
- ASTC_2D_5X4 = 47,
- BGRA8_SRGB = 48,
- DXT1_SRGB = 49,
- DXT23_SRGB = 50,
- DXT45_SRGB = 51,
- BC7U_SRGB = 52,
- R4G4B4A4U = 53,
- ASTC_2D_4X4_SRGB = 54,
- ASTC_2D_8X8_SRGB = 55,
- ASTC_2D_8X5_SRGB = 56,
- ASTC_2D_5X4_SRGB = 57,
- ASTC_2D_5X5 = 58,
- ASTC_2D_5X5_SRGB = 59,
- ASTC_2D_10X8 = 60,
- ASTC_2D_10X8_SRGB = 61,
- ASTC_2D_6X6 = 62,
- ASTC_2D_6X6_SRGB = 63,
- ASTC_2D_10X10 = 64,
- ASTC_2D_10X10_SRGB = 65,
- ASTC_2D_12X12 = 66,
- ASTC_2D_12X12_SRGB = 67,
- ASTC_2D_8X6 = 68,
- ASTC_2D_8X6_SRGB = 69,
- ASTC_2D_6X5 = 70,
- ASTC_2D_6X5_SRGB = 71,
- E5B9G9R9F = 72,
+ RGBA16S = 10,
+ RGBA16UI = 11,
+ R11FG11FB10F = 12,
+ RGBA32UI = 13,
+ DXT1 = 14,
+ DXT23 = 15,
+ DXT45 = 16,
+ DXN1 = 17, // This is also known as BC4
+ DXN2UNORM = 18,
+ DXN2SNORM = 19,
+ BC7U = 20,
+ BC6H_UF16 = 21,
+ BC6H_SF16 = 22,
+ ASTC_2D_4X4 = 23,
+ BGRA8 = 24,
+ RGBA32F = 25,
+ RG32F = 26,
+ R32F = 27,
+ R16F = 28,
+ R16U = 29,
+ R16S = 30,
+ R16UI = 31,
+ R16I = 32,
+ RG16 = 33,
+ RG16F = 34,
+ RG16UI = 35,
+ RG16I = 36,
+ RG16S = 37,
+ RGB32F = 38,
+ RGBA8_SRGB = 39,
+ RG8U = 40,
+ RG8S = 41,
+ RG32UI = 42,
+ RGBX16F = 43,
+ R32UI = 44,
+ R32I = 45,
+ ASTC_2D_8X8 = 46,
+ ASTC_2D_8X5 = 47,
+ ASTC_2D_5X4 = 48,
+ BGRA8_SRGB = 49,
+ DXT1_SRGB = 50,
+ DXT23_SRGB = 51,
+ DXT45_SRGB = 52,
+ BC7U_SRGB = 53,
+ R4G4B4A4U = 54,
+ ASTC_2D_4X4_SRGB = 55,
+ ASTC_2D_8X8_SRGB = 56,
+ ASTC_2D_8X5_SRGB = 57,
+ ASTC_2D_5X4_SRGB = 58,
+ ASTC_2D_5X5 = 59,
+ ASTC_2D_5X5_SRGB = 60,
+ ASTC_2D_10X8 = 61,
+ ASTC_2D_10X8_SRGB = 62,
+ ASTC_2D_6X6 = 63,
+ ASTC_2D_6X6_SRGB = 64,
+ ASTC_2D_10X10 = 65,
+ ASTC_2D_10X10_SRGB = 66,
+ ASTC_2D_12X12 = 67,
+ ASTC_2D_12X12_SRGB = 68,
+ ASTC_2D_8X6 = 69,
+ ASTC_2D_8X6_SRGB = 70,
+ ASTC_2D_6X5 = 71,
+ ASTC_2D_6X5_SRGB = 72,
+ E5B9G9R9F = 73,
MaxColorFormat,
// Depth formats
- Z32F = 73,
- Z16 = 74,
+ Z32F = 74,
+ Z16 = 75,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 75,
- S8Z24 = 76,
- Z32FS8 = 77,
+ Z24S8 = 76,
+ S8Z24 = 77,
+ Z32FS8 = 78,
MaxDepthStencilFormat,
@@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // R8UI
0, // RGBA16F
0, // RGBA16U
+ 0, // RGBA16S
0, // RGBA16UI
0, // R11FG11FB10F
0, // RGBA32UI
@@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // R8UI
1, // RGBA16F
1, // RGBA16U
+ 1, // RGBA16S
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
@@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // R8UI
1, // RGBA16F
1, // RGBA16U
+ 1, // RGBA16S
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
@@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
8, // R8UI
64, // RGBA16F
64, // RGBA16U
+ 64, // RGBA16S
64, // RGBA16UI
32, // R11FG11FB10F
128, // RGBA32UI
@@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::None, // R8UI
SurfaceCompression::None, // RGBA16F
SurfaceCompression::None, // RGBA16U
+ SurfaceCompression::None, // RGBA16S
SurfaceCompression::None, // RGBA16UI
SurfaceCompression::None, // R11FG11FB10F
SurfaceCompression::None, // RGBA32UI
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index cc3ad8417..e151c26c4 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 75> DefinitionTable = {{
+constexpr std::array<Table, 76> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{
{TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
{TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
+ {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
{TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
{TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
{TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index f00839313..9931c5ef7 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
params.height = tic.Height();
params.depth = tic.Depth();
params.pitch = params.is_tiled ? 0 : tic.Pitch();
- if (params.target == SurfaceTarget::TextureCubemap ||
- params.target == SurfaceTarget::TextureCubeArray) {
+ if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
+ params.depth = 1;
+ } else if (params.target == SurfaceTarget::TextureCubemap ||
+ params.target == SurfaceTarget::TextureCubeArray) {
params.depth *= 6;
}
params.num_levels = tic.max_mip_level + 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 51373b687..6cdbe63d0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -104,6 +104,11 @@ public:
if (!cache_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
+
+ if (!IsTypeCompatible(tic.texture_type, entry)) {
+ return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
+ }
+
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
if (guard_samplers) {
@@ -914,13 +919,15 @@ private:
params.width = 1;
params.height = 1;
params.depth = 1;
+ if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
+ params.depth = 6;
+ }
params.pitch = 4;
params.num_levels = 1;
params.emulated_levels = 1;
- params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F;
+ params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
params.type = VideoCore::Surface::SurfaceType::ColorTexture;
auto surface = CreateSurface(0ULL, params);
- invalid_memory.clear();
invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
surface->UploadTexture(invalid_memory);
surface->MarkAsModified(false, Tick());
@@ -1082,6 +1089,36 @@ private:
return siblings_table[static_cast<std::size_t>(format)];
}
+ /// Returns true the shader sampler entry is compatible with the TIC texture type.
+ static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
+ const VideoCommon::Shader::Sampler& entry) {
+ const auto shader_type = entry.GetType();
+ switch (tic_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ case Tegra::Texture::TextureType::Texture1DArray:
+ return shader_type == Tegra::Shader::TextureType::Texture1D;
+ case Tegra::Texture::TextureType::Texture1DBuffer:
+ // TODO(Rodrigo): Assume as valid for now
+ return true;
+ case Tegra::Texture::TextureType::Texture2D:
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ return shader_type == Tegra::Shader::TextureType::Texture2D;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ return shader_type == Tegra::Shader::TextureType::Texture2D ||
+ shader_type == Tegra::Shader::TextureType::TextureCube;
+ case Tegra::Texture::TextureType::Texture3D:
+ return shader_type == Tegra::Shader::TextureType::Texture3D;
+ case Tegra::Texture::TextureType::TextureCubeArray:
+ case Tegra::Texture::TextureType::TextureCubemap:
+ if (shader_type == Tegra::Shader::TextureType::TextureCube) {
+ return true;
+ }
+ return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray();
+ }
+ UNREACHABLE();
+ return true;
+ }
+
struct FramebufferTargetInfo {
TSurface target;
TView view;