diff options
Diffstat (limited to 'src/video_core')
65 files changed, 1342 insertions, 1185 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1e010e4da..f8b67cbe1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(video_core STATIC dma_pusher.h debug_utils/debug_utils.cpp debug_utils/debug_utils.h + engines/const_buffer_info.h engines/engine_upload.cpp engines/engine_upload.h engines/fermi_2d.cpp @@ -42,8 +43,6 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_global_cache.cpp renderer_opengl/gl_global_cache.h - renderer_opengl/gl_primitive_assembler.cpp - renderer_opengl/gl_primitive_assembler.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_rasterizer_cache.cpp @@ -102,6 +101,9 @@ add_library(video_core STATIC shader/decode/xmad.cpp shader/decode/other.cpp shader/decode.cpp + shader/node_helper.cpp + shader/node_helper.h + shader/node.h shader/shader_ir.cpp shader/shader_ir.h shader/track.cpp diff --git a/src/video_core/engines/const_buffer_info.h b/src/video_core/engines/const_buffer_info.h new file mode 100644 index 000000000..d8f672462 --- /dev/null +++ b/src/video_core/engines/const_buffer_info.h @@ -0,0 +1,17 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Tegra::Engines { + +struct ConstBufferInfo { + GPUVAddr address; + u32 size; + bool enabled; +}; + +} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 5250b8d9b..6a3309a2c 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -140,7 +140,7 @@ public: BitField<0, 16, u32> shared_alloc; - BitField<0, 31, u32> block_dim_x; + BitField<16, 16, u32> block_dim_x; union { BitField<0, 16, u32> block_dim_y; BitField<16, 16, u32> block_dim_z; @@ -153,7 +153,7 @@ public: INSERT_PADDING_WORDS(0x8); - struct { + struct ConstBufferConfig { u32 address_low; union { BitField<0, 8, u32> address_high; @@ -163,7 +163,8 @@ public: return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) | address_low); } - } const_buffer_config[8]; + }; + std::array<ConstBufferConfig, NumConstBuffers> const_buffer_config; union { BitField<0, 20, u32> local_pos_alloc; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 39968d403..08d553696 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -396,12 +396,10 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)]; - auto& buffer = shader.const_buffers[bind_data.index]; - ASSERT(bind_data.index < Regs::MaxConstBuffers); + auto& buffer = shader.const_buffers[bind_data.index]; buffer.enabled = bind_data.valid.Value() != 0; - buffer.index = bind_data.index; buffer.address = regs.const_buffer.BufferAddress(); buffer.size = regs.const_buffer.cb_size; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index f342c78e6..13e314944 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -15,6 +15,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" @@ -1112,13 +1113,6 @@ public: static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); struct State { - struct ConstBufferInfo { - GPUVAddr address; - u32 index; - u32 size; - bool enabled; - }; - struct ShaderStageInfo { std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers; }; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e83f25fa1..ffb3ec3e0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1663,6 +1663,7 @@ private: INST("111000100100----", Id::BRA, Type::Flow, "BRA"), INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), INST("111000110100---", Id::BRK, Type::Flow, "BRK"), + INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), @@ -1686,7 +1687,6 @@ private: INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1e2ff46b0..3f0939ec9 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -75,7 +75,7 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPus void ThreadManager::SubmitList(Tegra::CommandList&& entries) { const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; + const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 113f9d8f3..43a84bd52 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -163,8 +163,8 @@ private: static constexpr u64 page_size{1 << page_bits}; static constexpr u64 page_mask{page_size - 1}; - /// Address space in bits, this is fairly arbitrary but sufficiently large. - static constexpr u32 address_space_width{39}; + /// Address space in bits, according to Tegra X1 TRM + static constexpr u32 address_space_width{40}; /// Start address for mapping, this is fairly arbitrary but must be non-zero. static constexpr GPUVAddr address_space_base{0x100000}; /// End of address space, based on address space in bits. diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 25652e794..48b86f3bd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -71,16 +71,6 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s return uploaded_offset; } -std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) { - AlignBuffer(alignment); - u8* const uploaded_ptr = buffer_ptr; - const GLintptr uploaded_offset = buffer_offset; - - buffer_ptr += size; - buffer_offset += size; - return std::make_tuple(uploaded_ptr, uploaded_offset); -} - bool OGLBufferCache::Map(std::size_t max_size) { bool invalidate; std::tie(buffer_ptr, buffer_offset_base, invalidate) = diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f9247a40e..f2347581b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -61,9 +61,6 @@ public: /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); - /// Reserves memory to be used by host's CPU. Returns mapped address and offset. - std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); - bool Map(std::size_t max_size); void Unmap(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1d1581f49..65a88b06c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -2,11 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cstddef> #include <glad/glad.h> #include "common/logging/log.h" +#include "common/scope_exit.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { @@ -24,6 +27,7 @@ Device::Device() { max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); + has_component_indexing_bug = TestComponentIndexingBug(); } Device::Device(std::nullptr_t) { @@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_variable_aoffi = true; + has_component_indexing_bug = false; } bool Device::TestVariableAoffi() { @@ -52,4 +57,53 @@ void main() { return supported; } +bool Device::TestComponentIndexingBug() { + constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; + const GLchar* COMPONENT_TEST = R"(#version 430 core +layout (std430, binding = 0) buffer OutputBuffer { + uint output_value; +}; +layout (std140, binding = 0) uniform InputBuffer { + uvec4 input_value[4096]; +}; +layout (location = 0) uniform uint idx; +void main() { + output_value = input_value[idx >> 2][idx & 3]; +})"; + const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)}; + SCOPE_EXIT({ glDeleteProgram(shader); }); + glUseProgram(shader); + + OGLVertexArray vao; + vao.Create(); + glBindVertexArray(vao.handle); + + constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432}; + OGLBuffer ubo; + ubo.Create(); + glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle); + + OGLBuffer ssbo; + ssbo.Create(); + glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT); + + for (GLuint index = 4; index < 8; ++index) { + glInvalidateBufferData(ssbo.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle); + + glProgramUniform1ui(shader, 0, index); + glDrawArrays(GL_POINTS, 0, 1); + + GLuint result; + glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); + if (result != values.at(index)) { + LOG_INFO(Render_OpenGL, log_message, true); + return true; + } + } + LOG_INFO(Render_OpenGL, log_message, false); + return false; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de8490682..8c8c93760 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -30,13 +30,19 @@ public: return has_variable_aoffi; } + bool HasComponentIndexingBug() const { + return has_component_indexing_bug; + } + private: static bool TestVariableAoffi(); + static bool TestComponentIndexingBug(); std::size_t uniform_buffer_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_variable_aoffi{}; + bool has_component_indexing_bug{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp deleted file mode 100644 index c3e94d917..000000000 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include "common/assert.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_primitive_assembler.h" - -namespace OpenGL { - -constexpr u32 TRIANGLES_PER_QUAD = 6; -constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3}; - -PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {} - -PrimitiveAssembler::~PrimitiveAssembler() = default; - -std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const { - ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4"); - return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint); -} - -GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) { - const std::size_t size{CalculateQuadSize(count)}; - auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size); - - for (u32 primitive = 0; primitive < count / 4; ++primitive) { - for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) { - const u32 index = first + primitive * 4 + QUAD_MAP[i]; - std::memcpy(dst_pointer, &index, sizeof(index)); - dst_pointer += sizeof(index); - } - } - - return index_offset; -} - -GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) { - const std::size_t map_size{CalculateQuadSize(count)}; - auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); - - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const u8* source{memory_manager.GetPointer(gpu_addr)}; - - for (u32 primitive = 0; primitive < count / 4; ++primitive) { - for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { - const u32 index = primitive * 4 + QUAD_MAP[i]; - const u8* src_offset = source + (index * index_size); - - std::memcpy(dst_pointer, src_offset, index_size); - dst_pointer += index_size; - } - } - - return index_offset; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h deleted file mode 100644 index 4e87ce4d6..000000000 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <glad/glad.h> - -#include "common/common_types.h" - -namespace OpenGL { - -class OGLBufferCache; - -class PrimitiveAssembler { -public: - explicit PrimitiveAssembler(OGLBufferCache& buffer_cache); - ~PrimitiveAssembler(); - - /// Calculates the size required by MakeQuadArray and MakeQuadIndexed. - std::size_t CalculateQuadSize(u32 count) const; - - GLintptr MakeQuadArray(u32 first, u32 count); - - GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count); - -private: - OGLBufferCache& buffer_cache; -}; - -} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f9b6dfeea..d77426067 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -246,29 +246,6 @@ DrawParameters RasterizerOpenGL::SetupDraw() { DrawParameters params{}; params.current_instance = gpu.state.current_instance; - if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { - MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly); - - params.use_indexed = true; - params.primitive_mode = GL_TRIANGLES; - - if (is_indexed) { - params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - params.count = (regs.index_array.count / 4) * 6; - params.index_buffer_offset = primitive_assembler.MakeQuadIndexed( - regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(), - regs.index_array.count); - params.base_vertex = static_cast<GLint>(regs.vb_element_base); - } else { - // MakeQuadArray always generates u32 indexes - params.index_format = GL_UNSIGNED_INT; - params.count = (regs.vertex_buffer.count / 4) * 6; - params.index_buffer_offset = primitive_assembler.MakeQuadArray( - regs.vertex_buffer.first, regs.vertex_buffer.count); - } - return params; - } - params.use_indexed = is_indexed; params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); @@ -345,9 +322,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); - SetupConstBuffers(stage_enum, shader, program_handle, base_bindings); - SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings); - SetupTextures(stage_enum, shader, program_handle, base_bindings); + SetupDrawConstBuffers(stage_enum, shader); + SetupGlobalRegions(stage_enum, shader); + SetupTextures(stage_enum, shader, base_bindings); // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen @@ -686,30 +663,19 @@ void RasterizerOpenGL::DrawArrays() { SyncCullMode(); SyncPrimitiveRestart(); SyncScissorTest(state); - // Alpha Testing is synced on shaders. SyncTransformFeedback(); SyncPointState(); - CheckAlphaTests(); SyncPolygonOffset(); - // TODO(bunnei): Sync framebuffer_scale uniform here - // TODO(bunnei): Sync scissorbox uniform(s) here + SyncAlphaTest(); // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; std::size_t buffer_size = CalculateVertexArraysSize(); - // Add space for index buffer (keeping in mind non-core primitives) - switch (regs.draw.topology) { - case Maxwell::PrimitiveTopology::Quads: - buffer_size = Common::AlignUp(buffer_size, 4) + - primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); - break; - default: - if (is_indexed) { - buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); - } - break; + // Add space for index buffer + if (is_indexed) { + buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); } // Uniform space for the 5 shader stages @@ -810,57 +776,55 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLuint program_handle, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& gpu = system.GPU(); - const auto& maxwell3d = gpu.Maxwell3D(); - const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; + const auto stage_index = static_cast<std::size_t>(stage); + const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; const auto& entries = shader->GetShaderEntries().const_buffers; // Upload only the enabled buffers from the 16 constbuffers of each shader stage for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { - const auto& used_buffer = entries[bindpoint]; - const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; - - if (!buffer.enabled) { - // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(0, 0, 0); - continue; - } + const auto& entry = entries[bindpoint]; + SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); + } +} - std::size_t size = 0; +void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry) { + if (!buffer.enabled) { + // Set values to zero to unbind buffers + bind_ubo_pushbuffer.Push(0, 0, 0); + return; + } - if (used_buffer.IsIndirect()) { - // Buffer is accessed indirectly, so upload the entire thing - size = buffer.size; + std::size_t size; + if (entry.IsIndirect()) { + // Buffer is accessed indirectly, so upload the entire thing + size = buffer.size; - if (size > MaxConstbufferSize) { - LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); - size = MaxConstbufferSize; - } - } else { - // Buffer is accessed directly, upload just what we use - size = used_buffer.GetSize(); + if (size > MaxConstbufferSize) { + LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); + size = MaxConstbufferSize; } + } else { + // Buffer is accessed directly, upload just what we use + size = entry.GetSize(); + } - // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 - // UBO alignment requirements. - size = Common::AlignUp(size, sizeof(GLvec4)); - ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); - - const GLintptr const_buffer_offset = - buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); + // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 + // UBO alignment requirements. + size = Common::AlignUp(size, sizeof(GLvec4)); + ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); - bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); - } + const std::size_t alignment = device.GetUniformBufferAlignment(); + const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); } void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLenum primitive_mode, - BaseBindings base_bindings) { + const Shader& shader) { const auto& entries = shader->GetShaderEntries().global_memory_entries; for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry{entries[bindpoint]}; @@ -874,7 +838,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade } void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings) { + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); @@ -1152,10 +1116,17 @@ void RasterizerOpenGL::SyncPolygonOffset() { state.polygon_offset.clamp = regs.polygon_offset_clamp; } -void RasterizerOpenGL::CheckAlphaTests() { +void RasterizerOpenGL::SyncAlphaTest() { const auto& regs = system.GPU().Maxwell3D().regs; UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, "Alpha Testing is enabled with more than one rendertarget"); + + state.alpha_test.enabled = regs.alpha_test_enabled; + if (!state.alpha_test.enabled) { + return; + } + state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func); + state.alpha_test.ref = regs.alpha_test_ref; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d78094138..f7671ff5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -17,17 +17,18 @@ #include <glad/glad.h> #include "common/common_types.h" +#include "video_core/engines/const_buffer_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_global_cache.h" -#include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/utils.h" @@ -106,17 +107,20 @@ private: bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); /// Configures the current constbuffers to use for the draw command. - void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings); + void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader); + + /// Configures a constant buffer. + void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry); /// Configures the current global memory entries to use for the draw command. void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLenum primitive_mode, - BaseBindings base_bindings); + const Shader& shader); /// Configures the current textures to use for the draw command. void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings); + BaseBindings base_bindings); /// Syncs the viewport and depth range to match the guest state void SyncViewport(OpenGLState& current_state); @@ -167,8 +171,8 @@ private: /// Syncs the polygon offsets void SyncPolygonOffset(); - /// Check asserts for alpha testing. - void CheckAlphaTests(); + /// Syncs the alpha test state to match the guest state + void SyncAlphaTest(); /// Check for extension that are not strictly required /// but are needed for correct emulation @@ -197,7 +201,6 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - PrimitiveAssembler primitive_assembler{buffer_cache}; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d66252224..ac8a9e6b7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -35,8 +35,8 @@ struct UnspecializedShader { namespace { /// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { - const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; +GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { + const auto& gpu{system.GPU().Maxwell3D()}; const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; return gpu.regs.code_address.CodeAddress() + shader_config.offset; } @@ -350,7 +350,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, Core::Frontend::EmuWindow& emu_window, const Device& device) - : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} + : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, + disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -546,42 +547,45 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { - if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { - return last_shaders[static_cast<u32>(program)]; + if (!system.GPU().Maxwell3D().dirty_flags.shaders) { + return last_shaders[static_cast<std::size_t>(program)]; } - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(program)}; + auto& memory_manager{system.GPU().MemoryManager()}; + const GPUVAddr program_addr{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto& host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(program_addr)}; Shader shader{TryGet(host_ptr)}; + if (shader) { + return last_shaders[static_cast<std::size_t>(program)] = shader; + } - if (!shader) { - // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); - } - const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); - const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found != precompiled_shaders.end()) { - shader = - std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, - precompiled_programs, found->second, host_ptr); - } else { - shader = std::make_shared<CachedShader>( - device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, - std::move(program_code), std::move(program_code_b), host_ptr); - } - Register(shader); + // No shader found - create a new one + ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; + ProgramCode program_code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + program_code_b = GetShaderCode(memory_manager, program_addr_b, + memory_manager.GetPointer(program_addr_b)); + } + + const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); + const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; + const auto found = precompiled_shaders.find(unique_identifier); + if (found != precompiled_shaders.end()) { + // Create a shader from the cache + shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, + precompiled_programs, found->second, host_ptr); + } else { + // Create a shader from guest memory + shader = std::make_shared<CachedShader>( + device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, + std::move(program_code), std::move(program_code_b), host_ptr); } + Register(shader); - return last_shaders[static_cast<u32>(program)] = shader; + return last_shaders[static_cast<std::size_t>(program)] = shader; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 64e5a5594..09bd0761d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -137,6 +137,7 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats); + Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; ShaderDiskCacheOpenGL disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e9f8d40db..7dc2e0560 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,7 +45,6 @@ struct TextureAoffi {}; using TextureArgument = std::pair<Type, Node>; using TextureIR = std::variant<TextureAoffi, TextureArgument>; -enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); @@ -124,8 +123,8 @@ bool IsPrecise(Operation operand) { return false; } -bool IsPrecise(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { +bool IsPrecise(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { return IsPrecise(*operation); } return false; @@ -144,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) { return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); } +constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { + switch (stack) { + case MetaStackClass::Ssy: + return "ssy"; + case MetaStackClass::Pbk: + return "pbk"; + } + return {}; +} + +std::string FlowStackName(MetaStackClass stack) { + return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); +} + +std::string FlowStackTopName(MetaStackClass stack) { + return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); +} + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, @@ -174,8 +191,10 @@ public: // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. constexpr u32 FLOW_STACK_SIZE = 20; - code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); - code.AddLine("uint flow_stack_top = 0u;"); + for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { + code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); + code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); + } code.AddLine("while (true) {{"); ++code.scope; @@ -247,6 +266,12 @@ private: code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); code.AddNewLine(); + code.AddLine("in gl_PerVertex {{"); + ++code.scope; + code.AddLine("vec4 gl_Position;"); + --code.scope; + code.AddLine("}} gl_in[];"); + DeclareVertexRedeclarations(); } @@ -349,7 +374,7 @@ private: } void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 generic_index{GetGenericAttributeIndex(index)}; + const u32 location{GetGenericAttributeIndex(index)}; std::string name{GetInputAttribute(index)}; if (stage == ShaderStage::Geometry) { @@ -358,19 +383,13 @@ private: std::string suffix; if (stage == ShaderStage::Fragment) { - const auto input_mode{header.ps.GetAttributeUse(generic_index)}; + const auto input_mode{header.ps.GetAttributeUse(location)}; if (skip_unused && input_mode == AttributeUse::Unused) { return; } suffix = GetInputFlags(input_mode); } - u32 location = generic_index; - if (stage != ShaderStage::Vertex) { - // If inputs are varyings, add an offset - location += GENERIC_VARYING_START_LOCATION; - } - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); } @@ -395,7 +414,7 @@ private: } void DeclareOutputAttribute(Attribute::Index index) { - const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; + const u32 location{GetGenericAttributeIndex(index)}; code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); } @@ -498,15 +517,15 @@ private: } void VisitBlock(const NodeBlock& bb) { - for (const Node node : bb) { + for (const auto& node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { code.AddLine(expr); } } } - std::string Visit(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { + std::string Visit(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); if (operation_index >= operation_decompilers.size()) { UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); @@ -520,7 +539,7 @@ private: return (this->*decompiler)(*operation); } - if (const auto gpr = std::get_if<GprNode>(node)) { + if (const auto gpr = std::get_if<GprNode>(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return "0"; @@ -528,7 +547,7 @@ private: return GetRegister(index); } - if (const auto immediate = std::get_if<ImmediateNode>(node)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { // For eyecandy avoid using hex numbers on single digits @@ -537,7 +556,7 @@ private: return fmt::format("utof(0x{:x}u)", immediate->GetValue()); } - if (const auto predicate = std::get_if<PredicateNode>(node)) { + if (const auto predicate = std::get_if<PredicateNode>(&*node)) { const auto value = [&]() -> std::string { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -554,7 +573,7 @@ private: return value; } - if (const auto abuf = std::get_if<AbufNode>(node)) { + if (const auto abuf = std::get_if<AbufNode>(&*node)) { UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, "Physical attributes in geometry shaders are not implemented"); if (abuf->IsPhysicalBuffer()) { @@ -564,9 +583,9 @@ private: return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); } - if (const auto cbuf = std::get_if<CbufNode>(node)) { + if (const auto cbuf = std::get_if<CbufNode>(&*node)) { const Node offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if<ImmediateNode>(offset)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); @@ -577,30 +596,47 @@ private: if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); - return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset); + code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); + + if (!device.HasComponentIndexingBug()) { + return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset); + } + + // AMD's proprietary GLSL compiler emits ill code for variable component access. + // To bypass this driver bug generate 4 ifs, one per each component. + const std::string pack = code.GenerateTemporary(); + code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + final_offset); + + const std::string result = code.GenerateTemporary(); + code.AddLine("float {};", result); + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, + pack, GetSwizzle(swizzle)); + } + return result; } UNREACHABLE_MSG("Unmanaged offset node type"); } - if (const auto gmem = std::get_if<GmemNode>(node)) { + if (const auto gmem = std::get_if<GmemNode>(&*node)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); } - if (const auto lmem = std::get_if<LmemNode>(node)) { + if (const auto lmem = std::get_if<LmemNode>(&*node)) { return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); } - if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { + if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { return GetInternalFlag(internal_flag->GetFlag()); } - if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { // It's invalid to call conditional on nested nodes, use an operation instead code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); ++code.scope; @@ -612,7 +648,7 @@ private: return {}; } - if (const auto comment = std::get_if<CommentNode>(node)) { + if (const auto comment = std::get_if<CommentNode>(&*node)) { return "// " + comment->GetText(); } @@ -620,7 +656,7 @@ private: return {}; } - std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) { + std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { const auto GeometryPass = [&](std::string_view name) { if (stage == ShaderStage::Geometry && buffer) { // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games @@ -633,10 +669,14 @@ private: switch (attribute) { case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { - return GeometryPass("position") + GetSwizzle(element); - } else { + switch (stage) { + case ShaderStage::Geometry: + return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), + GetSwizzle(element)); + case ShaderStage::Fragment: return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); + default: + UNREACHABLE(); } case Attribute::Index::PointCoord: switch (element) { @@ -852,7 +892,7 @@ private: std::string expr = ", "; switch (type) { case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { // Inline the string as an immediate integer in GLSL (some extra arguments are // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); @@ -884,7 +924,7 @@ private: for (std::size_t index = 0; index < aoffi.size(); ++index) { const auto operand{aoffi.at(index)}; - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { // Inline the string as an immediate integer in GLSL (AOFFI arguments are required // to be constant by the standard). expr += std::to_string(static_cast<s32>(immediate->GetValue())); @@ -905,23 +945,23 @@ private: } std::string Assign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; std::string target; - if (const auto gpr = std::get_if<GprNode>(dest)) { + if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } target = GetRegister(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: - return "position"s + GetSwizzle(abuf->GetElement()); + return "gl_Position"s + GetSwizzle(abuf->GetElement()); case Attribute::Index::PointSize: return "gl_PointSize"; case Attribute::Index::ClipDistances0123: @@ -937,9 +977,9 @@ private: return "0"; } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { + } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); - } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); @@ -1216,12 +1256,12 @@ private: } std::string LogicalAssign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; std::string target; - if (const auto pred = std::get_if<PredicateNode>(dest)) { + if (const auto pred = std::get_if<PredicateNode>(&*dest)) { ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); const auto index = pred->GetIndex(); @@ -1232,7 +1272,7 @@ private: return {}; } target = GetPredicate(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { + } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { target = GetInternalFlag(flag->GetFlag()); } @@ -1409,7 +1449,7 @@ private: } std::string Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); @@ -1418,15 +1458,18 @@ private: } std::string PushFlowStack(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto stack = std::get<MetaStackClass>(operation.GetMeta()); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); + code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), + target->GetValue()); return {}; } std::string PopFlowStack(Operation operation) { - code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); + const auto stack = std::get<MetaStackClass>(operation.GetMeta()); + code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); code.AddLine("break;"); return {}; } @@ -1447,27 +1490,9 @@ private: UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - code.AddLine("if (alpha_test[0] != 0) {{"); - ++code.scope; - // We start on the register containing the alpha value in the first RT. - u32 current_reg = 3; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when - // multiple render targets are used. - if (header.ps.IsColorComponentOutputEnabled(render_target, 0) || - header.ps.IsColorComponentOutputEnabled(render_target, 1) || - header.ps.IsColorComponentOutputEnabled(render_target, 2) || - header.ps.IsColorComponentOutputEnabled(render_target, 3)) { - code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)); - current_reg += 4; - } - } - --code.scope; - code.AddLine("}}"); - // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. - current_reg = 0; + u32 current_reg = 0; for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { @@ -1506,9 +1531,7 @@ private: // If a geometry shader is attached, it will always flip (it's the last stage before // fragment). For more info about flipping, refer to gl_shader_gen.cpp. - code.AddLine("position.xy *= viewport_flip.xy;"); - code.AddLine("gl_Position = position;"); - code.AddLine("position.w = 1.0;"); + code.AddLine("gl_Position.xy *= viewport_flip.xy;"); code.AddLine("EmitVertex();"); return {}; } @@ -1746,8 +1769,7 @@ private: } u32 GetNumPhysicalVaryings() const { - return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION, - Maxwell::NumVaryings); + return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); } const Device& device; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index d2bb705a9..9148629ec 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -23,12 +23,9 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu out += GetCommonDeclarations(); out += R"( -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; )"; @@ -48,7 +45,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { out += R"( void main() { - position = vec4(0.0, 0.0, 0.0, 0.0); execute_vertex(); )"; @@ -59,19 +55,12 @@ void main() { out += R"( // Set Position Y direction - position.y *= utof(config_pack[2]); + gl_Position.y *= utof(config_pack[2]); // Check if the flip stage is VertexB // Config pack's second value is flip_stage if (config_pack[1] == 1) { // Viewport can be flipped, which is unsupported by glViewport - position.xy *= viewport_flip.xy; - } - gl_Position = position; - - // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 - // For now, this is here to bring order in lieu of proper emulation - if (config_pack[1] == 1) { - position.w = 1.0; + gl_Position.xy *= viewport_flip.xy; } })"; @@ -85,13 +74,9 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se out += GetCommonDeclarations(); out += R"( -layout (location = 0) in vec4 gs_position[]; -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; )"; @@ -124,38 +109,11 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (location = 0) in noperspective vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; -bool AlphaFunc(in float value) { - float ref = uintBitsToFloat(alpha_test[2]); - switch (alpha_test[1]) { - case 1: - return false; - case 2: - return value < ref; - case 3: - return value == ref; - case 4: - return value <= ref; - case 5: - return value > ref; - case 6: - return value != ref; - case 7: - return value >= ref; - case 8: - return true; - default: - return false; - } -} - )"; const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 05ab01dcb..b05f90f20 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -48,17 +48,6 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; - auto func{static_cast<u32>(regs.alpha_test_func)}; - // Normalize the gl variants of opCompare to be the same as the normal variants - const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never); - if (func >= op_gl_variant_base) { - func = func - op_gl_variant_base + 1U; - } - - alpha_test.enabled = regs.alpha_test_enabled; - alpha_test.func = func; - alpha_test.ref = regs.alpha_test_ref; - instance_id = state.current_instance; // Assign in which stage the position has to be flipped diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index cec18a832..6961e702a 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -27,14 +27,8 @@ struct MaxwellUniformData { GLuint flip_stage; GLfloat y_direction; }; - struct alignas(16) { - GLuint enabled; - GLuint func; - GLfloat ref; - GLuint padding; - } alpha_test; }; -static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 7425fbe5d..d86e137ac 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -156,6 +156,10 @@ OpenGLState::OpenGLState() { polygon_offset.factor = 0.0f; polygon_offset.units = 0.0f; polygon_offset.clamp = 0.0f; + + alpha_test.enabled = false; + alpha_test.func = GL_ALWAYS; + alpha_test.ref = 0.0f; } void OpenGLState::ApplyDefaultState() { @@ -461,6 +465,14 @@ void OpenGLState::ApplyPolygonOffset() const { } } +void OpenGLState::ApplyAlphaTest() const { + Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled); + if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref), + std::tie(alpha_test.func, alpha_test.ref))) { + glAlphaFunc(alpha_test.func, alpha_test.ref); + } +} + void OpenGLState::ApplyTextures() const { bool has_delta{}; std::size_t first{}; @@ -533,6 +545,7 @@ void OpenGLState::Apply() const { ApplyTextures(); ApplySamplers(); ApplyPolygonOffset(); + ApplyAlphaTest(); } void OpenGLState::EmulateViewportWithScissor() { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 41418a7b8..b0140495d 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -172,6 +172,12 @@ public: GLfloat clamp; } polygon_offset; + struct { + bool enabled; // GL_ALPHA_TEST + GLenum func; // GL_ALPHA_TEST_FUNC + GLfloat ref; // GL_ALPHA_TEST_REF + } alpha_test; + std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE OpenGLState(); @@ -215,6 +221,7 @@ public: void ApplySamplers() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; + void ApplyAlphaTest() const; /// Set the initial OpenGL state static void ApplyDefaultState(); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ed7b5cff0..ea77dd211 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -128,6 +128,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return GL_TRIANGLE_STRIP; case Maxwell::PrimitiveTopology::TriangleFan: return GL_TRIANGLE_FAN; + case Maxwell::PrimitiveTopology::Quads: + return GL_QUADS; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); UNREACHABLE(); @@ -173,11 +175,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { return GL_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::Border: return GL_CLAMP_TO_BORDER; - case Tegra::Texture::WrapMode::ClampOGL: - // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use - // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to - // manually mix them. However the shader part of this is not yet implemented. - return GL_CLAMP_TO_BORDER; + case Tegra::Texture::WrapMode::Clamp: + return GL_CLAMP; case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: return GL_MIRROR_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceBorder: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 3451d321d..aafd6f31b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -18,7 +18,6 @@ #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" -#include "core/tracer/recorder.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/renderer_opengl.h" diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 9fe1e3280..0bbbf6851 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -52,7 +52,7 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { return vk::SamplerAddressMode::eClampToEdge; case Tegra::Texture::WrapMode::Border: return vk::SamplerAddressMode::eClampToBorder; - case Tegra::Texture::WrapMode::ClampOGL: + case Tegra::Texture::WrapMode::Clamp: // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use // eClampToBorder to get the border color of the texture, and then sample the edge to // manually mix them. However the shader part of this is not yet implemented. diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 00242ecbe..3b966ddc3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; +constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; } // namespace Alternatives @@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { SetupFamilies(dldi, surface); SetupProperties(dldi); + SetupFeatures(dldi); } VKDevice::~VKDevice() = default; bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { - const auto queue_cis = GetDeviceQueueCreateInfos(); - vk::PhysicalDeviceFeatures device_features{}; + vk::PhysicalDeviceFeatures device_features; + device_features.vertexPipelineStoresAndAtomics = true; + device_features.independentBlend = true; + device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; - const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; + const auto queue_cis = GetDeviceQueueCreateInfos(); + const std::vector<const char*> extensions = LoadExtensions(dldi); const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, nullptr, static_cast<u32>(extensions.size()), extensions.data(), &device_features); @@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, LOG_CRITICAL(Render_Vulkan, "Format={} with usage={} and type={} has no defined alternatives and host " "hardware does not support it", - static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + vk::to_string(wanted_format), vk::to_string(wanted_usage), static_cast<u32>(format_type)); UNREACHABLE(); return wanted_format; @@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, return wanted_format; } +bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, + const vk::DispatchLoaderDynamic& dldi) const { + if (!features.textureCompressionASTC_LDR) { + return false; + } + const auto format_feature_usage{ + vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | + vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | + vk::FormatFeatureFlagBits::eTransferDst}; + constexpr std::array<vk::Format, 9> astc_formats = { + vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; + for (const auto format : astc_formats) { + const auto format_properties{physical.getFormatProperties(format, dldi)}; + if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { + return false; + } + } + return true; +} + bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); @@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface) { - const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; - bool has_swapchain{}; for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - has_swapchain |= prop.extensionName == swapchain_extension; + has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); } if (!has_swapchain) { // The device doesn't support creating swapchains. @@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } // TODO(Rodrigo): Check if the device matches all requeriments. - const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); - if (props.limits.maxUniformBufferRange < 65536) { + const auto properties{physical.getProperties(dldi)}; + const auto limits{properties.limits}; + if (limits.maxUniformBufferRange < 65536) { + return false; + } + + const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; + if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { return false; } @@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev return true; } +std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { + std::vector<const char*> extensions; + extensions.reserve(2); + extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + + const auto Test = [&](const vk::ExtensionProperties& extension, + std::optional<std::reference_wrapper<bool>> status, const char* name, + u32 revision) { + if (extension.extensionName != std::string(name)) { + return; + } + extensions.push_back(name); + if (status) { + status->get() = true; + } + }; + + for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); + } + + return extensions; +} + void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { std::optional<u32> graphics_family_, present_family_; @@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); device_type = props.deviceType; uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); + max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); +} + +void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { + const auto supported_features{physical.getFeatures(dldi)}; + is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { - static const float QUEUE_PRIORITY = 1.f; + static const float QUEUE_PRIORITY = 1.0f; std::set<u32> unique_queue_families = {graphics_family, present_family}; std::vector<vk::DeviceQueueCreateInfo> queue_cis; @@ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, + vk::Format::eB5G6R5UnormPack16, + vk::Format::eA2B10G10R10UnormPack32, + vk::Format::eR32G32B32A32Sfloat, + vk::Format::eR16G16Unorm, + vk::Format::eR16G16Snorm, + vk::Format::eR8G8B8A8Srgb, + vk::Format::eR8Unorm, + vk::Format::eB10G11R11UfloatPack32, + vk::Format::eR32Sfloat, + vk::Format::eR16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eD32Sfloat, + vk::Format::eD16Unorm, + vk::Format::eD16UnormS8Uint, + vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, + vk::Format::eBc1RgbaUnormBlock, + vk::Format::eBc2UnormBlock, + vk::Format::eBc3UnormBlock, + vk::Format::eBc4UnormBlock, + vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, + vk::Format::eBc7UnormBlock, + vk::Format::eAstc4x4UnormBlock, + vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; std::map<vk::Format, vk::FormatProperties> format_properties; - - const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { + for (const auto format : formats) { format_properties.emplace(format, physical.getFormatProperties(format, dldi)); - }; - AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); - AddFormatQuery(vk::Format::eB5G6R5UnormPack16); - AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); - AddFormatQuery(vk::Format::eR8G8B8A8Srgb); - AddFormatQuery(vk::Format::eR8Unorm); - AddFormatQuery(vk::Format::eD32Sfloat); - AddFormatQuery(vk::Format::eD16Unorm); - AddFormatQuery(vk::Format::eD16UnormS8Uint); - AddFormatQuery(vk::Format::eD24UnormS8Uint); - AddFormatQuery(vk::Format::eD32SfloatS8Uint); - AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); - AddFormatQuery(vk::Format::eBc2UnormBlock); - AddFormatQuery(vk::Format::eBc3UnormBlock); - AddFormatQuery(vk::Format::eBc4UnormBlock); - + } return format_properties; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index e87c7a508..537825d8b 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -11,7 +11,7 @@ namespace Vulkan { -/// Format usage descriptor +/// Format usage descriptor. enum class FormatType { Linear, Optimal, Buffer }; /// Handles data specific to a physical device. @@ -34,12 +34,12 @@ public: vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const; - /// Returns the dispatch loader with direct function pointers of the device + /// Returns the dispatch loader with direct function pointers of the device. const vk::DispatchLoaderDynamic& GetDispatchLoader() const { return dld; } - /// Returns the logical device + /// Returns the logical device. vk::Device GetLogical() const { return logical.get(); } @@ -69,30 +69,55 @@ public: return present_family; } - /// Returns if the device is integrated with the host CPU + /// Returns if the device is integrated with the host CPU. bool IsIntegrated() const { return device_type == vk::PhysicalDeviceType::eIntegratedGpu; } - /// Returns uniform buffer alignment requeriment + /// Returns uniform buffer alignment requeriment. u64 GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + /// Returns the maximum range for storage buffers. + u64 GetMaxStorageBufferRange() const { + return max_storage_buffer_range; + } + + /// Returns true if ASTC is natively supported. + bool IsOptimalAstcSupported() const { + return is_optimal_astc_supported; + } + + /// Returns true if the device supports VK_EXT_scalar_block_layout. + bool IsExtScalarBlockLayoutSupported() const { + return ext_scalar_block_layout; + } + /// Checks if the physical device is suitable. static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface); private: + /// Loads extensions into a vector and stores available ones in this object. + std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); + /// Sets up queue families. void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); /// Sets up device properties. void SetupProperties(const vk::DispatchLoaderDynamic& dldi); + /// Sets up device features. + void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); + /// Returns a list of queue initialization descriptors. std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; + /// Returns true if ASTC textures are natively supported. + bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, + const vk::DispatchLoaderDynamic& dldi) const; + /// Returns true if a format is supported. bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const; @@ -101,16 +126,19 @@ private: static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device - vk::DispatchLoaderDynamic dld; ///< Device function pointers - UniqueDevice logical; ///< Logical device - vk::Queue graphics_queue; ///< Main graphics queue - vk::Queue present_queue; ///< Main present queue - u32 graphics_family{}; ///< Main graphics queue family index - u32 present_family{}; ///< Main present queue family index - vk::PhysicalDeviceType device_type; ///< Physical device type - u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment - std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary + const vk::PhysicalDevice physical; ///< Physical device. + vk::DispatchLoaderDynamic dld; ///< Device function pointers. + UniqueDevice logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + vk::PhysicalDeviceType device_type; ///< Physical device type. + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. + u64 max_storage_buffer_range{}; ///< Max storage buffer size. + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. + std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a5b25aeff..33ad9764a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -17,6 +17,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/shader_ir.h" @@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; using Operation = const OperationNode&; // TODO(Rodrigo): Use rasterizer's value -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; +constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000; +constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4; constexpr u32 STAGE_BINDING_STRIDE = 0x100; enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; @@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) { class SPIRVDecompiler : public Sirit::Module { public: - explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) - : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { + explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) + : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { AddCapability(spv::Capability::Shader); AddExtension("SPV_KHR_storage_buffer_storage_class"); AddExtension("SPV_KHR_variable_pointers"); @@ -130,20 +132,16 @@ public: branch_labels.push_back(label); } - // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely - // that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint), spv::StorageClass::Function, Constant(t_uint, first_address))); - flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type), - spv::StorageClass::Function, ConstantNull(flow_stack_type))); - flow_stack_top = - Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0))); + std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); + std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); Name(jmp_to, "jmp_to"); - Name(flow_stack, "flow_stack"); - Name(flow_stack_top, "flow_stack_top"); + Name(ssy_flow_stack, "ssy_flow_stack"); + Name(ssy_flow_stack_top, "ssy_flow_stack_top"); + Name(pbk_flow_stack, "pbk_flow_stack"); + Name(pbk_flow_stack_top, "pbk_flow_stack_top"); Emit(OpBranch(loop_label)); Emit(loop_label); @@ -195,7 +193,9 @@ public: entries.samplers.emplace_back(sampler); } for (const auto& attribute : ir.GetInputAttributes()) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); + if (IsGenericAttribute(attribute)) { + entries.attributes.insert(GetGenericAttributeLocation(attribute)); + } } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -210,7 +210,6 @@ private: std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); - static constexpr u32 CBUF_STRIDE = 16; void AllocateBindings() { const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; @@ -315,6 +314,7 @@ private: constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", "overflow"}; for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { + const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } @@ -374,7 +374,9 @@ private: u32 binding = const_buffers_base_binding; for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); + const Id type = + device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; + const Id id = OpVariable(type, spv::StorageClass::Uniform); AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); Decorate(id, spv::Decoration::Binding, binding++); @@ -475,13 +477,13 @@ private: } void VisitBasicBlock(const NodeBlock& bb) { - for (const Node node : bb) { + for (const auto& node : bb) { static_cast<void>(Visit(node)); } } - Id Visit(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { + Id Visit(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); const auto decompiler = operation_decompilers[operation_index]; if (decompiler == nullptr) { @@ -489,17 +491,17 @@ private: } return (this->*decompiler)(*operation); - } else if (const auto gpr = std::get_if<GprNode>(node)) { + } else if (const auto gpr = std::get_if<GprNode>(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return Constant(t_float, 0.0f); } return Emit(OpLoad(t_float, registers.at(index))); - } else if (const auto immediate = std::get_if<ImmediateNode>(node)) { + } else if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue())); - } else if (const auto predicate = std::get_if<PredicateNode>(node)) { + } else if (const auto predicate = std::get_if<PredicateNode>(&*node)) { const auto value = [&]() -> Id { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -515,7 +517,7 @@ private: } return value; - } else if (const auto abuf = std::get_if<AbufNode>(node)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*node)) { const auto attribute = abuf->GetIndex(); const auto element = abuf->GetElement(); @@ -565,40 +567,42 @@ private: } UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); - } else if (const auto cbuf = std::get_if<CbufNode>(node)) { - const Node offset = cbuf->GetOffset(); + } else if (const auto cbuf = std::get_if<CbufNode>(&*node)) { + const Node& offset = cbuf->GetOffset(); const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - Id buffer_index{}; - Id buffer_element{}; - - if (const auto immediate = std::get_if<ImmediateNode>(offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - - } else if (std::holds_alternative<OperationNode>(*offset)) { - // Indirect access - // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which - // emits sub-optimal code on GLSL from my testing). - const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); - const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); - const Id final_offset = Emit( - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); - buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); - buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); - + Id pointer{}; + if (device.IsExtScalarBlockLayoutSupported()) { + const Id buffer_offset = Emit(OpShiftRightLogical( + t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); + pointer = Emit( + OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset)); } else { - UNREACHABLE_MSG("Unmanaged offset node type"); + Id buffer_index{}; + Id buffer_element{}; + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { + // Direct access + const u32 offset_imm = immediate->GetValue(); + ASSERT(offset_imm % 4 == 0); + buffer_index = Constant(t_uint, offset_imm / 16); + buffer_element = Constant(t_uint, (offset_imm / 4) % 4); + } else if (std::holds_alternative<OperationNode>(*offset)) { + // Indirect access + const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); + const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); + const Id final_offset = Emit(OpUMod( + t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); + buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); + buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); + } else { + UNREACHABLE_MSG("Unmanaged offset node type"); + } + pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), + buffer_index, buffer_element)); } - - const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), - buffer_index, buffer_element)); return Emit(OpLoad(t_float, pointer)); - } else if (const auto gmem = std::get_if<GmemNode>(node)) { + } else if (const auto gmem = std::get_if<GmemNode>(&*node)) { const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress())); const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress())); @@ -608,11 +612,13 @@ private: return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0u), offset)))); - } else if (const auto conditional = std::get_if<ConditionalNode>(node)) { + } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { // It's invalid to call conditional on nested nodes, use an operation instead const Id true_label = OpLabel(); const Id skip_label = OpLabel(); - Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); + const Id condition = Visit(conditional->GetCondition()); + Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone)); + Emit(OpBranchConditional(condition, true_label, skip_label)); Emit(true_label); VisitBasicBlock(conditional->GetCode()); @@ -621,7 +627,7 @@ private: Emit(skip_label); return {}; - } else if (const auto comment = std::get_if<CommentNode>(node)) { + } else if (const auto comment = std::get_if<CommentNode>(&*node)) { Name(Emit(OpUndef(t_void)), comment->GetText()); return {}; } @@ -689,18 +695,18 @@ private: } Id Assign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; Id target{}; - if (const auto gpr = std::get_if<GprNode>(dest)) { + if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } target = registers.at(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { target = [&]() -> Id { switch (const auto attribute = abuf->GetIndex(); attribute) { case Attribute::Index::Position: @@ -725,7 +731,7 @@ private: } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { + } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress())); address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4))); target = Emit(OpAccessChain(t_prv_float, local_memory, {address})); @@ -771,11 +777,11 @@ private: } Id LogicalAssign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; Id target{}; - if (const auto pred = std::get_if<PredicateNode>(dest)) { + if (const auto pred = std::get_if<PredicateNode>(&*dest)) { ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); const auto index = pred->GetIndex(); @@ -787,7 +793,7 @@ private: } target = predicates.at(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { + } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { target = internal_flags.at(static_cast<u32>(flag->GetFlag())); } @@ -873,7 +879,7 @@ private: } else { u32 component_value = 0; if (meta->component) { - const auto component = std::get_if<ImmediateNode>(meta->component); + const auto component = std::get_if<ImmediateNode>(&*meta->component); ASSERT_MSG(component, "Component is not an immediate value"); component_value = component->GetValue(); } @@ -930,7 +936,7 @@ private: } Id Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue()))); @@ -939,9 +945,10 @@ private: } Id PushFlowStack(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); ASSERT(target); + const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); const Id current = Emit(OpLoad(t_uint, flow_stack_top)); const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1))); const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current)); @@ -952,6 +959,7 @@ private: } Id PopFlowStack(Operation operation) { + const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); const Id current = Emit(OpLoad(t_uint, flow_stack_top)); const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1))); const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous)); @@ -968,11 +976,11 @@ private: case ShaderStage::Vertex: { // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. - const Id position = AccessElement(t_float4, per_vertex, position_index); - Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); + const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); + Id depth = Emit(OpLoad(t_float, z_pointer)); depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); - Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); + Emit(OpStore(z_pointer, depth)); break; } case ShaderStage::Fragment: { @@ -1162,6 +1170,31 @@ private: Emit(skip_label); } + std::tuple<Id, Id> CreateFlowStack() { + // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely + // that shaders will use 20 nested SSYs and PBKs. + constexpr u32 FLOW_STACK_SIZE = 20; + constexpr auto storage_class = spv::StorageClass::Function; + + const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); + const Id stack = Emit(OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, + ConstantNull(flow_stack_type))); + const Id top = Emit(OpVariable(t_func_uint, storage_class, Constant(t_uint, 0))); + return std::tie(stack, top); + } + + std::pair<Id, Id> GetFlowStack(Operation operation) { + const auto stack_class = std::get<MetaStackClass>(operation.GetMeta()); + switch (stack_class) { + case MetaStackClass::Ssy: + return {ssy_flow_stack, ssy_flow_stack_top}; + case MetaStackClass::Pbk: + return {pbk_flow_stack, pbk_flow_stack_top}; + } + UNREACHABLE(); + return {}; + } + static constexpr OperationDecompilersArray operation_decompilers = { &SPIRVDecompiler::Assign, @@ -1311,6 +1344,7 @@ private: &SPIRVDecompiler::WorkGroupId<2>, }; + const VKDevice& device; const ShaderIR& ir; const ShaderStage stage; const Tegra::Shader::Header header; @@ -1349,12 +1383,18 @@ private: const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_array = - Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), - spv::Decoration::ArrayStride, CBUF_STRIDE); - const Id t_cbuf_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); + const Id t_cbuf_std140 = Decorate( + Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"), + spv::Decoration::ArrayStride, 16u); + const Id t_cbuf_scalar = Decorate( + Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"), + spv::Decoration::ArrayStride, 4u); + const Id t_cbuf_std140_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_scalar_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); + const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); const Id t_gmem_array = @@ -1397,14 +1437,17 @@ private: Id execute_function{}; Id jmp_to{}; - Id flow_stack_top{}; - Id flow_stack{}; + Id ssy_flow_stack_top{}; + Id pbk_flow_stack_top{}; + Id ssy_flow_stack{}; + Id pbk_flow_stack{}; Id continue_label{}; std::map<u32, Id> labels; }; -DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { - auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); +DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage) { + auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); decompiler->Decompile(); return {std::move(decompiler), decompiler->GetShaderEntries()}; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 329d8fa38..f90541cc1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -20,10 +20,13 @@ namespace VideoCommon::Shader { class ShaderIR; } +namespace Vulkan { +class VKDevice; +} + namespace Vulkan::VKShader { using Maxwell = Tegra::Engines::Maxwell3D::Regs; - using SamplerEntry = VideoCommon::Shader::Sampler; constexpr u32 DESCRIPTOR_SET = 0; @@ -75,6 +78,7 @@ struct ShaderEntries { using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; -DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); +DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage); } // namespace Vulkan::VKShader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2da595c0d..a0554c97e 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index b4859bc1e..87d8fecaa 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 3a29c4a46..b06cbe441 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 5341e460f..7bcf38f23 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 3095f2fd4..f1875967c 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 9fd4b273e..c8c1a7f40 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 679ac0d4e..73880db0e 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 1ae192c6a..e02bcd097 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 0b12a0d08..8be1119df 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index b5ec9a6f5..4221f0c58 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index a1d04c6e5..29be25ca3 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index cc522f1de..f5013e44a 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 9d2322a1d..2323052b0 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 755f2ec44..48ca7a4af 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index fba44d714..d59d15bd8 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index a425f9eb7..c3bcf1ae9 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index a4cdaf74d..46e3d5905 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -4,6 +4,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index a6a1fb632..dd20775d7 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e6a010a7d..80fc0ccfc 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -169,7 +170,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); SetTemporal(bb, i, gmem); } @@ -262,7 +263,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); } @@ -298,9 +299,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB const Node base_address{ TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; - const auto cbuf = std::get_if<CbufNode>(base_address); + const auto cbuf = std::get_if<CbufNode>(&*base_address); ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); const auto cbuf_offset = cbuf_offset_imm->GetValue(); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index a6c123573..d46a8ab82 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -108,22 +109,20 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "Constant buffer flow is not supported"); - // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the - // target of the jump that the SYNC instruction will make. The SSY opcode has a similar - // structure to the BRA opcode. + // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); + bb.push_back( + Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); break; } case OpCode::Id::PBK: { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "Constant buffer PBK is not supported"); - // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but - // using SYNC on a PBK address will kill the shader execution. We don't emulate this because - // it's very unlikely a driver will emit such invalid shader. + // PBK pushes to a stack the address where BRK will jump to. const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); + bb.push_back( + Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); break; } case OpCode::Id::SYNC: { @@ -132,7 +131,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { static_cast<u32>(cc)); // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack)); + bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); break; } case OpCode::Id::BRK: { @@ -141,7 +140,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { static_cast<u32>(cc)); // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack)); + bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); break; } case OpCode::Id::IPA: { diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 71844c42b..9290d22eb 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 387491bd3..febbfeb50 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index f8659e48e..e6c9d287e 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 44ae87ece..2ac16eeb0 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 5b033126d..4a356dbd4 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -291,8 +292,8 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, const Node sampler_register = GetRegister(reg); const Node base_sampler = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_sampler); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + const auto cbuf = std::get_if<CbufNode>(&*base_sampler); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); const auto cbuf_offset = cbuf_offset_imm->GetValue(); const auto cbuf_index = cbuf->GetIndex(); @@ -388,8 +389,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { - const bool is_array = array; - const bool is_shadow = depth_compare; + const auto is_array = static_cast<bool>(array); + const auto is_shadow = static_cast<bool>(depth_compare); const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index cb9ab72b1..97fc6f9b1 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 04a776398..93dee77d1 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h new file mode 100644 index 000000000..3cfb911bb --- /dev/null +++ b/src/video_core/shader/node.h @@ -0,0 +1,519 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstddef> +#include <memory> +#include <string> +#include <tuple> +#include <utility> +#include <variant> +#include <vector> + +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" + +namespace VideoCommon::Shader { + +enum class OperationCode { + Assign, /// (float& dest, float src) -> void + + Select, /// (MetaArithmetic, bool pred, float a, float b) -> float + + FAdd, /// (MetaArithmetic, float a, float b) -> float + FMul, /// (MetaArithmetic, float a, float b) -> float + FDiv, /// (MetaArithmetic, float a, float b) -> float + FFma, /// (MetaArithmetic, float a, float b, float c) -> float + FNegate, /// (MetaArithmetic, float a) -> float + FAbsolute, /// (MetaArithmetic, float a) -> float + FClamp, /// (MetaArithmetic, float value, float min, float max) -> float + FMin, /// (MetaArithmetic, float a, float b) -> float + FMax, /// (MetaArithmetic, float a, float b) -> float + FCos, /// (MetaArithmetic, float a) -> float + FSin, /// (MetaArithmetic, float a) -> float + FExp2, /// (MetaArithmetic, float a) -> float + FLog2, /// (MetaArithmetic, float a) -> float + FInverseSqrt, /// (MetaArithmetic, float a) -> float + FSqrt, /// (MetaArithmetic, float a) -> float + FRoundEven, /// (MetaArithmetic, float a) -> float + FFloor, /// (MetaArithmetic, float a) -> float + FCeil, /// (MetaArithmetic, float a) -> float + FTrunc, /// (MetaArithmetic, float a) -> float + FCastInteger, /// (MetaArithmetic, int a) -> float + FCastUInteger, /// (MetaArithmetic, uint a) -> float + + IAdd, /// (MetaArithmetic, int a, int b) -> int + IMul, /// (MetaArithmetic, int a, int b) -> int + IDiv, /// (MetaArithmetic, int a, int b) -> int + INegate, /// (MetaArithmetic, int a) -> int + IAbsolute, /// (MetaArithmetic, int a) -> int + IMin, /// (MetaArithmetic, int a, int b) -> int + IMax, /// (MetaArithmetic, int a, int b) -> int + ICastFloat, /// (MetaArithmetic, float a) -> int + ICastUnsigned, /// (MetaArithmetic, uint a) -> int + ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int + ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int + IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int + IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int + IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int + IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int + IBitwiseNot, /// (MetaArithmetic, int a) -> int + IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int + IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int + IBitCount, /// (MetaArithmetic, int) -> int + + UAdd, /// (MetaArithmetic, uint a, uint b) -> uint + UMul, /// (MetaArithmetic, uint a, uint b) -> uint + UDiv, /// (MetaArithmetic, uint a, uint b) -> uint + UMin, /// (MetaArithmetic, uint a, uint b) -> uint + UMax, /// (MetaArithmetic, uint a, uint b) -> uint + UCastFloat, /// (MetaArithmetic, float a) -> uint + UCastSigned, /// (MetaArithmetic, int a) -> uint + ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint + ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint + UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseNot, /// (MetaArithmetic, uint a) -> uint + UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint + UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint + UBitCount, /// (MetaArithmetic, uint) -> uint + + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAbsolute, /// (f16vec2 a) -> f16vec2 + HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 + HMergeF32, /// (f16vec2 src) -> float + HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HPack2, /// (float a, float b) -> f16vec2 + + LogicalAssign, /// (bool& dst, bool src) -> void + LogicalAnd, /// (bool a, bool b) -> bool + LogicalOr, /// (bool a, bool b) -> bool + LogicalXor, /// (bool a, bool b) -> bool + LogicalNegate, /// (bool a) -> bool + LogicalPick2, /// (bool2 pair, uint index) -> bool + LogicalAll2, /// (bool2 a) -> bool + LogicalAny2, /// (bool2 a) -> bool + + LogicalFLessThan, /// (float a, float b) -> bool + LogicalFEqual, /// (float a, float b) -> bool + LogicalFLessEqual, /// (float a, float b) -> bool + LogicalFGreaterThan, /// (float a, float b) -> bool + LogicalFNotEqual, /// (float a, float b) -> bool + LogicalFGreaterEqual, /// (float a, float b) -> bool + LogicalFIsNan, /// (float a) -> bool + + LogicalILessThan, /// (int a, int b) -> bool + LogicalIEqual, /// (int a, int b) -> bool + LogicalILessEqual, /// (int a, int b) -> bool + LogicalIGreaterThan, /// (int a, int b) -> bool + LogicalINotEqual, /// (int a, int b) -> bool + LogicalIGreaterEqual, /// (int a, int b) -> bool + + LogicalULessThan, /// (uint a, uint b) -> bool + LogicalUEqual, /// (uint a, uint b) -> bool + LogicalULessEqual, /// (uint a, uint b) -> bool + LogicalUGreaterThan, /// (uint a, uint b) -> bool + LogicalUNotEqual, /// (uint a, uint b) -> bool + LogicalUGreaterEqual, /// (uint a, uint b) -> bool + + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + + Texture, /// (MetaTexture, float[N] coords) -> float4 + TextureLod, /// (MetaTexture, float[N] coords) -> float4 + TextureGather, /// (MetaTexture, float[N] coords) -> float4 + TextureQueryDimensions, /// (MetaTexture, float a) -> float4 + TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 + TexelFetch, /// (MetaTexture, int[N], int) -> float4 + + Branch, /// (uint branch_target) -> void + PushFlowStack, /// (uint branch_target) -> void + PopFlowStack, /// () -> void + Exit, /// () -> void + Discard, /// () -> void + + EmitVertex, /// () -> void + EndPrimitive, /// () -> void + + YNegate, /// () -> float + LocalInvocationIdX, /// () -> uint + LocalInvocationIdY, /// () -> uint + LocalInvocationIdZ, /// () -> uint + WorkGroupIdX, /// () -> uint + WorkGroupIdY, /// () -> uint + WorkGroupIdZ, /// () -> uint + + Amount, +}; + +enum class InternalFlag { + Zero = 0, + Sign = 1, + Carry = 2, + Overflow = 3, + Amount = 4, +}; + +enum class MetaStackClass { + Ssy, + Pbk, +}; + +class OperationNode; +class ConditionalNode; +class GprNode; +class ImmediateNode; +class InternalFlagNode; +class PredicateNode; +class AbufNode; +class CbufNode; +class LmemNode; +class GmemNode; +class CommentNode; + +using NodeData = + std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, + PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; +using Node = std::shared_ptr<NodeData>; +using Node4 = std::array<Node, 4>; +using NodeBlock = std::vector<Node>; + +class Sampler { +public: + /// This constructor is for bound samplers + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{false} {} + + /// This constructor is for bindless samplers + explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} + + /// This constructor is for serialization/deserialization + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow, bool is_bindless) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{is_bindless} {} + + std::size_t GetOffset() const { + return offset; + } + + std::size_t GetIndex() const { + return index; + } + + Tegra::Shader::TextureType GetType() const { + return type; + } + + bool IsArray() const { + return is_array; + } + + bool IsShadow() const { + return is_shadow; + } + + bool IsBindless() const { + return is_bindless; + } + + std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + + bool operator<(const Sampler& rhs) const { + return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < + std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, + rhs.is_bindless); + } + +private: + /// Offset in TSC memory from which to read the sampler object, as specified by the sampling + /// instruction. + std::size_t offset{}; + std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. + Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) + bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. +}; + +struct GlobalMemoryBase { + u32 cbuf_index{}; + u32 cbuf_offset{}; + + bool operator<(const GlobalMemoryBase& rhs) const { + return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); + } +}; + +/// Parameters describing an arithmetic operation +struct MetaArithmetic { + bool precise{}; ///< Whether the operation can be constraint or not +}; + +/// Parameters describing a texture sampler +struct MetaTexture { + const Sampler& sampler; + Node array; + Node depth_compare; + std::vector<Node> aoffi; + Node bias; + Node lod; + Node component{}; + u32 element{}; +}; + +/// Parameters that modify an operation but are not part of any particular operand +using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>; + +/// Holds any kind of operation that can be done in the IR +class OperationNode final { +public: + explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} + + explicit OperationNode(OperationCode code, Meta meta) + : OperationNode(code, meta, std::vector<Node>{}) {} + + explicit OperationNode(OperationCode code, std::vector<Node> operands) + : OperationNode(code, Meta{}, std::move(operands)) {} + + explicit OperationNode(OperationCode code, Meta meta, std::vector<Node> operands) + : code{code}, meta{std::move(meta)}, operands{std::move(operands)} {} + + template <typename... Args> + explicit OperationNode(OperationCode code, Meta meta, Args&&... operands) + : code{code}, meta{std::move(meta)}, operands{operands...} {} + + OperationCode GetCode() const { + return code; + } + + const Meta& GetMeta() const { + return meta; + } + + std::size_t GetOperandsCount() const { + return operands.size(); + } + + const Node& operator[](std::size_t operand_index) const { + return operands.at(operand_index); + } + +private: + OperationCode code{}; + Meta meta{}; + std::vector<Node> operands; +}; + +/// Encloses inside any kind of node that returns a boolean conditionally-executed code +class ConditionalNode final { +public: + explicit ConditionalNode(Node condition, std::vector<Node>&& code) + : condition{std::move(condition)}, code{std::move(code)} {} + + const Node& GetCondition() const { + return condition; + } + + const std::vector<Node>& GetCode() const { + return code; + } + +private: + Node condition; ///< Condition to be satisfied + std::vector<Node> code; ///< Code to execute +}; + +/// A general purpose register +class GprNode final { +public: + explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} + + u32 GetIndex() const { + return static_cast<u32>(index); + } + +private: + Tegra::Shader::Register index{}; +}; + +/// A 32-bits value that represents an immediate value +class ImmediateNode final { +public: + explicit constexpr ImmediateNode(u32 value) : value{value} {} + + u32 GetValue() const { + return value; + } + +private: + u32 value{}; +}; + +/// One of Maxwell's internal flags +class InternalFlagNode final { +public: + explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} + + InternalFlag GetFlag() const { + return flag; + } + +private: + InternalFlag flag{}; +}; + +/// A predicate register, it can be negated without additional nodes +class PredicateNode final { +public: + explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) + : index{index}, negated{negated} {} + + Tegra::Shader::Pred GetIndex() const { + return index; + } + + bool IsNegated() const { + return negated; + } + +private: + Tegra::Shader::Pred index{}; + bool negated{}; +}; + +/// Attribute buffer memory (known as attributes or varyings in GLSL terms) +class AbufNode final { +public: + // Initialize for standard attributes (index is explicit). + explicit AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {}) + : buffer{std::move(buffer)}, index{index}, element{element} {} + + // Initialize for physical attributes (index is a variable value). + explicit AbufNode(Node physical_address, Node buffer = {}) + : physical_address{std::move(physical_address)}, buffer{std::move(buffer)} {} + + Tegra::Shader::Attribute::Index GetIndex() const { + return index; + } + + u32 GetElement() const { + return element; + } + + const Node& GetBuffer() const { + return buffer; + } + + bool IsPhysicalBuffer() const { + return static_cast<bool>(physical_address); + } + + const Node& GetPhysicalAddress() const { + return physical_address; + } + +private: + Node physical_address; + Node buffer; + Tegra::Shader::Attribute::Index index{}; + u32 element{}; +}; + +/// Constant buffer node, usually mapped to uniform buffers in GLSL +class CbufNode final { +public: + explicit CbufNode(u32 index, Node offset) : index{index}, offset{std::move(offset)} {} + + u32 GetIndex() const { + return index; + } + + const Node& GetOffset() const { + return offset; + } + +private: + u32 index{}; + Node offset; +}; + +/// Local memory node +class LmemNode final { +public: + explicit LmemNode(Node address) : address{std::move(address)} {} + + const Node& GetAddress() const { + return address; + } + +private: + Node address; +}; + +/// Global memory node +class GmemNode final { +public: + explicit GmemNode(Node real_address, Node base_address, const GlobalMemoryBase& descriptor) + : real_address{std::move(real_address)}, base_address{std::move(base_address)}, + descriptor{descriptor} {} + + const Node& GetRealAddress() const { + return real_address; + } + + const Node& GetBaseAddress() const { + return base_address; + } + + const GlobalMemoryBase& GetDescriptor() const { + return descriptor; + } + +private: + Node real_address; + Node base_address; + GlobalMemoryBase descriptor; +}; + +/// Commentary, can be dropped +class CommentNode final { +public: + explicit CommentNode(std::string text) : text{std::move(text)} {} + + const std::string& GetText() const { + return text; + } + +private: + std::string text; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp new file mode 100644 index 000000000..6fccbbba3 --- /dev/null +++ b/src/video_core/shader/node_helper.cpp @@ -0,0 +1,99 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include <vector> + +#include "common/common_types.h" +#include "video_core/shader/node_helper.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +Node Conditional(Node condition, std::vector<Node> code) { + return MakeNode<ConditionalNode>(condition, std::move(code)); +} + +Node Comment(std::string text) { + return MakeNode<CommentNode>(std::move(text)); +} + +Node Immediate(u32 value) { + return MakeNode<ImmediateNode>(value); +} + +Node Immediate(s32 value) { + return Immediate(static_cast<u32>(value)); +} + +Node Immediate(f32 value) { + u32 integral; + std::memcpy(&integral, &value, sizeof(u32)); + return Immediate(integral); +} + +OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { + if (is_signed) { + return operation_code; + } + switch (operation_code) { + case OperationCode::FCastInteger: + return OperationCode::FCastUInteger; + case OperationCode::IAdd: + return OperationCode::UAdd; + case OperationCode::IMul: + return OperationCode::UMul; + case OperationCode::IDiv: + return OperationCode::UDiv; + case OperationCode::IMin: + return OperationCode::UMin; + case OperationCode::IMax: + return OperationCode::UMax; + case OperationCode::ICastFloat: + return OperationCode::UCastFloat; + case OperationCode::ICastUnsigned: + return OperationCode::UCastSigned; + case OperationCode::ILogicalShiftLeft: + return OperationCode::ULogicalShiftLeft; + case OperationCode::ILogicalShiftRight: + return OperationCode::ULogicalShiftRight; + case OperationCode::IArithmeticShiftRight: + return OperationCode::UArithmeticShiftRight; + case OperationCode::IBitwiseAnd: + return OperationCode::UBitwiseAnd; + case OperationCode::IBitwiseOr: + return OperationCode::UBitwiseOr; + case OperationCode::IBitwiseXor: + return OperationCode::UBitwiseXor; + case OperationCode::IBitwiseNot: + return OperationCode::UBitwiseNot; + case OperationCode::IBitfieldInsert: + return OperationCode::UBitfieldInsert; + case OperationCode::IBitCount: + return OperationCode::UBitCount; + case OperationCode::LogicalILessThan: + return OperationCode::LogicalULessThan; + case OperationCode::LogicalIEqual: + return OperationCode::LogicalUEqual; + case OperationCode::LogicalILessEqual: + return OperationCode::LogicalULessEqual; + case OperationCode::LogicalIGreaterThan: + return OperationCode::LogicalUGreaterThan; + case OperationCode::LogicalINotEqual: + return OperationCode::LogicalUNotEqual; + case OperationCode::LogicalIGreaterEqual: + return OperationCode::LogicalUGreaterEqual; + case OperationCode::INegate: + UNREACHABLE_MSG("Can't negate an unsigned integer"); + return {}; + case OperationCode::IAbsolute: + UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); + return {}; + default: + UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); + return {}; + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h new file mode 100644 index 000000000..0c2aa749b --- /dev/null +++ b/src/video_core/shader/node_helper.h @@ -0,0 +1,65 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <string> +#include <tuple> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/shader/node.h" + +namespace VideoCommon::Shader { + +/// This arithmetic operation cannot be constraint +inline constexpr MetaArithmetic PRECISE = {true}; +/// This arithmetic operation can be optimized away +inline constexpr MetaArithmetic NO_PRECISE = {false}; + +/// Creates a conditional node +Node Conditional(Node condition, std::vector<Node> code); + +/// Creates a commentary node +Node Comment(std::string text); + +/// Creates an u32 immediate +Node Immediate(u32 value); + +/// Creates a s32 immediate +Node Immediate(s32 value); + +/// Creates a f32 immediate +Node Immediate(f32 value); + +/// Converts an signed operation code to an unsigned operation code +OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); + +template <typename T, typename... Args> +Node MakeNode(Args&&... args) { + static_assert(std::is_convertible_v<T, NodeData>); + return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); +} + +template <typename... Args> +Node Operation(OperationCode code, Args&&... args) { + if constexpr (sizeof...(args) == 0) { + return MakeNode<OperationNode>(code); + } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, + Meta>) { + return MakeNode<OperationNode>(code, std::forward<Args>(args)...); + } else { + return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); + } +} + +template <typename... Args> +Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { + return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 8a6ee5cf5..11b545cca 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -28,30 +29,11 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) ShaderIR::~ShaderIR() = default; -Node ShaderIR::StoreNode(NodeData&& node_data) { - auto store = std::make_unique<NodeData>(node_data); - const Node node = store.get(); - stored_nodes.push_back(std::move(store)); - return node; -} - -Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { - return StoreNode(ConditionalNode(condition, std::move(code))); -} - -Node ShaderIR::Comment(std::string text) { - return StoreNode(CommentNode(std::move(text))); -} - -Node ShaderIR::Immediate(u32 value) { - return StoreNode(ImmediateNode(value)); -} - Node ShaderIR::GetRegister(Register reg) { if (reg != Register::ZeroIndex) { used_registers.insert(static_cast<u32>(reg)); } - return StoreNode(GprNode(reg)); + return MakeNode<GprNode>(reg); } Node ShaderIR::GetImmediate19(Instruction instr) { @@ -69,7 +51,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { const auto [entry, is_new] = used_cbufs.try_emplace(index); entry->second.MarkAsUsed(offset); - return StoreNode(CbufNode(index, Immediate(offset))); + return MakeNode<CbufNode>(index, Immediate(offset)); } Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { @@ -80,7 +62,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { entry->second.MarkAsUsedIndirect(); const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); - return StoreNode(CbufNode(index, final_offset)); + return MakeNode<CbufNode>(index, final_offset); } Node ShaderIR::GetPredicate(u64 pred_, bool negated) { @@ -89,7 +71,7 @@ Node ShaderIR::GetPredicate(u64 pred_, bool negated) { used_predicates.insert(pred); } - return StoreNode(PredicateNode(pred, negated)); + return MakeNode<PredicateNode>(pred, negated); } Node ShaderIR::GetPredicate(bool immediate) { @@ -98,12 +80,12 @@ Node ShaderIR::GetPredicate(bool immediate) { Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { used_input_attributes.emplace(index); - return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); + return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); } Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { uses_physical_attributes = true; - return StoreNode(AbufNode(GetRegister(physical_address), buffer)); + return MakeNode<AbufNode>(GetRegister(physical_address), buffer); } Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { @@ -115,11 +97,11 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff } used_output_attributes.insert(index); - return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); + return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); } Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { - const Node node = StoreNode(InternalFlagNode(flag)); + const Node node = MakeNode<InternalFlagNode>(flag); if (negated) { return Operation(OperationCode::LogicalNegate, node); } @@ -127,7 +109,7 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { } Node ShaderIR::GetLocalMemory(Node address) { - return StoreNode(LmemNode(address)); + return MakeNode<LmemNode>(address); } Node ShaderIR::GetTemporal(u32 id) { @@ -393,68 +375,4 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { Immediate(bits)); } -/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, - bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); - return {}; - } -} - } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ff7472e30..edcf2288e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -18,188 +18,14 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/node.h" namespace VideoCommon::Shader { -class OperationNode; -class ConditionalNode; -class GprNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; ///< Attribute buffer -class CbufNode; ///< Constant buffer -class LmemNode; ///< Local memory -class GmemNode; ///< Global memory -class CommentNode; - using ProgramCode = std::vector<u64>; -using NodeData = - std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, - PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; -using Node = const NodeData*; -using Node4 = std::array<Node, 4>; -using NodeBlock = std::vector<Node>; - constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAll2, /// (bool2 a) -> bool - LogicalAny2, /// (bool2 a) -> bool - - LogicalFLessThan, /// (float a, float b) -> bool - LogicalFEqual, /// (float a, float b) -> bool - LogicalFLessEqual, /// (float a, float b) -> bool - LogicalFGreaterThan, /// (float a, float b) -> bool - LogicalFNotEqual, /// (float a, float b) -> bool - LogicalFGreaterEqual, /// (float a, float b) -> bool - LogicalFIsNan, /// (float a) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - - Branch, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - /// Describes the behaviour of code path of a given entry point and a return point. enum class ExitMethod { Undetermined, ///< Internal value. Only occur when analyzing JMP loop. @@ -208,71 +34,6 @@ enum class ExitMethod { AlwaysEnd, ///< All code paths reach a END instruction. }; -class Sampler { -public: - // Use this constructor for bounded Samplers - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{false} {} - - // Use this constructor for bindless Samplers - explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} - - // Use this only for serialization/deserialization - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_bindless) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{is_bindless} {} - - std::size_t GetOffset() const { - return offset; - } - - std::size_t GetIndex() const { - return index; - } - - Tegra::Shader::TextureType GetType() const { - return type; - } - - bool IsArray() const { - return is_array; - } - - bool IsShadow() const { - return is_shadow; - } - - bool IsBindless() const { - return is_bindless; - } - - std::pair<u32, u32> GetBindlessCBuf() const { - return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; - } - - bool operator<(const Sampler& rhs) const { - return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < - std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, - rhs.is_bindless); - } - -private: - /// Offset in TSC memory from which to read the sampler object, as specified by the sampling - /// instruction. - std::size_t offset{}; - std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. - bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. -}; - class ConstBuffer { public: explicit ConstBuffer(u32 max_offset, bool is_indirect) @@ -305,268 +66,11 @@ private: bool is_indirect{}; }; -struct GlobalMemoryBase { - u32 cbuf_index{}; - u32 cbuf_offset{}; - - bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - struct GlobalMemoryUsage { bool is_read{}; bool is_written{}; }; -struct MetaArithmetic { - bool precise{}; -}; - -struct MetaTexture { - const Sampler& sampler; - Node array{}; - Node depth_compare{}; - std::vector<Node> aoffi; - Node bias{}; - Node lod{}; - Node component{}; - u32 element{}; -}; - -constexpr MetaArithmetic PRECISE = {true}; -constexpr MetaArithmetic NO_PRECISE = {false}; - -using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final { -public: - explicit OperationNode(OperationCode code) : code{code} {} - - explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {} - - template <typename... T> - explicit OperationNode(OperationCode code, const T*... operands) - : OperationNode(code, {}, operands...) {} - - template <typename... T> - explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_) - : code{code}, meta{std::move(meta)}, operands{operands_...} {} - - explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) - : code{code}, meta{meta}, operands{std::move(operands)} {} - - explicit OperationNode(OperationCode code, std::vector<Node>&& operands) - : code{code}, operands{std::move(operands)} {} - - OperationCode GetCode() const { - return code; - } - - const Meta& GetMeta() const { - return meta; - } - - std::size_t GetOperandsCount() const { - return operands.size(); - } - - Node operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - const OperationCode code; - const Meta meta; - std::vector<Node> operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final { -public: - explicit ConditionalNode(Node condition, std::vector<Node>&& code) - : condition{condition}, code{std::move(code)} {} - - Node GetCondition() const { - return condition; - } - - const std::vector<Node>& GetCode() const { - return code; - } - -private: - const Node condition; ///< Condition to be satisfied - std::vector<Node> code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} - - u32 GetIndex() const { - return static_cast<u32>(index); - } - -private: - const Tegra::Shader::Register index; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value) : value{value} {} - - u32 GetValue() const { - return value; - } - -private: - const u32 value; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} - - InternalFlag GetFlag() const { - return flag; - } - -private: - const InternalFlag flag; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) - : index{index}, negated{negated} {} - - Tegra::Shader::Pred GetIndex() const { - return index; - } - - bool IsNegated() const { - return negated; - } - -private: - const Tegra::Shader::Pred index; - const bool negated; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, - Node buffer = {}) - : buffer{buffer}, index{index}, element{element} {} - - // Initialize for physical attributes (index is a variable value). - explicit constexpr AbufNode(Node physical_address, Node buffer = {}) - : physical_address{physical_address}, buffer{buffer} {} - - Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - u32 GetElement() const { - return element; - } - - Node GetBuffer() const { - return buffer; - } - - bool IsPhysicalBuffer() const { - return physical_address != nullptr; - } - - Node GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address{}; - Node buffer{}; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {} - - u32 GetIndex() const { - return index; - } - - Node GetOffset() const { - return offset; - } - -private: - const u32 index; - const Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit constexpr LmemNode(Node address) : address{address} {} - - Node GetAddress() const { - return address; - } - -private: - const Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit constexpr GmemNode(Node real_address, Node base_address, - const GlobalMemoryBase& descriptor) - : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {} - - Node GetRealAddress() const { - return real_address; - } - - Node GetBaseAddress() const { - return base_address; - } - - const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - const Node real_address; - const Node base_address; - const GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text) : text{std::move(text)} {} - - const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); @@ -663,26 +167,6 @@ private: u32 DecodeXmad(NodeBlock& bb, u32 pc); u32 DecodeOther(NodeBlock& bb, u32 pc); - /// Internalizes node's data and returns a managed pointer to a clone of that node - Node StoreNode(NodeData&& node_data); - - /// Creates a conditional node - Node Conditional(Node condition, std::vector<Node>&& code); - /// Creates a commentary - Node Comment(std::string text); - /// Creates an u32 immediate - Node Immediate(u32 value); - /// Creates a s32 immediate - Node Immediate(s32 value) { - return Immediate(static_cast<u32>(value)); - } - /// Creates a f32 immediate - Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); - } - /// Generates a node for a passed register. Node GetRegister(Tegra::Shader::Register reg); /// Generates a node representing a 19-bit immediate value @@ -827,37 +311,6 @@ private: std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); - template <typename... T> - Node Operation(OperationCode code, const T*... operands) { - return StoreNode(OperationNode(code, operands...)); - } - - template <typename... T> - Node Operation(OperationCode code, Meta&& meta, const T*... operands) { - return StoreNode(OperationNode(code, std::move(meta), operands...)); - } - - Node Operation(OperationCode code, std::vector<Node>&& operands) { - return StoreNode(OperationNode(code, std::move(operands))); - } - - Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { - return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); - } - - template <typename... T> - Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) { - return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...)); - } - - template <typename... T> - Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) { - return StoreNode( - OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...)); - } - - static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - const ProgramCode& program_code; const u32 main_offset; @@ -868,8 +321,6 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; - std::vector<std::unique_ptr<NodeData>> stored_nodes; - std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; std::set<Tegra::Shader::Attribute::Index> used_input_attributes; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 19ede1eb9..fc957d980 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -16,12 +16,12 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { const Node node = code.at(cursor); - if (const auto operation = std::get_if<OperationNode>(node)) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { if (operation->GetCode() == operation_code) { return {node, cursor}; } } - if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { const auto& conditional_code = conditional->GetCode(); const auto [found, internal_cursor] = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); @@ -35,11 +35,11 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, } // namespace Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { - if (const auto cbuf = std::get_if<CbufNode>(tracked)) { + if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Cbuf found, but it has to be immediate return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; } - if (const auto gpr = std::get_if<GprNode>(tracked)) { + if (const auto gpr = std::get_if<GprNode>(&*tracked)) { if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { return nullptr; } @@ -51,7 +51,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const } return TrackCbuf(source, code, new_cursor); } - if (const auto operation = std::get_if<OperationNode>(tracked)) { + if (const auto operation = std::get_if<OperationNode>(&*tracked)) { for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { // Cbuf found in operand @@ -60,7 +60,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const } return nullptr; } - if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { const auto& conditional_code = conditional->GetCode(); return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); } @@ -75,7 +75,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, if (!found) { return {}; } - if (const auto immediate = std::get_if<ImmediateNode>(found)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { return immediate->GetValue(); } return {}; @@ -88,11 +88,11 @@ std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeB if (!found_node) { return {}; } - const auto operation = std::get_if<OperationNode>(found_node); + const auto operation = std::get_if<OperationNode>(&*found_node); ASSERT(operation); const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if<GprNode>(target)) { + if (const auto gpr_target = std::get_if<GprNode>(&*target)) { if (gpr_target->GetIndex() == tracked->GetIndex()) { return {(*operation)[1], new_cursor}; } diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index bea0d5bc2..219bfd559 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -251,7 +251,7 @@ enum class WrapMode : u32 { Mirror = 1, ClampToEdge = 2, Border = 3, - ClampOGL = 4, + Clamp = 4, MirrorOnceClampToEdge = 5, MirrorOnceBorder = 6, MirrorOnceClampOGL = 7, |