diff options
Diffstat (limited to 'src/video_core')
20 files changed, 1365 insertions, 490 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a710c4bc5..281810357 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -9,6 +9,7 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_compute.cpp engines/maxwell_compute.h + engines/shader_bytecode.h gpu.cpp gpu.h macro_interpreter.cpp @@ -27,6 +28,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_decompiler.h renderer_opengl/gl_shader_gen.cpp renderer_opengl/gl_shader_gen.h + renderer_opengl/gl_shader_manager.cpp + renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state.cpp diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 98b39b2ff..9c6236c39 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -427,14 +427,11 @@ public: BitField<0, 1, u32> enable; BitField<4, 4, ShaderProgram> program; }; - u32 start_id; - INSERT_PADDING_WORDS(1); - u32 gpr_alloc; - ShaderStage type; - INSERT_PADDING_WORDS(9); + u32 offset; + INSERT_PADDING_WORDS(14); } shader_config[MaxShaderProgram]; - INSERT_PADDING_WORDS(0x8C); + INSERT_PADDING_WORDS(0x80); struct { u32 cb_size; @@ -507,6 +504,7 @@ public: }; State state{}; + MemoryManager& memory_manager; /// Reads a register value located at the input method address u32 GetRegisterValue(u32 method) const; @@ -521,8 +519,6 @@ public: std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; private: - MemoryManager& memory_manager; - std::unordered_map<u32, std::vector<u32>> uploaded_macros; /// Macro method that is currently being executed / being fed parameters. diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h new file mode 100644 index 000000000..eff0c35a1 --- /dev/null +++ b/src/video_core/engines/shader_bytecode.h @@ -0,0 +1,327 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <map> +#include <string> +#include "common/bit_field.h" + +namespace Tegra { +namespace Shader { + +struct Register { + Register() = default; + + constexpr Register(u64 value) : value(value) {} + + constexpr u64 GetIndex() const { + return value; + } + + constexpr operator u64() const { + return value; + } + + template <typename T> + constexpr u64 operator-(const T& oth) const { + return value - oth; + } + + template <typename T> + constexpr u64 operator&(const T& oth) const { + return value & oth; + } + + constexpr u64 operator&(const Register& oth) const { + return value & oth.value; + } + + constexpr u64 operator~() const { + return ~value; + } + +private: + u64 value; +}; + +union Attribute { + Attribute() = default; + + constexpr Attribute(u64 value) : value(value) {} + + enum class Index : u64 { + Position = 7, + Attribute_0 = 8, + }; + + union { + BitField<22, 2, u64> element; + BitField<24, 6, Index> index; + BitField<47, 3, u64> size; + } fmt20; + + union { + BitField<30, 2, u64> element; + BitField<32, 6, Index> index; + } fmt28; + + BitField<39, 8, u64> reg; + u64 value; +}; + +union Uniform { + BitField<20, 14, u64> offset; + BitField<34, 5, u64> index; +}; + +union OpCode { + enum class Id : u64 { + TEXS = 0x6C, + IPA = 0xE0, + FFMA_IMM = 0x65, + FFMA_CR = 0x93, + FFMA_RC = 0xA3, + FFMA_RR = 0xB3, + + FADD_C = 0x98B, + FMUL_C = 0x98D, + MUFU = 0xA10, + FADD_R = 0xB8B, + FMUL_R = 0xB8D, + LD_A = 0x1DFB, + ST_A = 0x1DFE, + + FSETP_R = 0x5BB, + FSETP_C = 0x4BB, + EXIT = 0xE30, + KIL = 0xE33, + + FMUL_IMM = 0x70D, + FMUL_IMM_x = 0x72D, + FADD_IMM = 0x70B, + FADD_IMM_x = 0x72B, + }; + + enum class Type { + Trivial, + Arithmetic, + Ffma, + Flow, + Memory, + Unknown, + }; + + struct Info { + Type type; + std::string name; + }; + + OpCode() = default; + + constexpr OpCode(Id value) : value(static_cast<u64>(value)) {} + + constexpr OpCode(u64 value) : value{value} {} + + constexpr Id EffectiveOpCode() const { + switch (op1) { + case Id::TEXS: + return op1; + } + + switch (op2) { + case Id::IPA: + return op2; + } + + switch (op3) { + case Id::FFMA_IMM: + case Id::FFMA_CR: + case Id::FFMA_RC: + case Id::FFMA_RR: + return op3; + } + + switch (op4) { + case Id::EXIT: + case Id::FSETP_R: + case Id::FSETP_C: + case Id::KIL: + return op4; + } + + switch (op5) { + case Id::MUFU: + case Id::LD_A: + case Id::ST_A: + case Id::FADD_R: + case Id::FADD_C: + case Id::FMUL_R: + case Id::FMUL_C: + return op5; + + case Id::FMUL_IMM: + case Id::FMUL_IMM_x: + return Id::FMUL_IMM; + + case Id::FADD_IMM: + case Id::FADD_IMM_x: + return Id::FADD_IMM; + } + + return static_cast<Id>(value); + } + + static const Info& GetInfo(const OpCode& opcode) { + static const std::map<Id, Info> info_table{BuildInfoTable()}; + const auto& search{info_table.find(opcode.EffectiveOpCode())}; + if (search != info_table.end()) { + return search->second; + } + + static const Info unknown{Type::Unknown, "UNK"}; + return unknown; + } + + constexpr operator Id() const { + return static_cast<Id>(value); + } + + constexpr OpCode operator<<(size_t bits) const { + return value << bits; + } + + constexpr OpCode operator>>(size_t bits) const { + return value >> bits; + } + + template <typename T> + constexpr u64 operator-(const T& oth) const { + return value - oth; + } + + constexpr u64 operator&(const OpCode& oth) const { + return value & oth.value; + } + + constexpr u64 operator~() const { + return ~value; + } + + static std::map<Id, Info> BuildInfoTable() { + std::map<Id, Info> info_table; + info_table[Id::TEXS] = {Type::Memory, "texs"}; + info_table[Id::LD_A] = {Type::Memory, "ld_a"}; + info_table[Id::ST_A] = {Type::Memory, "st_a"}; + info_table[Id::MUFU] = {Type::Arithmetic, "mufu"}; + info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"}; + info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"}; + info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"}; + info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"}; + info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"}; + info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"}; + info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"}; + info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"}; + info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"}; + info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"}; + info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"}; + info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"}; + info_table[Id::EXIT] = {Type::Trivial, "exit"}; + info_table[Id::IPA] = {Type::Trivial, "ipa"}; + info_table[Id::KIL] = {Type::Flow, "kil"}; + return info_table; + } + + BitField<57, 7, Id> op1; + BitField<56, 8, Id> op2; + BitField<55, 9, Id> op3; + BitField<52, 12, Id> op4; + BitField<51, 13, Id> op5; + u64 value; +}; +static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); + +} // namespace Shader +} // namespace Tegra + +namespace std { + +// TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330. +template <> +struct make_unsigned<Tegra::Shader::Attribute> { + using type = Tegra::Shader::Attribute; +}; + +template <> +struct make_unsigned<Tegra::Shader::Register> { + using type = Tegra::Shader::Register; +}; + +template <> +struct make_unsigned<Tegra::Shader::OpCode> { + using type = Tegra::Shader::OpCode; +}; + +} // namespace std + +namespace Tegra { +namespace Shader { + +enum class Pred : u64 { + UnusedIndex = 0x7, + NeverExecute = 0xf, +}; + +enum class SubOp : u64 { + Cos = 0x0, + Sin = 0x1, + Ex2 = 0x2, + Lg2 = 0x3, + Rcp = 0x4, + Rsq = 0x5, +}; + +union Instruction { + Instruction& operator=(const Instruction& instr) { + hex = instr.hex; + return *this; + } + + OpCode opcode; + BitField<0, 8, Register> gpr0; + BitField<8, 8, Register> gpr8; + BitField<16, 4, Pred> pred; + BitField<20, 8, Register> gpr20; + BitField<20, 7, SubOp> sub_op; + BitField<28, 8, Register> gpr28; + BitField<36, 13, u64> imm36; + BitField<39, 8, Register> gpr39; + + union { + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> abs_d; + } alu; + + union { + BitField<48, 1, u64> negate_b; + BitField<49, 1, u64> negate_c; + } ffma; + + BitField<60, 1, u64> is_b_gpr; + BitField<59, 1, u64> is_c_gpr; + + Attribute attribute; + Uniform uniform; + + u64 hex; +}; +static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); +static_assert(std::is_standard_layout<Instruction>::value, + "Structure does not have standard layout"); + +} // namespace Shader +} // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f217a265b..f75d4c658 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -enum class UniformBindings : GLuint { Common, VS, FS }; - -static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, - size_t expected_size) { - GLuint ub_index = glGetUniformBlockIndex(shader, name); - if (ub_index != GL_INVALID_INDEX) { - GLint ub_size = 0; - glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); - ASSERT_MSG(ub_size == expected_size, - "Uniform block size did not match! Got %d, expected %zu", - static_cast<int>(ub_size), expected_size); - glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); - } -} - -static void SetShaderUniformBlockBindings(GLuint shader) { - SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, - sizeof(RasterizerOpenGL::UniformData)); - SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, - sizeof(RasterizerOpenGL::VSUniformData)); - SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, - sizeof(RasterizerOpenGL::FSUniformData)); -} - RasterizerOpenGL::RasterizerOpenGL() { - shader_dirty = true; - has_ARB_buffer_storage = false; has_ARB_direct_state_access = false; has_ARB_separate_shader_objects = false; @@ -88,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() { } } + ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported"); + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; @@ -102,36 +78,31 @@ RasterizerOpenGL::RasterizerOpenGL() { state.draw.uniform_buffer = uniform_buffer.handle; state.Apply(); - glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); - glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); - - uniform_block_data.dirty = true; - // Create render framebuffer framebuffer.Create(); - if (has_ARB_separate_shader_objects) { - hw_vao.Create(); - hw_vao_enabled_attributes.fill(false); + hw_vao.Create(); + hw_vao_enabled_attributes.fill(false); - stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); - stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); - state.draw.vertex_buffer = stream_buffer->GetHandle(); + stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); + stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); + state.draw.vertex_buffer = stream_buffer->GetHandle(); - pipeline.Create(); - state.draw.program_pipeline = pipeline.handle; - state.draw.shader_program = 0; - state.draw.vertex_array = hw_vao.handle; - state.Apply(); + shader_program_manager = std::make_unique<GLShader::ProgramManager>(); + + state.draw.shader_program = 0; + state.draw.vertex_array = hw_vao.handle; + state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); - vs_uniform_buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); - } else { - UNREACHABLE(); + for (unsigned index = 0; index < uniform_buffers.size(); ++index) { + auto& buffer = uniform_buffers[index]; + buffer.Create(); + glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); + glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, + GL_STREAM_COPY); + glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); } accelerate_draw = AccelDraw::Disabled; @@ -200,26 +171,74 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { buffer_offset += data_size; } -void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_VS); - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); -} +void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { + // Helper function for uploading uniform data + const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { + if (has_ARB_direct_state_access) { + glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); + } else { + glBindBuffer(GL_COPY_WRITE_BUFFER, handle); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); + } + }; -void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_FS); - UNREACHABLE(); -} + auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); + ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); -bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { - if (!has_ARB_separate_shader_objects) { - UNREACHABLE(); - return false; + for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { + ptr_pos += sizeof(GLShader::MaxwellUniformData); + + auto& shader_config = gpu.regs.shader_config[index]; + const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; + + // VertexB program is always enabled, despite bit setting + const bool is_enabled{shader_config.enable || program == Maxwell::ShaderProgram::VertexB}; + + // Skip stages that are not enabled + if (!is_enabled) { + continue; + } + + // Upload uniform data as one UBO per stage + const auto& stage = index - 1; // Stage indices are 0 - 5 + const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + copy_buffer(uniform_buffers[stage].handle, ubo_offset, + sizeof(GLShader::MaxwellUniformData)); + GLShader::MaxwellUniformData* ub_ptr = + reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); + ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); + + // Fetch program code from memory + GLShader::ProgramCode program_code; + const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; + Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + GLShader::ShaderSetup setup{std::move(program_code)}; + + switch (program) { + case Maxwell::ShaderProgram::VertexB: { + GLShader::MaxwellVSConfig vs_config{setup}; + shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + break; + } + case Maxwell::ShaderProgram::Fragment: { + GLShader::MaxwellFSConfig fs_config{setup}; + shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index, + shader_config.enable.Value(), shader_config.offset); + UNREACHABLE(); + } } + shader_program_manager->UseTrivialGeometryShader(); +} + +bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; DrawArrays(); - return true; } @@ -280,18 +299,6 @@ void RasterizerOpenGL::DrawArrays() { // Sync and bind the texture surfaces BindTextures(); - // Sync and bind the shader - if (shader_dirty) { - SetShader(); - shader_dirty = false; - } - - // Sync the uniform data - if (uniform_block_data.dirty) { - glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); - uniform_block_data.dirty = false; - } - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable // scissor test to prevent drawing outside of the framebuffer region state.scissor.enabled = true; @@ -311,7 +318,9 @@ void RasterizerOpenGL::DrawArrays() { if (is_indexed) { UNREACHABLE(); } - buffer_size += sizeof(VSUniformData); + + // Uniform space for the 5 shader stages + buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; size_t ptr_pos = 0; u8* buffer_ptr; @@ -327,25 +336,12 @@ void RasterizerOpenGL::DrawArrays() { UNREACHABLE(); } - SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), - buffer_offset + static_cast<GLintptr>(ptr_pos)); - const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); - ptr_pos += sizeof(VSUniformData); + SetupShaders(buffer_ptr, buffer_offset, ptr_pos); stream_buffer->Unmap(); - const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { - if (has_ARB_direct_state_access) { - glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); - } else { - glBindBuffer(GL_COPY_WRITE_BUFFER, handle); - glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); - } - }; - - copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); - - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); + shader_program_manager->ApplyTo(state); + state.Apply(); if (is_indexed) { UNREACHABLE(); @@ -531,72 +527,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -void RasterizerOpenGL::SetShader() { - // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to - // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell - // shaders. - - static constexpr char vertex_shader[] = R"( -#version 150 core - -in vec2 vert_position; -in vec2 vert_tex_coord; -out vec2 frag_tex_coord; - -void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - - static constexpr char fragment_shader[] = R"( -#version 150 core - -in vec2 frag_tex_coord; -out vec4 color; - -uniform sampler2D tex[32]; - -void main() { - color = texture(tex[0], frag_tex_coord); -} -)"; - - if (current_shader) { - return; - } - - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - - current_shader = &test_shader; - if (has_ARB_separate_shader_objects) { - test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); - glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); - } else { - UNREACHABLE(); - } - - state.draw.shader_program = test_shader.shader.handle; - state.Apply(); - - for (u32 texture = 0; texture < texture_samplers.size(); ++texture) { - // Set the texture samplers to correspond to different texture units - std::string uniform_name = "tex[" + std::to_string(texture) + "]"; - GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str()); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); - } - } - - if (has_ARB_separate_shader_objects) { - state.draw.shader_program = 0; - state.Apply(); - } -} - void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, bool has_stencil) { state.draw.draw_framebuffer = framebuffer.handle; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d868bf421..71c21c69b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -15,10 +15,12 @@ #include "common/common_types.h" #include "common/hash.h" #include "common/vector_math.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -45,7 +47,7 @@ public: /// OpenGL shader generated for a given Maxwell register state struct MaxwellShader { /// OpenGL shader resource - OGLShader shader; + OGLProgram shader; }; struct VertexShader { @@ -56,34 +58,6 @@ public: OGLShader shader; }; - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned - // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at - // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. - // Not following that rule will cause problems on some AMD drivers. - struct UniformData {}; - - // static_assert( - // sizeof(UniformData) == 0x460, - // "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16384, - "UniformData structure must be less than 16kb as per the OpenGL spec"); - - struct VSUniformData {}; - // static_assert( - // sizeof(VSUniformData) == 1856, - // "The size of the VSUniformData structure has changed, update the structure in the - // shader"); - static_assert(sizeof(VSUniformData) < 16384, - "VSUniformData structure must be less than 16kb as per the OpenGL spec"); - - struct FSUniformData {}; - // static_assert( - // sizeof(FSUniformData) == 1856, - // "The size of the FSUniformData structure has changed, update the structure in the - // shader"); - static_assert(sizeof(FSUniformData) < 16384, - "FSUniformData structure must be less than 16kb as per the OpenGL spec"); - private: class SamplerInfo { public: @@ -122,9 +96,6 @@ private: /// Syncs the clip coefficients to match the guest state void SyncClipCoef(); - /// Sets the OpenGL shader in accordance with the current guest state - void SetShader(); - /// Syncs the cull mode to match the guest state void SyncCullMode(); @@ -152,23 +123,12 @@ private: RasterizerCacheOpenGL res_cache; - /// Shader used for test renderering - to be removed once we have emulated shaders - MaxwellShader test_shader{}; - - const MaxwellShader* current_shader{}; - bool shader_dirty{}; - - struct { - UniformData data; - bool dirty; - } uniform_block_data = {}; - - OGLPipeline pipeline; + std::unique_ptr<GLShader::ProgramManager> shader_program_manager; OGLVertexArray sw_vao; OGLVertexArray hw_vao; std::array<bool, 16> hw_vao_enabled_attributes; - std::array<SamplerInfo, 32> texture_samplers; + std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; std::unique_ptr<OGLStreamBuffer> vertex_buffer; OGLBuffer uniform_buffer; @@ -182,19 +142,9 @@ private: void AnalyzeVertexArray(bool is_indexed); void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); - OGLBuffer vs_uniform_buffer; - std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map; - std::unordered_map<std::string, VertexShader> vs_shader_cache; - OGLShader vs_default_shader; - - void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset); - - OGLBuffer fs_uniform_buffer; - std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map; - std::unordered_map<std::string, FragmentShader> fs_shader_cache; - OGLShader fs_default_shader; + std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; - void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); + void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5cbafa2e7..213b20a21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -818,7 +818,7 @@ void main() { color = texelFetch(tbo, tbo_offset).rabg; } )"; - d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); + d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); OpenGLState state = OpenGLState::GetCurState(); GLuint old_program = state.draw.shader_program; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 06524fc59..e7ce506cf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -334,7 +334,7 @@ private: OGLVertexArray attributeless_vao; OGLBuffer d24s8_abgr_buffer; GLsizeiptr d24s8_abgr_buffer_size; - OGLShader d24s8_abgr_shader; + OGLProgram d24s8_abgr_shader; GLint d24s8_abgr_tbo_size_u_id; GLint d24s8_abgr_viewport_u_id; }; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 7da5e74d1..2f0e7ac1a 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -13,14 +13,16 @@ class OGLTexture : private NonCopyable { public: OGLTexture() = default; - OGLTexture(OGLTexture&& o) { - std::swap(handle, o.handle); - } + + OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLTexture() { Release(); } + OGLTexture& operator=(OGLTexture&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -46,14 +48,16 @@ public: class OGLSampler : private NonCopyable { public: OGLSampler() = default; - OGLSampler(OGLSampler&& o) { - std::swap(handle, o.handle); - } + + OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLSampler() { Release(); } + OGLSampler& operator=(OGLSampler&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -79,25 +83,71 @@ public: class OGLShader : private NonCopyable { public: OGLShader() = default; - OGLShader(OGLShader&& o) { - std::swap(handle, o.handle); - } + + OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLShader() { Release(); } + OGLShader& operator=(OGLShader&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } - /// Creates a new internal OpenGL resource and stores the handle - void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, - const std::vector<const char*>& feedback_vars = {}, - bool separable_program = false) { + void Create(const char* source, GLenum type) { + if (handle != 0) + return; + if (source == nullptr) + return; + handle = GLShader::LoadShader(source, type); + } + + void Release() { + if (handle == 0) + return; + glDeleteShader(handle); + handle = 0; + } + + GLuint handle = 0; +}; + +class OGLProgram : private NonCopyable { +public: + OGLProgram() = default; + + OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {} + + ~OGLProgram() { + Release(); + } + + OGLProgram& operator=(OGLProgram&& o) { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + template <typename... T> + void Create(bool separable_program, T... shaders) { if (handle != 0) return; - handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, - separable_program); + handle = GLShader::LoadProgram(separable_program, shaders...); + } + + /// Creates a new internal OpenGL resource and stores the handle + void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, + bool separable_program = false) { + OGLShader vert, geo, frag; + if (vert_shader) + vert.Create(vert_shader, GL_VERTEX_SHADER); + if (geo_shader) + geo.Create(geo_shader, GL_GEOMETRY_SHADER); + if (frag_shader) + frag.Create(frag_shader, GL_FRAGMENT_SHADER); + Create(separable_program, vert.handle, geo.handle, frag.handle); } /// Deletes the internal OpenGL resource @@ -148,14 +198,16 @@ public: class OGLBuffer : private NonCopyable { public: OGLBuffer() = default; - OGLBuffer(OGLBuffer&& o) { - std::swap(handle, o.handle); - } + + OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLBuffer() { Release(); } + OGLBuffer& operator=(OGLBuffer&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -214,14 +266,16 @@ public: class OGLVertexArray : private NonCopyable { public: OGLVertexArray() = default; - OGLVertexArray(OGLVertexArray&& o) { - std::swap(handle, o.handle); - } + + OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLVertexArray() { Release(); } + OGLVertexArray& operator=(OGLVertexArray&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -247,14 +301,16 @@ public: class OGLFramebuffer : private NonCopyable { public: OGLFramebuffer() = default; - OGLFramebuffer(OGLFramebuffer&& o) { - std::swap(handle, o.handle); - } + + OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLFramebuffer() { Release(); } + OGLFramebuffer& operator=(OGLFramebuffer&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 564ea8f9e..1290fa4cd 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2,57 +2,499 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <map> +#include <set> #include <string> -#include <queue> #include "common/assert.h" #include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -namespace Maxwell3D { -namespace Shader { +namespace GLShader { namespace Decompiler { +using Tegra::Shader::Attribute; +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Register; +using Tegra::Shader::SubOp; +using Tegra::Shader::Uniform; + constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; -class Impl { +class DecompileFail : public std::runtime_error { public: - Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul, - const std::string& emit_cb, const std::string& setemit_cb) - : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), - inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), - sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} - - std::string Decompile() { - UNREACHABLE(); - return {}; + using std::runtime_error::runtime_error; +}; + +/// Describes the behaviour of code path of a given entry point and a return point. +enum class ExitMethod { + Undetermined, ///< Internal value. Only occur when analyzing JMP loop. + AlwaysReturn, ///< All code paths reach the return point. + Conditional, ///< Code path reaches the return point or an END instruction conditionally. + AlwaysEnd, ///< All code paths reach a END instruction. +}; + +/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. +struct Subroutine { + /// Generates a name suitable for GLSL source code. + std::string GetName() const { + return "sub_" + std::to_string(begin) + "_" + std::to_string(end); + } + + u32 begin; ///< Entry point of the subroutine. + u32 end; ///< Return point of the subroutine. + ExitMethod exit_method; ///< Exit method of the subroutine. + std::set<u32> labels; ///< Addresses refereced by JMP instructions. + + bool operator<(const Subroutine& rhs) const { + return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); + } +}; + +/// Analyzes shader code and produces a set of subroutines. +class ControlFlowAnalyzer { +public: + ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) + : program_code(program_code) { + + // Recursively finds all subroutines. + const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); + if (program_main.exit_method != ExitMethod::AlwaysEnd) + throw DecompileFail("Program does not always end"); + } + + std::set<Subroutine> GetSubroutines() { + return std::move(subroutines); } private: - const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code; - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data; - u32 main_offset; - const std::function<std::string(u32)>& inputreg_getter; - const std::function<std::string(u32)>& outputreg_getter; - bool sanitize_mul; - const std::string& emit_cb; - const std::string& setemit_cb; + const ProgramCode& program_code; + std::set<Subroutine> subroutines; + std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; + + /// Adds and analyzes a new subroutine if it is not added yet. + const Subroutine& AddSubroutine(u32 begin, u32 end) { + auto iter = subroutines.find(Subroutine{begin, end}); + if (iter != subroutines.end()) + return *iter; + + Subroutine subroutine{begin, end}; + subroutine.exit_method = Scan(begin, end, subroutine.labels); + if (subroutine.exit_method == ExitMethod::Undetermined) + throw DecompileFail("Recursive function detected"); + return *subroutines.insert(std::move(subroutine)).first; + } + + /// Scans a range of code for labels and determines the exit method. + ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { + auto [iter, inserted] = + exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); + ExitMethod& exit_method = iter->second; + if (!inserted) + return exit_method; + + for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { + const Instruction instr = {program_code[offset]}; + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::EXIT: { + return exit_method = ExitMethod::AlwaysEnd; + } + } + } + return exit_method = ExitMethod::AlwaysReturn; + } }; -std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, - u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, - bool sanitize_mul, const std::string& emit_cb, - const std::string& setemit_cb) { - Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, - sanitize_mul, emit_cb, setemit_cb); - return impl.Decompile(); +class ShaderWriter { +public: + void AddLine(const std::string& text) { + DEBUG_ASSERT(scope >= 0); + if (!text.empty()) { + shader_source += std::string(static_cast<size_t>(scope) * 4, ' '); + } + shader_source += text + '\n'; + } + + std::string GetResult() { + return std::move(shader_source); + } + + int scope = 0; + +private: + std::string shader_source; +}; + +class GLSLGenerator { +public: + GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, + u32 main_offset, Maxwell3D::Regs::ShaderStage stage) + : subroutines(subroutines), program_code(program_code), main_offset(main_offset), + stage(stage) { + + Generate(); + } + + std::string GetShaderCode() { + return declarations.GetResult() + shader.GetResult(); + } + +private: + /// Gets the Subroutine object corresponding to the specified address. + const Subroutine& GetSubroutine(u32 begin, u32 end) const { + auto iter = subroutines.find(Subroutine{begin, end}); + ASSERT(iter != subroutines.end()); + return *iter; + } + + /// Generates code representing an input attribute register. + std::string GetInputAttribute(Attribute::Index attribute) { + declr_input_attribute.insert(attribute); + + const u32 index{static_cast<u32>(attribute) - + static_cast<u32>(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + return "input_attribute_" + std::to_string(index); + } + + LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index); + UNREACHABLE(); + } + + /// Generates code representing an output attribute register. + std::string GetOutputAttribute(Attribute::Index attribute) { + switch (attribute) { + case Attribute::Index::Position: + return "gl_Position"; + default: + const u32 index{static_cast<u32>(attribute) - + static_cast<u32>(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + declr_output_attribute.insert(attribute); + return "output_attribute_" + std::to_string(index); + } + + LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index); + UNREACHABLE(); + } + } + + /// Generates code representing a temporary (GPR) register. + std::string GetRegister(const Register& reg) { + return *declr_register.insert("register_" + std::to_string(reg)).first; + } + + /// Generates code representing a uniform (C buffer) register. + std::string GetUniform(const Uniform& reg) const { + std::string index = std::to_string(reg.index); + return "uniform_" + index + "[" + std::to_string(reg.offset >> 2) + "][" + + std::to_string(reg.offset & 3) + "]"; + } + + /** + * Adds code that calls a subroutine. + * @param subroutine the subroutine to call. + */ + void CallSubroutine(const Subroutine& subroutine) { + if (subroutine.exit_method == ExitMethod::AlwaysEnd) { + shader.AddLine(subroutine.GetName() + "();"); + shader.AddLine("return true;"); + } else if (subroutine.exit_method == ExitMethod::Conditional) { + shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); + } else { + shader.AddLine(subroutine.GetName() + "();"); + } + } + + /** + * Writes code that does an assignment operation. + * @param reg the destination register code. + * @param value the code representing the value to assign. + */ + void SetDest(u64 elem, const std::string& reg, const std::string& value, + u64 dest_num_components, u64 value_num_components) { + std::string swizzle = "."; + swizzle += "xyzw"[elem]; + + std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); + std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); + + shader.AddLine(dest + " = " + src + ";"); + } + + /** + * Compiles a single instruction from Tegra to GLSL. + * @param offset the offset of the Tegra shader instruction. + * @return the offset of the next instruction to execute. Usually it is the current offset + * + 1. If the current instruction always terminates the program, returns PROGRAM_END. + */ + u32 CompileInstr(u32 offset) { + const Instruction instr = {program_code[offset]}; + + shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name); + + switch (OpCode::GetInfo(instr.opcode).type) { + case OpCode::Type::Arithmetic: { + ASSERT(!instr.alu.abs_d); + + std::string dest = GetRegister(instr.gpr0); + std::string op_a = instr.alu.negate_a ? "-" : ""; + op_a += GetRegister(instr.gpr8); + if (instr.alu.abs_a) { + op_a = "abs(" + op_a + ")"; + } + + std::string op_b = instr.alu.negate_b ? "-" : ""; + if (instr.is_b_gpr) { + op_b += GetRegister(instr.gpr20); + } else { + op_b += GetUniform(instr.uniform); + } + if (instr.alu.abs_b) { + op_b = "abs(" + op_b + ")"; + } + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::FMUL_C: + case OpCode::Id::FMUL_R: { + SetDest(0, dest, op_a + " * " + op_b, 1, 1); + break; + } + case OpCode::Id::FADD_C: + case OpCode::Id::FADD_R: { + SetDest(0, dest, op_a + " + " + op_b, 1, 1); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + case OpCode::Type::Ffma: { + ASSERT_MSG(!instr.ffma.negate_b, "untested"); + ASSERT_MSG(!instr.ffma.negate_c, "untested"); + + std::string dest = GetRegister(instr.gpr0); + std::string op_a = GetRegister(instr.gpr8); + + std::string op_b = instr.ffma.negate_b ? "-" : ""; + op_b += GetUniform(instr.uniform); + + std::string op_c = instr.ffma.negate_c ? "-" : ""; + op_c += GetRegister(instr.gpr39); + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::FFMA_CR: { + SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); + break; + } + + default: { + LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + case OpCode::Type::Memory: { + std::string gpr0 = GetRegister(instr.gpr0); + const Attribute::Index attribute = instr.attribute.fmt20.index; + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::LD_A: { + ASSERT(instr.attribute.fmt20.size == 0); + SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); + break; + } + case OpCode::Id::ST_A: { + ASSERT(instr.attribute.fmt20.size == 0); + SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + + default: { + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::EXIT: { + shader.AddLine("return true;"); + offset = PROGRAM_END - 1; + break; + } + + default: { + LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + + break; + } + } + + return offset + 1; + } + + /** + * Compiles a range of instructions from Tegra to GLSL. + * @param begin the offset of the starting instruction. + * @param end the offset where the compilation should stop (exclusive). + * @return the offset of the next instruction to compile. PROGRAM_END if the program + * terminates. + */ + u32 CompileRange(u32 begin, u32 end) { + u32 program_counter; + for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { + program_counter = CompileInstr(program_counter); + } + return program_counter; + } + + void Generate() { + // Add declarations for all subroutines + for (const auto& subroutine : subroutines) { + shader.AddLine("bool " + subroutine.GetName() + "();"); + } + shader.AddLine(""); + + // Add the main entry point + shader.AddLine("bool exec_shader() {"); + ++shader.scope; + CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); + --shader.scope; + shader.AddLine("}\n"); + + // Add definitions for all subroutines + for (const auto& subroutine : subroutines) { + std::set<u32> labels = subroutine.labels; + + shader.AddLine("bool " + subroutine.GetName() + "() {"); + ++shader.scope; + + if (labels.empty()) { + if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { + shader.AddLine("return false;"); + } + } else { + labels.insert(subroutine.begin); + shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); + shader.AddLine("while (true) {"); + ++shader.scope; + + shader.AddLine("switch (jmp_to) {"); + + for (auto label : labels) { + shader.AddLine("case " + std::to_string(label) + "u: {"); + ++shader.scope; + + auto next_it = labels.lower_bound(label + 1); + u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; + + u32 compile_end = CompileRange(label, next_label); + if (compile_end > next_label && compile_end != PROGRAM_END) { + // This happens only when there is a label inside a IF/LOOP block + shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); + labels.emplace(compile_end); + } + + --shader.scope; + shader.AddLine("}"); + } + + shader.AddLine("default: return false;"); + shader.AddLine("}"); + + --shader.scope; + shader.AddLine("}"); + + shader.AddLine("return false;"); + } + + --shader.scope; + shader.AddLine("}\n"); + + DEBUG_ASSERT(shader.scope == 0); + } + + GenerateDeclarations(); + } + + /// Add declarations for registers + void GenerateDeclarations() { + for (const auto& reg : declr_register) { + declarations.AddLine("float " + reg + " = 0.0;"); + } + declarations.AddLine(""); + + for (const auto& index : declr_input_attribute) { + // TODO(bunnei): Use proper number of elements for these + declarations.AddLine("layout(location = " + + std::to_string(static_cast<u32>(index) - + static_cast<u32>(Attribute::Index::Attribute_0)) + + ") in vec4 " + GetInputAttribute(index) + ";"); + } + declarations.AddLine(""); + + for (const auto& index : declr_output_attribute) { + // TODO(bunnei): Use proper number of elements for these + declarations.AddLine("layout(location = " + + std::to_string(static_cast<u32>(index) - + static_cast<u32>(Attribute::Index::Attribute_0)) + + ") out vec4 " + GetOutputAttribute(index) + ";"); + } + declarations.AddLine(""); + } + +private: + const std::set<Subroutine>& subroutines; + const ProgramCode& program_code; + const u32 main_offset; + Maxwell3D::Regs::ShaderStage stage; + + ShaderWriter shader; + ShaderWriter declarations; + + // Declarations + std::set<std::string> declr_register; + std::set<Attribute::Index> declr_input_attribute; + std::set<Attribute::Index> declr_output_attribute; +}; // namespace Decompiler + +std::string GetCommonDeclarations() { + return "bool exec_shader();"; +} + +boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage) { + try { + auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); + GLSLGenerator generator(subroutines, program_code, main_offset, stage); + return generator.GetShaderCode(); + } catch (const DecompileFail& exception) { + LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); + } + return boost::none; } } // namespace Decompiler -} // namespace Shader -} // namespace Maxwell3D +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02ebfcbe8..2f4047d87 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -5,23 +5,20 @@ #include <array> #include <functional> #include <string> +#include <boost/optional.hpp> #include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" -namespace Maxwell3D { -namespace Shader { +namespace GLShader { namespace Decompiler { -constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; -constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000}; +using Tegra::Engines::Maxwell3D; -std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, - u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, - bool sanitize_mul, const std::string& emit_cb = "", - const std::string& setemit_cb = ""); +std::string GetCommonDeclarations(); + +boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage); } // namespace Decompiler -} // namespace Shader -} // namespace Maxwell3D +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8f3c98800..524c2cfb5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -7,12 +7,12 @@ namespace GLShader { -std::string GenerateVertexShader(const MaxwellVSConfig& config) { +std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { UNREACHABLE(); return {}; } -std::string GenerateFragmentShader(const MaxwellFSConfig& config) { +std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 5101e7d30..925e66ee4 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -4,46 +4,67 @@ #pragma once -#include <cstring> +#include <array> #include <string> #include <type_traits> +#include "common/common_types.h" #include "common/hash.h" namespace GLShader { -enum Attributes { - ATTRIBUTE_POSITION, - ATTRIBUTE_COLOR, - ATTRIBUTE_TEXCOORD0, - ATTRIBUTE_TEXCOORD1, - ATTRIBUTE_TEXCOORD2, - ATTRIBUTE_TEXCOORD0_W, - ATTRIBUTE_NORMQUAT, - ATTRIBUTE_VIEW, -}; +constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; -struct MaxwellShaderConfigCommon { - explicit MaxwellShaderConfigCommon(){}; +using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>; + +struct ShaderSetup { + ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} + + ProgramCode program_code; + bool program_code_hash_dirty = true; + + u64 GetProgramCodeHash() { + if (program_code_hash_dirty) { + program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash_dirty = false; + } + return program_code_hash; + } + +private: + u64 program_code_hash{}; }; -struct MaxwellVSConfig : MaxwellShaderConfigCommon { - explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} +struct MaxwellShaderConfigCommon { + void Init(ShaderSetup& setup) { + program_hash = setup.GetProgramCodeHash(); + } - bool operator==(const MaxwellVSConfig& o) const { - return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0; - }; + u64 program_hash; }; -struct MaxwellFSConfig : MaxwellShaderConfigCommon { - explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} +struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { + explicit MaxwellVSConfig(ShaderSetup& setup) { + state.Init(setup); + } +}; - bool operator==(const MaxwellFSConfig& o) const { - return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; - }; +struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { + explicit MaxwellFSConfig(ShaderSetup& setup) { + state.Init(setup); + } }; -std::string GenerateVertexShader(const MaxwellVSConfig& config); -std::string GenerateFragmentShader(const MaxwellFSConfig& config); +/** + * Generates the GLSL vertex shader program source code for the given VS program + * @returns String of the shader source code + */ +std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); + +/** + * Generates the GLSL fragment shader program source code for the given FS program + * @returns String of the shader source code + */ +std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); } // namespace GLShader @@ -52,14 +73,14 @@ namespace std { template <> struct hash<GLShader::MaxwellVSConfig> { size_t operator()(const GLShader::MaxwellVSConfig& k) const { - return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); + return k.Hash(); } }; template <> struct hash<GLShader::MaxwellFSConfig> { size_t operator()(const GLShader::MaxwellFSConfig& k) const { - return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); + return k.Hash(); } }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp new file mode 100644 index 000000000..7fceedce8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -0,0 +1,65 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/hle/kernel/process.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace GLShader { + +namespace Impl { +void SetShaderUniformBlockBinding(GLuint shader, const char* name, + Maxwell3D::Regs::ShaderStage binding, size_t expected_size) { + GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index != GL_INVALID_INDEX) { + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, + "Uniform block size did not match! Got %d, expected %zu", + static_cast<int>(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); + } +} + +void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex, + sizeof(MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry, + sizeof(MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment, + sizeof(MaxwellUniformData)); +} + +void SetShaderSamplerBindings(GLuint shader) { + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); + cur_state.Apply(); + + // Set the texture samplers to correspond to different texture units + for (u32 texture = 0; texture < NumTextureSamplers; ++texture) { + // Set the texture samplers to correspond to different texture units + std::string uniform_name = "tex[" + std::to_string(texture) + "]"; + GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str()); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); + } + } + + cur_state.draw.shader_program = old_program; + cur_state.Apply(); +} + +} // namespace Impl + +void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) { + const auto& const_buffer = shader_stage.const_buffers[index]; + const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address); + Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer)); + } +} + +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h new file mode 100644 index 000000000..5c8560cf5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -0,0 +1,151 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <tuple> +#include <unordered_map> +#include <boost/functional/hash.hpp> +#include <glad/glad.h> +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" + +namespace GLShader { + +/// Number of OpenGL texture samplers that can be used in the fragment shader +static constexpr size_t NumTextureSamplers = 32; + +using Tegra::Engines::Maxwell3D; + +namespace Impl { +void SetShaderUniformBlockBindings(GLuint shader); +void SetShaderSamplerBindings(GLuint shader); +} // namespace Impl + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct MaxwellUniformData { + void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); + + using ConstBuffer = std::array<GLvec4, 4>; + alignas(16) std::array<ConstBuffer, Maxwell3D::Regs::MaxConstBuffers> const_buffers; +}; +static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) < 16384, + "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); + +class OGLShaderStage { +public: + OGLShaderStage() = default; + + void Create(const char* source, GLenum type) { + OGLShader shader; + shader.Create(source, type); + program.Create(true, shader.handle); + Impl::SetShaderUniformBlockBindings(program.handle); + Impl::SetShaderSamplerBindings(program.handle); + } + GLuint GetHandle() const { + return program.handle; + } + +private: + OGLProgram program; +}; + +// TODO(wwylele): beautify this doc +// This is a shader cache designed for translating PICA shader to GLSL shader. +// The double cache is needed because diffent KeyConfigType, which includes a hash of the code +// region (including its leftover unused code) can generate the same GLSL code. +template <typename KeyConfigType, + std::string (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), GLenum ShaderType> +class ShaderCache { +public: + ShaderCache() = default; + + GLuint Get(const KeyConfigType& key, const ShaderSetup& setup) { + auto map_it = shader_map.find(key); + if (map_it == shader_map.end()) { + std::string program = CodeGenerator(setup, key); + + auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{}); + OGLShaderStage& cached_shader = iter->second; + if (new_shader) { + cached_shader.Create(program.c_str(), ShaderType); + } + shader_map[key] = &cached_shader; + return cached_shader.GetHandle(); + } else { + return map_it->second->GetHandle(); + } + } + +private: + std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map; + std::unordered_map<std::string, OGLShaderStage> shader_cache; +}; + +using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>; + +using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>; + +class ProgramManager { +public: + ProgramManager() { + pipeline.Create(); + } + + void UseProgrammableVertexShader(const MaxwellVSConfig& config, const ShaderSetup setup) { + current.vs = vertex_shaders.Get(config, setup); + } + + void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) { + current.fs = fragment_shaders.Get(config, setup); + } + + void UseTrivialGeometryShader() { + current.gs = 0; + } + + void ApplyTo(OpenGLState& state) { + // Workaround for AMD bug + glUseProgramStages(pipeline.handle, + GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, + 0); + + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); + glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); + state.draw.shader_program = 0; + state.draw.program_pipeline = pipeline.handle; + } + +private: + struct ShaderTuple { + GLuint vs = 0, gs = 0, fs = 0; + bool operator==(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); + } + struct Hash { + std::size_t operator()(const ShaderTuple& tuple) const { + std::size_t hash = 0; + boost::hash_combine(hash, tuple.vs); + boost::hash_combine(hash, tuple.gs); + boost::hash_combine(hash, tuple.fs); + return hash; + } + }; + }; + ShaderTuple current; + VertexShaders vertex_shaders; + FragmentShaders fragment_shaders; + + std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache; + OGLPipeline pipeline; +}; + +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a6c6204d5..8568fface 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -10,156 +10,41 @@ namespace GLShader { -GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, - const char* fragment_shader, const std::vector<const char*>& feedback_vars, - bool separable_program) { - // Create the shaders - GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; - GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; - GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; +GLuint LoadShader(const char* source, GLenum type) { + const char* debug_type; + switch (type) { + case GL_VERTEX_SHADER: + debug_type = "vertex"; + break; + case GL_GEOMETRY_SHADER: + debug_type = "geometry"; + break; + case GL_FRAGMENT_SHADER: + debug_type = "fragment"; + break; + default: + UNREACHABLE(); + } + GLuint shader_id = glCreateShader(type); + glShaderSource(shader_id, 1, &source, nullptr); + NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); + glCompileShader(shader_id); GLint result = GL_FALSE; - int info_log_length; - - if (vertex_shader) { - // Compile Vertex Shader - LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); - - glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); - glCompileShader(vertex_shader_id); - - // Check Vertex Shader - glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> vertex_shader_error(info_log_length); - glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", - &vertex_shader_error[0]); - } - } - } - - if (geometry_shader) { - // Compile Geometry Shader - LOG_DEBUG(Render_OpenGL, "Compiling geometry shader..."); - - glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr); - glCompileShader(geometry_shader_id); - - // Check Geometry Shader - glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> geometry_shader_error(info_log_length); - glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr, - &geometry_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", - &geometry_shader_error[0]); - } - } - } - - if (fragment_shader) { - // Compile Fragment Shader - LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); - - glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); - glCompileShader(fragment_shader_id); - - // Check Fragment Shader - glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> fragment_shader_error(info_log_length); - glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, - &fragment_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); - } - } - } - - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - if (vertex_shader) { - glAttachShader(program_id, vertex_shader_id); - } - if (geometry_shader) { - glAttachShader(program_id, geometry_shader_id); - } - if (fragment_shader) { - glAttachShader(program_id, fragment_shader_id); - } - - if (!feedback_vars.empty()) { - auto varyings = feedback_vars; - glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()), - &varyings[0], GL_INTERLEAVED_ATTRIBS); - } - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + GLint info_log_length; + glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); + glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); if (info_log_length > 1) { - std::vector<char> program_error(info_log_length); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); + std::string shader_error(info_log_length, ' '); + glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); + NGLOG_DEBUG(Render_OpenGL, "{}", shader_error); } else { - LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); + NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); } } - - // If the program linking failed at least one of the shaders was probably bad - if (result == GL_FALSE) { - if (vertex_shader) { - LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); - } - if (geometry_shader) { - LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); - } - if (fragment_shader) { - LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); - } - } - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); - - if (vertex_shader) { - glDetachShader(program_id, vertex_shader_id); - glDeleteShader(vertex_shader_id); - } - if (geometry_shader) { - glDetachShader(program_id, geometry_shader_id); - glDeleteShader(geometry_shader_id); - } - if (fragment_shader) { - glDetachShader(program_id, fragment_shader_id); - glDeleteShader(fragment_shader_id); - } - - return program_id; + return shader_id; } } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index fc7b5e080..a1fa9e814 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -6,18 +6,60 @@ #include <vector> #include <glad/glad.h> +#include "common/assert.h" +#include "common/logging/log.h" namespace GLShader { /** + * Utility function to create and compile an OpenGL GLSL shader + * @param source String of the GLSL shader program + * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) + */ +GLuint LoadShader(const char* source, GLenum type); + +/** * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param vertex_shader String of the GLSL vertex shader program - * @param geometry_shader String of the GLSL geometry shader program - * @param fragment_shader String of the GLSL fragment shader program - * @returns Handle of the newly created OpenGL shader object + * @param separable_program whether to create a separable program + * @param shaders ID of shaders to attach to the program + * @returns Handle of the newly created OpenGL program object */ -GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, - const char* fragment_shader, const std::vector<const char*>& feedback_vars = {}, - bool separable_program = false); +template <typename... T> +GLuint LoadProgram(bool separable_program, T... shaders) { + // Link the program + NGLOG_DEBUG(Render_OpenGL, "Linking program..."); + + GLuint program_id = glCreateProgram(); + + ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); + + if (separable_program) { + glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); + } + + glLinkProgram(program_id); + + // Check the program + GLint result = GL_FALSE; + GLint info_log_length; + glGetProgramiv(program_id, GL_LINK_STATUS, &result); + glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + + if (info_log_length > 1) { + std::string program_error(info_log_length, ' '); + glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); + if (result == GL_TRUE) { + NGLOG_DEBUG(Render_OpenGL, "{}", program_error); + } else { + NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); + } + } + + ASSERT_MSG(result == GL_TRUE, "Shader not linked"); + + ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); + + return program_id; +} } // namespace GLShader diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 48ee80125..7909dcfc3 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -10,6 +10,14 @@ #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" +using GLvec2 = std::array<GLfloat, 2>; +using GLvec3 = std::array<GLfloat, 3>; +using GLvec4 = std::array<GLfloat, 4>; + +using GLuvec2 = std::array<GLuint, 2>; +using GLuvec3 = std::array<GLuint, 3>; +using GLuvec4 = std::array<GLuint, 4>; + namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -39,6 +47,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { + case Maxwell::PrimitiveTopology::Triangles: + return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 78b50b227..5e78723a2 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -57,7 +57,7 @@ uniform sampler2D color_texture; void main() { // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to // support more framebuffer pixel formats. - color = texture(color_texture, frag_tex_coord).abgr; + color = texture(color_texture, frag_tex_coord); } )"; @@ -210,7 +210,7 @@ void RendererOpenGL::InitOpenGLObjects() { 0.0f); // Link shaders and get variable locations - shader.Create(vertex_shader, nullptr, fragment_shader); + shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); state.draw.shader_program = shader.handle; state.Apply(); uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); @@ -311,10 +311,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, } std::array<ScreenRectVertex, 4> vertices = {{ - ScreenRectVertex(x, y, texcoords.top, right), - ScreenRectVertex(x + w, y, texcoords.bottom, right), - ScreenRectVertex(x, y + h, texcoords.top, left), - ScreenRectVertex(x + w, y + h, texcoords.bottom, left), + ScreenRectVertex(x, y, texcoords.top, left), + ScreenRectVertex(x + w, y, texcoords.bottom, left), + ScreenRectVertex(x, y + h, texcoords.top, right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, right), }}; state.texture_units[0].texture_2d = screen_info.display_texture; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c52f40037..2cc6d9a00 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -72,7 +72,7 @@ private: // OpenGL object IDs OGLVertexArray vertex_array; OGLBuffer vertex_buffer; - OGLShader shader; + OGLProgram shader; /// Display information for Switch screen ScreenInfo screen_info; diff --git a/src/video_core/utils.h b/src/video_core/utils.h index be0f7e22b..e0a14d48f 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe const u32 coarse_y = y & ~127; u32 morton_offset = GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; + u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel; data_ptrs[morton_to_gl] = morton_data + morton_offset; data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; |