diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
19 files changed, 1915 insertions, 649 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f217a265b..2d4a0d6db 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <memory> #include <string> #include <tuple> @@ -13,7 +14,6 @@ #include "common/math_util.h" #include "common/microprofile.h" #include "common/scope_exit.h" -#include "common/vector_math.h" #include "core/core.h" #include "core/hle/kernel/process.h" #include "core/settings.h" @@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -enum class UniformBindings : GLuint { Common, VS, FS }; - -static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, - size_t expected_size) { - GLuint ub_index = glGetUniformBlockIndex(shader, name); - if (ub_index != GL_INVALID_INDEX) { - GLint ub_size = 0; - glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); - ASSERT_MSG(ub_size == expected_size, - "Uniform block size did not match! Got %d, expected %zu", - static_cast<int>(ub_size), expected_size); - glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); - } -} - -static void SetShaderUniformBlockBindings(GLuint shader) { - SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, - sizeof(RasterizerOpenGL::UniformData)); - SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, - sizeof(RasterizerOpenGL::VSUniformData)); - SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, - sizeof(RasterizerOpenGL::FSUniformData)); -} - RasterizerOpenGL::RasterizerOpenGL() { - shader_dirty = true; - has_ARB_buffer_storage = false; has_ARB_direct_state_access = false; has_ARB_separate_shader_objects = false; @@ -72,6 +46,14 @@ RasterizerOpenGL::RasterizerOpenGL() { state.texture_units[i].sampler = texture_samplers[i].sampler.handle; } + // Create SSBOs + for (size_t stage = 0; stage < ssbos.size(); ++stage) { + for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) { + ssbos[stage][buffer].Create(); + state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle; + } + } + GLint ext_num; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); for (GLint i = 0; i < ext_num; i++) { @@ -88,6 +70,8 @@ RasterizerOpenGL::RasterizerOpenGL() { } } + ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported"); + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; @@ -102,36 +86,30 @@ RasterizerOpenGL::RasterizerOpenGL() { state.draw.uniform_buffer = uniform_buffer.handle; state.Apply(); - glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); - glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); - - uniform_block_data.dirty = true; - // Create render framebuffer framebuffer.Create(); - if (has_ARB_separate_shader_objects) { - hw_vao.Create(); - hw_vao_enabled_attributes.fill(false); + hw_vao.Create(); + hw_vao_enabled_attributes.fill(false); - stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); - stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); - state.draw.vertex_buffer = stream_buffer->GetHandle(); + stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); + stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); + state.draw.vertex_buffer = stream_buffer->GetHandle(); - pipeline.Create(); - state.draw.program_pipeline = pipeline.handle; - state.draw.shader_program = 0; - state.draw.vertex_array = hw_vao.handle; - state.Apply(); + shader_program_manager = std::make_unique<GLShader::ProgramManager>(); + state.draw.shader_program = 0; + state.draw.vertex_array = hw_vao.handle; + state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); - vs_uniform_buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); - } else { - UNREACHABLE(); + for (unsigned index = 0; index < uniform_buffers.size(); ++index) { + auto& buffer = uniform_buffers[index]; + buffer.Create(); + glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); + glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, + GL_STREAM_COPY); + glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); } accelerate_draw = AccelDraw::Disabled; @@ -149,17 +127,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { - const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - - if (is_indexed) { - UNREACHABLE(); - } - - // TODO(bunnei): Add support for 1+ vertex arrays - vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride; -} - void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; @@ -171,6 +138,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { // TODO(bunnei): Add support for 1+ vertex arrays const auto& vertex_array{regs.vertex_array[0]}; + const auto& vertex_array_limit{regs.vertex_array_limit[0]}; ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { @@ -183,6 +151,10 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { // to avoid OpenGL errors. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; + NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", + index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), + attrib.offset.Value(), attrib.IsNormalized()); + glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); @@ -191,7 +163,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { } // Copy vertex array data - const u32 data_size{vertex_array.stride * regs.vertex_buffer.count}; + const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; res_cache.FlushRegion(data_addr, data_size, nullptr); Memory::ReadBlock(data_addr, array_ptr, data_size); @@ -200,26 +172,89 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { buffer_offset += data_size; } -void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_VS); - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); -} +void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { + // Helper function for uploading uniform data + const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { + if (has_ARB_direct_state_access) { + glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); + } else { + glBindBuffer(GL_COPY_WRITE_BUFFER, handle); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); + } + }; -void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_FS); - UNREACHABLE(); -} + auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); + ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); -bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { - if (!has_ARB_separate_shader_objects) { - UNREACHABLE(); - return false; + // Next available bindpoint to use when uploading the const buffers to the GLSL shaders. + u32 current_constbuffer_bindpoint = 0; + + for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { + ptr_pos += sizeof(GLShader::MaxwellUniformData); + + auto& shader_config = gpu.regs.shader_config[index]; + const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; + + const auto& stage = index - 1; // Stage indices are 0 - 5 + + const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage)); + + // Skip stages that are not enabled + if (!is_enabled) { + continue; + } + + // Upload uniform data as one UBO per stage + const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + copy_buffer(uniform_buffers[stage].handle, ubo_offset, + sizeof(GLShader::MaxwellUniformData)); + GLShader::MaxwellUniformData* ub_ptr = + reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); + ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); + + // Fetch program code from memory + GLShader::ProgramCode program_code; + const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; + Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + GLShader::ShaderSetup setup{std::move(program_code)}; + + GLShader::ShaderEntries shader_resources; + + switch (program) { + case Maxwell::ShaderProgram::VertexB: { + GLShader::MaxwellVSConfig vs_config{setup}; + shader_resources = + shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + break; + } + case Maxwell::ShaderProgram::Fragment: { + GLShader::MaxwellFSConfig fs_config{setup}; + shader_resources = + shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index, + shader_config.enable.Value(), shader_config.offset); + UNREACHABLE(); + } + + GLuint gl_stage_program = shader_program_manager->GetCurrentProgramStage( + static_cast<Maxwell::ShaderStage>(stage)); + + // Configure the const buffers for this shader stage. + current_constbuffer_bindpoint = + SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, + current_constbuffer_bindpoint, shader_resources.const_buffer_entries); } + shader_program_manager->UseTrivialGeometryShader(); +} + +bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; DrawArrays(); - return true; } @@ -255,18 +290,18 @@ void RasterizerOpenGL::DrawArrays() { : (depth_surface == nullptr ? 1u : depth_surface->res_scale); MathUtil::Rectangle<u32> draw_rect{ - static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) + - viewport_rect.left * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + - viewport_rect.top * res_scale, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) + - viewport_rect.right * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + - viewport_rect.bottom * res_scale, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + static_cast<u32>( + std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast<u32>( + std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast<u32>( + std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + + viewport_rect.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); @@ -280,18 +315,6 @@ void RasterizerOpenGL::DrawArrays() { // Sync and bind the texture surfaces BindTextures(); - // Sync and bind the shader - if (shader_dirty) { - SetShader(); - shader_dirty = false; - } - - // Sync the uniform data - if (uniform_block_data.dirty) { - glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); - uniform_block_data.dirty = false; - } - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable // scissor test to prevent drawing outside of the framebuffer region state.scissor.enabled = true; @@ -303,15 +326,22 @@ void RasterizerOpenGL::DrawArrays() { // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - AnalyzeVertexArray(is_indexed); + const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; + const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; + + // TODO(bunnei): Add support for 1+ vertex arrays + vs_input_size = vertex_num * regs.vertex_array[0].stride; + state.draw.vertex_buffer = stream_buffer->GetHandle(); state.Apply(); size_t buffer_size = static_cast<size_t>(vs_input_size); if (is_indexed) { - UNREACHABLE(); + buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; } - buffer_size += sizeof(VSUniformData); + + // Uniform space for the 5 shader stages + buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; size_t ptr_pos = 0; u8* buffer_ptr; @@ -322,36 +352,37 @@ void RasterizerOpenGL::DrawArrays() { SetupVertexArray(buffer_ptr, buffer_offset); ptr_pos += vs_input_size; + // If indexed mode, copy the index buffer GLintptr index_buffer_offset = 0; if (is_indexed) { - UNREACHABLE(); - } + ptr_pos = Common::AlignUp(ptr_pos, 4); - SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), - buffer_offset + static_cast<GLintptr>(ptr_pos)); - const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); - ptr_pos += sizeof(VSUniformData); + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + const VAddr index_data_addr{ + memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; + Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); - stream_buffer->Unmap(); + index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + ptr_pos += index_buffer_size; + } - const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { - if (has_ARB_direct_state_access) { - glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); - } else { - glBindBuffer(GL_COPY_WRITE_BUFFER, handle); - glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); - } - }; + SetupShaders(buffer_ptr, buffer_offset, ptr_pos); - copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); + stream_buffer->Unmap(); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); + shader_program_manager->ApplyTo(state); + state.Apply(); + const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; if (is_indexed) { - UNREACHABLE(); + const GLint index_min{static_cast<GLint>(regs.index_array.first)}; + const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)}; + glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count, + MaxwellToGL::IndexFormat(regs.index_array.format), + reinterpret_cast<const void*>(index_buffer_offset), + -index_min); } else { - glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0, - regs.vertex_buffer.count); + glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count); } // Disable scissor test @@ -384,7 +415,7 @@ void RasterizerOpenGL::DrawArrays() { void RasterizerOpenGL::BindTextures() { using Regs = Tegra::Engines::Maxwell3D::Regs; - auto maxwell3d = Core::System::GetInstance().GPU().Get3DEngine(); + auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine(); // Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a // certain number in OpenGL. We try to only use the minimum amount of host textures by not @@ -415,7 +446,32 @@ void RasterizerOpenGL::BindTextures() { } } -void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {} +void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + switch (method) { + case MAXWELL3D_REG_INDEX(blend.separate_alpha): + ASSERT_MSG(false, "unimplemented"); + break; + case MAXWELL3D_REG_INDEX(blend.equation_rgb): + state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.factor_source_rgb): + state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb): + state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.equation_a): + state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a); + break; + case MAXWELL3D_REG_INDEX(blend.factor_source_a): + state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a); + break; + case MAXWELL3D_REG_INDEX(blend.factor_dest_a): + state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a); + break; + } +} void RasterizerOpenGL::FlushAll() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); @@ -467,9 +523,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu src_params.width = std::min(framebuffer.width, pixel_stride); src_params.height = framebuffer.height; src_params.stride = pixel_stride; - src_params.is_tiled = false; + src_params.is_tiled = true; + src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); + src_params.component_type = + SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); src_params.UpdateParams(); MathUtil::Rectangle<u32> src_rect; @@ -531,70 +590,53 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -void RasterizerOpenGL::SetShader() { - // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to - // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell - // shaders. - - static constexpr char vertex_shader[] = R"( -#version 150 core - -in vec2 vert_position; -in vec2 vert_tex_coord; -out vec2 frag_tex_coord; - -void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - - static constexpr char fragment_shader[] = R"( -#version 150 core - -in vec2 frag_tex_coord; -out vec4 color; - -uniform sampler2D tex[32]; +u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program, + u32 current_bindpoint, + const std::vector<GLShader::ConstBufferEntry>& entries) { + auto& gpu = Core::System::GetInstance().GPU(); + auto& maxwell3d = gpu.Get3DEngine(); -void main() { - color = texture(tex[0], frag_tex_coord); -} -)"; + ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage), + "Attempted to upload constbuffer of disabled shader stage"); - if (current_shader) { - return; + // Reset all buffer draw state for this stage. + for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { + buffer.bindpoint = 0; + buffer.enabled = false; } - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - - current_shader = &test_shader; - if (has_ARB_separate_shader_objects) { - test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); - glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); - } else { - UNREACHABLE(); + // Upload only the enabled buffers from the 16 constbuffers of each shader stage + auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; + + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& used_buffer = entries[bindpoint]; + const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; + auto& buffer_draw_state = + state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; + + ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer"); + buffer_draw_state.enabled = true; + buffer_draw_state.bindpoint = current_bindpoint + bindpoint; + + VAddr addr = gpu.memory_manager->PhysicalToVirtualAddress(buffer.address); + std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); + Memory::ReadBlock(addr, data.data(), data.size()); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); + glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + + // Now configure the bindpoint of the buffer inside the shader + std::string buffer_name = used_buffer.GetName(); + GLuint index = + glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str()); + if (index != -1) + glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint); } - state.draw.shader_program = test_shader.shader.handle; state.Apply(); - for (u32 texture = 0; texture < texture_samplers.size(); ++texture) { - // Set the texture samplers to correspond to different texture units - std::string uniform_name = "tex[" + std::to_string(texture) + "]"; - GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str()); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); - } - } - - if (has_ARB_separate_shader_objects) { - state.draw.shader_program = 0; - state.Apply(); - } + return current_bindpoint + entries.size(); } void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d868bf421..03e02b52a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -6,19 +6,16 @@ #include <array> #include <cstddef> -#include <cstring> #include <memory> -#include <unordered_map> #include <vector> #include <glad/glad.h> -#include "common/bit_field.h" #include "common/common_types.h" -#include "common/hash.h" -#include "common/vector_math.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -30,7 +27,7 @@ public: ~RasterizerOpenGL() override; void DrawArrays() override; - void NotifyMaxwellRegisterChanged(u32 id) override; + void NotifyMaxwellRegisterChanged(u32 method) override; void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; @@ -45,7 +42,7 @@ public: /// OpenGL shader generated for a given Maxwell register state struct MaxwellShader { /// OpenGL shader resource - OGLShader shader; + OGLProgram shader; }; struct VertexShader { @@ -56,34 +53,6 @@ public: OGLShader shader; }; - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned - // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at - // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. - // Not following that rule will cause problems on some AMD drivers. - struct UniformData {}; - - // static_assert( - // sizeof(UniformData) == 0x460, - // "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16384, - "UniformData structure must be less than 16kb as per the OpenGL spec"); - - struct VSUniformData {}; - // static_assert( - // sizeof(VSUniformData) == 1856, - // "The size of the VSUniformData structure has changed, update the structure in the - // shader"); - static_assert(sizeof(VSUniformData) < 16384, - "VSUniformData structure must be less than 16kb as per the OpenGL spec"); - - struct FSUniformData {}; - // static_assert( - // sizeof(FSUniformData) == 1856, - // "The size of the FSUniformData structure has changed, update the structure in the - // shader"); - static_assert(sizeof(FSUniformData) < 16384, - "FSUniformData structure must be less than 16kb as per the OpenGL spec"); - private: class SamplerInfo { public: @@ -113,6 +82,18 @@ private: /// Binds the required textures to OpenGL before drawing a batch. void BindTextures(); + /* + * Configures the current constbuffers to use for the draw command. + * @param stage The shader stage to configure buffers for. + * @param program The OpenGL program object that contains the specified stage. + * @param current_bindpoint The offset at which to start counting new buffer bindpoints. + * @param entries Vector describing the buffers that are actually used in the guest shader. + * @returns The next available bindpoint for use in the next shader stage. + */ + u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, + u32 current_bindpoint, + const std::vector<GLShader::ConstBufferEntry>& entries); + /// Syncs the viewport to match the guest state void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); @@ -122,9 +103,6 @@ private: /// Syncs the clip coefficients to match the guest state void SyncClipCoef(); - /// Sets the OpenGL shader in accordance with the current guest state - void SetShader(); - /// Syncs the cull mode to match the guest state void SyncCullMode(); @@ -152,23 +130,16 @@ private: RasterizerCacheOpenGL res_cache; - /// Shader used for test renderering - to be removed once we have emulated shaders - MaxwellShader test_shader{}; - - const MaxwellShader* current_shader{}; - bool shader_dirty{}; - - struct { - UniformData data; - bool dirty; - } uniform_block_data = {}; - - OGLPipeline pipeline; + std::unique_ptr<GLShader::ProgramManager> shader_program_manager; OGLVertexArray sw_vao; OGLVertexArray hw_vao; std::array<bool, 16> hw_vao_enabled_attributes; - std::array<SamplerInfo, 32> texture_samplers; + std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; + std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>, + Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> + ssbos; + static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; std::unique_ptr<OGLStreamBuffer> vertex_buffer; OGLBuffer uniform_buffer; @@ -179,22 +150,11 @@ private: GLsizeiptr vs_input_size; - void AnalyzeVertexArray(bool is_indexed); void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); - OGLBuffer vs_uniform_buffer; - std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map; - std::unordered_map<std::string, VertexShader> vs_shader_cache; - OGLShader vs_default_shader; - - void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset); - - OGLBuffer fs_uniform_buffer; - std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map; - std::unordered_map<std::string, FragmentShader> fs_shader_cache; - OGLShader fs_default_shader; + std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; - void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); + void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5cbafa2e7..ced2b8247 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -7,7 +7,6 @@ #include <cstring> #include <iterator> #include <memory> -#include <unordered_set> #include <utility> #include <vector> #include <boost/optional.hpp> @@ -20,7 +19,6 @@ #include "common/math_util.h" #include "common/microprofile.h" #include "common/scope_exit.h" -#include "common/vector_math.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" @@ -36,6 +34,7 @@ using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; +using ComponentType = SurfaceParams::ComponentType; struct FormatTuple { GLint internal_format; @@ -47,26 +46,24 @@ struct FormatTuple { u32 compression_factor; }; -static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8 +static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8 + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5 + {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45 }}; -static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8 - {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 -}}; - -static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { +static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); - if (type == SurfaceType::Color) { - ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size()); - return fb_format_tuples[static_cast<unsigned int>(pixel_format)]; + if (type == SurfaceType::ColorTexture) { + ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); + // For now only UNORM components are supported + ASSERT(component_type == ComponentType::UNorm); + return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { // TODO(Subv): Implement depth formats ASSERT_MSG(false, "Unimplemented"); - } else if (type == SurfaceType::Texture) { - ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); - return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } UNREACHABLE(); @@ -85,56 +82,42 @@ static u16 GetResolutionScaleFactor() { } template <bool morton_to_gl, PixelFormat format> -static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - for (u32 y = 0; y < 8; ++y) { - for (u32 x = 0; x < 8; ++x) { - u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; - u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; - if (morton_to_gl) { - std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); - } else { - std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); - } - } - } -} - -template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start, + VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the - // configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(base), gl_buffer, morton_to_gl); -} - -template <> -void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base, - VAddr start, VAddr end) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1); - - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the - // configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - auto data = - Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height); - std::memcpy(gl_buffer, data.data(), data.size()); + if (morton_to_gl) { + auto data = Tegra::Texture::UnswizzleTexture( + base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, + block_height); + std::memcpy(gl_buffer, data.data(), data.size()); + } else { + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check + // the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(base), gl_buffer, morton_to_gl); + } } -static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::RGBA8>, - MortonCopy<true, PixelFormat::DXT1>, +static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), + SurfaceParams::MaxPixelFormat> + morton_to_gl_fns = { + MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, + MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, }; -static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::RGBA8>, - MortonCopy<false, PixelFormat::DXT1>, +static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), + SurfaceParams::MaxPixelFormat> + gl_to_morton_fns = { + MortonCopy<false, PixelFormat::ABGR8>, + MortonCopy<false, PixelFormat::B5G6R5>, + // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported + nullptr, + nullptr, + nullptr, }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -183,7 +166,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec u32 buffers = 0; - if (type == SurfaceType::Color || type == SurfaceType::Texture) { + if (type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, @@ -311,15 +294,18 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == - std::tie(addr, width, height, stride, pixel_format, is_tiled) && + other_surface.stride, other_surface.block_height, other_surface.pixel_format, + other_surface.component_type, + other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, + pixel_format, component_type, is_tiled) && pixel_format != PixelFormat::Invalid; } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { return sub_surface.addr >= addr && sub_surface.end <= end && sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && + sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && + sub_surface.component_type == component_type && (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && GetSubRect(sub_surface).left + sub_surface.width <= stride; @@ -328,7 +314,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && + component_type == expanded_surface.component_type && stride == expanded_surface.stride && (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % BytesInPixels(stride * (is_tiled ? 8 : 1)) == 0; @@ -339,6 +326,10 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { end < texcopy_params.end) { return false; } + if (texcopy_params.block_height != block_height || + texcopy_params.component_type != component_type) + return false; + if (texcopy_params.width != texcopy_params.stride) { const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && @@ -481,18 +472,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { const u64 start_offset = load_start - addr; if (!is_tiled) { - ASSERT(type == SurfaceType::Color); const u32 bytes_per_pixel{GetFormatBpp() >> 3}; - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check - // the configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, - texture_src_data + start_offset, &gl_buffer[start_offset], - true); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + bytes_per_pixel * width * height); } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, - load_start, load_end); + morton_to_gl_fns[static_cast<size_t>(pixel_format)]( + stride, block_height, height, &gl_buffer[0], addr, load_start, load_end); } } @@ -533,11 +519,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { if (backup_bytes) std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); } else if (!is_tiled) { - ASSERT(type == SurfaceType::Color); std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); } else { - gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, - flush_start, flush_end); + gl_to_morton_fns[static_cast<size_t>(pixel_format)]( + stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); } } @@ -556,7 +541,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint GLint y0 = static_cast<GLint>(rect.bottom); size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); - const FormatTuple& tuple = GetFormatTuple(pixel_format); + const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); GLuint target_tex = texture.handle; // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in @@ -629,7 +614,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - const FormatTuple& tuple = GetFormatTuple(pixel_format); + const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); // Ensure no bad interactions with GL_PACK_ALIGNMENT ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); @@ -662,7 +647,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui state.draw.read_framebuffer = read_fb_handle; state.Apply(); - if (type == SurfaceType::Color || type == SurfaceType::Texture) { + if (type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, @@ -685,7 +670,8 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui glPixelStorei(GL_PACK_ROW_LENGTH, 0); } -enum MatchFlags { +enum class MatchFlags { + None = 0, Invalid = 1, // Flag that can be applied to other match types, invalid matches require // validation before they can be used Exact = 1 << 1, // Surfaces perfectly match @@ -699,6 +685,10 @@ constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); } +constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) { + return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs)); +} + /// Get the best surface match (and its match type) for the given flags template <MatchFlags find_flags> Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, @@ -716,15 +706,15 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params : (params.res_scale <= surface->res_scale); // validity will be checked in GetCopyableInterval bool is_valid = - find_flags & MatchFlags::Copy + (find_flags & MatchFlags::Copy) != MatchFlags::None ? true : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - if (!(find_flags & MatchFlags::Invalid) && !is_valid) + if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid) continue; auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if (!(find_flags & check_type)) + if ((find_flags & check_type) == MatchFlags::None) return; bool matched; @@ -818,7 +808,7 @@ void main() { color = texelFetch(tbo, tbo_offset).rabg; } )"; - d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); + d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); OpenGLState state = OpenGLState::GetCurState(); GLuint old_program = state.draw.shader_program; @@ -1041,9 +1031,25 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu params.height = config.tic.Height(); params.is_tiled = config.tic.IsTiled(); params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); + + // TODO(Subv): Different types per component are not supported. + ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && + config.tic.r_type.Value() == config.tic.b_type.Value() && + config.tic.r_type.Value() == config.tic.a_type.Value()); + + params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); + + if (config.tic.IsTiled()) { + params.block_height = config.tic.BlockHeight(); + } else { + // Use the texture-provided stride value if the texture isn't tiled. + params.stride = params.PixelsInBytes(config.tic.Pitch()); + } + params.UpdateParams(); - if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) { + if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 || + params.stride != params.width) { Surface src_surface; MathUtil::Rectangle<u32> rect; std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); @@ -1083,10 +1089,10 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( } MathUtil::Rectangle<u32> viewport_clamped{ - static_cast<u32>(MathUtil::Clamp(viewport.left, 0, static_cast<s32>(config.width))), - static_cast<u32>(MathUtil::Clamp(viewport.top, 0, static_cast<s32>(config.height))), - static_cast<u32>(MathUtil::Clamp(viewport.right, 0, static_cast<s32>(config.width))), - static_cast<u32>(MathUtil::Clamp(viewport.bottom, 0, static_cast<s32>(config.height)))}; + static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))), + static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))), + static_cast<u32>(std::clamp(viewport.right, 0, static_cast<s32>(config.width))), + static_cast<u32>(std::clamp(viewport.bottom, 0, static_cast<s32>(config.height)))}; // get color and depth surfaces SurfaceParams color_params; @@ -1094,10 +1100,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.res_scale = resolution_scale_factor; color_params.width = config.width; color_params.height = config.height; + // TODO(Subv): Can framebuffers use a different block height? + color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; SurfaceParams depth_params = color_params; color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); + color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); color_params.UpdateParams(); ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); @@ -1293,7 +1302,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface const SurfaceInterval invalid_interval(addr, addr + size); if (region_owner != nullptr) { - ASSERT(region_owner->type != SurfaceType::Texture); ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); // Surfaces can't have a gap ASSERT(region_owner->width == region_owner->stride); @@ -1355,7 +1363,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { surface->gl_buffer_size = 0; surface->invalid_regions.insert(surface->GetInterval()); - AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format), + AllocateSurfaceTexture(surface->texture.handle, + GetFormatTuple(surface->pixel_format, surface->component_type), surface->GetScaledWidth(), surface->GetScaledHeight()); return surface; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 06524fc59..6861efe16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -52,27 +52,45 @@ enum class ScaleMatch { struct SurfaceParams { enum class PixelFormat { - RGBA8 = 0, - DXT1 = 1, + ABGR8 = 0, + B5G6R5 = 1, + DXT1 = 2, + DXT23 = 3, + DXT45 = 4, + + Max, Invalid = 255, }; + static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max); + + enum class ComponentType { + Invalid = 0, + SNorm = 1, + UNorm = 2, + SInt = 3, + UInt = 4, + Float = 5, + }; + enum class SurfaceType { - Color = 0, - Texture = 1, - Depth = 2, - DepthStencil = 3, - Fill = 4, - Invalid = 5 + ColorTexture = 0, + Depth = 1, + DepthStencil = 2, + Fill = 3, + Invalid = 4, }; static constexpr unsigned int GetFormatBpp(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; - constexpr std::array<unsigned int, 2> bpp_table = { - 32, // RGBA8 - 64, // DXT1 + constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = { + 32, // ABGR8 + 16, // B5G6R5 + 64, // DXT1 + 128, // DXT23 + 128, // DXT45 }; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -85,8 +103,9 @@ struct SurfaceParams { static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } @@ -94,8 +113,9 @@ struct SurfaceParams { static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } @@ -104,10 +124,69 @@ struct SurfaceParams { // TODO(Subv): Properly implement this switch (format) { case Tegra::Texture::TextureFormat::A8R8G8B8: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; + case Tegra::Texture::TextureFormat::B5G6R5: + return PixelFormat::B5G6R5; case Tegra::Texture::TextureFormat::DXT1: return PixelFormat::DXT1; + case Tegra::Texture::TextureFormat::DXT23: + return PixelFormat::DXT23; + case Tegra::Texture::TextureFormat::DXT45: + return PixelFormat::DXT45; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } + } + + static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { + // TODO(Subv): Properly implement this + switch (format) { + case PixelFormat::ABGR8: + return Tegra::Texture::TextureFormat::A8R8G8B8; + case PixelFormat::B5G6R5: + return Tegra::Texture::TextureFormat::B5G6R5; + case PixelFormat::DXT1: + return Tegra::Texture::TextureFormat::DXT1; + case PixelFormat::DXT23: + return Tegra::Texture::TextureFormat::DXT23; + case PixelFormat::DXT45: + return Tegra::Texture::TextureFormat::DXT45; + default: + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { + // TODO(Subv): Implement more component types + switch (type) { + case Tegra::Texture::ComponentType::UNORM: + return ComponentType::UNorm; default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type)); + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) { + // TODO(Subv): Implement more render targets + switch (format) { + case Tegra::RenderTargetFormat::RGBA8_UNORM: + case Tegra::RenderTargetFormat::RGB10_A2_UNORM: + return ComponentType::UNorm; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromGPUPixelFormat( + Tegra::FramebufferConfig::PixelFormat format) { + switch (format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: + return ComponentType::UNorm; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } @@ -116,8 +195,7 @@ struct SurfaceParams { SurfaceType a_type = GetFormatType(pixel_format_a); SurfaceType b_type = GetFormatType(pixel_format_b); - if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && - (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { return true; } @@ -133,12 +211,8 @@ struct SurfaceParams { } static SurfaceType GetFormatType(PixelFormat pixel_format) { - if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::RGBA8)) { - return SurfaceType::Color; - } - - if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) { - return SurfaceType::Texture; + if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { + return SurfaceType::ColorTexture; } // TODO(Subv): Implement the other formats @@ -210,11 +284,13 @@ struct SurfaceParams { u32 width = 0; u32 height = 0; u32 stride = 0; + u32 block_height = 0; u16 res_scale = 1; bool is_tiled = false; PixelFormat pixel_format = PixelFormat::Invalid; SurfaceType type = SurfaceType::Invalid; + ComponentType component_type = ComponentType::Invalid; }; struct CachedSurface : SurfaceParams { @@ -334,7 +410,7 @@ private: OGLVertexArray attributeless_vao; OGLBuffer d24s8_abgr_buffer; GLsizeiptr d24s8_abgr_buffer_size; - OGLShader d24s8_abgr_shader; + OGLProgram d24s8_abgr_shader; GLint d24s8_abgr_tbo_size_u_id; GLint d24s8_abgr_viewport_u_id; }; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 7da5e74d1..93f9172e7 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -13,14 +13,16 @@ class OGLTexture : private NonCopyable { public: OGLTexture() = default; - OGLTexture(OGLTexture&& o) { - std::swap(handle, o.handle); - } + + OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + ~OGLTexture() { Release(); } - OGLTexture& operator=(OGLTexture&& o) { - std::swap(handle, o.handle); + + OGLTexture& operator=(OGLTexture&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -46,14 +48,16 @@ public: class OGLSampler : private NonCopyable { public: OGLSampler() = default; - OGLSampler(OGLSampler&& o) { - std::swap(handle, o.handle); - } + + OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + ~OGLSampler() { Release(); } - OGLSampler& operator=(OGLSampler&& o) { - std::swap(handle, o.handle); + + OGLSampler& operator=(OGLSampler&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -79,25 +83,71 @@ public: class OGLShader : private NonCopyable { public: OGLShader() = default; - OGLShader(OGLShader&& o) { - std::swap(handle, o.handle); - } + + OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + ~OGLShader() { Release(); } - OGLShader& operator=(OGLShader&& o) { - std::swap(handle, o.handle); + + OGLShader& operator=(OGLShader&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); return *this; } - /// Creates a new internal OpenGL resource and stores the handle - void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, - const std::vector<const char*>& feedback_vars = {}, - bool separable_program = false) { + void Create(const char* source, GLenum type) { if (handle != 0) return; - handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, - separable_program); + if (source == nullptr) + return; + handle = GLShader::LoadShader(source, type); + } + + void Release() { + if (handle == 0) + return; + glDeleteShader(handle); + handle = 0; + } + + GLuint handle = 0; +}; + +class OGLProgram : private NonCopyable { +public: + OGLProgram() = default; + + OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLProgram() { + Release(); + } + + OGLProgram& operator=(OGLProgram&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + template <typename... T> + void Create(bool separable_program, T... shaders) { + if (handle != 0) + return; + handle = GLShader::LoadProgram(separable_program, shaders...); + } + + /// Creates a new internal OpenGL resource and stores the handle + void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, + bool separable_program = false) { + OGLShader vert, geo, frag; + if (vert_shader) + vert.Create(vert_shader, GL_VERTEX_SHADER); + if (geo_shader) + geo.Create(geo_shader, GL_GEOMETRY_SHADER); + if (frag_shader) + frag.Create(frag_shader, GL_FRAGMENT_SHADER); + Create(separable_program, vert.handle, geo.handle, frag.handle); } /// Deletes the internal OpenGL resource @@ -115,13 +165,12 @@ public: class OGLPipeline : private NonCopyable { public: OGLPipeline() = default; - OGLPipeline(OGLPipeline&& o) { - handle = std::exchange<GLuint>(o.handle, 0); - } + OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {} + ~OGLPipeline() { Release(); } - OGLPipeline& operator=(OGLPipeline&& o) { + OGLPipeline& operator=(OGLPipeline&& o) noexcept { handle = std::exchange<GLuint>(o.handle, 0); return *this; } @@ -148,14 +197,16 @@ public: class OGLBuffer : private NonCopyable { public: OGLBuffer() = default; - OGLBuffer(OGLBuffer&& o) { - std::swap(handle, o.handle); - } + + OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + ~OGLBuffer() { Release(); } - OGLBuffer& operator=(OGLBuffer&& o) { - std::swap(handle, o.handle); + + OGLBuffer& operator=(OGLBuffer&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -182,12 +233,12 @@ class OGLSync : private NonCopyable { public: OGLSync() = default; - OGLSync(OGLSync&& o) : handle(std::exchange(o.handle, nullptr)) {} + OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {} ~OGLSync() { Release(); } - OGLSync& operator=(OGLSync&& o) { + OGLSync& operator=(OGLSync&& o) noexcept { Release(); handle = std::exchange(o.handle, nullptr); return *this; @@ -214,14 +265,16 @@ public: class OGLVertexArray : private NonCopyable { public: OGLVertexArray() = default; - OGLVertexArray(OGLVertexArray&& o) { - std::swap(handle, o.handle); - } + + OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + ~OGLVertexArray() { Release(); } - OGLVertexArray& operator=(OGLVertexArray&& o) { - std::swap(handle, o.handle); + + OGLVertexArray& operator=(OGLVertexArray&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -247,14 +300,16 @@ public: class OGLFramebuffer : private NonCopyable { public: OGLFramebuffer() = default; - OGLFramebuffer(OGLFramebuffer&& o) { - std::swap(handle, o.handle); - } + + OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + ~OGLFramebuffer() { Release(); } - OGLFramebuffer& operator=(OGLFramebuffer&& o) { - std::swap(handle, o.handle); + + OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); return *this; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 564ea8f9e..086424395 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2,57 +2,778 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <map> +#include <set> #include <string> -#include <queue> +#include <string_view> #include "common/assert.h" #include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -namespace Maxwell3D { -namespace Shader { +namespace GLShader { namespace Decompiler { +using Tegra::Shader::Attribute; +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Register; +using Tegra::Shader::Sampler; +using Tegra::Shader::SubOp; +using Tegra::Shader::Uniform; + constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; -class Impl { +class DecompileFail : public std::runtime_error { +public: + using std::runtime_error::runtime_error; +}; + +/// Describes the behaviour of code path of a given entry point and a return point. +enum class ExitMethod { + Undetermined, ///< Internal value. Only occur when analyzing JMP loop. + AlwaysReturn, ///< All code paths reach the return point. + Conditional, ///< Code path reaches the return point or an END instruction conditionally. + AlwaysEnd, ///< All code paths reach a END instruction. +}; + +/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. +struct Subroutine { + /// Generates a name suitable for GLSL source code. + std::string GetName() const { + return "sub_" + std::to_string(begin) + "_" + std::to_string(end); + } + + u32 begin; ///< Entry point of the subroutine. + u32 end; ///< Return point of the subroutine. + ExitMethod exit_method; ///< Exit method of the subroutine. + std::set<u32> labels; ///< Addresses refereced by JMP instructions. + + bool operator<(const Subroutine& rhs) const { + return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); + } +}; + +/// Analyzes shader code and produces a set of subroutines. +class ControlFlowAnalyzer { public: - Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul, - const std::string& emit_cb, const std::string& setemit_cb) - : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), - inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), - sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} + ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) + : program_code(program_code) { + + // Recursively finds all subroutines. + const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); + if (program_main.exit_method != ExitMethod::AlwaysEnd) + throw DecompileFail("Program does not always end"); + } - std::string Decompile() { - UNREACHABLE(); - return {}; + std::set<Subroutine> GetSubroutines() { + return std::move(subroutines); } private: - const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code; - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data; - u32 main_offset; - const std::function<std::string(u32)>& inputreg_getter; - const std::function<std::string(u32)>& outputreg_getter; - bool sanitize_mul; - const std::string& emit_cb; - const std::string& setemit_cb; + const ProgramCode& program_code; + std::set<Subroutine> subroutines; + std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; + + /// Adds and analyzes a new subroutine if it is not added yet. + const Subroutine& AddSubroutine(u32 begin, u32 end) { + auto iter = subroutines.find(Subroutine{begin, end}); + if (iter != subroutines.end()) + return *iter; + + Subroutine subroutine{begin, end}; + subroutine.exit_method = Scan(begin, end, subroutine.labels); + if (subroutine.exit_method == ExitMethod::Undetermined) + throw DecompileFail("Recursive function detected"); + return *subroutines.insert(std::move(subroutine)).first; + } + + /// Scans a range of code for labels and determines the exit method. + ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { + auto [iter, inserted] = + exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); + ExitMethod& exit_method = iter->second; + if (!inserted) + return exit_method; + + for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { + if (const auto opcode = OpCode::Decode({program_code[offset]})) { + switch (opcode->GetId()) { + case OpCode::Id::EXIT: { + return exit_method = ExitMethod::AlwaysEnd; + } + } + } + } + return exit_method = ExitMethod::AlwaysReturn; + } +}; + +class ShaderWriter { +public: + void AddLine(std::string_view text) { + DEBUG_ASSERT(scope >= 0); + if (!text.empty()) { + AppendIndentation(); + } + shader_source += text; + AddNewLine(); + } + + void AddLine(char character) { + DEBUG_ASSERT(scope >= 0); + AppendIndentation(); + shader_source += character; + AddNewLine(); + } + + void AddNewLine() { + DEBUG_ASSERT(scope >= 0); + shader_source += '\n'; + } + + std::string GetResult() { + return std::move(shader_source); + } + + int scope = 0; + +private: + void AppendIndentation() { + shader_source.append(static_cast<size_t>(scope) * 4, ' '); + } + + std::string shader_source; }; -std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, - u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, - bool sanitize_mul, const std::string& emit_cb, - const std::string& setemit_cb) { - Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, - sanitize_mul, emit_cb, setemit_cb); - return impl.Decompile(); +class GLSLGenerator { +public: + GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, + u32 main_offset, Maxwell3D::Regs::ShaderStage stage) + : subroutines(subroutines), program_code(program_code), main_offset(main_offset), + stage(stage) { + + Generate(); + } + + std::string GetShaderCode() { + return declarations.GetResult() + shader.GetResult(); + } + + /// Returns entries in the shader that are useful for external functions + ShaderEntries GetEntries() const { + return {GetConstBuffersDeclarations()}; + } + +private: + /// Gets the Subroutine object corresponding to the specified address. + const Subroutine& GetSubroutine(u32 begin, u32 end) const { + auto iter = subroutines.find(Subroutine{begin, end}); + ASSERT(iter != subroutines.end()); + return *iter; + } + + /// Generates code representing an input attribute register. + std::string GetInputAttribute(Attribute::Index attribute) { + switch (attribute) { + case Attribute::Index::Position: + return "position"; + default: + const u32 index{static_cast<u32>(attribute) - + static_cast<u32>(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + declr_input_attribute.insert(attribute); + return "input_attribute_" + std::to_string(index); + } + + NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); + UNREACHABLE(); + } + } + + /// Generates code representing an output attribute register. + std::string GetOutputAttribute(Attribute::Index attribute) { + switch (attribute) { + case Attribute::Index::Position: + return "position"; + default: + const u32 index{static_cast<u32>(attribute) - + static_cast<u32>(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + declr_output_attribute.insert(attribute); + return "output_attribute_" + std::to_string(index); + } + + NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); + UNREACHABLE(); + } + } + + /// Generates code representing a 19-bit immediate value + static std::string GetImmediate19(const Instruction& instr) { + return std::to_string(instr.alu.GetImm20_19()); + } + + /// Generates code representing a 32-bit immediate value + static std::string GetImmediate32(const Instruction& instr) { + return std::to_string(instr.alu.GetImm20_32()); + } + + /// Generates code representing a temporary (GPR) register. + std::string GetRegister(const Register& reg, unsigned elem = 0) { + if (reg == Register::ZeroIndex) + return "0"; + if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) { + // GPRs 0-3 are output color for the fragment shader + return std::string{"color."} + "rgba"[(reg + elem) & 3]; + } + + return *declr_register.insert("register_" + std::to_string(reg + elem)).first; + } + + /// Generates code representing a uniform (C buffer) register. + std::string GetUniform(const Uniform& reg) { + declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index), + static_cast<unsigned>(reg.offset), stage); + return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']'; + } + + /// Generates code representing a texture sampler. + std::string GetSampler(const Sampler& sampler) const { + // TODO(Subv): Support more than just texture sampler 0 + ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported"); + const unsigned index{static_cast<unsigned>(sampler.index.Value()) - + static_cast<unsigned>(Sampler::Index::Sampler_0)}; + return "tex[" + std::to_string(index) + "]"; + } + + /** + * Adds code that calls a subroutine. + * @param subroutine the subroutine to call. + */ + void CallSubroutine(const Subroutine& subroutine) { + if (subroutine.exit_method == ExitMethod::AlwaysEnd) { + shader.AddLine(subroutine.GetName() + "();"); + shader.AddLine("return true;"); + } else if (subroutine.exit_method == ExitMethod::Conditional) { + shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); + } else { + shader.AddLine(subroutine.GetName() + "();"); + } + } + + /** + * Writes code that does an assignment operation. + * @param reg the destination register code. + * @param value the code representing the value to assign. + */ + void SetDest(u64 elem, const std::string& reg, const std::string& value, + u64 dest_num_components, u64 value_num_components, bool is_abs = false) { + std::string swizzle = "."; + swizzle += "xyzw"[elem]; + + std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); + std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); + src = is_abs ? "abs(" + src + ")" : src; + + shader.AddLine(dest + " = " + src + ";"); + } + + /* + * Writes code that assigns a predicate boolean variable. + * @param pred The id of the predicate to write to. + * @param value The expression value to assign to the predicate. + */ + void SetPredicate(u64 pred, const std::string& value) { + using Tegra::Shader::Pred; + // Can't assign to the constant predicate. + ASSERT(pred != static_cast<u64>(Pred::UnusedIndex)); + + std::string variable = 'p' + std::to_string(pred); + shader.AddLine(variable + " = " + value + ';'); + declr_predicates.insert(std::move(variable)); + } + + /* + * Returns the condition to use in the 'if' for a predicated instruction. + * @param instr Instruction to generate the if condition for. + * @returns string containing the predicate condition. + */ + std::string GetPredicateCondition(Instruction instr) const { + using Tegra::Shader::Pred; + ASSERT(instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)); + + std::string variable = + 'p' + std::to_string(static_cast<u64>(instr.pred.pred_index.Value())); + + if (instr.negate_pred) { + return "!(" + variable + ')'; + } + + return variable; + } + + /* + * Returns whether the instruction at the specified offset is a 'sched' instruction. + * Sched instructions always appear before a sequence of 3 instructions. + */ + bool IsSchedInstruction(u32 offset) const { + // sched instructions appear once every 4 instructions. + static constexpr size_t SchedPeriod = 4; + u32 absolute_offset = offset - main_offset; + + return (absolute_offset % SchedPeriod) == 0; + } + + /** + * Compiles a single instruction from Tegra to GLSL. + * @param offset the offset of the Tegra shader instruction. + * @return the offset of the next instruction to execute. Usually it is the current offset + * + 1. If the current instruction always terminates the program, returns PROGRAM_END. + */ + u32 CompileInstr(u32 offset) { + // Ignore sched instructions when generating code. + if (IsSchedInstruction(offset)) { + return offset + 1; + } + + const Instruction instr = {program_code[offset]}; + const auto opcode = OpCode::Decode(instr); + + // Decoding failure + if (!opcode) { + NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value); + UNREACHABLE(); + } + + shader.AddLine("// " + std::to_string(offset) + ": " + opcode->GetName()); + + using Tegra::Shader::Pred; + ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute, + "NeverExecute predicate not implemented"); + + if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + shader.AddLine("if (" + GetPredicateCondition(instr) + ')'); + shader.AddLine('{'); + ++shader.scope; + } + + switch (opcode->GetType()) { + case OpCode::Type::Arithmetic: { + std::string dest = GetRegister(instr.gpr0); + std::string op_a = instr.alu.negate_a ? "-" : ""; + op_a += GetRegister(instr.gpr8); + if (instr.alu.abs_a) { + op_a = "abs(" + op_a + ")"; + } + + std::string op_b = instr.alu.negate_b ? "-" : ""; + + if (instr.is_b_imm) { + op_b += GetImmediate19(instr); + } else { + if (instr.is_b_gpr) { + op_b += GetRegister(instr.gpr20); + } else { + op_b += GetUniform(instr.uniform); + } + } + + if (instr.alu.abs_b) { + op_b = "abs(" + op_b + ")"; + } + + switch (opcode->GetId()) { + case OpCode::Id::FMUL_C: + case OpCode::Id::FMUL_R: + case OpCode::Id::FMUL_IMM: { + SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d); + break; + } + case OpCode::Id::FMUL32_IMM: { + // fmul32i doesn't have abs or neg bits. + SetDest(0, dest, GetRegister(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1); + break; + } + case OpCode::Id::FADD_C: + case OpCode::Id::FADD_R: + case OpCode::Id::FADD_IMM: { + SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d); + break; + } + case OpCode::Id::MUFU: { + switch (instr.sub_op) { + case SubOp::Cos: + SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Sin: + SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Ex2: + SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Lg2: + SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Rcp: + SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); + break; + case SubOp::Rsq: + SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Min: + SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d); + break; + default: + NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", + static_cast<unsigned>(instr.sub_op.Value())); + UNREACHABLE(); + } + break; + } + case OpCode::Id::RRO: { + NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction"); + break; + } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + break; + } + case OpCode::Type::Ffma: { + std::string dest = GetRegister(instr.gpr0); + std::string op_a = GetRegister(instr.gpr8); + std::string op_b = instr.ffma.negate_b ? "-" : ""; + std::string op_c = instr.ffma.negate_c ? "-" : ""; + + switch (opcode->GetId()) { + case OpCode::Id::FFMA_CR: { + op_b += GetUniform(instr.uniform); + op_c += GetRegister(instr.gpr39); + break; + } + case OpCode::Id::FFMA_RR: { + op_b += GetRegister(instr.gpr20); + op_c += GetRegister(instr.gpr39); + break; + } + case OpCode::Id::FFMA_RC: { + op_b += GetRegister(instr.gpr39); + op_c += GetUniform(instr.uniform); + break; + } + case OpCode::Id::FFMA_IMM: { + op_b += GetImmediate19(instr); + op_c += GetRegister(instr.gpr39); + break; + } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + + SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); + break; + } + case OpCode::Type::Memory: { + std::string gpr0 = GetRegister(instr.gpr0); + const Attribute::Index attribute = instr.attribute.fmt20.index; + + switch (opcode->GetId()) { + case OpCode::Id::LD_A: { + ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); + SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); + break; + } + case OpCode::Id::ST_A: { + ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); + SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); + break; + } + case OpCode::Id::TEXS: { + ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); + const std::string op_a = GetRegister(instr.gpr8); + const std::string op_b = GetRegister(instr.gpr20); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + // Add an extra scope and declare the texture coords inside to prevent overwriting + // them in case they are used as outputs of the texs instruction. + shader.AddLine("{"); + ++shader.scope; + shader.AddLine(coord); + const std::string texture = "texture(" + sampler + ", coords)"; + for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) { + SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4); + } + --shader.scope; + shader.AddLine("}"); + break; + } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + break; + } + case OpCode::Type::FloatPredicate: { + std::string op_a = instr.fsetp.neg_a ? "-" : ""; + op_a += GetRegister(instr.gpr8); + + if (instr.fsetp.abs_a) { + op_a = "abs(" + op_a + ')'; + } + + std::string op_b{}; + + if (instr.is_b_imm) { + if (instr.fsetp.neg_b) { + // Only the immediate version of fsetp has a neg_b bit. + op_b += '-'; + } + op_b += '(' + GetImmediate19(instr) + ')'; + } else { + if (instr.is_b_gpr) { + op_b += GetRegister(instr.gpr20); + } else { + op_b += GetUniform(instr.uniform); + } + } + + if (instr.fsetp.abs_b) { + op_b = "abs(" + op_b + ')'; + } + + using Tegra::Shader::Pred; + ASSERT_MSG(instr.fsetp.pred0 == static_cast<u64>(Pred::UnusedIndex) && + instr.fsetp.pred39 == static_cast<u64>(Pred::UnusedIndex), + "Compound predicates are not implemented"); + + // We can't use the constant predicate as destination. + ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); + + using Tegra::Shader::PredCondition; + switch (instr.fsetp.cond) { + case PredCondition::LessThan: + SetPredicate(instr.fsetp.pred3, '(' + op_a + ") < (" + op_b + ')'); + break; + case PredCondition::Equal: + SetPredicate(instr.fsetp.pred3, '(' + op_a + ") == (" + op_b + ')'); + break; + default: + NGLOG_CRITICAL(HW_GPU, "Unhandled predicate condition: {} (a: {}, b: {})", + static_cast<unsigned>(instr.fsetp.cond.Value()), op_a, op_b); + UNREACHABLE(); + } + break; + } + default: { + switch (opcode->GetId()) { + case OpCode::Id::EXIT: { + ASSERT_MSG(instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex), + "Predicated exits not implemented"); + shader.AddLine("return true;"); + offset = PROGRAM_END - 1; + break; + } + case OpCode::Id::KIL: { + shader.AddLine("discard;"); + break; + } + case OpCode::Id::IPA: { + const auto& attribute = instr.attribute.fmt28; + std::string dest = GetRegister(instr.gpr0); + SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4); + break; + } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + + break; + } + } + + // Close the predicate condition scope. + if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + --shader.scope; + shader.AddLine('}'); + } + + return offset + 1; + } + + /** + * Compiles a range of instructions from Tegra to GLSL. + * @param begin the offset of the starting instruction. + * @param end the offset where the compilation should stop (exclusive). + * @return the offset of the next instruction to compile. PROGRAM_END if the program + * terminates. + */ + u32 CompileRange(u32 begin, u32 end) { + u32 program_counter; + for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { + program_counter = CompileInstr(program_counter); + } + return program_counter; + } + + void Generate() { + // Add declarations for all subroutines + for (const auto& subroutine : subroutines) { + shader.AddLine("bool " + subroutine.GetName() + "();"); + } + shader.AddNewLine(); + + // Add the main entry point + shader.AddLine("bool exec_shader() {"); + ++shader.scope; + CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); + --shader.scope; + shader.AddLine("}\n"); + + // Add definitions for all subroutines + for (const auto& subroutine : subroutines) { + std::set<u32> labels = subroutine.labels; + + shader.AddLine("bool " + subroutine.GetName() + "() {"); + ++shader.scope; + + if (labels.empty()) { + if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { + shader.AddLine("return false;"); + } + } else { + labels.insert(subroutine.begin); + shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); + shader.AddLine("while (true) {"); + ++shader.scope; + + shader.AddLine("switch (jmp_to) {"); + + for (auto label : labels) { + shader.AddLine("case " + std::to_string(label) + "u: {"); + ++shader.scope; + + auto next_it = labels.lower_bound(label + 1); + u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; + + u32 compile_end = CompileRange(label, next_label); + if (compile_end > next_label && compile_end != PROGRAM_END) { + // This happens only when there is a label inside a IF/LOOP block + shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); + labels.emplace(compile_end); + } + + --shader.scope; + shader.AddLine('}'); + } + + shader.AddLine("default: return false;"); + shader.AddLine('}'); + + --shader.scope; + shader.AddLine('}'); + + shader.AddLine("return false;"); + } + + --shader.scope; + shader.AddLine("}\n"); + + DEBUG_ASSERT(shader.scope == 0); + } + + GenerateDeclarations(); + } + + /// Returns a list of constant buffer declarations + std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const { + std::vector<ConstBufferEntry> result; + std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(), + std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); }); + return result; + } + + /// Add declarations for registers + void GenerateDeclarations() { + for (const auto& reg : declr_register) { + declarations.AddLine("float " + reg + " = 0.0;"); + } + declarations.AddNewLine(); + + for (const auto& index : declr_input_attribute) { + // TODO(bunnei): Use proper number of elements for these + declarations.AddLine("layout(location = " + + std::to_string(static_cast<u32>(index) - + static_cast<u32>(Attribute::Index::Attribute_0)) + + ") in vec4 " + GetInputAttribute(index) + ";"); + } + declarations.AddNewLine(); + + for (const auto& index : declr_output_attribute) { + // TODO(bunnei): Use proper number of elements for these + declarations.AddLine("layout(location = " + + std::to_string(static_cast<u32>(index) - + static_cast<u32>(Attribute::Index::Attribute_0)) + + ") out vec4 " + GetOutputAttribute(index) + ";"); + } + declarations.AddNewLine(); + + unsigned const_buffer_layout = 0; + for (const auto& entry : GetConstBuffersDeclarations()) { + declarations.AddLine("layout(std430) buffer " + entry.GetName()); + declarations.AddLine('{'); + declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];"); + declarations.AddLine("};"); + declarations.AddNewLine(); + ++const_buffer_layout; + } + + declarations.AddNewLine(); + for (const auto& pred : declr_predicates) { + declarations.AddLine("bool " + pred + " = false;"); + } + declarations.AddNewLine(); + } + +private: + const std::set<Subroutine>& subroutines; + const ProgramCode& program_code; + const u32 main_offset; + Maxwell3D::Regs::ShaderStage stage; + + ShaderWriter shader; + ShaderWriter declarations; + + // Declarations + std::set<std::string> declr_register; + std::set<std::string> declr_predicates; + std::set<Attribute::Index> declr_input_attribute; + std::set<Attribute::Index> declr_output_attribute; + std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; +}; // namespace Decompiler + +std::string GetCommonDeclarations() { + return "bool exec_shader();"; +} + +boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage) { + try { + auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); + GLSLGenerator generator(subroutines, program_code, main_offset, stage); + return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; + } catch (const DecompileFail& exception) { + NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); + } + return boost::none; } } // namespace Decompiler -} // namespace Shader -} // namespace Maxwell3D +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02ebfcbe8..382c76b7a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -2,26 +2,25 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <array> #include <functional> #include <string> +#include <boost/optional.hpp> #include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" -namespace Maxwell3D { -namespace Shader { +namespace GLShader { namespace Decompiler { -constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; -constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000}; +using Tegra::Engines::Maxwell3D; + +std::string GetCommonDeclarations(); -std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, - u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, - bool sanitize_mul, const std::string& emit_cb = "", - const std::string& setemit_cb = ""); +boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage); } // namespace Decompiler -} // namespace Shader -} // namespace Maxwell3D +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8f3c98800..254f6e2c3 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -3,18 +3,74 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" namespace GLShader { -std::string GenerateVertexShader(const MaxwellVSConfig& config) { - UNREACHABLE(); - return {}; +using Tegra::Engines::Maxwell3D; + +static constexpr u32 PROGRAM_OFFSET{10}; + +ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { + std::string out = "#version 430 core\n"; + out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; + out += Decompiler::GetCommonDeclarations(); + + ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Vertex) + .get_value_or({}); + out += R"( + +out gl_PerVertex { + vec4 gl_Position; +}; + +out vec4 position; + +layout (std140) uniform vs_config { + vec4 viewport_flip; +}; + +void main() { + exec_shader(); + + // Viewport can be flipped, which is unsupported by glViewport + position.xy *= viewport_flip.xy; + gl_Position = position; +} +)"; + out += program.first; + return {out, program.second}; +} + +ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { + std::string out = "#version 430 core\n"; + out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; + out += Decompiler::GetCommonDeclarations(); + + ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Fragment) + .get_value_or({}); + out += R"( + +in vec4 position; +out vec4 color; + +layout (std140) uniform fs_config { + vec4 viewport_flip; +}; + +uniform sampler2D tex[32]; + +void main() { + exec_shader(); } -std::string GenerateFragmentShader(const MaxwellFSConfig& config) { - UNREACHABLE(); - return {}; +)"; + out += program.first; + return {out, program.second}; } } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 5101e7d30..458032b5c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -4,46 +4,113 @@ #pragma once -#include <cstring> +#include <array> #include <string> #include <type_traits> +#include <utility> +#include <vector> +#include "common/common_types.h" #include "common/hash.h" namespace GLShader { -enum Attributes { - ATTRIBUTE_POSITION, - ATTRIBUTE_COLOR, - ATTRIBUTE_TEXCOORD0, - ATTRIBUTE_TEXCOORD1, - ATTRIBUTE_TEXCOORD2, - ATTRIBUTE_TEXCOORD0_W, - ATTRIBUTE_NORMQUAT, - ATTRIBUTE_VIEW, +constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; + +using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>; + +class ConstBufferEntry { + using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +public: + void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) { + is_used = true; + this->index = index; + this->stage = stage; + max_offset = std::max(max_offset, offset); + } + + bool IsUsed() const { + return is_used; + } + + unsigned GetIndex() const { + return index; + } + + unsigned GetSize() const { + return max_offset + 1; + } + + std::string GetName() const { + return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index); + } + +private: + static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = { + "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c", + }; + + bool is_used{}; + unsigned index{}; + unsigned max_offset{}; + Maxwell::ShaderStage stage; }; -struct MaxwellShaderConfigCommon { - explicit MaxwellShaderConfigCommon(){}; +struct ShaderEntries { + std::vector<ConstBufferEntry> const_buffer_entries; }; -struct MaxwellVSConfig : MaxwellShaderConfigCommon { - explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} +using ProgramResult = std::pair<std::string, ShaderEntries>; - bool operator==(const MaxwellVSConfig& o) const { - return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0; - }; +struct ShaderSetup { + ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} + + ProgramCode program_code; + bool program_code_hash_dirty = true; + + u64 GetProgramCodeHash() { + if (program_code_hash_dirty) { + program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash_dirty = false; + } + return program_code_hash; + } + +private: + u64 program_code_hash{}; }; -struct MaxwellFSConfig : MaxwellShaderConfigCommon { - explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} +struct MaxwellShaderConfigCommon { + void Init(ShaderSetup& setup) { + program_hash = setup.GetProgramCodeHash(); + } - bool operator==(const MaxwellFSConfig& o) const { - return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; - }; + u64 program_hash; }; -std::string GenerateVertexShader(const MaxwellVSConfig& config); -std::string GenerateFragmentShader(const MaxwellFSConfig& config); +struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { + explicit MaxwellVSConfig(ShaderSetup& setup) { + state.Init(setup); + } +}; + +struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { + explicit MaxwellFSConfig(ShaderSetup& setup) { + state.Init(setup); + } +}; + +/** + * Generates the GLSL vertex shader program source code for the given VS program + * @returns String of the shader source code + */ +ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); + +/** + * Generates the GLSL fragment shader program source code for the given FS program + * @returns String of the shader source code + */ +ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); } // namespace GLShader @@ -52,14 +119,14 @@ namespace std { template <> struct hash<GLShader::MaxwellVSConfig> { size_t operator()(const GLShader::MaxwellVSConfig& k) const { - return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); + return k.Hash(); } }; template <> struct hash<GLShader::MaxwellFSConfig> { size_t operator()(const GLShader::MaxwellFSConfig& k) const { - return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); + return k.Hash(); } }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp new file mode 100644 index 000000000..17b3925a0 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -0,0 +1,64 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/hle/kernel/process.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace GLShader { + +namespace Impl { +void SetShaderUniformBlockBinding(GLuint shader, const char* name, + Maxwell3D::Regs::ShaderStage binding, size_t expected_size) { + GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index != GL_INVALID_INDEX) { + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, + "Uniform block size did not match! Got %d, expected %zu", + static_cast<int>(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); + } +} + +void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex, + sizeof(MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry, + sizeof(MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment, + sizeof(MaxwellUniformData)); +} + +void SetShaderSamplerBindings(GLuint shader) { + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); + cur_state.Apply(); + + // Set the texture samplers to correspond to different texture units + for (u32 texture = 0; texture < NumTextureSamplers; ++texture) { + // Set the texture samplers to correspond to different texture units + std::string uniform_name = "tex[" + std::to_string(texture) + "]"; + GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str()); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); + } + } + + cur_state.draw.shader_program = old_program; + cur_state.Apply(); +} + +} // namespace Impl + +void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + // TODO(bunnei): Support more than one viewport + viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0; + viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0; +} + +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h new file mode 100644 index 000000000..e963b4b7e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -0,0 +1,175 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <tuple> +#include <unordered_map> +#include <boost/functional/hash.hpp> +#include <glad/glad.h> +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" + +namespace GLShader { + +/// Number of OpenGL texture samplers that can be used in the fragment shader +static constexpr size_t NumTextureSamplers = 32; + +using Tegra::Engines::Maxwell3D; + +namespace Impl { +void SetShaderUniformBlockBindings(GLuint shader); +void SetShaderSamplerBindings(GLuint shader); +} // namespace Impl + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct MaxwellUniformData { + void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); + alignas(16) GLvec4 viewport_flip; +}; +static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) < 16384, + "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); + +class OGLShaderStage { +public: + OGLShaderStage() = default; + + void Create(const ProgramResult& program_result, GLenum type) { + OGLShader shader; + shader.Create(program_result.first.c_str(), type); + program.Create(true, shader.handle); + Impl::SetShaderUniformBlockBindings(program.handle); + Impl::SetShaderSamplerBindings(program.handle); + entries = program_result.second; + } + GLuint GetHandle() const { + return program.handle; + } + + ShaderEntries GetEntries() const { + return entries; + } + +private: + OGLProgram program; + ShaderEntries entries; +}; + +// TODO(wwylele): beautify this doc +// This is a shader cache designed for translating PICA shader to GLSL shader. +// The double cache is needed because diffent KeyConfigType, which includes a hash of the code +// region (including its leftover unused code) can generate the same GLSL code. +template <typename KeyConfigType, + ProgramResult (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), + GLenum ShaderType> +class ShaderCache { +public: + ShaderCache() = default; + + using Result = std::pair<GLuint, ShaderEntries>; + + Result Get(const KeyConfigType& key, const ShaderSetup& setup) { + auto map_it = shader_map.find(key); + if (map_it == shader_map.end()) { + ProgramResult program = CodeGenerator(setup, key); + + auto [iter, new_shader] = shader_cache.emplace(program.first, OGLShaderStage{}); + OGLShaderStage& cached_shader = iter->second; + if (new_shader) { + cached_shader.Create(program, ShaderType); + } + shader_map[key] = &cached_shader; + return {cached_shader.GetHandle(), program.second}; + } else { + return {map_it->second->GetHandle(), map_it->second->GetEntries()}; + } + } + +private: + std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map; + std::unordered_map<std::string, OGLShaderStage> shader_cache; +}; + +using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>; + +using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>; + +class ProgramManager { +public: + ProgramManager() { + pipeline.Create(); + } + + ShaderEntries UseProgrammableVertexShader(const MaxwellVSConfig& config, + const ShaderSetup setup) { + ShaderEntries result; + std::tie(current.vs, result) = vertex_shaders.Get(config, setup); + return result; + } + + ShaderEntries UseProgrammableFragmentShader(const MaxwellFSConfig& config, + const ShaderSetup setup) { + ShaderEntries result; + std::tie(current.fs, result) = fragment_shaders.Get(config, setup); + return result; + } + + GLuint GetCurrentProgramStage(Maxwell3D::Regs::ShaderStage stage) { + switch (stage) { + case Maxwell3D::Regs::ShaderStage::Vertex: + return current.vs; + case Maxwell3D::Regs::ShaderStage::Fragment: + return current.fs; + } + + UNREACHABLE(); + } + + void UseTrivialGeometryShader() { + current.gs = 0; + } + + void ApplyTo(OpenGLState& state) { + // Workaround for AMD bug + glUseProgramStages(pipeline.handle, + GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, + 0); + + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); + glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); + state.draw.shader_program = 0; + state.draw.program_pipeline = pipeline.handle; + } + +private: + struct ShaderTuple { + GLuint vs = 0, gs = 0, fs = 0; + bool operator==(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); + } + struct Hash { + std::size_t operator()(const ShaderTuple& tuple) const { + std::size_t hash = 0; + boost::hash_combine(hash, tuple.vs); + boost::hash_combine(hash, tuple.gs); + boost::hash_combine(hash, tuple.fs); + return hash; + } + }; + }; + ShaderTuple current; + VertexShaders vertex_shaders; + FragmentShaders fragment_shaders; + + std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache; + OGLPipeline pipeline; +}; + +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a6c6204d5..8568fface 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -10,156 +10,41 @@ namespace GLShader { -GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, - const char* fragment_shader, const std::vector<const char*>& feedback_vars, - bool separable_program) { - // Create the shaders - GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; - GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; - GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; +GLuint LoadShader(const char* source, GLenum type) { + const char* debug_type; + switch (type) { + case GL_VERTEX_SHADER: + debug_type = "vertex"; + break; + case GL_GEOMETRY_SHADER: + debug_type = "geometry"; + break; + case GL_FRAGMENT_SHADER: + debug_type = "fragment"; + break; + default: + UNREACHABLE(); + } + GLuint shader_id = glCreateShader(type); + glShaderSource(shader_id, 1, &source, nullptr); + NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); + glCompileShader(shader_id); GLint result = GL_FALSE; - int info_log_length; - - if (vertex_shader) { - // Compile Vertex Shader - LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); - - glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); - glCompileShader(vertex_shader_id); - - // Check Vertex Shader - glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> vertex_shader_error(info_log_length); - glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", - &vertex_shader_error[0]); - } - } - } - - if (geometry_shader) { - // Compile Geometry Shader - LOG_DEBUG(Render_OpenGL, "Compiling geometry shader..."); - - glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr); - glCompileShader(geometry_shader_id); - - // Check Geometry Shader - glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> geometry_shader_error(info_log_length); - glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr, - &geometry_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", - &geometry_shader_error[0]); - } - } - } - - if (fragment_shader) { - // Compile Fragment Shader - LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); - - glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); - glCompileShader(fragment_shader_id); - - // Check Fragment Shader - glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> fragment_shader_error(info_log_length); - glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, - &fragment_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); - } - } - } - - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - if (vertex_shader) { - glAttachShader(program_id, vertex_shader_id); - } - if (geometry_shader) { - glAttachShader(program_id, geometry_shader_id); - } - if (fragment_shader) { - glAttachShader(program_id, fragment_shader_id); - } - - if (!feedback_vars.empty()) { - auto varyings = feedback_vars; - glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()), - &varyings[0], GL_INTERLEAVED_ATTRIBS); - } - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + GLint info_log_length; + glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); + glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); if (info_log_length > 1) { - std::vector<char> program_error(info_log_length); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); + std::string shader_error(info_log_length, ' '); + glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); + NGLOG_DEBUG(Render_OpenGL, "{}", shader_error); } else { - LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); + NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); } } - - // If the program linking failed at least one of the shaders was probably bad - if (result == GL_FALSE) { - if (vertex_shader) { - LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); - } - if (geometry_shader) { - LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); - } - if (fragment_shader) { - LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); - } - } - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); - - if (vertex_shader) { - glDetachShader(program_id, vertex_shader_id); - glDeleteShader(vertex_shader_id); - } - if (geometry_shader) { - glDetachShader(program_id, geometry_shader_id); - glDeleteShader(geometry_shader_id); - } - if (fragment_shader) { - glDetachShader(program_id, fragment_shader_id); - glDeleteShader(fragment_shader_id); - } - - return program_id; + return shader_id; } } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index fc7b5e080..a1fa9e814 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -6,18 +6,60 @@ #include <vector> #include <glad/glad.h> +#include "common/assert.h" +#include "common/logging/log.h" namespace GLShader { /** + * Utility function to create and compile an OpenGL GLSL shader + * @param source String of the GLSL shader program + * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) + */ +GLuint LoadShader(const char* source, GLenum type); + +/** * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param vertex_shader String of the GLSL vertex shader program - * @param geometry_shader String of the GLSL geometry shader program - * @param fragment_shader String of the GLSL fragment shader program - * @returns Handle of the newly created OpenGL shader object + * @param separable_program whether to create a separable program + * @param shaders ID of shaders to attach to the program + * @returns Handle of the newly created OpenGL program object */ -GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, - const char* fragment_shader, const std::vector<const char*>& feedback_vars = {}, - bool separable_program = false); +template <typename... T> +GLuint LoadProgram(bool separable_program, T... shaders) { + // Link the program + NGLOG_DEBUG(Render_OpenGL, "Linking program..."); + + GLuint program_id = glCreateProgram(); + + ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); + + if (separable_program) { + glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); + } + + glLinkProgram(program_id); + + // Check the program + GLint result = GL_FALSE; + GLint info_log_length; + glGetProgramiv(program_id, GL_LINK_STATUS, &result); + glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + + if (info_log_length > 1) { + std::string program_error(info_log_length, ' '); + glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); + if (result == GL_TRUE) { + NGLOG_DEBUG(Render_OpenGL, "{}", program_error); + } else { + NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); + } + } + + ASSERT_MSG(result == GL_TRUE, "Shader not linked"); + + ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); + + return program_id; +} } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 6da3a7781..f91dfe36a 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -2,8 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <iterator> #include <glad/glad.h> -#include "common/common_funcs.h" #include "common/logging/log.h" #include "video_core/renderer_opengl/gl_state.h" @@ -192,7 +192,7 @@ void OpenGLState::Apply() const { } // Textures - for (unsigned i = 0; i < ARRAY_SIZE(texture_units); ++i) { + for (size_t i = 0; i < std::size(texture_units); ++i) { if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) { glActiveTexture(TextureUnits::MaxwellTexture(i).Enum()); glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d); @@ -202,6 +202,20 @@ void OpenGLState::Apply() const { } } + // Constbuffers + for (u32 stage = 0; stage < draw.const_buffers.size(); ++stage) { + for (u32 buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) { + auto& current = cur_state.draw.const_buffers[stage][buffer_id]; + auto& new_state = draw.const_buffers[stage][buffer_id]; + if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || + current.ssbo != new_state.ssbo) { + if (new_state.enabled) { + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo); + } + } + } + } + // Lighting LUTs if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { glActiveTexture(TextureUnits::LightingLUT.Enum()); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b18af14bb..75c08e645 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -123,6 +123,12 @@ public: GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING GLuint shader_program; // GL_CURRENT_PROGRAM GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING + struct ConstBufferConfig { + bool enabled = false; + GLuint bindpoint; + GLuint ssbo; + }; + std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{}; } draw; struct { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 4bc2f52e0..e78dc5784 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <memory> #include <glad/glad.h> #include "common/common_types.h" diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 48ee80125..a49265b38 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -10,6 +10,14 @@ #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" +using GLvec2 = std::array<GLfloat, 2>; +using GLvec3 = std::array<GLfloat, 3>; +using GLvec4 = std::array<GLfloat, 4>; + +using GLuvec2 = std::array<GLuint, 2>; +using GLuvec3 = std::array<GLuint, 3>; +using GLuvec4 = std::array<GLuint, 4>; + namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -23,7 +31,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_BYTE; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str()); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); UNREACHABLE(); return {}; } @@ -32,17 +40,33 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_FLOAT; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str()); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); + UNREACHABLE(); + return {}; +} + +inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { + switch (index_format) { + case Maxwell::IndexFormat::UnsignedByte: + return GL_UNSIGNED_BYTE; + case Maxwell::IndexFormat::UnsignedShort: + return GL_UNSIGNED_SHORT; + case Maxwell::IndexFormat::UnsignedInt: + return GL_UNSIGNED_INT; + } + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format)); UNREACHABLE(); return {}; } inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { + case Maxwell::PrimitiveTopology::Triangles: + return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); UNREACHABLE(); return {}; } @@ -54,18 +78,90 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) { case Tegra::Texture::TextureFilter::Nearest: return GL_NEAREST; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode=%u", - static_cast<u32>(filter_mode)); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}", + static_cast<u32>(filter_mode)); UNREACHABLE(); return {}; } inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { switch (wrap_mode) { + case Tegra::Texture::WrapMode::Wrap: + return GL_REPEAT; case Tegra::Texture::WrapMode::ClampToEdge: return GL_CLAMP_TO_EDGE; + case Tegra::Texture::WrapMode::ClampOGL: + // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use + // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to + // manually mix them. However the shader part of this is not yet implemented. + return GL_CLAMP_TO_BORDER; + } + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", + static_cast<u32>(wrap_mode)); + UNREACHABLE(); + return {}; +} + +inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { + switch (equation) { + case Maxwell::Blend::Equation::Add: + return GL_FUNC_ADD; + case Maxwell::Blend::Equation::Subtract: + return GL_FUNC_SUBTRACT; + case Maxwell::Blend::Equation::ReverseSubtract: + return GL_FUNC_REVERSE_SUBTRACT; + case Maxwell::Blend::Equation::Min: + return GL_MIN; + case Maxwell::Blend::Equation::Max: + return GL_MAX; + } + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); + UNREACHABLE(); + return {}; +} + +inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { + switch (factor) { + case Maxwell::Blend::Factor::Zero: + return GL_ZERO; + case Maxwell::Blend::Factor::One: + return GL_ONE; + case Maxwell::Blend::Factor::SourceColor: + return GL_SRC_COLOR; + case Maxwell::Blend::Factor::OneMinusSourceColor: + return GL_ONE_MINUS_SRC_COLOR; + case Maxwell::Blend::Factor::SourceAlpha: + return GL_SRC_ALPHA; + case Maxwell::Blend::Factor::OneMinusSourceAlpha: + return GL_ONE_MINUS_SRC_ALPHA; + case Maxwell::Blend::Factor::DestAlpha: + return GL_DST_ALPHA; + case Maxwell::Blend::Factor::OneMinusDestAlpha: + return GL_ONE_MINUS_DST_ALPHA; + case Maxwell::Blend::Factor::DestColor: + return GL_DST_COLOR; + case Maxwell::Blend::Factor::OneMinusDestColor: + return GL_ONE_MINUS_DST_COLOR; + case Maxwell::Blend::Factor::SourceAlphaSaturate: + return GL_SRC_ALPHA_SATURATE; + case Maxwell::Blend::Factor::Source1Color: + return GL_SRC1_COLOR; + case Maxwell::Blend::Factor::OneMinusSource1Color: + return GL_ONE_MINUS_SRC1_COLOR; + case Maxwell::Blend::Factor::Source1Alpha: + return GL_SRC1_ALPHA; + case Maxwell::Blend::Factor::OneMinusSource1Alpha: + return GL_ONE_MINUS_SRC1_ALPHA; + case Maxwell::Blend::Factor::ConstantColor: + return GL_CONSTANT_COLOR; + case Maxwell::Blend::Factor::OneMinusConstantColor: + return GL_ONE_MINUS_CONSTANT_COLOR; + case Maxwell::Blend::Factor::ConstantAlpha: + return GL_CONSTANT_ALPHA; + case Maxwell::Blend::Factor::OneMinusConstantAlpha: + return GL_ONE_MINUS_CONSTANT_ALPHA; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode=%u", static_cast<u32>(wrap_mode)); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 78b50b227..ab0acb20a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -9,13 +9,10 @@ #include <memory> #include <glad/glad.h> #include "common/assert.h" -#include "common/bit_field.h" #include "common/logging/log.h" #include "core/core.h" #include "core/core_timing.h" #include "core/frontend/emu_window.h" -#include "core/hw/hw.h" -#include "core/hw/lcd.h" #include "core/memory.h" #include "core/settings.h" #include "core/tracer/recorder.h" @@ -57,7 +54,7 @@ uniform sampler2D color_texture; void main() { // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to // support more framebuffer pixel formats. - color = texture(color_texture, frag_tex_coord).abgr; + color = texture(color_texture, frag_tex_coord); } )"; @@ -210,7 +207,7 @@ void RendererOpenGL::InitOpenGLObjects() { 0.0f); // Link shaders and get variable locations - shader.Create(vertex_shader, nullptr, fragment_shader); + shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); state.draw.shader_program = shader.handle; state.Apply(); uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); @@ -311,10 +308,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, } std::array<ScreenRectVertex, 4> vertices = {{ - ScreenRectVertex(x, y, texcoords.top, right), - ScreenRectVertex(x + w, y, texcoords.bottom, right), - ScreenRectVertex(x, y + h, texcoords.top, left), - ScreenRectVertex(x + w, y + h, texcoords.bottom, left), + ScreenRectVertex(x, y, texcoords.top, left), + ScreenRectVertex(x + w, y, texcoords.bottom, left), + ScreenRectVertex(x, y + h, texcoords.top, right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, right), }}; state.texture_units[0].texture_2d = screen_info.display_texture; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c52f40037..2cc6d9a00 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -72,7 +72,7 @@ private: // OpenGL object IDs OGLVertexArray vertex_array; OGLBuffer vertex_buffer; - OGLShader shader; + OGLProgram shader; /// Display information for Switch screen ScreenInfo screen_info; |