diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
8 files changed, 153 insertions, 29 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 49147fdf7..b653bb479 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -304,7 +304,8 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { } std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, - bool using_depth_fb) { + bool using_depth_fb, + bool preserve_contents) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) { @@ -327,7 +328,7 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c Surface depth_surface; MathUtil::Rectangle<u32> surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents); const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; const MathUtil::Rectangle<u32> draw_rect{ @@ -390,7 +391,7 @@ void RasterizerOpenGL::Clear() { ScopeAcquireGLContext acquire_context{emu_window}; auto [dirty_color_surface, dirty_depth_surface] = - ConfigureFramebuffers(use_color_fb, use_depth_fb); + ConfigureFramebuffers(use_color_fb, use_depth_fb, false); // TODO(Subv): Support clearing only partial colors. glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], @@ -445,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() { ScopeAcquireGLContext acquire_context{emu_window}; auto [dirty_color_surface, dirty_depth_surface] = - ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0); + ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true); SyncDepthTestState(); SyncBlendState(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 5cac8029a..394fc59f1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -87,7 +87,8 @@ private: /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth> /// surfaces if writing was enabled. - std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb); + std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, + bool preserve_contents); /// Binds the framebuffer color and depth surface void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index fb7476fb8..817fa07a8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -686,7 +686,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, - bool using_depth_fb) { + bool using_depth_fb, + bool preserve_contents) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; // TODO(bunnei): This is hard corded to use just the first render buffer @@ -708,7 +709,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin MathUtil::Rectangle<u32> color_rect{}; Surface color_surface; if (using_color_fb) { - color_surface = GetSurface(color_params); + color_surface = GetSurface(color_params, preserve_contents); if (color_surface) { color_rect = color_surface->GetSurfaceParams().GetRect(); } @@ -717,7 +718,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin MathUtil::Rectangle<u32> depth_rect{}; Surface depth_surface; if (using_depth_fb) { - depth_surface = GetSurface(depth_params); + depth_surface = GetSurface(depth_params, preserve_contents); if (depth_surface) { depth_rect = depth_surface->GetSurfaceParams().GetRect(); } @@ -752,7 +753,7 @@ void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { surface->FlushGLBuffer(); } -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { if (params.addr == 0 || params.height * params.width == 0) { return {}; } @@ -774,9 +775,13 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { // Use the cached surface as-is return surface; - } else { - // If surface parameters changed, recreate the surface from the old one + } else if (preserve_contents) { + // If surface parameters changed and we care about keeping the previous data, recreate + // the surface from the old one return RecreateSurface(surface, params); + } else { + // Delete the old surface before creating a new one to prevent collisions. + UnregisterSurface(surface); } } @@ -793,12 +798,58 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, // Verify surface is compatible for blitting const auto& params{surface->GetSurfaceParams()}; ASSERT(params.type == new_params.type); + ASSERT_MSG(params.GetCompressionFactor(params.pixel_format) == 1, + "Compressed texture reinterpretation is not supported"); // Create a new surface with the new parameters, and blit the previous surface to it Surface new_surface{std::make_shared<CachedSurface>(new_params)}; - BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, - new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle, - draw_framebuffer.handle); + + auto source_format = GetFormatTuple(params.pixel_format, params.component_type); + auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); + + size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); + + // Use a Pixel Buffer Object to download the previous texture and then upload it to the new one + // using the new format. + OGLBuffer pbo; + pbo.Create(); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle); + glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); + glGetTextureImage(surface->Texture().handle, 0, source_format.format, source_format.type, + params.SizeInBytes(), nullptr); + + // If the new texture is bigger than the previous one, we need to fill in the rest with data + // from the CPU. + if (params.SizeInBytes() < new_params.SizeInBytes()) { + // Upload the rest of the memory. + if (new_params.is_tiled) { + // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest of + // the data in this case. Games like Super Mario Odyssey seem to hit this case when + // drawing, it re-uses the memory of a previous texture as a bigger framebuffer but it + // doesn't clear it beforehand, the texture is already full of zeros. + LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during " + "reinterpretation but the texture is tiled."); + } + size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); + auto address = Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress( + new_params.addr + params.SizeInBytes()); + std::vector<u8> data(remaining_size); + Memory::ReadBlock(*address, data.data(), data.size()); + glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, data.data()); + } + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + const auto& dest_rect{new_params.GetRect()}; + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle); + glTextureSubImage2D( + new_surface->Texture().handle, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()), + static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, dest_format.type, nullptr); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + pbo.Release(); // Update cache accordingly UnregisterSurface(surface); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index fc8b44219..907e7d606 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -722,7 +722,8 @@ public: Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + bool preserve_contents); /// Flushes the surface to Switch memory void FlushSurface(const Surface& surface); @@ -738,7 +739,7 @@ public: private: void LoadSurface(const Surface& surface); - Surface GetSurface(const SurfaceParams& params); + Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); /// Recreates a surface with new parameters Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ac6ccfec7..aeb908744 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -26,6 +26,7 @@ using Tegra::Shader::Sampler; using Tegra::Shader::SubOp; constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; +constexpr u32 PROGRAM_HEADER_SIZE = 0x50; class DecompileFail : public std::runtime_error { public: @@ -621,6 +622,23 @@ public: } private: + // Shader program header for a Fragment Shader. + struct FragmentHeader { + INSERT_PADDING_WORDS(5); + INSERT_PADDING_WORDS(13); + u32 enabled_color_outputs; + union { + BitField<0, 1, u32> writes_samplemask; + BitField<1, 1, u32> writes_depth; + }; + + bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { + u32 bit = render_target * 4 + component; + return enabled_color_outputs & (1 << bit); + } + }; + static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); + /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { auto iter = subroutines.find(Subroutine{begin, end, suffix}); @@ -894,6 +912,36 @@ private: shader.AddLine('}'); } + /// Writes the output values from a fragment shader to the corresponding GLSL output variables. + void EmitFragmentOutputsWrite() { + ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); + FragmentHeader header; + std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); + + ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); + + // Write the color outputs using the data in the shader registers, disabled + // rendertargets/components are skipped in the register assignment. + u32 current_reg = 0; + for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; + ++render_target) { + // TODO(Subv): Figure out how dual-source blending is configured in the Switch. + for (u32 component = 0; component < 4; ++component) { + if (header.IsColorComponentOutputEnabled(render_target, component)) { + shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component, + regs.GetRegisterAsFloat(current_reg))); + ++current_reg; + } + } + } + + if (header.writes_depth) { + // The depth output is always 2 registers after the last color output, and current_reg + // already contains one past the last color register. + shader.AddLine("gl_FragDepth = " + regs.GetRegisterAsFloat(current_reg + 1) + ';'); + } + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -1514,7 +1562,7 @@ private: switch (instr.tex.texture_type) { case Tegra::Shader::TextureType::Texture2D: { std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr20); + std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); coord = "vec2 coords = vec2(" + x + ", " + y + ");"; break; } @@ -1525,8 +1573,18 @@ private: coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; break; } + case Tegra::Shader::TextureType::TextureCube: { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); + ASSERT(instr.gpr20.Value() == Register::ZeroIndex); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + break; + } default: - UNIMPLEMENTED(); + LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", + static_cast<u32>(instr.tex.texture_type.Value())); + UNREACHABLE(); } const std::string sampler = @@ -1568,6 +1626,13 @@ private: } break; } + case Tegra::Shader::TextureType::Texture3D: { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr20); + std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + break; + } case Tegra::Shader::TextureType::TextureCube: { std::string x = regs.GetRegisterAsFloat(instr.gpr8); std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); @@ -1576,7 +1641,9 @@ private: break; } default: - UNIMPLEMENTED(); + LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", + static_cast<u32>(instr.texs.GetTextureType())); + UNREACHABLE(); } const std::string sampler = GetSampler(instr.sampler, instr.texs.GetTextureType(), instr.texs.IsArrayTexture()); @@ -1593,7 +1660,8 @@ private: switch (instr.tlds.GetTextureType()) { case Tegra::Shader::TextureType::Texture2D: { if (instr.tlds.IsArrayTexture()) { - UNIMPLEMENTED(); + LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture"); + UNREACHABLE(); } else { std::string x = regs.GetRegisterAsInteger(instr.gpr8); std::string y = regs.GetRegisterAsInteger(instr.gpr20); @@ -1602,7 +1670,9 @@ private: break; } default: - UNIMPLEMENTED(); + LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", + static_cast<u32>(instr.tlds.GetTextureType())); + UNREACHABLE(); } const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(), instr.tlds.IsArrayTexture()); @@ -1623,7 +1693,9 @@ private: break; } default: - UNIMPLEMENTED(); + LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", + static_cast<u32>(instr.tld4.texture_type.Value())); + UNREACHABLE(); } const std::string sampler = @@ -1960,12 +2032,8 @@ private: default: { switch (opcode->GetId()) { case OpCode::Id::EXIT: { - // Final color output is currently hardcoded to GPR0-3 for fragment shaders if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine("color.r = " + regs.GetRegisterAsFloat(0) + ';'); - shader.AddLine("color.g = " + regs.GetRegisterAsFloat(1) + ';'); - shader.AddLine("color.b = " + regs.GetRegisterAsFloat(2) + ';'); - shader.AddLine("color.a = " + regs.GetRegisterAsFloat(3) + ';'); + EmitFragmentOutputsWrite(); } switch (instr.flow.cond) { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 57e0e1726..01c7b9720 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -87,7 +87,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo .get_value_or({}); out += R"( in vec4 position; -out vec4 color; +layout(location = 0) out vec4 color[8]; layout (std140) uniform fs_config { vec4 viewport_flip; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 45592daaf..3b0cdf6bc 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <tuple> #include <glad/glad.h> #include "common/common_types.h" diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 26de614ef..eef13dddc 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -438,7 +438,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum break; case GL_DEBUG_SEVERITY_NOTIFICATION: case GL_DEBUG_SEVERITY_LOW: - LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message); + LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); break; } } |