summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp962
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h378
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp712
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h221
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp188
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp75
-rw-r--r--src/video_core/renderer_opengl/gl_state.h28
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h27
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp149
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h47
13 files changed, 1813 insertions, 980 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4fdf93a3e..bcd1ae78d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -2,28 +2,28 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <cstring>
#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
#include <glad/glad.h>
+#include "common/assert.h"
#include "common/color.h"
-#include "common/file_util.h"
+#include "common/logging/log.h"
#include "common/math_util.h"
-#include "common/microprofile.h"
-#include "common/profiler.h"
+#include "common/vector_math.h"
-#include "core/memory.h"
-#include "core/settings.h"
#include "core/hw/gpu.h"
#include "video_core/pica.h"
#include "video_core/pica_state.h"
-#include "video_core/utils.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
+#include "video_core/renderer_opengl/renderer_opengl.h"
static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace &&
@@ -36,10 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
stage.GetAlphaMultiplier() == 1);
}
-RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { }
-RasterizerOpenGL::~RasterizerOpenGL() { }
-
-void RasterizerOpenGL::InitObjects() {
+RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
// Create sampler objects
for (size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -61,6 +58,10 @@ void RasterizerOpenGL::InitObjects() {
uniform_block_data.dirty = true;
+ for (unsigned index = 0; index < lighting_luts.size(); index++) {
+ uniform_block_data.lut_dirty[index] = true;
+ }
+
// Set vertex attributes
glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -75,88 +76,47 @@ void RasterizerOpenGL::InitObjects() {
glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
+ glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w));
+ glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W);
+
glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
- SetShader();
-
- // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
- fb_color_texture.texture.Create();
- ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1);
-
- state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
- state.Apply();
-
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-
- state.texture_units[0].texture_2d = 0;
- state.Apply();
-
- fb_depth_texture.texture.Create();
- ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1);
-
- state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
- state.Apply();
-
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
-
- state.texture_units[0].texture_2d = 0;
- state.Apply();
-
- // Configure OpenGL framebuffer
+ // Create render framebuffer
framebuffer.Create();
- state.draw.framebuffer = framebuffer.handle;
+ // Allocate and bind lighting lut textures
+ for (size_t i = 0; i < lighting_luts.size(); ++i) {
+ lighting_luts[i].Create();
+ state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
+ }
state.Apply();
- glActiveTexture(GL_TEXTURE0);
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0);
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
-
- for (size_t i = 0; i < lighting_lut.size(); ++i) {
- lighting_lut[i].Create();
- state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
-
- glActiveTexture(GL_TEXTURE3 + i);
- glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
-
+ for (size_t i = 0; i < lighting_luts.size(); ++i) {
+ glActiveTexture(static_cast<GLenum>(GL_TEXTURE3 + i));
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}
- state.Apply();
- GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
- ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
- "OpenGL rasterizer framebuffer setup failed, status %X", status);
-}
-
-void RasterizerOpenGL::Reset() {
+ // Sync fixed function OpenGL state
SyncCullMode();
- SyncDepthModifiers();
SyncBlendEnabled();
SyncBlendFuncs();
SyncBlendColor();
SyncLogicOp();
SyncStencilTest();
SyncDepthTest();
+ SyncColorWriteMask();
+ SyncStencilWriteMask();
+ SyncDepthWriteMask();
+}
- SetShader();
+RasterizerOpenGL::~RasterizerOpenGL() {
- res_cache.InvalidateAll();
}
/**
@@ -193,47 +153,98 @@ void RasterizerOpenGL::DrawTriangles() {
if (vertex_batch.empty())
return;
- SyncFramebuffer();
- SyncDrawState();
+ const auto& regs = Pica::g_state.regs;
+
+ // Sync and bind the framebuffer surfaces
+ CachedSurface* color_surface;
+ CachedSurface* depth_surface;
+ MathUtil::Rectangle<int> rect;
+ std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer);
+
+ state.draw.draw_framebuffer = framebuffer.handle;
+ state.Apply();
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
+ bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
- if (state.draw.shader_dirty) {
+ if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ return;
+ }
+
+ // Sync the viewport
+ // These registers hold half-width and half-height, so must be multiplied by 2
+ GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
+ GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
+
+ glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
+ (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
+ (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height));
+
+ // Sync and bind the texture surfaces
+ const auto pica_textures = regs.GetTextures();
+ for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
+ const auto& texture = pica_textures[texture_index];
+
+ if (texture.enabled) {
+ texture_samplers[texture_index].SyncWithConfig(texture.config);
+ CachedSurface* surface = res_cache.GetTextureSurface(texture);
+ if (surface != nullptr) {
+ state.texture_units[texture_index].texture_2d = surface->texture.handle;
+ } else {
+ // Can occur when texture addr is null or its memory is unmapped/invalid
+ state.texture_units[texture_index].texture_2d = 0;
+ }
+ } else {
+ state.texture_units[texture_index].texture_2d = 0;
+ }
+ }
+
+ // Sync and bind the shader
+ if (shader_dirty) {
SetShader();
- state.draw.shader_dirty = false;
+ shader_dirty = false;
}
- for (unsigned index = 0; index < lighting_lut.size(); index++) {
+ // Sync the lighting luts
+ for (unsigned index = 0; index < lighting_luts.size(); index++) {
if (uniform_block_data.lut_dirty[index]) {
SyncLightingLUT(index);
uniform_block_data.lut_dirty[index] = false;
}
}
+ // Sync the uniform data
if (uniform_block_data.dirty) {
glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
uniform_block_data.dirty = false;
}
+ state.Apply();
+
+ // Draw the vertex batch
glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW);
glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
- vertex_batch.clear();
-
- // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture
- const auto& regs = Pica::g_state.regs;
-
- u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format)
- * fb_color_texture.width * fb_color_texture.height;
-
- u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format)
- * fb_depth_texture.width * fb_depth_texture.height;
+ // Mark framebuffer surfaces as dirty
+ // TODO: Restrict invalidation area to the viewport
+ if (color_surface != nullptr) {
+ color_surface->dirty = true;
+ res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
+ }
+ if (depth_surface != nullptr) {
+ depth_surface->dirty = true;
+ res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
+ }
- res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true);
- res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true);
-}
+ vertex_batch.clear();
-void RasterizerOpenGL::FlushFramebuffer() {
- CommitColorBuffer();
- CommitDepthBuffer();
+ // Unbind textures for potential future use as framebuffer attachments
+ for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
+ state.texture_units[texture_index].texture_2d = 0;
+ }
+ state.Apply();
}
void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
@@ -247,8 +258,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Depth modifiers
case PICA_REG_INDEX(viewport_depth_range):
- case PICA_REG_INDEX(viewport_depth_far_plane):
- SyncDepthModifiers();
+ SyncDepthScale();
+ break;
+ case PICA_REG_INDEX(viewport_depth_near_plane):
+ SyncDepthOffset();
+ break;
+
+ // Depth buffering
+ case PICA_REG_INDEX(depthmap_enable):
+ shader_dirty = true;
break;
// Blending
@@ -265,18 +283,39 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Alpha test
case PICA_REG_INDEX(output_merger.alpha_test):
SyncAlphaTest();
- state.draw.shader_dirty = true;
+ shader_dirty = true;
break;
- // Stencil test
+ // Sync GL stencil test + stencil write mask
+ // (Pica stencil test function register also contains a stencil write mask)
case PICA_REG_INDEX(output_merger.stencil_test.raw_func):
+ SyncStencilTest();
+ SyncStencilWriteMask();
+ break;
case PICA_REG_INDEX(output_merger.stencil_test.raw_op):
+ case PICA_REG_INDEX(framebuffer.depth_format):
SyncStencilTest();
break;
- // Depth test
+ // Sync GL depth test + depth and color write mask
+ // (Pica depth test function register also contains a depth and color write mask)
case PICA_REG_INDEX(output_merger.depth_test_enable):
SyncDepthTest();
+ SyncDepthWriteMask();
+ SyncColorWriteMask();
+ break;
+
+ // Sync GL depth and stencil write mask
+ // (This is a dedicated combined depth / stencil write-enable register)
+ case PICA_REG_INDEX(framebuffer.allow_depth_stencil_write):
+ SyncDepthWriteMask();
+ SyncStencilWriteMask();
+ break;
+
+ // Sync GL color write mask
+ // (This is a dedicated color write-enable register)
+ case PICA_REG_INDEX(framebuffer.allow_color_write):
+ SyncColorWriteMask();
break;
// Logic op
@@ -284,6 +323,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
SyncLogicOp();
break;
+ // Texture 0 type
+ case PICA_REG_INDEX(texture0.type):
+ shader_dirty = true;
+ break;
+
// TEV stages
case PICA_REG_INDEX(tev_stage0.color_source1):
case PICA_REG_INDEX(tev_stage0.color_modifier1):
@@ -310,7 +354,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
case PICA_REG_INDEX(tev_stage5.color_op):
case PICA_REG_INDEX(tev_stage5.color_scale):
case PICA_REG_INDEX(tev_combiner_buffer_input):
- state.draw.shader_dirty = true;
+ shader_dirty = true;
break;
case PICA_REG_INDEX(tev_stage0.const_r):
SyncTevConstColor(0, regs.tev_stage0);
@@ -497,41 +541,257 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
}
}
+void RasterizerOpenGL::FlushAll() {
+ res_cache.FlushAll();
+}
+
void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
- const auto& regs = Pica::g_state.regs;
+ res_cache.FlushRegion(addr, size, nullptr, false);
+}
+
+void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
+ res_cache.FlushRegion(addr, size, nullptr, true);
+}
- u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format)
- * fb_color_texture.width * fb_color_texture.height;
+bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
+ using PixelFormat = CachedSurface::PixelFormat;
+ using SurfaceType = CachedSurface::SurfaceType;
- u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format)
- * fb_depth_texture.width * fb_depth_texture.height;
+ if (config.is_texture_copy) {
+ // TODO(tfarley): Try to hardware accelerate this
+ return false;
+ }
+
+ CachedSurface src_params;
+ src_params.addr = config.GetPhysicalInputAddress();
+ src_params.width = config.output_width;
+ src_params.height = config.output_height;
+ src_params.is_tiled = !config.input_linear;
+ src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
+
+ CachedSurface dst_params;
+ dst_params.addr = config.GetPhysicalOutputAddress();
+ dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
+ dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
+ dst_params.is_tiled = config.input_linear != config.dont_swizzle;
+ dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
+
+ MathUtil::Rectangle<int> src_rect;
+ CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
+
+ if (src_surface == nullptr) {
+ return false;
+ }
- // If source memory region overlaps 3DS framebuffers, commit them before the copy happens
- if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size))
- CommitColorBuffer();
+ // Require destination surface to have same resolution scale as source to preserve scaling
+ dst_params.res_scale_width = src_surface->res_scale_width;
+ dst_params.res_scale_height = src_surface->res_scale_height;
- if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size))
- CommitDepthBuffer();
+ MathUtil::Rectangle<int> dst_rect;
+ CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
+
+ if (dst_surface == nullptr) {
+ return false;
+ }
+
+ // Don't accelerate if the src and dst surfaces are the same
+ if (src_surface == dst_surface) {
+ return false;
+ }
+
+ if (config.flip_vertically) {
+ std::swap(dst_rect.top, dst_rect.bottom);
+ }
+
+ if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
+ return false;
+ }
+
+ u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
+ dst_surface->dirty = true;
+ res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
+ return true;
}
-void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) {
- const auto& regs = Pica::g_state.regs;
+bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
+ using PixelFormat = CachedSurface::PixelFormat;
+ using SurfaceType = CachedSurface::SurfaceType;
+
+ CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
+
+ if (dst_surface == nullptr) {
+ return false;
+ }
+
+ OpenGLState cur_state = OpenGLState::GetCurState();
+
+ SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
+
+ GLuint old_fb = cur_state.draw.draw_framebuffer;
+ cur_state.draw.draw_framebuffer = framebuffer.handle;
+ // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected
+ cur_state.Apply();
+
+ if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ return false;
+ }
+
+ GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+
+ // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases
+ // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/...
+ // Currently only handles formats that are multiples of the fill value size
+
+ if (config.fill_24bit) {
+ switch (dst_surface->pixel_format) {
+ case PixelFormat::RGB8:
+ color_values[0] = config.value_24bit_r / 255.0f;
+ color_values[1] = config.value_24bit_g / 255.0f;
+ color_values[2] = config.value_24bit_b / 255.0f;
+ break;
+ default:
+ return false;
+ }
+ } else if (config.fill_32bit) {
+ u32 value = config.value_32bit;
+
+ switch (dst_surface->pixel_format) {
+ case PixelFormat::RGBA8:
+ color_values[0] = (value >> 24) / 255.0f;
+ color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
+ color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
+ color_values[3] = (value & 0xFF) / 255.0f;
+ break;
+ default:
+ return false;
+ }
+ } else {
+ u16 value_16bit = config.value_16bit.Value();
+ Math::Vec4<u8> color;
+
+ switch (dst_surface->pixel_format) {
+ case PixelFormat::RGBA8:
+ color_values[0] = (value_16bit >> 8) / 255.0f;
+ color_values[1] = (value_16bit & 0xFF) / 255.0f;
+ color_values[2] = color_values[0];
+ color_values[3] = color_values[1];
+ break;
+ case PixelFormat::RGB5A1:
+ color = Color::DecodeRGB5A1((const u8*)&value_16bit);
+ color_values[0] = color[0] / 31.0f;
+ color_values[1] = color[1] / 31.0f;
+ color_values[2] = color[2] / 31.0f;
+ color_values[3] = color[3];
+ break;
+ case PixelFormat::RGB565:
+ color = Color::DecodeRGB565((const u8*)&value_16bit);
+ color_values[0] = color[0] / 31.0f;
+ color_values[1] = color[1] / 63.0f;
+ color_values[2] = color[2] / 31.0f;
+ break;
+ case PixelFormat::RGBA4:
+ color = Color::DecodeRGBA4((const u8*)&value_16bit);
+ color_values[0] = color[0] / 15.0f;
+ color_values[1] = color[1] / 15.0f;
+ color_values[2] = color[2] / 15.0f;
+ color_values[3] = color[3] / 15.0f;
+ break;
+ case PixelFormat::IA8:
+ case PixelFormat::RG8:
+ color_values[0] = (value_16bit >> 8) / 255.0f;
+ color_values[1] = (value_16bit & 0xFF) / 255.0f;
+ break;
+ default:
+ return false;
+ }
+ }
+
+ cur_state.color_mask.red_enabled = true;
+ cur_state.color_mask.green_enabled = true;
+ cur_state.color_mask.blue_enabled = true;
+ cur_state.color_mask.alpha_enabled = true;
+ cur_state.Apply();
+ glClearBufferfv(GL_COLOR, 0, color_values);
+ } else if (dst_type == SurfaceType::Depth) {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ return false;
+ }
+
+ GLfloat value_float;
+ if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
+ value_float = config.value_32bit / 65535.0f; // 2^16 - 1
+ } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
+ value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
+ }
+
+ cur_state.depth.write_mask = true;
+ cur_state.Apply();
+ glClearBufferfv(GL_DEPTH, 0, &value_float);
+ } else if (dst_type == SurfaceType::DepthStencil) {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
- u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format)
- * fb_color_texture.width * fb_color_texture.height;
+ if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ return false;
+ }
- u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format)
- * fb_depth_texture.width * fb_depth_texture.height;
+ GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
+ GLint value_int = (config.value_32bit >> 24);
- // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL
- if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size))
- ReloadColorBuffer();
+ cur_state.depth.write_mask = true;
+ cur_state.stencil.write_mask = true;
+ cur_state.Apply();
+ glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
+ }
- if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size))
- ReloadDepthBuffer();
+ cur_state.draw.draw_framebuffer = old_fb;
+ // TODO: Return scissor test to previous value when scissor test is implemented
+ cur_state.Apply();
- // Notify cache of flush in case the region touches a cached resource
- res_cache.InvalidateInRange(addr, size);
+ dst_surface->dirty = true;
+ res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
+ return true;
+}
+
+bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) {
+ if (framebuffer_addr == 0) {
+ return false;
+ }
+
+ CachedSurface src_params;
+ src_params.addr = framebuffer_addr;
+ src_params.width = config.width;
+ src_params.height = config.height;
+ src_params.stride = pixel_stride;
+ src_params.is_tiled = false;
+ src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
+
+ MathUtil::Rectangle<int> src_rect;
+ CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
+
+ if (src_surface == nullptr) {
+ return false;
+ }
+
+ u32 scaled_width = src_surface->GetScaledWidth();
+ u32 scaled_height = src_surface->GetScaledHeight();
+
+ screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height,
+ (float)src_rect.left / (float)scaled_width,
+ (float)src_rect.bottom / (float)scaled_height,
+ (float)src_rect.right / (float)scaled_width);
+
+ screen_info.display_texture = src_surface->texture.handle;
+
+ return true;
}
void RasterizerOpenGL::SamplerInfo::Create() {
@@ -567,114 +827,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConf
if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) {
if (border_color != config.border_color.raw) {
+ border_color = config.border_color.raw;
auto gl_color = PicaToGL::ColorRGBA8(border_color);
glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data());
}
}
}
-void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) {
- GLint internal_format;
-
- texture.format = format;
- texture.width = width;
- texture.height = height;
-
- switch (format) {
- case Pica::Regs::ColorFormat::RGBA8:
- internal_format = GL_RGBA;
- texture.gl_format = GL_RGBA;
- texture.gl_type = GL_UNSIGNED_INT_8_8_8_8;
- break;
-
- case Pica::Regs::ColorFormat::RGB8:
- // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every
- // specific OpenGL type used in this function using native-endian (that is, little-endian
- // mostly everywhere) for words or half-words.
- // TODO: check how those behave on big-endian processors.
- internal_format = GL_RGB;
- texture.gl_format = GL_BGR;
- texture.gl_type = GL_UNSIGNED_BYTE;
- break;
-
- case Pica::Regs::ColorFormat::RGB5A1:
- internal_format = GL_RGBA;
- texture.gl_format = GL_RGBA;
- texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1;
- break;
-
- case Pica::Regs::ColorFormat::RGB565:
- internal_format = GL_RGB;
- texture.gl_format = GL_RGB;
- texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
- break;
-
- case Pica::Regs::ColorFormat::RGBA4:
- internal_format = GL_RGBA;
- texture.gl_format = GL_RGBA;
- texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4;
- break;
-
- default:
- LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format);
- UNIMPLEMENTED();
- break;
- }
-
- state.texture_units[0].texture_2d = texture.texture.handle;
- state.Apply();
-
- glActiveTexture(GL_TEXTURE0);
- glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
- texture.gl_format, texture.gl_type, nullptr);
-
- state.texture_units[0].texture_2d = 0;
- state.Apply();
-}
-
-void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) {
- GLint internal_format;
-
- texture.format = format;
- texture.width = width;
- texture.height = height;
-
- switch (format) {
- case Pica::Regs::DepthFormat::D16:
- internal_format = GL_DEPTH_COMPONENT16;
- texture.gl_format = GL_DEPTH_COMPONENT;
- texture.gl_type = GL_UNSIGNED_SHORT;
- break;
-
- case Pica::Regs::DepthFormat::D24:
- internal_format = GL_DEPTH_COMPONENT24;
- texture.gl_format = GL_DEPTH_COMPONENT;
- texture.gl_type = GL_UNSIGNED_INT;
- break;
-
- case Pica::Regs::DepthFormat::D24S8:
- internal_format = GL_DEPTH24_STENCIL8;
- texture.gl_format = GL_DEPTH_STENCIL;
- texture.gl_type = GL_UNSIGNED_INT_24_8;
- break;
-
- default:
- LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format);
- UNIMPLEMENTED();
- break;
- }
-
- state.texture_units[0].texture_2d = texture.texture.handle;
- state.Apply();
-
- glActiveTexture(GL_TEXTURE0);
- glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
- texture.gl_format, texture.gl_type, nullptr);
-
- state.texture_units[0].texture_2d = 0;
- state.Apply();
-}
-
void RasterizerOpenGL::SetShader() {
PicaShaderConfig config = PicaShaderConfig::CurrentConfig();
std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
@@ -722,6 +881,8 @@ void RasterizerOpenGL::SetShader() {
glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
// Update uniforms
+ SyncDepthScale();
+ SyncDepthOffset();
SyncAlphaTest();
SyncCombinerColor();
auto& tev_stages = Pica::g_state.regs.GetTevStages();
@@ -730,6 +891,8 @@ void RasterizerOpenGL::SetShader() {
SyncGlobalAmbient();
for (int light_index = 0; light_index < 8; light_index++) {
+ SyncLightSpecular0(light_index);
+ SyncLightSpecular1(light_index);
SyncLightDiffuse(light_index);
SyncLightAmbient(light_index);
SyncLightPosition(light_index);
@@ -737,83 +900,6 @@ void RasterizerOpenGL::SetShader() {
}
}
-void RasterizerOpenGL::SyncFramebuffer() {
- const auto& regs = Pica::g_state.regs;
-
- PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
- Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
-
- PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
- Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
-
- bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) ||
- fb_color_texture.height != static_cast<GLsizei>(regs.framebuffer.GetHeight());
-
- bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format ||
- fb_size_changed;
-
- bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format ||
- fb_size_changed;
-
- bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr ||
- color_fb_prop_changed;
-
- bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr ||
- depth_fb_prop_changed;
-
- // Commit if framebuffer modified in any way
- if (color_fb_modified)
- CommitColorBuffer();
-
- if (depth_fb_modified)
- CommitDepthBuffer();
-
- // Reconfigure framebuffer textures if any property has changed
- if (color_fb_prop_changed) {
- ReconfigureColorTexture(fb_color_texture, new_fb_color_format,
- regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
- }
-
- if (depth_fb_prop_changed) {
- ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format,
- regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
-
- // Only attach depth buffer as stencil if it supports stencil
- switch (new_fb_depth_format) {
- case Pica::Regs::DepthFormat::D16:
- case Pica::Regs::DepthFormat::D24:
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- break;
-
- case Pica::Regs::DepthFormat::D24S8:
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
- break;
-
- default:
- LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format);
- UNIMPLEMENTED();
- break;
- }
- }
-
- // Load buffer data again if fb modified in any way
- if (color_fb_modified) {
- cached_fb_color_addr = new_fb_color_addr;
-
- ReloadColorBuffer();
- }
-
- if (depth_fb_modified) {
- cached_fb_depth_addr = new_fb_depth_addr;
-
- ReloadDepthBuffer();
- }
-
- GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
- ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
- "OpenGL rasterizer framebuffer setup failed, status %X", status);
-}
-
void RasterizerOpenGL::SyncCullMode() {
const auto& regs = Pica::g_state.regs;
@@ -839,13 +925,20 @@ void RasterizerOpenGL::SyncCullMode() {
}
}
-void RasterizerOpenGL::SyncDepthModifiers() {
- float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
- float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f;
+void RasterizerOpenGL::SyncDepthScale() {
+ float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
+ if (depth_scale != uniform_block_data.data.depth_scale) {
+ uniform_block_data.data.depth_scale = depth_scale;
+ uniform_block_data.dirty = true;
+ }
+}
- // TODO: Implement scale modifier
- uniform_block_data.data.depth_offset = depth_offset;
- uniform_block_data.dirty = true;
+void RasterizerOpenGL::SyncDepthOffset() {
+ float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
+ if (depth_offset != uniform_block_data.data.depth_offset) {
+ uniform_block_data.data.depth_offset = depth_offset;
+ uniform_block_data.dirty = true;
+ }
}
void RasterizerOpenGL::SyncBlendEnabled() {
@@ -854,6 +947,8 @@ void RasterizerOpenGL::SyncBlendEnabled() {
void RasterizerOpenGL::SyncBlendFuncs() {
const auto& regs = Pica::g_state.regs;
+ state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb);
+ state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a);
state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb);
state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb);
state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a);
@@ -880,13 +975,39 @@ void RasterizerOpenGL::SyncLogicOp() {
state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op);
}
+void RasterizerOpenGL::SyncColorWriteMask() {
+ const auto& regs = Pica::g_state.regs;
+
+ auto IsColorWriteEnabled = [&](u32 value) {
+ return (regs.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE : GL_FALSE;
+ };
+
+ state.color_mask.red_enabled = IsColorWriteEnabled(regs.output_merger.red_enable);
+ state.color_mask.green_enabled = IsColorWriteEnabled(regs.output_merger.green_enable);
+ state.color_mask.blue_enabled = IsColorWriteEnabled(regs.output_merger.blue_enable);
+ state.color_mask.alpha_enabled = IsColorWriteEnabled(regs.output_merger.alpha_enable);
+}
+
+void RasterizerOpenGL::SyncStencilWriteMask() {
+ const auto& regs = Pica::g_state.regs;
+ state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0)
+ ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask)
+ : 0;
+}
+
+void RasterizerOpenGL::SyncDepthWriteMask() {
+ const auto& regs = Pica::g_state.regs;
+ state.depth.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable)
+ ? GL_TRUE
+ : GL_FALSE;
+}
+
void RasterizerOpenGL::SyncStencilTest() {
const auto& regs = Pica::g_state.regs;
state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func);
state.stencil.test_ref = regs.output_merger.stencil_test.reference_value;
state.stencil.test_mask = regs.output_merger.stencil_test.input_mask;
- state.stencil.write_mask = regs.output_merger.stencil_test.write_mask;
state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail);
state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail);
state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass);
@@ -898,11 +1019,6 @@ void RasterizerOpenGL::SyncDepthTest() {
regs.output_merger.depth_write_enable == 1;
state.depth.test_func = regs.output_merger.depth_test_enable == 1 ?
PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS;
- state.color_mask.red_enabled = regs.output_merger.red_enable;
- state.color_mask.green_enabled = regs.output_merger.green_enable;
- state.color_mask.blue_enabled = regs.output_merger.blue_enable;
- state.color_mask.alpha_enabled = regs.output_merger.alpha_enable;
- state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE;
}
void RasterizerOpenGL::SyncCombinerColor() {
@@ -989,229 +1105,3 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
uniform_block_data.dirty = true;
}
}
-
-void RasterizerOpenGL::SyncDrawState() {
- const auto& regs = Pica::g_state.regs;
-
- // Sync the viewport
- GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
- GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
-
- // OpenGL uses different y coordinates, so negate corner offset and flip origin
- // TODO: Ensure viewport_corner.x should not be negated or origin flipped
- // TODO: Use floating-point viewports for accuracy if supported
- glViewport((GLsizei)regs.viewport_corner.x,
- (GLsizei)regs.viewport_corner.y,
- viewport_width, viewport_height);
-
- // Sync bound texture(s), upload if not cached
- const auto pica_textures = regs.GetTextures();
- for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
- const auto& texture = pica_textures[texture_index];
-
- if (texture.enabled) {
- texture_samplers[texture_index].SyncWithConfig(texture.config);
- res_cache.LoadAndBindTexture(state, texture_index, texture);
- } else {
- state.texture_units[texture_index].texture_2d = 0;
- }
- }
-
- state.draw.uniform_buffer = uniform_buffer.handle;
- state.Apply();
-}
-
-MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
-
-void RasterizerOpenGL::ReloadColorBuffer() {
- u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
-
- if (color_buffer == nullptr)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
-
- u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
-
- std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
-
- // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
- for (int y = 0; y < fb_color_texture.height; ++y) {
- for (int x = 0; x < fb_color_texture.width; ++x) {
- const u32 coarse_y = y & ~7;
- u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
- u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel;
-
- u8* pixel = color_buffer + dst_offset;
- memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel);
- }
- }
-
- state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
- state.Apply();
-
- glActiveTexture(GL_TEXTURE0);
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height,
- fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get());
-
- state.texture_units[0].texture_2d = 0;
- state.Apply();
-}
-
-void RasterizerOpenGL::ReloadDepthBuffer() {
- if (cached_fb_depth_addr == 0)
- return;
-
- // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
- u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
-
- if (depth_buffer == nullptr)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
-
- u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
-
- // OpenGL needs 4 bpp alignment for D24
- u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
-
- std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
-
- u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get();
-
- if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
- for (int y = 0; y < fb_depth_texture.height; ++y) {
- for (int x = 0; x < fb_depth_texture.width; ++x) {
- const u32 coarse_y = y & ~7;
- u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
- u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
-
- u8* pixel = depth_buffer + dst_offset;
- u32 depth_stencil = *(u32*)pixel;
- ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24);
- }
- }
- } else {
- for (int y = 0; y < fb_depth_texture.height; ++y) {
- for (int x = 0; x < fb_depth_texture.width; ++x) {
- const u32 coarse_y = y & ~7;
- u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
- u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
-
- u8* pixel = depth_buffer + dst_offset;
- memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel);
- }
- }
- }
-
- state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
- state.Apply();
-
- glActiveTexture(GL_TEXTURE0);
- if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
- // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer.
- // The bug has been reported to Intel (https://communities.intel.com/message/324464)
- glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0,
- GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get());
- } else {
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
- fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
- }
-
- state.texture_units[0].texture_2d = 0;
- state.Apply();
-}
-
-Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
-MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
-
-void RasterizerOpenGL::CommitColorBuffer() {
- if (cached_fb_color_addr != 0) {
- u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
-
- if (color_buffer != nullptr) {
- Common::Profiling::ScopeTimer timer(buffer_commit_category);
- MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
-
- u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
-
- std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
-
- state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
- state.Apply();
-
- glActiveTexture(GL_TEXTURE0);
- glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get());
-
- state.texture_units[0].texture_2d = 0;
- state.Apply();
-
- // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
- for (int y = 0; y < fb_color_texture.height; ++y) {
- for (int x = 0; x < fb_color_texture.width; ++x) {
- const u32 coarse_y = y & ~7;
- u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
- u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel;
-
- u8* pixel = color_buffer + dst_offset;
- memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel);
- }
- }
- }
- }
-}
-
-void RasterizerOpenGL::CommitDepthBuffer() {
- if (cached_fb_depth_addr != 0) {
- // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong.
- u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
-
- if (depth_buffer != nullptr) {
- Common::Profiling::ScopeTimer timer(buffer_commit_category);
- MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
-
- u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
-
- // OpenGL needs 4 bpp alignment for D24
- u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
-
- std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
-
- state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
- state.Apply();
-
- glActiveTexture(GL_TEXTURE0);
- glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get());
-
- state.texture_units[0].texture_2d = 0;
- state.Apply();
-
- u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get();
-
- if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
- for (int y = 0; y < fb_depth_texture.height; ++y) {
- for (int x = 0; x < fb_depth_texture.width; ++x) {
- const u32 coarse_y = y & ~7;
- u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
- u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
-
- u8* pixel = depth_buffer + dst_offset;
- u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index];
- *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24);
- }
- }
- } else {
- for (int y = 0; y < fb_depth_texture.height; ++y) {
- for (int x = 0; x < fb_depth_texture.width; ++x) {
- const u32 coarse_y = y & ~7;
- u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
- u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
-
- u8* pixel = depth_buffer + dst_offset;
- memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel);
- }
- }
- }
- }
- }
-}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index fc85aa3ff..d70369400 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -4,22 +4,33 @@
#pragma once
+#include <array>
#include <cstddef>
#include <cstring>
#include <memory>
#include <vector>
#include <unordered_map>
+#include <glad/glad.h>
+
+#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/hash.h"
+#include "common/vector_math.h"
+
+#include "core/hw/gpu.h"
#include "video_core/pica.h"
#include "video_core/pica_state.h"
+#include "video_core/pica_types.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
-#include "video_core/shader/shader_interpreter.h"
+#include "video_core/shader/shader.h"
+
+struct ScreenInfo;
/**
* This struct contains all state used to generate the GLSL shader program that emulates the current
@@ -28,158 +39,185 @@
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
* two separate shaders sharing the same key.
+ *
+ * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X."
+ * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X."
+ * = Bytewise copy instead of memberwise copy.
+ * This is important because the padding bytes are included in the hash and comparison between objects.
*/
-struct PicaShaderConfig {
+union PicaShaderConfig {
+
/// Construct a PicaShaderConfig with the current Pica register configuration.
static PicaShaderConfig CurrentConfig() {
PicaShaderConfig res;
+
+ auto& state = res.state;
+ std::memset(&state, 0, sizeof(PicaShaderConfig::State));
+
const auto& regs = Pica::g_state.regs;
- res.alpha_test_func = regs.output_merger.alpha_test.enable ?
+ state.depthmap_enable = regs.depthmap_enable;
+
+ state.alpha_test_func = regs.output_merger.alpha_test.enable ?
regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always;
- // Copy relevant TevStageConfig fields only. We're doing this manually (instead of calling
- // the GetTevStages() function) because BitField explicitly disables copies.
-
- res.tev_stages[0].sources_raw = regs.tev_stage0.sources_raw;
- res.tev_stages[1].sources_raw = regs.tev_stage1.sources_raw;
- res.tev_stages[2].sources_raw = regs.tev_stage2.sources_raw;
- res.tev_stages[3].sources_raw = regs.tev_stage3.sources_raw;
- res.tev_stages[4].sources_raw = regs.tev_stage4.sources_raw;
- res.tev_stages[5].sources_raw = regs.tev_stage5.sources_raw;
-
- res.tev_stages[0].modifiers_raw = regs.tev_stage0.modifiers_raw;
- res.tev_stages[1].modifiers_raw = regs.tev_stage1.modifiers_raw;
- res.tev_stages[2].modifiers_raw = regs.tev_stage2.modifiers_raw;
- res.tev_stages[3].modifiers_raw = regs.tev_stage3.modifiers_raw;
- res.tev_stages[4].modifiers_raw = regs.tev_stage4.modifiers_raw;
- res.tev_stages[5].modifiers_raw = regs.tev_stage5.modifiers_raw;
-
- res.tev_stages[0].ops_raw = regs.tev_stage0.ops_raw;
- res.tev_stages[1].ops_raw = regs.tev_stage1.ops_raw;
- res.tev_stages[2].ops_raw = regs.tev_stage2.ops_raw;
- res.tev_stages[3].ops_raw = regs.tev_stage3.ops_raw;
- res.tev_stages[4].ops_raw = regs.tev_stage4.ops_raw;
- res.tev_stages[5].ops_raw = regs.tev_stage5.ops_raw;
-
- res.tev_stages[0].scales_raw = regs.tev_stage0.scales_raw;
- res.tev_stages[1].scales_raw = regs.tev_stage1.scales_raw;
- res.tev_stages[2].scales_raw = regs.tev_stage2.scales_raw;
- res.tev_stages[3].scales_raw = regs.tev_stage3.scales_raw;
- res.tev_stages[4].scales_raw = regs.tev_stage4.scales_raw;
- res.tev_stages[5].scales_raw = regs.tev_stage5.scales_raw;
-
- res.combiner_buffer_input =
+ state.texture0_type = regs.texture0.type;
+
+ // Copy relevant tev stages fields.
+ // We don't sync const_color here because of the high variance, it is a
+ // shader uniform instead.
+ const auto& tev_stages = regs.GetTevStages();
+ DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
+ for (size_t i = 0; i < tev_stages.size(); i++) {
+ const auto& tev_stage = tev_stages[i];
+ state.tev_stages[i].sources_raw = tev_stage.sources_raw;
+ state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
+ state.tev_stages[i].ops_raw = tev_stage.ops_raw;
+ state.tev_stages[i].scales_raw = tev_stage.scales_raw;
+ }
+
+ state.combiner_buffer_input =
regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
// Fragment lighting
- res.lighting.enable = !regs.lighting.disable;
- res.lighting.src_num = regs.lighting.num_lights + 1;
+ state.lighting.enable = !regs.lighting.disable;
+ state.lighting.src_num = regs.lighting.num_lights + 1;
- for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) {
+ for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
unsigned num = regs.lighting.light_enable.GetNum(light_index);
const auto& light = regs.lighting.light[num];
- res.lighting.light[light_index].num = num;
- res.lighting.light[light_index].directional = light.directional != 0;
- res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
- res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
- res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
- res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
+ state.lighting.light[light_index].num = num;
+ state.lighting.light[light_index].directional = light.directional != 0;
+ state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
+ state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
+ state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
+ state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
}
- res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
- res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
- res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
- res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
-
- res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
- res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
- res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
- res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
-
- res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
- res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
- res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
- res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
-
- res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
- res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
- res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
- res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
-
- res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
- res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
- res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
- res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
-
- res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
- res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
- res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
- res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
-
- res.lighting.config = regs.lighting.config;
- res.lighting.fresnel_selector = regs.lighting.fresnel_selector;
- res.lighting.bump_mode = regs.lighting.bump_mode;
- res.lighting.bump_selector = regs.lighting.bump_selector;
- res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
- res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
+ state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
+ state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
+ state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
+ state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
+
+ state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
+ state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
+ state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
+ state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
+
+ state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
+ state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
+ state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
+ state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
+
+ state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
+ state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
+ state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
+ state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
+
+ state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
+ state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
+ state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
+ state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
+
+ state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
+ state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
+ state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
+ state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
+
+ state.lighting.config = regs.lighting.config;
+ state.lighting.fresnel_selector = regs.lighting.fresnel_selector;
+ state.lighting.bump_mode = regs.lighting.bump_mode;
+ state.lighting.bump_selector = regs.lighting.bump_selector;
+ state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
+ state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
return res;
}
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
- return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index));
+ return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
- return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index));
+ return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
}
bool operator ==(const PicaShaderConfig& o) const {
- return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0;
+ return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0;
};
- Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never;
- std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
- u8 combiner_buffer_input = 0;
+ // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC.
+ // This makes BitField not TC when used in a union or struct so we have to resort
+ // to this ugly hack.
+ // Once that bug is fixed we can use Pica::Regs::TevStageConfig here.
+ // Doesn't include const_color because we don't sync it, see comment in CurrentConfig()
+ struct TevStageConfigRaw {
+ u32 sources_raw;
+ u32 modifiers_raw;
+ u32 ops_raw;
+ u32 scales_raw;
+ explicit operator Pica::Regs::TevStageConfig() const noexcept {
+ Pica::Regs::TevStageConfig stage;
+ stage.sources_raw = sources_raw;
+ stage.modifiers_raw = modifiers_raw;
+ stage.ops_raw = ops_raw;
+ stage.const_color = 0;
+ stage.scales_raw = scales_raw;
+ return stage;
+ }
+ };
- struct {
- struct {
- unsigned num = 0;
- bool directional = false;
- bool two_sided_diffuse = false;
- bool dist_atten_enable = false;
- GLfloat dist_atten_scale = 0.0f;
- GLfloat dist_atten_bias = 0.0f;
- } light[8];
-
- bool enable = false;
- unsigned src_num = 0;
- Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
- unsigned bump_selector = 0;
- bool bump_renorm = false;
- bool clamp_highlights = false;
-
- Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
- Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
+ struct State {
+
+ Pica::Regs::CompareFunc alpha_test_func;
+ Pica::Regs::TextureConfig::TextureType texture0_type;
+ std::array<TevStageConfigRaw, 6> tev_stages;
+ u8 combiner_buffer_input;
+
+ Pica::Regs::DepthBuffering depthmap_enable;
struct {
- bool enable = false;
- bool abs_input = false;
- Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
- float scale = 1.0f;
- } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
- } lighting;
+ struct {
+ unsigned num;
+ bool directional;
+ bool two_sided_diffuse;
+ bool dist_atten_enable;
+ GLfloat dist_atten_scale;
+ GLfloat dist_atten_bias;
+ } light[8];
+
+ bool enable;
+ unsigned src_num;
+ Pica::Regs::LightingBumpMode bump_mode;
+ unsigned bump_selector;
+ bool bump_renorm;
+ bool clamp_highlights;
+
+ Pica::Regs::LightingConfig config;
+ Pica::Regs::LightingFresnelSelector fresnel_selector;
+
+ struct {
+ bool enable;
+ bool abs_input;
+ Pica::Regs::LightingLutInput type;
+ float scale;
+ } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
+ } lighting;
+
+ } state;
};
+#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
+static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable");
+#endif
namespace std {
template <>
struct hash<PicaShaderConfig> {
size_t operator()(const PicaShaderConfig& k) const {
- return Common::ComputeHash64(&k, sizeof(PicaShaderConfig));
+ return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State));
}
};
@@ -191,16 +229,17 @@ public:
RasterizerOpenGL();
~RasterizerOpenGL() override;
- void InitObjects() override;
- void Reset() override;
void AddTriangle(const Pica::Shader::OutputVertex& v0,
const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void DrawTriangles() override;
- void FlushFramebuffer() override;
void NotifyPicaRegisterChanged(u32 id) override;
+ void FlushAll() override;
void FlushRegion(PAddr addr, u32 size) override;
- void InvalidateRegion(PAddr addr, u32 size) override;
+ void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
+ bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
+ bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
+ bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override;
/// OpenGL shader generated for a given Pica register state
struct PicaShader {
@@ -210,26 +249,6 @@ public:
private:
- /// Structure used for storing information about color textures
- struct TextureInfo {
- OGLTexture texture;
- GLsizei width;
- GLsizei height;
- Pica::Regs::ColorFormat format;
- GLenum gl_format;
- GLenum gl_type;
- };
-
- /// Structure used for storing information about depth textures
- struct DepthTextureInfo {
- OGLTexture texture;
- GLsizei width;
- GLsizei height;
- Pica::Regs::DepthFormat format;
- GLenum gl_format;
- GLenum gl_type;
- };
-
struct SamplerInfo {
using TextureConfig = Pica::Regs::TextureConfig;
@@ -265,6 +284,7 @@ private:
tex_coord1[1] = v.tc1.y.ToFloat32();
tex_coord2[0] = v.tc2.x.ToFloat32();
tex_coord2[1] = v.tc2.y.ToFloat32();
+ tex_coord0_w = v.tc0_w.ToFloat32();
normquat[0] = v.quat.x.ToFloat32();
normquat[1] = v.quat.y.ToFloat32();
normquat[2] = v.quat.z.ToFloat32();
@@ -285,6 +305,7 @@ private:
GLfloat tex_coord0[2];
GLfloat tex_coord1[2];
GLfloat tex_coord2[2];
+ GLfloat tex_coord0_w;
GLfloat normquat[4];
GLfloat view[3];
};
@@ -303,6 +324,7 @@ private:
GLvec4 const_color[6];
GLvec4 tev_combiner_buffer_color;
GLint alphatest_ref;
+ GLfloat depth_scale;
GLfloat depth_offset;
alignas(16) GLvec3 lighting_global_ambient;
LightSrc light_src[8];
@@ -311,23 +333,17 @@ private:
static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
- /// Reconfigure the OpenGL color texture to use the given format and dimensions
- void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
-
- /// Reconfigure the OpenGL depth texture to use the given format and dimensions
- void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height);
-
/// Sets the OpenGL shader in accordance with the current PICA register state
void SetShader();
- /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer
- void SyncFramebuffer();
-
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
- /// Syncs the depth scale and offset to match the PICA registers
- void SyncDepthModifiers();
+ /// Syncs the depth scale to match the PICA register
+ void SyncDepthScale();
+
+ /// Syncs the depth offset to match the PICA register
+ void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
@@ -344,90 +360,70 @@ private:
/// Syncs the logic op states to match the PICA register
void SyncLogicOp();
+ /// Syncs the color write mask to match the PICA register state
+ void SyncColorWriteMask();
+
+ /// Syncs the stencil write mask to match the PICA register state
+ void SyncStencilWriteMask();
+
+ /// Syncs the depth write mask to match the PICA register state
+ void SyncDepthWriteMask();
+
/// Syncs the stencil test states to match the PICA register
void SyncStencilTest();
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
- /// Syncs the TEV constant color to match the PICA register
- void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
-
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
+ /// Syncs the TEV constant color to match the PICA register
+ void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
+
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the lighting lookup tables
void SyncLightingLUT(unsigned index);
- /// Syncs the specified light's diffuse color to match the PICA register
- void SyncLightDiffuse(int light_index);
-
- /// Syncs the specified light's ambient color to match the PICA register
- void SyncLightAmbient(int light_index);
-
- /// Syncs the specified light's position to match the PICA register
- void SyncLightPosition(int light_index);
-
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
/// Syncs the specified light's specular 1 color to match the PICA register
void SyncLightSpecular1(int light_index);
- /// Syncs the remaining OpenGL drawing state to match the current PICA state
- void SyncDrawState();
-
- /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture
- void ReloadColorBuffer();
+ /// Syncs the specified light's diffuse color to match the PICA register
+ void SyncLightDiffuse(int light_index);
- /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture
- void ReloadDepthBuffer();
+ /// Syncs the specified light's ambient color to match the PICA register
+ void SyncLightAmbient(int light_index);
- /**
- * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory
- * Loads the OpenGL framebuffer textures into temporary buffers
- * Then copies into the 3DS framebuffer using proper Morton order
- */
- void CommitColorBuffer();
+ /// Syncs the specified light's position to match the PICA register
+ void SyncLightPosition(int light_index);
- /**
- * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory
- * Loads the OpenGL framebuffer textures into temporary buffers
- * Then copies into the 3DS framebuffer using proper Morton order
- */
- void CommitDepthBuffer();
+ OpenGLState state;
RasterizerCacheOpenGL res_cache;
std::vector<HardwareVertex> vertex_batch;
- OpenGLState state;
-
- PAddr cached_fb_color_addr;
- PAddr cached_fb_depth_addr;
-
- // Hardware rasterizer
- std::array<SamplerInfo, 3> texture_samplers;
- TextureInfo fb_color_texture;
- DepthTextureInfo fb_depth_texture;
-
std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
const PicaShader* current_shader = nullptr;
+ bool shader_dirty;
struct {
UniformData data;
bool lut_dirty[6];
bool dirty;
- } uniform_block_data;
+ } uniform_block_data = {};
+ std::array<SamplerInfo, 3> texture_samplers;
OGLVertexArray vertex_array;
OGLBuffer vertex_buffer;
OGLBuffer uniform_buffer;
OGLFramebuffer framebuffer;
- std::array<OGLTexture, 6> lighting_lut;
- std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
+ std::array<OGLTexture, 6> lighting_luts;
+ std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 1323c12e4..7efd0038a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,9 +2,19 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <memory>
+#include <algorithm>
+#include <atomic>
+#include <cstring>
+#include <iterator>
+#include <unordered_set>
+#include <utility>
+#include <vector>
-#include "common/hash.h"
+#include <glad/glad.h>
+
+#include "common/bit_field.h"
+#include "common/emu_window.h"
+#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/vector_math.h"
@@ -12,71 +22,693 @@
#include "core/memory.h"
#include "video_core/debug_utils/debug_utils.h"
+#include "video_core/pica_state.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
-#include "video_core/renderer_opengl/pica_to_gl.h"
+#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/utils.h"
+#include "video_core/video_core.h"
+
+struct FormatTuple {
+ GLint internal_format;
+ GLenum format;
+ GLenum type;
+};
+
+static const std::array<FormatTuple, 5> fb_format_tuples = {{
+ { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8
+ { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8
+ { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1
+ { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565
+ { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4
+}};
+
+static const std::array<FormatTuple, 4> depth_format_tuples = {{
+ { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16
+ {},
+ { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24
+ { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8
+}};
+
+RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
+ transfer_framebuffers[0].Create();
+ transfer_framebuffers[1].Create();
+}
RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
- InvalidateAll();
+ FlushAll();
+}
+
+static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) {
+ using PixelFormat = CachedSurface::PixelFormat;
+
+ u8* data_ptrs[2];
+ u32 depth_stencil_shifts[2] = {24, 8};
+
+ if (morton_to_gl) {
+ std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
+ }
+
+ if (pixel_format == PixelFormat::D24S8) {
+ for (unsigned y = 0; y < height; ++y) {
+ for (unsigned x = 0; x < width; ++x) {
+ const u32 coarse_y = y & ~7;
+ u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
+ u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
+
+ data_ptrs[morton_to_gl] = morton_data + morton_offset;
+ data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
+
+ // Swap depth and stencil value ordering since 3DS does not match OpenGL
+ u32 depth_stencil;
+ memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
+ depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]);
+
+ memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
+ }
+ }
+ } else {
+ for (unsigned y = 0; y < height; ++y) {
+ for (unsigned x = 0; x < width; ++x) {
+ const u32 coarse_y = y & ~7;
+ u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
+ u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
+
+ data_ptrs[morton_to_gl] = morton_data + morton_offset;
+ data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
+
+ memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
+ }
+ }
+ }
+}
+
+bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) {
+ using SurfaceType = CachedSurface::SurfaceType;
+
+ OpenGLState cur_state = OpenGLState::GetCurState();
+
+ // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components
+ OpenGLState::ResetTexture(src_tex);
+ OpenGLState::ResetTexture(dst_tex);
+
+ // Keep track of previous framebuffer bindings
+ GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer };
+ cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
+ cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
+ cur_state.Apply();
+
+ u32 buffers = 0;
+
+ if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ buffers = GL_COLOR_BUFFER_BIT;
+ } else if (type == SurfaceType::Depth) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ buffers = GL_DEPTH_BUFFER_BIT;
+ } else if (type == SurfaceType::DepthStencil) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
+
+ buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ }
+
+ if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ return false;
+ }
+
+ if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ return false;
+ }
+
+ glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
+ dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom,
+ buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
+
+ // Restore previous framebuffer bindings
+ cur_state.draw.read_framebuffer = old_fbs[0];
+ cur_state.draw.draw_framebuffer = old_fbs[1];
+ cur_state.Apply();
+
+ return true;
+}
+
+bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) {
+ using SurfaceType = CachedSurface::SurfaceType;
+
+ if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
+ return false;
+ }
+
+ return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect);
+}
+
+static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) {
+ // Allocate an uninitialized texture of appropriate size and format for the surface
+ using SurfaceType = CachedSurface::SurfaceType;
+
+ OpenGLState cur_state = OpenGLState::GetCurState();
+
+ // Keep track of previous texture bindings
+ GLuint old_tex = cur_state.texture_units[0].texture_2d;
+ cur_state.texture_units[0].texture_2d = texture;
+ cur_state.Apply();
+ glActiveTexture(GL_TEXTURE0);
+
+ SurfaceType type = CachedSurface::GetFormatType(pixel_format);
+
+ FormatTuple tuple;
+ if (type == SurfaceType::Color) {
+ ASSERT((size_t)pixel_format < fb_format_tuples.size());
+ tuple = fb_format_tuples[(unsigned int)pixel_format];
+ } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
+ size_t tuple_idx = (size_t)pixel_format - 14;
+ ASSERT(tuple_idx < depth_format_tuples.size());
+ tuple = depth_format_tuples[tuple_idx];
+ } else {
+ tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
+ }
+
+ glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0,
+ tuple.format, tuple.type, nullptr);
+
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+ // Restore previous texture bindings
+ cur_state.texture_units[0].texture_2d = old_tex;
+ cur_state.Apply();
}
-MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
+MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
+CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) {
+ using PixelFormat = CachedSurface::PixelFormat;
+ using SurfaceType = CachedSurface::SurfaceType;
+
+ if (params.addr == 0) {
+ return nullptr;
+ }
+
+ u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
+
+ // Check for an exact match in existing surfaces
+ CachedSurface* best_exact_surface = nullptr;
+ float exact_surface_goodness = -1.f;
+
+ auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
+ auto range = surface_cache.equal_range(surface_interval);
+ for (auto it = range.first; it != range.second; ++it) {
+ for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
+ CachedSurface* surface = it2->get();
+
+ // Check if the request matches the surface exactly
+ if (params.addr == surface->addr &&
+ params.width == surface->width && params.height == surface->height &&
+ params.pixel_format == surface->pixel_format)
+ {
+ // Make sure optional param-matching criteria are fulfilled
+ bool tiling_match = (params.is_tiled == surface->is_tiled);
+ bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
+ if (!match_res_scale || res_scale_match) {
+ // Prioritize same-tiling and highest resolution surfaces
+ float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
+ if (match_goodness > exact_surface_goodness || surface->dirty) {
+ exact_surface_goodness = match_goodness;
+ best_exact_surface = surface;
+ }
+ }
+ }
+ }
+ }
+
+ // Return the best exact surface if found
+ if (best_exact_surface != nullptr) {
+ return best_exact_surface;
+ }
+
+ // No matching surfaces found, so create a new one
+ u8* texture_src_data = Memory::GetPhysicalPointer(params.addr);
+ if (texture_src_data == nullptr) {
+ return nullptr;
+ }
+
+ MICROPROFILE_SCOPE(OpenGL_SurfaceUpload);
+
+ std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>();
-void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info) {
- const auto cached_texture = texture_cache.find(info.physical_address);
+ new_surface->addr = params.addr;
+ new_surface->size = params_size;
- if (cached_texture != texture_cache.end()) {
- state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle;
- state.Apply();
+ new_surface->texture.Create();
+ new_surface->width = params.width;
+ new_surface->height = params.height;
+ new_surface->stride = params.stride;
+ new_surface->res_scale_width = params.res_scale_width;
+ new_surface->res_scale_height = params.res_scale_height;
+
+ new_surface->is_tiled = params.is_tiled;
+ new_surface->pixel_format = params.pixel_format;
+ new_surface->dirty = false;
+
+ if (!load_if_create) {
+ // Don't load any data; just allocate the surface's texture
+ AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
} else {
- MICROPROFILE_SCOPE(OpenGL_TextureUpload);
+ // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game
+
+ Memory::RasterizerFlushRegion(params.addr, params_size);
+
+ // Load data from memory to the new surface
+ OpenGLState cur_state = OpenGLState::GetCurState();
+
+ GLuint old_tex = cur_state.texture_units[0].texture_2d;
+ cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
+ cur_state.Apply();
+ glActiveTexture(GL_TEXTURE0);
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride);
+ if (!new_surface->is_tiled) {
+ // TODO: Ensure this will always be a color format, not a depth or other format
+ ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
+ const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
+
+ glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
+ tuple.format, tuple.type, texture_src_data);
+ } else {
+ SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
+ if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
+ FormatTuple tuple;
+ if ((size_t)params.pixel_format < fb_format_tuples.size()) {
+ tuple = fb_format_tuples[(unsigned int)params.pixel_format];
+ } else {
+ // Texture
+ tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
+ }
+
+ std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
- std::unique_ptr<CachedTexture> new_texture = std::make_unique<CachedTexture>();
+ Pica::DebugUtils::TextureInfo tex_info;
+ tex_info.width = params.width;
+ tex_info.height = params.height;
+ tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
+ tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
+ tex_info.physical_address = params.addr;
- new_texture->texture.Create();
- state.texture_units[texture_unit].texture_2d = new_texture->texture.handle;
- state.Apply();
- glActiveTexture(GL_TEXTURE0 + texture_unit);
+ for (unsigned y = 0; y < params.height; ++y) {
+ for (unsigned x = 0; x < params.width; ++x) {
+ tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info);
+ }
+ }
- u8* texture_src_data = Memory::GetPhysicalPointer(info.physical_address);
+ glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
+ } else {
+ // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
+ size_t tuple_idx = (size_t)params.pixel_format - 14;
+ ASSERT(tuple_idx < depth_format_tuples.size());
+ const FormatTuple& tuple = depth_format_tuples[tuple_idx];
- new_texture->width = info.width;
- new_texture->height = info.height;
- new_texture->size = info.stride * info.height;
- new_texture->addr = info.physical_address;
- new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size);
+ u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
- std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]);
+ // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
+ bool use_4bpp = (params.pixel_format == PixelFormat::D24);
- for (int y = 0; y < info.height; ++y) {
- for (int x = 0; x < info.width; ++x) {
- temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info);
+ u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
+
+ std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel);
+
+ u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
+
+ MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true);
+
+ glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
+ tuple.format, tuple.type, temp_fb_depth_buffer.data());
}
}
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+
+ // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
+ if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
+ OGLTexture scaled_texture;
+ scaled_texture.Create();
+
+ AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
+ BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format),
+ MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
+ MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()));
+
+ new_surface->texture.Release();
+ new_surface->texture.handle = scaled_texture.handle;
+ scaled_texture.handle = 0;
+ cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
+ cur_state.Apply();
+ }
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get());
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- texture_cache.emplace(info.physical_address, std::move(new_texture));
+ cur_state.texture_units[0].texture_2d = old_tex;
+ cur_state.Apply();
}
+
+ Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
+ surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface })));
+ return new_surface.get();
}
-void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) {
- // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound
- auto cache_upper_bound = texture_cache.upper_bound(addr + size);
+CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) {
+ if (params.addr == 0) {
+ return nullptr;
+ }
+
+ u32 total_pixels = params.width * params.height;
+ u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
- for (auto it = texture_cache.begin(); it != cache_upper_bound;) {
- const auto& info = *it->second;
+ // Attempt to find encompassing surfaces
+ CachedSurface* best_subrect_surface = nullptr;
+ float subrect_surface_goodness = -1.f;
- // Flush the texture only if the memory region intersects and a change is detected
- if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) &&
- (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) {
+ auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
+ auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
+ for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
+ for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
+ CachedSurface* surface = it2->get();
- it = texture_cache.erase(it);
+ // Check if the request is contained in the surface
+ if (params.addr >= surface->addr &&
+ params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
+ params.pixel_format == surface->pixel_format)
+ {
+ // Make sure optional param-matching criteria are fulfilled
+ bool tiling_match = (params.is_tiled == surface->is_tiled);
+ bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
+ if (!match_res_scale || res_scale_match) {
+ // Prioritize same-tiling and highest resolution surfaces
+ float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
+ if (match_goodness > subrect_surface_goodness || surface->dirty) {
+ subrect_surface_goodness = match_goodness;
+ best_subrect_surface = surface;
+ }
+ }
+ }
+ }
+ }
+
+ // Return the best subrect surface if found
+ if (best_subrect_surface != nullptr) {
+ unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
+
+ int x0, y0;
+
+ if (!params.is_tiled) {
+ u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel;
+ x0 = begin_pixel_index % best_subrect_surface->width;
+ y0 = begin_pixel_index / best_subrect_surface->width;
+
+ out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height);
+ } else {
+ u32 bytes_per_tile = 8 * 8 * bytes_per_pixel;
+ u32 tiles_per_row = best_subrect_surface->width / 8;
+
+ u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile;
+ x0 = begin_tile_index % tiles_per_row * 8;
+ y0 = begin_tile_index / tiles_per_row * 8;
+
+ // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
+ out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height));
+ }
+
+ out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
+ out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width);
+ out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height);
+ out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height);
+
+ return best_subrect_surface;
+ }
+
+ // No subrect found - create and return a new surface
+ if (!params.is_tiled) {
+ out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height));
+ } else {
+ out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0);
+ }
+
+ return GetSurface(params, match_res_scale, load_if_create);
+}
+
+CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) {
+ Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
+
+ CachedSurface params;
+ params.addr = info.physical_address;
+ params.width = info.width;
+ params.height = info.height;
+ params.is_tiled = true;
+ params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
+ return GetSurface(params, false, true);
+}
+
+std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
+ const auto& regs = Pica::g_state.regs;
+
+ // Make sur that framebuffers don't overlap if both color and depth are being used
+ u32 fb_area = config.GetWidth() * config.GetHeight();
+ bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 &&
+ config.GetDepthBufferPhysicalAddress() != 0 &&
+ MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
+ config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
+ bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
+ bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap);
+
+ if (framebuffers_overlap && using_color_fb && using_depth_fb) {
+ LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!");
+ using_depth_fb = false;
+ }
+
+ // get color and depth surfaces
+ CachedSurface color_params;
+ CachedSurface depth_params;
+ color_params.width = depth_params.width = config.GetWidth();
+ color_params.height = depth_params.height = config.GetHeight();
+ color_params.is_tiled = depth_params.is_tiled = true;
+ if (VideoCore::g_scaled_resolution_enabled) {
+ auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
+
+ // Assume same scaling factor for top and bottom screens
+ color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
+ color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
+ }
+
+ color_params.addr = config.GetColorBufferPhysicalAddress();
+ color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
+
+ depth_params.addr = config.GetDepthBufferPhysicalAddress();
+ depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
+
+ MathUtil::Rectangle<int> color_rect;
+ CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
+
+ MathUtil::Rectangle<int> depth_rect;
+ CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
+
+ // Sanity check to make sure found surfaces aren't the same
+ if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
+ LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
+ using_depth_fb = false;
+ depth_surface = nullptr;
+ }
+
+ MathUtil::Rectangle<int> rect;
+
+ if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
+ // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match
+ if (color_rect.left != 0 || color_rect.top != 0) {
+ color_surface = GetSurface(color_params, true, true);
+ }
+
+ if (depth_rect.left != 0 || depth_rect.top != 0) {
+ depth_surface = GetSurface(depth_params, true, true);
+ }
+
+ if (!color_surface->is_tiled) {
+ rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height));
} else {
- ++it;
+ rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0);
}
+ } else if (color_surface != nullptr) {
+ rect = color_rect;
+ } else if (depth_surface != nullptr) {
+ rect = depth_rect;
+ } else {
+ rect = MathUtil::Rectangle<int>(0, 0, 0, 0);
}
+
+ return std::make_tuple(color_surface, depth_surface, rect);
}
-void RasterizerCacheOpenGL::InvalidateAll() {
- texture_cache.clear();
+CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
+ auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
+ auto range = surface_cache.equal_range(surface_interval);
+ for (auto it = range.first; it != range.second; ++it) {
+ for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
+ int bits_per_value = 0;
+ if (config.fill_24bit) {
+ bits_per_value = 24;
+ } else if (config.fill_32bit) {
+ bits_per_value = 32;
+ } else {
+ bits_per_value = 16;
+ }
+
+ CachedSurface* surface = it2->get();
+
+ if (surface->addr == config.GetStartAddress() &&
+ CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
+ (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress()))
+ {
+ return surface;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
+void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
+ using PixelFormat = CachedSurface::PixelFormat;
+ using SurfaceType = CachedSurface::SurfaceType;
+
+ if (!surface->dirty) {
+ return;
+ }
+
+ MICROPROFILE_SCOPE(OpenGL_SurfaceDownload);
+
+ u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr);
+ if (dst_buffer == nullptr) {
+ return;
+ }
+
+ OpenGLState cur_state = OpenGLState::GetCurState();
+ GLuint old_tex = cur_state.texture_units[0].texture_2d;
+
+ OGLTexture unscaled_tex;
+ GLuint texture_to_flush = surface->texture.handle;
+
+ // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
+ if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
+ unscaled_tex.Create();
+
+ AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height);
+ BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format),
+ MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
+ MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
+
+ texture_to_flush = unscaled_tex.handle;
+ }
+
+ cur_state.texture_units[0].texture_2d = texture_to_flush;
+ cur_state.Apply();
+ glActiveTexture(GL_TEXTURE0);
+
+ glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride);
+ if (!surface->is_tiled) {
+ // TODO: Ensure this will always be a color format, not a depth or other format
+ ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
+ const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
+
+ glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
+ } else {
+ SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
+ if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
+ ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
+ const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
+
+ u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
+
+ std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
+
+ glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
+
+ // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
+ MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false);
+ } else {
+ // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
+ size_t tuple_idx = (size_t)surface->pixel_format - 14;
+ ASSERT(tuple_idx < depth_format_tuples.size());
+ const FormatTuple& tuple = depth_format_tuples[tuple_idx];
+
+ u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
+
+ // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
+ bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
+
+ u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
+
+ std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
+
+ glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
+
+ u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
+
+ MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false);
+ }
+ }
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+
+ surface->dirty = false;
+
+ cur_state.texture_units[0].texture_2d = old_tex;
+ cur_state.Apply();
+}
+
+void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) {
+ if (size == 0) {
+ return;
+ }
+
+ // Gather up unique surfaces that touch the region
+ std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces;
+
+ auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
+ auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
+ for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
+ std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()),
+ [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); });
+ }
+
+ // Flush and invalidate surfaces
+ for (auto surface : touching_surfaces) {
+ FlushSurface(surface.get());
+ if (invalidate) {
+ Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
+ surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface })));
+ }
+ }
+}
+
+void RasterizerCacheOpenGL::FlushAll() {
+ for (auto& surfaces : surface_cache) {
+ for (auto& surface : surfaces.second) {
+ FlushSurface(surface.get());
+ }
+ }
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b69651427..225596415 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -4,40 +4,219 @@
#pragma once
-#include <map>
+#include <array>
#include <memory>
+#include <set>
+#include <tuple>
+
+#include <boost/icl/interval_map.hpp>
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+#include "core/hw/gpu.h"
#include "video_core/pica.h"
-#include "video_core/debug_utils/debug_utils.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
+
+namespace MathUtil {
+template <class T> struct Rectangle;
+}
+
+struct CachedSurface;
+
+using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
+
+struct CachedSurface {
+ enum class PixelFormat {
+ // First 5 formats are shared between textures and color buffers
+ RGBA8 = 0,
+ RGB8 = 1,
+ RGB5A1 = 2,
+ RGB565 = 3,
+ RGBA4 = 4,
+
+ // Texture-only formats
+ IA8 = 5,
+ RG8 = 6,
+ I8 = 7,
+ A8 = 8,
+ IA4 = 9,
+ I4 = 10,
+ A4 = 11,
+ ETC1 = 12,
+ ETC1A4 = 13,
+
+ // Depth buffer-only formats
+ D16 = 14,
+ // gap
+ D24 = 16,
+ D24S8 = 17,
+
+ Invalid = 255,
+ };
+
+ enum class SurfaceType {
+ Color = 0,
+ Texture = 1,
+ Depth = 2,
+ DepthStencil = 3,
+ Invalid = 4,
+ };
+
+ static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
+ static const std::array<unsigned int, 18> bpp_table = {
+ 32, // RGBA8
+ 24, // RGB8
+ 16, // RGB5A1
+ 16, // RGB565
+ 16, // RGBA4
+ 16, // IA8
+ 16, // RG8
+ 8, // I8
+ 8, // A8
+ 8, // IA4
+ 4, // I4
+ 4, // A4
+ 4, // ETC1
+ 8, // ETC1A4
+ 16, // D16
+ 0,
+ 24, // D24
+ 32, // D24S8
+ };
+
+ ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
+ return bpp_table[(unsigned int)format];
+ }
+
+ static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
+ return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
+ }
+
+ static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
+ return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
+ }
+
+ static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
+ return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid;
+ }
+
+ static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
+ switch (format) {
+ // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
+ case GPU::Regs::PixelFormat::RGB565:
+ return PixelFormat::RGB565;
+ case GPU::Regs::PixelFormat::RGB5A1:
+ return PixelFormat::RGB5A1;
+ default:
+ return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
+ }
+ }
+
+ static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
+ SurfaceType a_type = GetFormatType(pixel_format_a);
+ SurfaceType b_type = GetFormatType(pixel_format_b);
+
+ if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
+ return true;
+ }
+
+ if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
+ return true;
+ }
+
+ if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
+ return true;
+ }
+
+ return false;
+ }
+
+ static SurfaceType GetFormatType(PixelFormat pixel_format) {
+ if ((unsigned int)pixel_format < 5) {
+ return SurfaceType::Color;
+ }
+
+ if ((unsigned int)pixel_format < 14) {
+ return SurfaceType::Texture;
+ }
+
+ if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
+ return SurfaceType::Depth;
+ }
+
+ if (pixel_format == PixelFormat::D24S8) {
+ return SurfaceType::DepthStencil;
+ }
+
+ return SurfaceType::Invalid;
+ }
+
+ u32 GetScaledWidth() const {
+ return (u32)(width * res_scale_width);
+ }
+
+ u32 GetScaledHeight() const {
+ return (u32)(height * res_scale_height);
+ }
+
+ PAddr addr;
+ u32 size;
+
+ PAddr min_valid;
+ PAddr max_valid;
+
+ OGLTexture texture;
+ u32 width;
+ u32 height;
+ u32 stride = 0;
+ float res_scale_width = 1.f;
+ float res_scale_height = 1.f;
+
+ bool is_tiled;
+ PixelFormat pixel_format;
+ bool dirty;
+};
class RasterizerCacheOpenGL : NonCopyable {
public:
+ RasterizerCacheOpenGL();
~RasterizerCacheOpenGL();
+ /// Blits one texture to another
+ bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect);
+
+ /// Attempt to blit one surface's texture to another
+ bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
+
/// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
- void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info);
+ CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create);
- void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) {
- LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format));
- }
+ /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
+ CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect);
- /// Invalidate any cached resource intersecting the specified region.
- void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false);
+ /// Gets a surface based on the texture configuration
+ CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
- /// Invalidate all cached OpenGL resources tracked by this cache manager
- void InvalidateAll();
+ /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration
+ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config);
-private:
- struct CachedTexture {
- OGLTexture texture;
- GLuint width;
- GLuint height;
- u32 size;
- u64 hash;
- PAddr addr;
- };
+ /// Attempt to get a surface that exactly matches the fill region and format
+ CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
+
+ /// Write the surface back to memory
+ void FlushSurface(CachedSurface* surface);
- std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache;
+ /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache
+ void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
+
+ /// Flush all cached resources tracked by this cache manager
+ void FlushAll();
+
+private:
+ SurfaceCache surface_cache;
+ OGLFramebuffer transfer_framebuffers[2];
};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ee4b54ab9..71d60e69c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -2,9 +2,17 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
+#include <cstddef>
+
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/logging/log.h"
+
#include "video_core/pica.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
using Pica::Regs;
using TevStageConfig = Regs::TevStageConfig;
@@ -24,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
}
/// Writes the specified TEV stage source component(s)
-static void AppendSource(std::string& out, TevStageConfig::Source source,
+static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source,
const std::string& index_name) {
+ const auto& state = config.state;
using Source = TevStageConfig::Source;
switch (source) {
case Source::PrimaryColor:
@@ -38,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
out += "secondary_fragment_color";
break;
case Source::Texture0:
- out += "texture(tex[0], texcoord[0])";
+ // Only unit 0 respects the texturing type (according to 3DBrew)
+ switch(state.texture0_type) {
+ case Pica::Regs::TextureConfig::Texture2D:
+ out += "texture(tex[0], texcoord[0])";
+ break;
+ case Pica::Regs::TextureConfig::Projection2D:
+ out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))";
+ break;
+ default:
+ out += "texture(tex[0], texcoord[0])";
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type));
+ UNIMPLEMENTED();
+ break;
+ }
break;
case Source::Texture1:
out += "texture(tex[1], texcoord[1])";
@@ -63,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
}
/// Writes the color components to use for the specified TEV stage color modifier
-static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier,
+static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier,
TevStageConfig::Source source, const std::string& index_name) {
using ColorModifier = TevStageConfig::ColorModifier;
switch (modifier) {
case ColorModifier::SourceColor:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".rgb";
break;
case ColorModifier::OneMinusSourceColor:
out += "vec3(1.0) - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".rgb";
break;
case ColorModifier::SourceAlpha:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".aaa";
break;
case ColorModifier::OneMinusSourceAlpha:
out += "vec3(1.0) - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".aaa";
break;
case ColorModifier::SourceRed:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".rrr";
break;
case ColorModifier::OneMinusSourceRed:
out += "vec3(1.0) - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".rrr";
break;
case ColorModifier::SourceGreen:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".ggg";
break;
case ColorModifier::OneMinusSourceGreen:
out += "vec3(1.0) - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".ggg";
break;
case ColorModifier::SourceBlue:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".bbb";
break;
case ColorModifier::OneMinusSourceBlue:
out += "vec3(1.0) - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".bbb";
break;
default:
@@ -120,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier
}
/// Writes the alpha component to use for the specified TEV stage alpha modifier
-static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier,
+static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier,
TevStageConfig::Source source, const std::string& index_name) {
using AlphaModifier = TevStageConfig::AlphaModifier;
switch (modifier) {
case AlphaModifier::SourceAlpha:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".a";
break;
case AlphaModifier::OneMinusSourceAlpha:
out += "1.0 - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".a";
break;
case AlphaModifier::SourceRed:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".r";
break;
case AlphaModifier::OneMinusSourceRed:
out += "1.0 - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".r";
break;
case AlphaModifier::SourceGreen:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".g";
break;
case AlphaModifier::OneMinusSourceGreen:
out += "1.0 - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".g";
break;
case AlphaModifier::SourceBlue:
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".b";
break;
case AlphaModifier::OneMinusSourceBlue:
out += "1.0 - ";
- AppendSource(out, source, index_name);
+ AppendSource(out, config, source, index_name);
out += ".b";
break;
default:
@@ -198,6 +220,9 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
case Operation::AddThenMultiply:
out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]";
break;
+ case Operation::Dot3_RGB:
+ out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)";
+ break;
default:
out += "vec3(0.0)";
LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation);
@@ -276,16 +301,16 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
/// Writes the code to emulate the specified TEV stage
static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) {
- auto& stage = config.tev_stages[index];
+ const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]);
if (!IsPassThroughTevStage(stage)) {
std::string index_name = std::to_string(index);
out += "vec3 color_results_" + index_name + "[3] = vec3[3](";
- AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name);
+ AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
out += ", ";
- AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name);
+ AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
out += ", ";
- AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name);
+ AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
out += ");\n";
out += "vec3 color_output_" + index_name + " = ";
@@ -293,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
out += ";\n";
out += "float alpha_results_" + index_name + "[3] = float[3](";
- AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name);
+ AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);
out += ", ";
- AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name);
+ AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);
out += ", ";
- AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name);
+ AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);
out += ");\n";
out += "float alpha_output_" + index_name + " = ";
@@ -320,6 +345,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
/// Writes the code to emulate fragment lighting
static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
+ const auto& lighting = config.state.lighting;
+
// Define lighting globals
out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
"vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
@@ -327,17 +354,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
"vec3 refl_value = vec3(0.0);\n";
// Compute fragment normals
- if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
+ if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
// Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
- std::string bump_selector = std::to_string(config.lighting.bump_selector);
+ std::string bump_selector = std::to_string(lighting.bump_selector);
out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
// Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
- if (config.lighting.bump_renorm) {
+ if (lighting.bump_renorm) {
std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
}
- } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
+ } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
// Bump mapping is enabled using a tangent map
LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
UNIMPLEMENTED();
@@ -350,7 +377,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
// Gets the index into the specified lookup table for specular lighting
- auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) {
+ auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) {
const std::string half_angle = "normalize(normalize(view) + light_vector)";
std::string index;
switch (input) {
@@ -378,7 +405,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
if (abs) {
// LUT index is in the range of (0.0, 1.0)
- index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
+ index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
} else {
// LUT index is in the range of (-1.0, 1.0)
@@ -396,8 +423,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
};
// Write the code to emulate each enabled light
- for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) {
- const auto& light_config = config.lighting.light[light_index];
+ for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) {
+ const auto& light_config = lighting.light[light_index];
std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
// Compute light vector (directional or positional)
@@ -421,39 +448,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
}
// If enabled, clamp specular component if lighting result is negative
- std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
+ std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
// Specular 0 component
std::string d0_lut_value = "1.0";
- if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
+ if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
// Lookup specular "distribution 0" LUT value
- std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
- d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
+ std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
+ d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
}
std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
// If enabled, lookup ReflectRed value, otherwise, 1.0 is used
- if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
- std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input);
- std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
+ if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
+ std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
+ std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
out += "refl_value.r = " + value + ";\n";
} else {
out += "refl_value.r = 1.0;\n";
}
// If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
- if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
- std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input);
- std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
+ if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
+ std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
+ std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
out += "refl_value.g = " + value + ";\n";
} else {
out += "refl_value.g = refl_value.r;\n";
}
// If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
- if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
- std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input);
- std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
+ if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
+ std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
+ std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
out += "refl_value.b = " + value + ";\n";
} else {
out += "refl_value.b = refl_value.r;\n";
@@ -461,27 +488,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
// Specular 1 component
std::string d1_lut_value = "1.0";
- if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
+ if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
// Lookup specular "distribution 1" LUT value
- std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input);
- d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
+ std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
+ d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
}
std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
// Fresnel
- if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
+ if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
// Lookup fresnel LUT value
- std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input);
- std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
+ std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
+ std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
// Enabled for difffuse lighting alpha component
- if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
- config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
+ if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
+ lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
out += "diffuse_sum.a *= " + value + ";\n";
// Enabled for the specular lighting alpha component
- if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
- config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
+ if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
+ lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
out += "specular_sum.a *= " + value + ";\n";
}
@@ -499,6 +526,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
}
std::string GenerateFragmentShader(const PicaShaderConfig& config) {
+ const auto& state = config.state;
+
std::string out = R"(
#version 330 core
#define NUM_TEV_STAGES 6
@@ -508,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
in vec4 primary_color;
in vec2 texcoord[3];
+in float texcoord0_w;
in vec4 normquat;
in vec3 view;
@@ -525,6 +555,7 @@ layout (std140) uniform shader_data {
vec4 const_color[NUM_TEV_STAGES];
vec4 tev_combiner_buffer_color;
int alphatest_ref;
+ float depth_scale;
float depth_offset;
vec3 lighting_global_ambient;
LightSrc light_src[NUM_LIGHTS];
@@ -544,29 +575,37 @@ vec4 secondary_fragment_color = vec4(0.0);
)";
// Do not do any sort of processing if it's obvious we're not going to pass the alpha test
- if (config.alpha_test_func == Regs::CompareFunc::Never) {
+ if (state.alpha_test_func == Regs::CompareFunc::Never) {
out += "discard; }";
return out;
}
- if (config.lighting.enable)
+ if (state.lighting.enable)
WriteLighting(out, config);
out += "vec4 combiner_buffer = vec4(0.0);\n";
out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
out += "vec4 last_tex_env_out = vec4(0.0);\n";
- for (size_t index = 0; index < config.tev_stages.size(); ++index)
+ for (size_t index = 0; index < state.tev_stages.size(); ++index)
WriteTevStage(out, config, (unsigned)index);
- if (config.alpha_test_func != Regs::CompareFunc::Always) {
+ if (state.alpha_test_func != Regs::CompareFunc::Always) {
out += "if (";
- AppendAlphaTestCondition(out, config.alpha_test_func);
+ AppendAlphaTestCondition(out, state.alpha_test_func);
out += ") discard;\n";
}
out += "color = last_tex_env_out;\n";
- out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}";
+
+ out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
+ out += "float depth = z_over_w * depth_scale + depth_offset;\n";
+ if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
+ out += "depth /= gl_FragCoord.w;\n";
+ }
+ out += "gl_FragDepth = depth;\n";
+
+ out += "}";
return out;
}
@@ -574,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0);
std::string GenerateVertexShader() {
std::string out = "#version 330 core\n";
- out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
- out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
- out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
- out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
- out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
- out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
- out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
+ out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
+ out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
+ out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
+ out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
+ out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
+ out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n";
+ out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
+ out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
out += R"(
out vec4 primary_color;
out vec2 texcoord[3];
+out float texcoord0_w;
out vec4 normquat;
out vec3 view;
@@ -593,6 +634,7 @@ void main() {
texcoord[0] = vert_texcoord0;
texcoord[1] = vert_texcoord1;
texcoord[2] = vert_texcoord2;
+ texcoord0_w = vert_texcoord0_w;
normquat = vert_normquat;
view = vert_view;
gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0ca9d2879..bef3249cf 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -6,7 +6,7 @@
#include <string>
-#include "video_core/renderer_opengl/gl_rasterizer.h"
+union PicaShaderConfig;
namespace GLShader {
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index e3f7a5868..dded3db46 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,9 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <algorithm>
#include <vector>
+#include <glad/glad.h>
+
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 097242f6f..f59912f79 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -14,6 +14,7 @@ enum Attributes {
ATTRIBUTE_TEXCOORD0,
ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2,
+ ATTRIBUTE_TEXCOORD0_W,
ATTRIBUTE_NORMQUAT,
ATTRIBUTE_VIEW,
};
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 08e4d0b54..fa141fc9a 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -2,7 +2,11 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "video_core/pica.h"
+#include <glad/glad.h>
+
+#include "common/common_funcs.h"
+#include "common/logging/log.h"
+
#include "video_core/renderer_opengl/gl_state.h"
OpenGLState OpenGLState::cur_state;
@@ -32,6 +36,8 @@ OpenGLState::OpenGLState() {
stencil.action_stencil_fail = GL_KEEP;
blend.enabled = false;
+ blend.rgb_equation = GL_FUNC_ADD;
+ blend.a_equation = GL_FUNC_ADD;
blend.src_rgb_func = GL_ONE;
blend.dst_rgb_func = GL_ZERO;
blend.src_a_func = GL_ONE;
@@ -48,17 +54,19 @@ OpenGLState::OpenGLState() {
texture_unit.sampler = 0;
}
- for (auto& lut : lighting_lut) {
+ for (auto& lut : lighting_luts) {
lut.texture_1d = 0;
}
- draw.framebuffer = 0;
+ draw.read_framebuffer = 0;
+ draw.draw_framebuffer = 0;
draw.vertex_array = 0;
draw.vertex_buffer = 0;
+ draw.uniform_buffer = 0;
draw.shader_program = 0;
}
-void OpenGLState::Apply() {
+void OpenGLState::Apply() const {
// Culling
if (cull.enabled != cur_state.cull.enabled) {
if (cull.enabled) {
@@ -159,6 +167,11 @@ void OpenGLState::Apply() {
blend.src_a_func, blend.dst_a_func);
}
+ if (blend.rgb_equation != cur_state.blend.rgb_equation ||
+ blend.a_equation != cur_state.blend.a_equation) {
+ glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
+ }
+
if (logic_op != cur_state.logic_op) {
glLogicOp(logic_op);
}
@@ -175,16 +188,19 @@ void OpenGLState::Apply() {
}
// Lighting LUTs
- for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) {
- if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) {
+ for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
+ if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
glActiveTexture(GL_TEXTURE3 + i);
- glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d);
+ glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
}
}
// Framebuffer
- if (draw.framebuffer != cur_state.draw.framebuffer) {
- glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer);
+ if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
+ }
+ if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
}
// Vertex array
@@ -210,45 +226,58 @@ void OpenGLState::Apply() {
cur_state = *this;
}
-void OpenGLState::ResetTexture(GLuint id) {
+GLenum OpenGLState::CheckFBStatus(GLenum target) {
+ GLenum fb_status = glCheckFramebufferStatus(target);
+ if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
+ const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
+ LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status);
+ }
+
+ return fb_status;
+}
+
+void OpenGLState::ResetTexture(GLuint handle) {
for (auto& unit : cur_state.texture_units) {
- if (unit.texture_2d == id) {
+ if (unit.texture_2d == handle) {
unit.texture_2d = 0;
}
}
}
-void OpenGLState::ResetSampler(GLuint id) {
+void OpenGLState::ResetSampler(GLuint handle) {
for (auto& unit : cur_state.texture_units) {
- if (unit.sampler == id) {
+ if (unit.sampler == handle) {
unit.sampler = 0;
}
}
}
-void OpenGLState::ResetProgram(GLuint id) {
- if (cur_state.draw.shader_program == id) {
+void OpenGLState::ResetProgram(GLuint handle) {
+ if (cur_state.draw.shader_program == handle) {
cur_state.draw.shader_program = 0;
}
}
-void OpenGLState::ResetBuffer(GLuint id) {
- if (cur_state.draw.vertex_buffer == id) {
+void OpenGLState::ResetBuffer(GLuint handle) {
+ if (cur_state.draw.vertex_buffer == handle) {
cur_state.draw.vertex_buffer = 0;
}
- if (cur_state.draw.uniform_buffer == id) {
+ if (cur_state.draw.uniform_buffer == handle) {
cur_state.draw.uniform_buffer = 0;
}
}
-void OpenGLState::ResetVertexArray(GLuint id) {
- if (cur_state.draw.vertex_array == id) {
+void OpenGLState::ResetVertexArray(GLuint handle) {
+ if (cur_state.draw.vertex_array == handle) {
cur_state.draw.vertex_array = 0;
}
}
-void OpenGLState::ResetFramebuffer(GLuint id) {
- if (cur_state.draw.framebuffer == id) {
- cur_state.draw.framebuffer = 0;
+void OpenGLState::ResetFramebuffer(GLuint handle) {
+ if (cur_state.draw.read_framebuffer == handle) {
+ cur_state.draw.read_framebuffer = 0;
+ }
+ if (cur_state.draw.draw_framebuffer == handle) {
+ cur_state.draw.draw_framebuffer = 0;
}
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index e848058d7..228727054 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -40,6 +40,8 @@ public:
struct {
bool enabled; // GL_BLEND
+ GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
+ GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
GLenum src_rgb_func; // GL_BLEND_SRC_RGB
GLenum dst_rgb_func; // GL_BLEND_DST_RGB
GLenum src_a_func; // GL_BLEND_SRC_ALPHA
@@ -63,15 +65,15 @@ public:
struct {
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
- } lighting_lut[6];
+ } lighting_luts[6];
struct {
- GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
+ GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
+ GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
GLuint shader_program; // GL_CURRENT_PROGRAM
- bool shader_dirty;
} draw;
OpenGLState();
@@ -82,14 +84,18 @@ public:
}
/// Apply this state as the current OpenGL state
- void Apply();
-
- static void ResetTexture(GLuint id);
- static void ResetSampler(GLuint id);
- static void ResetProgram(GLuint id);
- static void ResetBuffer(GLuint id);
- static void ResetVertexArray(GLuint id);
- static void ResetFramebuffer(GLuint id);
+ void Apply() const;
+
+ /// Check the status of the current OpenGL read or draw framebuffer configuration
+ static GLenum CheckFBStatus(GLenum target);
+
+ /// Resets and unbinds any references to the given resource in the current OpenGL state
+ static void ResetTexture(GLuint handle);
+ static void ResetSampler(GLuint handle);
+ static void ResetProgram(GLuint handle);
+ static void ResetBuffer(GLuint handle);
+ static void ResetVertexArray(GLuint handle);
+ static void ResetFramebuffer(GLuint handle);
private:
static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index fd3617d77..6dc2758c5 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -4,9 +4,16 @@
#pragma once
+#include <array>
+#include <cstddef>
+
#include <glad/glad.h>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/pica.h"
@@ -71,6 +78,26 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
return gl_mode;
}
+inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) {
+ static const GLenum blend_equation_table[] = {
+ GL_FUNC_ADD, // BlendEquation::Add
+ GL_FUNC_SUBTRACT, // BlendEquation::Subtract
+ GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract
+ GL_MIN, // BlendEquation::Min
+ GL_MAX, // BlendEquation::Max
+ };
+
+ // Range check table for input
+ if (static_cast<size_t>(equation) >= ARRAY_SIZE(blend_equation_table)) {
+ LOG_CRITICAL(Render_OpenGL, "Unknown blend equation %d", equation);
+ UNREACHABLE();
+
+ return GL_FUNC_ADD;
+ }
+
+ return blend_equation_table[(unsigned)equation];
+}
+
inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
static const GLenum blend_func_table[] = {
GL_ZERO, // BlendFactor::Zero
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 11c4d0daf..8f424a435 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,23 +5,28 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
+#include <memory>
+
+#include <glad/glad.h>
#include "common/assert.h"
+#include "common/bit_field.h"
#include "common/emu_window.h"
#include "common/logging/log.h"
#include "common/profiler_reporting.h"
+#include "common/synchronized_wrapper.h"
-#include "core/memory.h"
-#include "core/settings.h"
#include "core/hw/gpu.h"
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
+#include "core/memory.h"
+#include "core/settings.h"
+#include "core/tracer/recorder.h"
-#include "video_core/video_core.h"
#include "video_core/debug_utils/debug_utils.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/video_core.h"
static const char vertex_shader[] = R"(
#version 150 core
@@ -107,7 +112,7 @@ void RendererOpenGL::SwapBuffers() {
OpenGLState prev_state = OpenGLState::GetCurState();
state.Apply();
- for(int i : {0, 1}) {
+ for (int i : {0, 1}) {
const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
// Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
@@ -117,25 +122,25 @@ void RendererOpenGL::SwapBuffers() {
LCD::Read(color_fill.raw, lcd_color_addr);
if (color_fill.is_enabled) {
- LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]);
+ LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture);
// Resize the texture in case the framebuffer size has changed
- textures[i].width = 1;
- textures[i].height = 1;
+ screen_infos[i].texture.width = 1;
+ screen_infos[i].texture.height = 1;
} else {
- if (textures[i].width != (GLsizei)framebuffer.width ||
- textures[i].height != (GLsizei)framebuffer.height ||
- textures[i].format != framebuffer.color_format) {
+ if (screen_infos[i].texture.width != (GLsizei)framebuffer.width ||
+ screen_infos[i].texture.height != (GLsizei)framebuffer.height ||
+ screen_infos[i].texture.format != framebuffer.color_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
- ConfigureFramebufferTexture(textures[i], framebuffer);
+ ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer);
}
- LoadFBToActiveGLTexture(framebuffer, textures[i]);
+ LoadFBToScreenInfo(framebuffer, screen_infos[i]);
// Resize the texture in case the framebuffer size has changed
- textures[i].width = framebuffer.width;
- textures[i].height = framebuffer.height;
+ screen_infos[i].texture.width = framebuffer.width;
+ screen_infos[i].texture.height = framebuffer.height;
}
}
@@ -166,8 +171,8 @@ void RendererOpenGL::SwapBuffers() {
/**
* Loads framebuffer from emulated memory into the active OpenGL texture.
*/
-void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer,
- const TextureInfo& texture) {
+void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
+ ScreenInfo& screen_info) {
const PAddr framebuffer_addr = framebuffer.active_fb == 0 ?
framebuffer.address_left1 : framebuffer.address_left2;
@@ -177,8 +182,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
framebuffer_addr, (int)framebuffer.width,
(int)framebuffer.height, (int)framebuffer.format);
- const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
-
int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
size_t pixel_stride = framebuffer.stride / bpp;
@@ -189,24 +192,34 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
// only allows rows to have a memory alignement of 4.
ASSERT(pixel_stride % 4 == 0);
- state.texture_units[0].texture_2d = texture.handle;
- state.Apply();
+ if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) {
+ // Reset the screen info's display texture to its own permanent texture
+ screen_info.display_texture = screen_info.texture.resource.handle;
+ screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
- glActiveTexture(GL_TEXTURE0);
- glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
+ Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height);
- // Update existing texture
- // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
- // differ from the LCD resolution.
- // TODO: Applications could theoretically crash Citra here by specifying too large
- // framebuffer sizes. We should make sure that this cannot happen.
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
- texture.gl_format, texture.gl_type, framebuffer_data);
+ const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+ state.Apply();
- state.texture_units[0].texture_2d = 0;
- state.Apply();
+ glActiveTexture(GL_TEXTURE0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
+
+ // Update existing texture
+ // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
+ // differ from the LCD resolution.
+ // TODO: Applications could theoretically crash Citra here by specifying too large
+ // framebuffer sizes. We should make sure that this cannot happen.
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
+ screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data);
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+
+ state.texture_units[0].texture_2d = 0;
+ state.Apply();
+ }
}
/**
@@ -216,7 +229,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
*/
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
const TextureInfo& texture) {
- state.texture_units[0].texture_2d = texture.handle;
+ state.texture_units[0].texture_2d = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
@@ -224,6 +237,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
// Update existing texture
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
+
+ state.texture_units[0].texture_2d = 0;
+ state.Apply();
}
/**
@@ -233,20 +249,22 @@ void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f);
// Link shaders and get variable locations
- program_id = GLShader::LoadProgram(vertex_shader, fragment_shader);
- uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix");
- uniform_color_texture = glGetUniformLocation(program_id, "color_texture");
- attrib_position = glGetAttribLocation(program_id, "vert_position");
- attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord");
+ shader.Create(vertex_shader, fragment_shader);
+ state.draw.shader_program = shader.handle;
+ state.Apply();
+ uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
+ uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
+ attrib_position = glGetAttribLocation(shader.handle, "vert_position");
+ attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
// Generate VBO handle for drawing
- glGenBuffers(1, &vertex_buffer_handle);
+ vertex_buffer.Create();
// Generate VAO
- glGenVertexArrays(1, &vertex_array_handle);
+ vertex_array.Create();
- state.draw.vertex_array = vertex_array_handle;
- state.draw.vertex_buffer = vertex_buffer_handle;
+ state.draw.vertex_array = vertex_array.handle;
+ state.draw.vertex_buffer = vertex_buffer.handle;
state.draw.uniform_buffer = 0;
state.Apply();
@@ -258,13 +276,13 @@ void RendererOpenGL::InitOpenGLObjects() {
glEnableVertexAttribArray(attrib_tex_coord);
// Allocate textures for each screen
- for (auto& texture : textures) {
- glGenTextures(1, &texture.handle);
+ for (auto& screen_info : screen_infos) {
+ screen_info.texture.resource.Create();
// Allocation of storage is deferred until the first frame, when we
// know the framebuffer size.
- state.texture_units[0].texture_2d = texture.handle;
+ state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
@@ -273,6 +291,8 @@ void RendererOpenGL::InitOpenGLObjects() {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+ screen_info.display_texture = screen_info.texture.resource.handle;
}
state.texture_units[0].texture_2d = 0;
@@ -327,30 +347,38 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
UNIMPLEMENTED();
}
- state.texture_units[0].texture_2d = texture.handle;
+ state.texture_units[0].texture_2d = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
texture.gl_format, texture.gl_type, nullptr);
+
+ state.texture_units[0].texture_2d = 0;
+ state.Apply();
}
/**
* Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
*/
-void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) {
+void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) {
+ auto& texcoords = screen_info.display_texcoords;
+
std::array<ScreenRectVertex, 4> vertices = {{
- ScreenRectVertex(x, y, 1.f, 0.f),
- ScreenRectVertex(x+w, y, 1.f, 1.f),
- ScreenRectVertex(x, y+h, 0.f, 0.f),
- ScreenRectVertex(x+w, y+h, 0.f, 1.f),
+ ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
+ ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right),
+ ScreenRectVertex(x, y+h, texcoords.top, texcoords.left),
+ ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right),
}};
- state.texture_units[0].texture_2d = texture.handle;
+ state.texture_units[0].texture_2d = screen_info.display_texture;
state.Apply();
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ state.texture_units[0].texture_2d = 0;
+ state.Apply();
}
/**
@@ -362,9 +390,6 @@ void RendererOpenGL::DrawScreens() {
glViewport(0, 0, layout.width, layout.height);
glClear(GL_COLOR_BUFFER_BIT);
- state.draw.shader_program = program_id;
- state.Apply();
-
// Set projection matrix
std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
(float)layout.height);
@@ -374,9 +399,9 @@ void RendererOpenGL::DrawScreens() {
glActiveTexture(GL_TEXTURE0);
glUniform1i(uniform_color_texture, 0);
- DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
+ DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
(float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
- DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
+ DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
(float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
m_current_frame++;
@@ -448,12 +473,6 @@ static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
bool RendererOpenGL::Init() {
render_window->MakeCurrent();
- // TODO: Make frontends initialize this, so they can use gladLoadGLLoader with their own loaders
- if (!gladLoadGL()) {
- LOG_CRITICAL(Render_OpenGL, "Failed to initialize GL functions! Exiting...");
- exit(-1);
- }
-
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index fe4d142a5..00e1044ab 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -8,13 +8,34 @@
#include <glad/glad.h>
+#include "common/common_types.h"
+#include "common/math_util.h"
+
#include "core/hw/gpu.h"
#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
class EmuWindow;
+/// Structure used for storing information about the textures for each 3DS screen
+struct TextureInfo {
+ OGLTexture resource;
+ GLsizei width;
+ GLsizei height;
+ GPU::Regs::PixelFormat format;
+ GLenum gl_format;
+ GLenum gl_type;
+};
+
+/// Structure used for storing information about the display target for each 3DS screen
+struct ScreenInfo {
+ GLuint display_texture;
+ MathUtil::Rectangle<float> display_texcoords;
+ TextureInfo texture;
+};
+
class RendererOpenGL : public RendererBase {
public:
@@ -37,26 +58,16 @@ public:
void ShutDown() override;
private:
- /// Structure used for storing information about the textures for each 3DS screen
- struct TextureInfo {
- GLuint handle;
- GLsizei width;
- GLsizei height;
- GPU::Regs::PixelFormat format;
- GLenum gl_format;
- GLenum gl_type;
- };
-
void InitOpenGLObjects();
void ConfigureFramebufferTexture(TextureInfo& texture,
const GPU::Regs::FramebufferConfig& framebuffer);
void DrawScreens();
- void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h);
+ void DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h);
void UpdateFramerate();
- // Loads framebuffer from emulated memory into the active OpenGL texture.
- void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer,
- const TextureInfo& texture);
+ // Loads framebuffer from emulated memory into the display information structure
+ void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
+ ScreenInfo& screen_info);
// Fills active OpenGL texture with the given RGB color.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
const TextureInfo& texture);
@@ -69,10 +80,10 @@ private:
OpenGLState state;
// OpenGL object IDs
- GLuint vertex_array_handle;
- GLuint vertex_buffer_handle;
- GLuint program_id;
- std::array<TextureInfo, 2> textures; ///< Textures for top and bottom screens respectively
+ OGLVertexArray vertex_array;
+ OGLBuffer vertex_buffer;
+ OGLShader shader;
+ std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively
// Shader uniform location indices
GLuint uniform_modelview_matrix;
GLuint uniform_color_texture;