summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp290
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp152
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp90
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h22
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp39
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp41
-rw-r--r--src/video_core/renderer_opengl/gl_state.h33
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp7
15 files changed, 585 insertions, 190 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0bb5c068c..c28ae795c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -4,6 +4,7 @@
#include <algorithm>
#include <array>
+#include <bitset>
#include <memory>
#include <string>
#include <string_view>
@@ -19,6 +20,7 @@
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
+#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -105,6 +107,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
+ clear_framebuffer.Create();
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
CheckExtensions();
@@ -124,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
- if (!gpu.dirty_flags.vertex_attrib_format) {
+ if (!gpu.dirty.vertex_attrib_format) {
return state.draw.vertex_array;
}
- gpu.dirty_flags.vertex_attrib_format = false;
+ gpu.dirty.vertex_attrib_format = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
@@ -181,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
// Rebinding the VAO invalidates the vertex buffer bindings.
- gpu.dirty_flags.vertex_array.set();
+ gpu.dirty.ResetVertexArrays();
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
@@ -189,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- if (gpu.dirty_flags.vertex_array.none())
+ if (!gpu.dirty.vertex_array_buffers)
return;
+ gpu.dirty.vertex_array_buffers = false;
+
+ const auto& regs = gpu.regs;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!gpu.dirty_flags.vertex_array[index])
+ if (!gpu.dirty.vertex_array[index])
continue;
+ gpu.dirty.vertex_array[index] = false;
+ gpu.dirty.vertex_instance[index] = false;
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled())
@@ -224,8 +230,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
glVertexArrayBindingDivisor(vao, index, 0);
}
}
+}
+
+void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
+ auto& gpu = system.GPU().Maxwell3D();
+
+ if (!gpu.dirty.vertex_instances)
+ return;
+ gpu.dirty.vertex_instances = false;
+
+ const auto& regs = gpu.regs;
+ // Upload all guest vertex arrays sequentially to our buffer
+ for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ if (!gpu.dirty.vertex_instance[index])
+ continue;
+
+ gpu.dirty.vertex_instance[index] = false;
- gpu.dirty_flags.vertex_array.reset();
+ if (regs.instanced_arrays.IsInstancingEnabled(index) &&
+ regs.vertex_array[index].divisor != 0) {
+ // Enable vertex buffer instancing with the specified divisor.
+ glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
+ } else {
+ // Disable the vertex buffer instancing.
+ glVertexArrayBindingDivisor(vao, index, 0);
+ }
+ }
}
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
@@ -298,9 +328,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
Shader shader{shader_cache.GetStageProgram(program)};
- const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
+ const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupDrawConstBuffers(stage_enum, shader);
- SetupGlobalRegions(stage_enum, shader);
+ SetupDrawGlobalMemory(stage_enum, shader);
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
@@ -341,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SyncClipEnabled(clip_distances);
- gpu.dirty_flags.shaders = false;
+ gpu.dirty.shaders = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -424,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target};
- if (fb_config_state == current_framebuffer_config_state &&
- gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
+ if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
return current_depth_stencil_usage;
}
+ gpu.dirty.render_settings = false;
current_framebuffer_config_state = fb_config_state;
texture_cache.GuardRenderTargets(true);
@@ -519,13 +549,71 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
}
+void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
+ bool using_depth_fb, bool using_stencil_fb) {
+ auto& gpu = system.GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
+
+ texture_cache.GuardRenderTargets(true);
+ View color_surface{};
+ if (using_color_fb) {
+ color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
+ }
+ View depth_surface{};
+ if (using_depth_fb || using_stencil_fb) {
+ depth_surface = texture_cache.GetDepthBufferSurface(false);
+ }
+ texture_cache.GuardRenderTargets(false);
+
+ current_state.draw.draw_framebuffer = clear_framebuffer.handle;
+ current_state.ApplyFramebufferState();
+
+ if (color_surface) {
+ color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
+ } else {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ }
+
+ if (depth_surface) {
+ const auto& params = depth_surface->GetSurfaceParams();
+ switch (params.type) {
+ case VideoCore::Surface::SurfaceType::Depth: {
+ depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ break;
+ }
+ case VideoCore::Surface::SurfaceType::DepthStencil: {
+ depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ break;
+ }
+ default: { UNIMPLEMENTED(); }
+ }
+ } else {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+ }
+}
+
void RasterizerOpenGL::Clear() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& maxwell3d = system.GPU().Maxwell3D();
+
+ if (!maxwell3d.ShouldExecute()) {
+ return;
+ }
+
+ const auto& regs = maxwell3d.regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
- OpenGLState clear_state;
+ OpenGLState prev_state{OpenGLState::GetCurState()};
+ SCOPE_EXIT({
+ prev_state.AllDirty();
+ prev_state.Apply();
+ });
+
+ OpenGLState clear_state{OpenGLState::GetCurState()};
+ clear_state.SetDefaultViewports();
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
use_color = true;
@@ -545,6 +633,7 @@ void RasterizerOpenGL::Clear() {
// true.
clear_state.depth.test_enabled = true;
clear_state.depth.test_func = GL_ALWAYS;
+ clear_state.depth.write_mask = GL_TRUE;
}
if (regs.clear_buffers.S) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
@@ -581,8 +670,9 @@ void RasterizerOpenGL::Clear() {
return;
}
- const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
- clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
+ ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
+
+ SyncViewport(clear_state);
if (regs.clear_flags.scissor) {
SyncScissorTest(clear_state);
}
@@ -591,21 +681,18 @@ void RasterizerOpenGL::Clear() {
clear_state.EmulateViewportWithScissor();
}
- clear_state.ApplyColorMask();
- clear_state.ApplyDepth();
- clear_state.ApplyStencilTest();
- clear_state.ApplyViewport();
- clear_state.ApplyFramebufferState();
+ clear_state.AllDirty();
+ clear_state.Apply();
if (use_color) {
- glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+ glClearBufferfv(GL_COLOR, 0, regs.clear_color);
}
- if (clear_depth && clear_stencil) {
+ if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
- } else if (clear_depth) {
+ } else if (use_depth) {
glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
- } else if (clear_stencil) {
+ } else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
}
@@ -616,6 +703,11 @@ void RasterizerOpenGL::DrawArrays() {
MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
+
+ if (!gpu.ShouldExecute()) {
+ return;
+ }
+
const auto& regs = gpu.regs;
SyncColorMask();
@@ -661,6 +753,7 @@ void RasterizerOpenGL::DrawArrays() {
// Upload vertex and index data.
SetupVertexBuffer(vao);
+ SetupVertexInstances(vao);
const GLintptr index_buffer_offset = SetupIndexBuffer();
// Setup draw parameters. It will automatically choose what glDraw* method to use.
@@ -687,7 +780,7 @@ void RasterizerOpenGL::DrawArrays() {
if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state.
- gpu.dirty_flags.vertex_array.set();
+ gpu.dirty.ResetVertexArrays();
}
shader_program_manager->ApplyTo(state);
@@ -700,6 +793,46 @@ void RasterizerOpenGL::DrawArrays() {
params.DispatchDraw();
accelerate_draw = AccelDraw::Disabled;
+ gpu.dirty.memory_general = false;
+}
+
+void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
+ if (!GLAD_GL_ARB_compute_variable_group_size) {
+ LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
+ "lack of GL_ARB_compute_variable_group_size");
+ return;
+ }
+
+ auto kernel = shader_cache.GetComputeKernel(code_addr);
+ const auto [program, next_bindings] = kernel->GetProgramHandle({});
+ state.draw.shader_program = program;
+ state.draw.program_pipeline = 0;
+
+ const std::size_t buffer_size =
+ Tegra::Engines::KeplerCompute::NumConstBuffers *
+ (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+ buffer_cache.Map(buffer_size);
+
+ bind_ubo_pushbuffer.Setup(0);
+ bind_ssbo_pushbuffer.Setup(0);
+
+ SetupComputeConstBuffers(kernel);
+ SetupComputeGlobalMemory(kernel);
+
+ // TODO(Rodrigo): Bind images and samplers
+
+ buffer_cache.Unmap();
+
+ bind_ubo_pushbuffer.Bind();
+ bind_ssbo_pushbuffer.Bind();
+
+ state.ApplyShaderProgram();
+ state.ApplyProgramPipeline();
+
+ const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
+ launch_desc.grid_dim_z, launch_desc.block_dim_x,
+ launch_desc.block_dim_y, launch_desc.block_dim_z);
}
void RasterizerOpenGL::FlushAll() {}
@@ -775,12 +908,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader) {
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto stage_index = static_cast<std::size_t>(stage);
- const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index];
-
- // Upload only the enabled buffers from the 16 constbuffers of each shader stage
+ const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
+ const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
for (const auto& entry : shader->GetShaderEntries().const_buffers) {
- SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry);
+ const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
+ SetupConstBuffer(buffer, entry);
+ }
+}
+
+void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
+ MICROPROFILE_SCOPE(OpenGL_UBO);
+ const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
+ const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
+ const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value();
+ Tegra::Engines::ConstBufferInfo buffer;
+ buffer.address = config.Address();
+ buffer.size = config.size;
+ buffer.enabled = mask[entry.GetIndex()];
+ SetupConstBuffer(buffer, entry);
}
}
@@ -801,24 +947,39 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
bind_ubo_pushbuffer.Push(cbuf, offset, size);
}
-void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader) {
+void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader) {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
- const auto alignment{device.GetShaderStorageBufferAlignment()};
-
for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
- const auto actual_addr{memory_manager.Read<u64>(addr)};
+ const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
+ SetupGlobalMemory(entry, gpu_addr, size);
+ }
+}
- const auto [ssbo, buffer_offset] =
- buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten());
- bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
+void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
+ auto& gpu{system.GPU()};
+ auto& memory_manager{gpu.MemoryManager()};
+ const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
+ for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
+ const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
+ const auto gpu_addr{memory_manager.Read<u64>(addr)};
+ const auto size{memory_manager.Read<u32>(addr + 8)};
+ SetupGlobalMemory(entry, gpu_addr, size);
}
}
+void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
+ GPUVAddr gpu_addr, std::size_t size) {
+ const auto alignment{device.GetShaderStorageBufferAlignment()};
+ const auto [ssbo, buffer_offset] =
+ buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten());
+ bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
+}
+
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
@@ -907,10 +1068,11 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
- state.cull.enabled = regs.cull.enabled != 0;
+ const auto& regs = maxwell3d.regs;
+ state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@@ -943,16 +1105,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
state.depth.test_enabled = regs.depth_test_enable != 0;
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
- if (!state.depth.test_enabled)
+ if (!state.depth.test_enabled) {
return;
+ }
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
}
void RasterizerOpenGL::SyncStencilTestState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
- state.stencil.test_enabled = regs.stencil_enable != 0;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.stencil_test) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
+ state.stencil.test_enabled = regs.stencil_enable != 0;
if (!regs.stencil_enable) {
return;
}
@@ -981,10 +1148,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
state.stencil.back.action_depth_fail = GL_KEEP;
state.stencil.back.action_depth_pass = GL_KEEP;
}
+ state.MarkDirtyStencilState();
+ maxwell3d.dirty.stencil_test = false;
}
void RasterizerOpenGL::SyncColorMask() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.color_mask) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
+
const std::size_t count =
regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
for (std::size_t i = 0; i < count; i++) {
@@ -995,6 +1169,9 @@ void RasterizerOpenGL::SyncColorMask() {
dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
}
+
+ state.MarkDirtyColorMask();
+ maxwell3d.dirty.color_mask = false;
}
void RasterizerOpenGL::SyncMultiSampleState() {
@@ -1009,7 +1186,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
}
void RasterizerOpenGL::SyncBlendState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.blend_state) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
state.blend_color.red = regs.blend_color.r;
state.blend_color.green = regs.blend_color.g;
@@ -1032,6 +1213,8 @@ void RasterizerOpenGL::SyncBlendState() {
for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
state.blend[i].enabled = false;
}
+ maxwell3d.dirty.blend_state = false;
+ state.MarkDirtyBlendState();
return;
}
@@ -1048,6 +1231,9 @@ void RasterizerOpenGL::SyncBlendState() {
blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
}
+
+ state.MarkDirtyBlendState();
+ maxwell3d.dirty.blend_state = false;
}
void RasterizerOpenGL::SyncLogicOpState() {
@@ -1099,13 +1285,21 @@ void RasterizerOpenGL::SyncPointState() {
}
void RasterizerOpenGL::SyncPolygonOffset() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.polygon_offset) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
+
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
state.polygon_offset.units = regs.polygon_offset_units;
state.polygon_offset.factor = regs.polygon_offset_factor;
state.polygon_offset.clamp = regs.polygon_offset_clamp;
+
+ state.MarkDirtyPolygonOffset();
+ maxwell3d.dirty.polygon_offset = false;
}
void RasterizerOpenGL::SyncAlphaTest() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 40b571d58..8b123c48d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -58,6 +58,7 @@ public:
void DrawArrays() override;
void Clear() override;
+ void DispatchCompute(GPUVAddr code_addr) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -108,17 +109,30 @@ private:
OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
+ void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
+ bool using_depth_fb, bool using_stencil_fb);
+
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader);
+ /// Configures the current constbuffers to use for the kernel invocation.
+ void SetupComputeConstBuffers(const Shader& kernel);
+
/// Configures a constant buffer.
void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
- void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader);
+ void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader);
+
+ /// Configures the current global memory entries to use for the kernel invocation.
+ void SetupComputeGlobalMemory(const Shader& kernel);
+
+ /// Configures a constant buffer.
+ void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
+ std::size_t size);
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
/// usage.
@@ -216,6 +230,7 @@ private:
GLuint SetupVertexFormat();
void SetupVertexBuffer(GLuint vao);
+ void SetupVertexInstances(GLuint vao);
GLintptr SetupIndexBuffer();
@@ -226,6 +241,8 @@ private:
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
+ OGLFramebuffer clear_framebuffer;
+
using CachedPageMap = boost::icl::interval_map<u64, int>;
CachedPageMap cached_pages;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 32dd9eae7..1c90facc3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -23,13 +23,13 @@ namespace OpenGL {
using VideoCommon::Shader::ProgramCode;
-// One UBO is always reserved for emulation values
-constexpr u32 RESERVED_UBOS = 1;
+// One UBO is always reserved for emulation values on staged shaders
+constexpr u32 STAGE_RESERVED_UBOS = 1;
struct UnspecializedShader {
std::string code;
GLShader::ShaderEntries entries;
- Maxwell::ShaderProgram program_type;
+ ProgramType program_type;
};
namespace {
@@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
}
/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
+constexpr GLenum GetShaderType(ProgramType program_type) {
switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
+ case ProgramType::VertexA:
+ case ProgramType::VertexB:
return GL_VERTEX_SHADER;
- case Maxwell::ShaderProgram::Geometry:
+ case ProgramType::Geometry:
return GL_GEOMETRY_SHADER;
- case Maxwell::ShaderProgram::Fragment:
+ case ProgramType::Fragment:
return GL_FRAGMENT_SHADER;
+ case ProgramType::Compute:
+ return GL_COMPUTE_SHADER;
default:
return GL_NONE;
}
@@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
}
}
+ProgramType GetProgramType(Maxwell::ShaderProgram program) {
+ switch (program) {
+ case Maxwell::ShaderProgram::VertexA:
+ return ProgramType::VertexA;
+ case Maxwell::ShaderProgram::VertexB:
+ return ProgramType::VertexB;
+ case Maxwell::ShaderProgram::TesselationControl:
+ return ProgramType::TessellationControl;
+ case Maxwell::ShaderProgram::TesselationEval:
+ return ProgramType::TessellationEval;
+ case Maxwell::ShaderProgram::Geometry:
+ return ProgramType::Geometry;
+ case Maxwell::ShaderProgram::Fragment:
+ return ProgramType::Fragment;
+ }
+ UNREACHABLE();
+ return {};
+}
+
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
constexpr std::size_t start_offset = 10;
@@ -128,13 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
}
/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
+u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
if (size_a == 0) {
size_a = CalculateProgramSize(code);
}
u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
- if (program_type != Maxwell::ShaderProgram::VertexA) {
+ if (program_type != ProgramType::VertexA) {
return unique_identifier;
}
// VertexA programs include two programs
@@ -152,12 +173,12 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
}
/// Creates an unspecialized program from code streams
-GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type,
+GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
ProgramCode program_code, ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code);
setup.program.size_a = CalculateProgramSize(program_code);
setup.program.size_b = 0;
- if (program_type == Maxwell::ShaderProgram::VertexA) {
+ if (program_type == ProgramType::VertexA) {
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
// Conventional HW does not support this, so we combine VertexA and VertexB into one
// stage here.
@@ -168,22 +189,23 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
+ case ProgramType::VertexA:
+ case ProgramType::VertexB:
return GLShader::GenerateVertexShader(device, setup);
- case Maxwell::ShaderProgram::Geometry:
+ case ProgramType::Geometry:
return GLShader::GenerateGeometryShader(device, setup);
- case Maxwell::ShaderProgram::Fragment:
+ case ProgramType::Fragment:
return GLShader::GenerateFragmentShader(device, setup);
+ case ProgramType::Compute:
+ return GLShader::GenerateComputeShader(device, setup);
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
return {};
}
}
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
- Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
+ ProgramType program_type, const ProgramVariant& variant,
bool hint_retrievable = false) {
auto base_bindings{variant.base_bindings};
const auto primitive_mode{variant.primitive_mode};
@@ -194,7 +216,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
if (entries.shader_viewport_layer_array) {
source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
}
- source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+ if (program_type == ProgramType::Compute) {
+ source += "#extension GL_ARB_compute_variable_group_size : require\n";
+ }
+ source += '\n';
+
+ if (program_type != ProgramType::Compute) {
+ source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+ }
for (const auto& cbuf : entries.const_buffers) {
source +=
@@ -221,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
}
- if (program_type == Maxwell::ShaderProgram::Geometry) {
+ if (program_type == ProgramType::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
GetPrimitiveDescription(primitive_mode);
source += "layout (" + std::string(glsl_topology) + ") in;\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
}
+ if (program_type == ProgramType::Compute) {
+ source += "layout (local_size_variable) in;\n";
+ }
source += code;
@@ -255,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
-CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type,
+CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
GLShader::ProgramResult result)
: RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr},
unique_identifier{params.unique_identifier}, program_type{program_type},
@@ -268,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
ProgramCode&& program_code_b) {
const auto code_size{CalculateProgramSize(program_code)};
const auto code_size_b{CalculateProgramSize(program_code_b)};
- auto result{CreateProgram(params.device, program_type, program_code, program_code_b)};
+ auto result{
+ CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
if (result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
return {};
}
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
- params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)),
- static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code),
- std::move(program_code_b)));
+ params.unique_identifier, GetProgramType(program_type),
+ static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
+ std::move(program_code), std::move(program_code_b)));
- return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result)));
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params, GetProgramType(program_type), std::move(result)));
}
Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
GLShader::ProgramResult result) {
- return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result)));
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params, GetProgramType(program_type), std::move(result)));
+}
+
+Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
+ auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
+
+ const auto code_size{CalculateProgramSize(code)};
+ params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
+ static_cast<u32>(code_size / sizeof(u64)), 0,
+ std::move(code), {}));
+
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params, ProgramType::Compute, std::move(result)));
+}
+
+Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
+ GLShader::ProgramResult result) {
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params, ProgramType::Compute, std::move(result)));
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
GLuint handle{};
- if (program_type == Maxwell::ShaderProgram::Geometry) {
+ if (program_type == ProgramType::Geometry) {
handle = GetGeometryShader(variant);
} else {
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
@@ -308,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
handle = program->handle;
}
- auto base_bindings{variant.base_bindings};
- base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
+ auto base_bindings = variant.base_bindings;
+ base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
+ if (program_type != ProgramType::Compute) {
+ base_bindings.cbuf += STAGE_RESERVED_UBOS;
+ }
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
@@ -572,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
+ if (!system.GPU().Maxwell3D().dirty.shaders) {
return last_shaders[static_cast<std::size_t>(program)];
}
@@ -589,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
// No shader found - create a new one
ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
ProgramCode program_code_b;
- if (program == Maxwell::ShaderProgram::VertexA) {
+ const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
+ if (is_program_a) {
const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
program_code_b = GetShaderCode(memory_manager, program_addr_b,
memory_manager.GetPointer(program_addr_b));
}
- const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
+ const auto unique_identifier =
+ GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
host_ptr, unique_identifier};
@@ -612,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
+Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
+ auto& memory_manager{system.GPU().MemoryManager()};
+ const auto host_ptr{memory_manager.GetPointer(code_addr)};
+ auto kernel = TryGet(host_ptr);
+ if (kernel) {
+ return kernel;
+ }
+
+ // No kernel found - create a new one
+ auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
+ const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
+ const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
+ const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
+ host_ptr, unique_identifier};
+
+ const auto found = precompiled_shaders.find(unique_identifier);
+ if (found == precompiled_shaders.end()) {
+ kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
+ } else {
+ kernel = CachedShader::CreateKernelFromCache(params, found->second);
+ }
+
+ Register(kernel);
+ return kernel;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index bbb53cdf4..a3106a0ff 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -61,6 +61,11 @@ public:
Maxwell::ShaderProgram program_type,
GLShader::ProgramResult result);
+ static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
+
+ static Shader CreateKernelFromCache(const ShaderParameters& params,
+ GLShader::ProgramResult result);
+
VAddr GetCpuAddr() const override {
return cpu_addr;
}
@@ -78,7 +83,7 @@ public:
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
private:
- explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type,
+ explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
GLShader::ProgramResult result);
// Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -104,7 +109,7 @@ private:
u8* host_ptr{};
VAddr cpu_addr{};
u64 unique_identifier{};
- Maxwell::ShaderProgram program_type{};
+ ProgramType program_type{};
ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledPrograms& precompiled_programs;
@@ -132,6 +137,9 @@ public:
/// Gets the current specified shader stage program
Shader GetStageProgram(Maxwell::ShaderProgram program);
+ /// Gets a compute kernel in the passed address
+ Shader GetComputeKernel(GPUVAddr code_addr);
+
protected:
// We do not have to flush this cache as things in it are never modified by us.
void FlushObjectInner(const Shader& object) override {}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 119073776..ffe26b241 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,7 +37,6 @@ using namespace std::string_literals;
using namespace VideoCommon::Shader;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
using Operation = const OperationNode&;
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -162,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
+constexpr bool IsVertexShader(ProgramType stage) {
+ return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
+}
+
class GLSLDecompiler final {
public:
- explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
+ explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
std::string suffix)
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
@@ -248,25 +251,21 @@ public:
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_viewport_layer_array =
- stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex());
+ IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());
entries.shader_length = ir.GetLength();
return entries;
}
private:
- using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
- using OperationDecompilersArray =
- std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
-
void DeclareVertex() {
- if (stage != ShaderStage::Vertex)
+ if (!IsVertexShader(stage))
return;
DeclareVertexRedeclarations();
}
void DeclareGeometry() {
- if (stage != ShaderStage::Geometry) {
+ if (stage != ProgramType::Geometry) {
return;
}
@@ -297,14 +296,14 @@ private:
break;
}
}
- if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) {
+ if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
if (ir.UsesLayer()) {
code.AddLine("int gl_Layer;");
}
if (ir.UsesViewportIndex()) {
code.AddLine("int gl_ViewportIndex;");
}
- } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex &&
+ } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
!device.HasVertexViewportLayer()) {
LOG_ERROR(
Render_OpenGL,
@@ -341,11 +340,16 @@ private:
}
void DeclareLocalMemory() {
- if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
- const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
- code.AddNewLine();
+ // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
+ // specialization time.
+ const u64 local_memory_size =
+ stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
+ if (local_memory_size == 0) {
+ return;
}
+ const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
+ code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
+ code.AddNewLine();
}
void DeclareInternalFlags() {
@@ -399,12 +403,12 @@ private:
const u32 location{GetGenericAttributeIndex(index)};
std::string name{GetInputAttribute(index)};
- if (stage == ShaderStage::Geometry) {
+ if (stage == ProgramType::Geometry) {
name = "gs_" + name + "[]";
}
std::string suffix;
- if (stage == ShaderStage::Fragment) {
+ if (stage == ProgramType::Fragment) {
const auto input_mode{header.ps.GetAttributeUse(location)};
if (skip_unused && input_mode == AttributeUse::Unused) {
return;
@@ -416,7 +420,7 @@ private:
}
void DeclareOutputAttributes() {
- if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) {
+ if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {
for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
DeclareOutputAttribute(ToGenericAttribute(i));
}
@@ -538,7 +542,7 @@ private:
constexpr u32 element_stride{4};
const u32 address{generic_base + index * generic_stride + element * element_stride};
- const bool declared{stage != ShaderStage::Fragment ||
+ const bool declared{stage != ProgramType::Fragment ||
header.ps.GetAttributeUse(index) != AttributeUse::Unused};
const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
code.AddLine("case 0x{:x}: return {};", address, value);
@@ -642,7 +646,7 @@ private:
}
if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
+ UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
"Physical attributes in geometry shaders are not implemented");
if (abuf->IsPhysicalBuffer()) {
return fmt::format("readPhysicalAttribute(ftou({}))",
@@ -697,6 +701,9 @@ private:
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
+ if (stage == ProgramType::Compute) {
+ LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
+ }
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
}
@@ -726,7 +733,7 @@ private:
std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
const auto GeometryPass = [&](std::string_view name) {
- if (stage == ShaderStage::Geometry && buffer) {
+ if (stage == ProgramType::Geometry && buffer) {
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
@@ -738,10 +745,10 @@ private:
switch (attribute) {
case Attribute::Index::Position:
switch (stage) {
- case ShaderStage::Geometry:
+ case ProgramType::Geometry:
return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
GetSwizzle(element));
- case ShaderStage::Fragment:
+ case ProgramType::Fragment:
return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
default:
UNREACHABLE();
@@ -762,7 +769,7 @@ private:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
// shader.
- ASSERT(stage == ShaderStage::Vertex);
+ ASSERT(IsVertexShader(stage));
switch (element) {
case 2:
// Config pack's first value is instance_id.
@@ -774,7 +781,7 @@ private:
return "0";
case Attribute::Index::FrontFacing:
// TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderStage::Fragment);
+ ASSERT(stage == ProgramType::Fragment);
switch (element) {
case 3:
return "itof(gl_FrontFacing ? -1 : 0)";
@@ -796,7 +803,7 @@ private:
return value;
}
// There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
- const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
+ const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
const std::string temporary = code.GenerateTemporary();
code.AddLine("{}float {} = {};", precise, temporary, value);
@@ -831,12 +838,12 @@ private:
UNIMPLEMENTED();
return {};
case 1:
- if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) {
+ if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
return std::make_pair("gl_Layer", true);
case 2:
- if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) {
+ if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
return std::make_pair("gl_ViewportIndex", true);
@@ -1073,6 +1080,9 @@ private:
target = result->first;
is_integer = result->second;
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
+ if (stage == ProgramType::Compute) {
+ LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
+ }
target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const std::string real = Visit(gmem->GetRealAddress());
@@ -1400,14 +1410,10 @@ private:
return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
}
- std::string LogicalAll2(Operation operation) {
+ std::string LogicalAnd2(Operation operation) {
return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
}
- std::string LogicalAny2(Operation operation) {
- return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
- }
-
template <bool with_nan>
std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1630,7 +1636,7 @@ private:
}
std::string Exit(Operation operation) {
- if (stage != ShaderStage::Fragment) {
+ if (stage != ProgramType::Fragment) {
code.AddLine("return;");
return {};
}
@@ -1681,7 +1687,7 @@ private:
}
std::string EmitVertex(Operation operation) {
- ASSERT_MSG(stage == ShaderStage::Geometry,
+ ASSERT_MSG(stage == ProgramType::Geometry,
"EmitVertex is expected to be used in a geometry shader.");
// If a geometry shader is attached, it will always flip (it's the last stage before
@@ -1692,7 +1698,7 @@ private:
}
std::string EndPrimitive(Operation operation) {
- ASSERT_MSG(stage == ShaderStage::Geometry,
+ ASSERT_MSG(stage == ProgramType::Geometry,
"EndPrimitive is expected to be used in a geometry shader.");
code.AddLine("EndPrimitive();");
@@ -1714,7 +1720,7 @@ private:
return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
}
- static constexpr OperationDecompilersArray operation_decompilers = {
+ static constexpr std::array operation_decompilers = {
&GLSLDecompiler::Assign,
&GLSLDecompiler::Select,
@@ -1798,8 +1804,7 @@ private:
&GLSLDecompiler::LogicalXor,
&GLSLDecompiler::LogicalNegate,
&GLSLDecompiler::LogicalPick2,
- &GLSLDecompiler::LogicalAll2,
- &GLSLDecompiler::LogicalAny2,
+ &GLSLDecompiler::LogicalAnd2,
&GLSLDecompiler::LogicalLessThan<Type::Float>,
&GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1863,6 +1868,7 @@ private:
&GLSLDecompiler::WorkGroupId<1>,
&GLSLDecompiler::WorkGroupId<2>,
};
+ static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
return GetDeclarationWithSuffix(index, "gpr");
@@ -1927,7 +1933,7 @@ private:
}
u32 GetNumPhysicalInputAttributes() const {
- return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
+ return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
}
u32 GetNumPhysicalAttributes() const {
@@ -1940,7 +1946,7 @@ private:
const Device& device;
const ShaderIR& ir;
- const ShaderStage stage;
+ const ProgramType stage;
const std::string suffix;
const Header header;
@@ -1971,7 +1977,7 @@ std::string GetCommonDeclarations() {
MAX_CONSTBUFFER_ELEMENTS);
}
-ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage,
+ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
const std::string& suffix) {
GLSLDecompiler decompiler(device, ir, stage, suffix);
decompiler.Decompile();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 02586736d..2ea02f5bf 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -12,14 +12,26 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/shader_ir.h"
-namespace OpenGL {
-class Device;
-}
-
namespace VideoCommon::Shader {
class ShaderIR;
}
+namespace OpenGL {
+
+class Device;
+
+enum class ProgramType : u32 {
+ VertexA = 0,
+ VertexB = 1,
+ TessellationControl = 2,
+ TessellationEval = 3,
+ Geometry = 4,
+ Fragment = 5,
+ Compute = 6
+};
+
+} // namespace OpenGL
+
namespace OpenGL::GLShader {
struct ShaderEntries;
@@ -85,6 +97,6 @@ struct ShaderEntries {
std::string GetCommonDeclarations();
ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Maxwell::ShaderStage stage, const std::string& suffix);
+ ProgramType stage, const std::string& suffix);
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 7893d1e26..969fe9ced 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
} // namespace
-ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
+ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
u32 program_code_size, u32 program_code_size_b,
ProgramCode program_code, ProgramCode program_code_b)
: unique_identifier{unique_identifier}, program_type{program_type},
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 4f296dda6..cc8bbd61e 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -18,7 +18,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
-#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace Core {
@@ -34,14 +33,11 @@ namespace OpenGL {
struct ShaderDiskCacheUsage;
struct ShaderDiskCacheDump;
-using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
-
using ProgramCode = std::vector<u64>;
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
+using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
using TextureBufferUsage = std::bitset<64>;
-/// Allocated bindings used by an OpenGL shader program.
+/// Allocated bindings used by an OpenGL shader program
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
@@ -126,7 +122,7 @@ namespace OpenGL {
/// Describes a shader how it's used by the guest GPU
class ShaderDiskCacheRaw {
public:
- explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
+ explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
u32 program_code_size, u32 program_code_size_b,
ProgramCode program_code, ProgramCode program_code_b);
ShaderDiskCacheRaw();
@@ -141,30 +137,13 @@ public:
}
bool HasProgramA() const {
- return program_type == Maxwell::ShaderProgram::VertexA;
+ return program_type == ProgramType::VertexA;
}
- Maxwell::ShaderProgram GetProgramType() const {
+ ProgramType GetProgramType() const {
return program_type;
}
- Maxwell::ShaderStage GetProgramStage() const {
- switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- return Maxwell::ShaderStage::Vertex;
- case Maxwell::ShaderProgram::TesselationControl:
- return Maxwell::ShaderStage::TesselationControl;
- case Maxwell::ShaderProgram::TesselationEval:
- return Maxwell::ShaderStage::TesselationEval;
- case Maxwell::ShaderProgram::Geometry:
- return Maxwell::ShaderStage::Geometry;
- case Maxwell::ShaderProgram::Fragment:
- return Maxwell::ShaderStage::Fragment;
- }
- UNREACHABLE();
- }
-
const ProgramCode& GetProgramCode() const {
return program_code;
}
@@ -175,7 +154,7 @@ public:
private:
u64 unique_identifier{};
- Maxwell::ShaderProgram program_type{};
+ ProgramType program_type{};
u32 program_code_size{};
u32 program_code_size_b{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index f9ee8429e..3a8d9e1da 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
-static constexpr u32 PROGRAM_OFFSET{10};
+static constexpr u32 PROGRAM_OFFSET = 10;
+static constexpr u32 COMPUTE_OFFSET = 0;
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
@@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
};
)";
- const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
- ProgramResult program =
- Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
+ const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
+ ProgramResult program = Decompile(device, program_ir, stage, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
- ProgramResult program_b =
- Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
-
+ ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
out += program_b.first;
}
@@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
};
)";
+
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
- ProgramResult program =
- Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
+ ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
out += program.first;
out += R"(
@@ -116,9 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
- ProgramResult program =
- Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
-
+ ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
out += program.first;
out += R"(
@@ -130,4 +127,22 @@ void main() {
return {std::move(out), std::move(program.second)};
}
+ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
+ const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+
+ std::string out = "// Shader Unique Id: CS" + id + "\n\n";
+ out += GetCommonDeclarations();
+
+ const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
+ ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
+ out += program.first;
+
+ out += R"(
+void main() {
+ execute_compute();
+}
+)";
+ return {std::move(out), std::move(program.second)};
+}
+
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 7cbc590f8..3833e88ab 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -54,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
/// Generates the GLSL fragment shader program source code for the given FS program
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
+/// Generates the GLSL compute shader program source code for the given CS program
+ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
+
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5f3fe067e..9e74eda0d 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,21 +10,25 @@
namespace OpenGL::GLShader {
-GLuint LoadShader(const char* source, GLenum type) {
- const char* debug_type;
+namespace {
+const char* GetStageDebugName(GLenum type) {
switch (type) {
case GL_VERTEX_SHADER:
- debug_type = "vertex";
- break;
+ return "vertex";
case GL_GEOMETRY_SHADER:
- debug_type = "geometry";
- break;
+ return "geometry";
case GL_FRAGMENT_SHADER:
- debug_type = "fragment";
- break;
- default:
- UNREACHABLE();
+ return "fragment";
+ case GL_COMPUTE_SHADER:
+ return "compute";
}
+ UNIMPLEMENTED();
+ return "unknown";
+}
+} // Anonymous namespace
+
+GLuint LoadShader(const char* source, GLenum type) {
+ const char* debug_type = GetStageDebugName(type);
const GLuint shader_id = glCreateShader(type);
glShaderSource(shader_id, 1, &source, nullptr);
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 0eae98afe..f4777d0b0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -165,6 +165,25 @@ OpenGLState::OpenGLState() {
alpha_test.ref = 0.0f;
}
+void OpenGLState::SetDefaultViewports() {
+ for (auto& item : viewports) {
+ item.x = 0;
+ item.y = 0;
+ item.width = 0;
+ item.height = 0;
+ item.depth_range_near = 0.0f;
+ item.depth_range_far = 1.0f;
+ item.scissor.enabled = false;
+ item.scissor.x = 0;
+ item.scissor.y = 0;
+ item.scissor.width = 0;
+ item.scissor.height = 0;
+ }
+
+ depth_clamp.far_plane = false;
+ depth_clamp.near_plane = false;
+}
+
void OpenGLState::ApplyDefaultState() {
glEnable(GL_BLEND);
glDisable(GL_FRAMEBUFFER_SRGB);
@@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const {
}
}
-void OpenGLState::Apply() const {
+void OpenGLState::Apply() {
MICROPROFILE_SCOPE(OpenGL_State);
ApplyFramebufferState();
ApplyVertexArrayState();
@@ -536,19 +555,31 @@ void OpenGLState::Apply() const {
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
+ if (dirty.color_mask) {
+ ApplyColorMask();
+ dirty.color_mask = false;
+ }
ApplyDepthClamp();
- ApplyColorMask();
ApplyViewport();
- ApplyStencilTest();
+ if (dirty.stencil_state) {
+ ApplyStencilTest();
+ dirty.stencil_state = false;
+ }
ApplySRgb();
ApplyCulling();
ApplyDepth();
ApplyPrimitiveRestart();
- ApplyBlending();
+ if (dirty.blend_state) {
+ ApplyBlending();
+ dirty.blend_state = false;
+ }
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
- ApplyPolygonOffset();
+ if (dirty.polygon_offset) {
+ ApplyPolygonOffset();
+ dirty.polygon_offset = false;
+ }
ApplyAlphaTest();
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b0140495d..fdf9a8a12 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -195,8 +195,9 @@ public:
s_rgb_used = false;
}
+ void SetDefaultViewports();
/// Apply this state as the current OpenGL state
- void Apply() const;
+ void Apply();
void ApplyFramebufferState() const;
void ApplyVertexArrayState() const;
@@ -237,11 +238,41 @@ public:
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void EmulateViewportWithScissor();
+ void MarkDirtyBlendState() {
+ dirty.blend_state = true;
+ }
+
+ void MarkDirtyStencilState() {
+ dirty.stencil_state = true;
+ }
+
+ void MarkDirtyPolygonOffset() {
+ dirty.polygon_offset = true;
+ }
+
+ void MarkDirtyColorMask() {
+ dirty.color_mask = true;
+ }
+
+ void AllDirty() {
+ dirty.blend_state = true;
+ dirty.stencil_state = true;
+ dirty.polygon_offset = true;
+ dirty.color_mask = true;
+ }
+
private:
static OpenGLState cur_state;
// Workaround for sRGB problems caused by QT not supporting srgb output
static bool s_rgb_used;
+ struct {
+ bool blend_state;
+ bool stencil_state;
+ bool viewport_state;
+ bool polygon_offset;
+ bool color_mask;
+ } dirty{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 6ecb02c45..408332f90 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -484,11 +484,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const auto& dst_params{dst_view->GetSurfaceParams()};
OpenGLState prev_state{OpenGLState::GetCurState()};
- SCOPE_EXIT({ prev_state.Apply(); });
+ SCOPE_EXIT({
+ prev_state.AllDirty();
+ prev_state.Apply();
+ });
OpenGLState state;
state.draw.read_framebuffer = src_framebuffer.handle;
state.draw.draw_framebuffer = dst_framebuffer.handle;
+ state.AllDirty();
state.Apply();
u32 buffers{};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 9ecdddb0d..a05cef3b9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
+ state.AllDirty();
state.Apply();
if (framebuffer) {
@@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
system.GetPerfStats().BeginSystemFrame();
// Restore the rasterizer state
+ prev_state.AllDirty();
prev_state.Apply();
}
@@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Link shaders and get variable locations
shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
state.draw.shader_program = shader.handle;
+ state.AllDirty();
state.Apply();
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
// Workaround brigthness problems in SMO by enabling sRGB in the final output
// if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
+ state.AllDirty();
state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
state.texture_units[0].texture = 0;
+ state.AllDirty();
state.Apply();
// Clear sRGB state for the next frame
OpenGLState::ClearsRGBUsed();
@@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
GLuint old_read_fb = state.draw.read_framebuffer;
GLuint old_draw_fb = state.draw.draw_framebuffer;
state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
+ state.AllDirty();
state.Apply();
Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
screenshot_framebuffer.Release();
state.draw.read_framebuffer = old_read_fb;
state.draw.draw_framebuffer = old_draw_fb;
+ state.AllDirty();
state.Apply();
glDeleteRenderbuffers(1, &renderbuffer);