summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_rasterizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp916
1 files changed, 573 insertions, 343 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ca410287a..6a17bed72 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -4,6 +4,7 @@
#include <algorithm>
#include <array>
+#include <bitset>
#include <memory>
#include <string>
#include <string_view>
@@ -19,7 +20,9 @@
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
+#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -29,8 +32,10 @@
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using PixelFormat = VideoCore::Surface::PixelFormat;
-using SurfaceType = VideoCore::Surface::SurfaceType;
+
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::SurfaceTarget;
+using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
@@ -44,70 +49,31 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
-struct DrawParameters {
- GLenum primitive_mode;
- GLsizei count;
- GLint current_instance;
- bool use_indexed;
-
- GLint vertex_first;
-
- GLenum index_format;
- GLint base_vertex;
- GLintptr index_buffer_offset;
-
- void DispatchDraw() const {
- if (use_indexed) {
- const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
- if (current_instance > 0) {
- glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
- index_buffer_ptr, 1, base_vertex,
- current_instance);
- } else {
- glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
- base_vertex);
- }
- } else {
- if (current_instance > 0) {
- glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1,
- current_instance);
- } else {
- glDrawArrays(primitive_mode, vertex_first, count);
- }
- }
+static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
+ const GLShader::ConstBufferEntry& entry) {
+ if (!entry.IsIndirect()) {
+ return entry.GetSize();
}
-};
-
-struct FramebufferCacheKey {
- bool is_single_buffer = false;
- bool stencil_enable = false;
-
- std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{};
- u32 colors_count = 0;
-
- GLuint zeta = 0;
- auto Tie() const {
- return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count,
- zeta);
+ if (buffer.size > Maxwell::MaxConstBufferSize) {
+ LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
+ Maxwell::MaxConstBufferSize);
+ return Maxwell::MaxConstBufferSize;
}
- bool operator<(const FramebufferCacheKey& rhs) const {
- return Tie() < rhs.Tie();
- }
-};
+ return buffer.size;
+}
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
- : res_cache{*this}, shader_cache{*this, system, emu_window, device},
- global_cache{*this}, system{system}, screen_info{info},
- buffer_cache(*this, STREAM_BUFFER_SIZE) {
+ : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
+ system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
OpenGLState::ApplyDefaultState();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
+ clear_framebuffer.Create();
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
CheckExtensions();
@@ -121,21 +87,16 @@ void RasterizerOpenGL::CheckExtensions() {
Render_OpenGL,
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
}
- if (!GLAD_GL_ARB_buffer_storage) {
- LOG_WARNING(
- Render_OpenGL,
- "Buffer storage control is not supported! This can cause performance degradation.");
- }
}
GLuint RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
- if (!gpu.dirty_flags.vertex_attrib_format) {
+ if (!gpu.dirty.vertex_attrib_format) {
return state.draw.vertex_array;
}
- gpu.dirty_flags.vertex_attrib_format = false;
+ gpu.dirty.vertex_attrib_format = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
@@ -152,8 +113,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
state.draw.vertex_array = vao;
state.ApplyVertexArrayState();
- glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
-
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
// Enables the first 16 vertex attributes always, as we don't know which ones are actually
// used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
@@ -191,7 +150,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
// Rebinding the VAO invalidates the vertex buffer bindings.
- gpu.dirty_flags.vertex_array.set();
+ gpu.dirty.ResetVertexArrays();
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
@@ -199,17 +158,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- if (gpu.dirty_flags.vertex_array.none())
+ if (!gpu.dirty.vertex_array_buffers)
return;
+ gpu.dirty.vertex_array_buffers = false;
+
+ const auto& regs = gpu.regs;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!gpu.dirty_flags.vertex_array[index])
+ if (!gpu.dirty.vertex_array[index])
continue;
+ gpu.dirty.vertex_array[index] = false;
+ gpu.dirty.vertex_instance[index] = false;
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled())
@@ -220,11 +182,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
ASSERT(end > start);
const u64 size = end - start + 1;
- const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);
+ const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
// Bind the vertex array to the buffer at the current offset.
- glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset,
- vertex_array.stride);
+ vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
+ vertex_array.stride);
if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
// Enable vertex buffer instancing with the specified divisor.
@@ -234,33 +196,44 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
glVertexArrayBindingDivisor(vao, index, 0);
}
}
-
- gpu.dirty_flags.vertex_array.reset();
}
-DrawParameters RasterizerOpenGL::SetupDraw() {
- const auto& gpu = system.GPU().Maxwell3D();
+void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
+ auto& gpu = system.GPU().Maxwell3D();
+
+ if (!gpu.dirty.vertex_instances)
+ return;
+ gpu.dirty.vertex_instances = false;
+
const auto& regs = gpu.regs;
- const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
+ // Upload all guest vertex arrays sequentially to our buffer
+ for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ if (!gpu.dirty.vertex_instance[index])
+ continue;
- DrawParameters params{};
- params.current_instance = gpu.state.current_instance;
+ gpu.dirty.vertex_instance[index] = false;
- params.use_indexed = is_indexed;
- params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
+ if (regs.instanced_arrays.IsInstancingEnabled(index) &&
+ regs.vertex_array[index].divisor != 0) {
+ // Enable vertex buffer instancing with the specified divisor.
+ glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
+ } else {
+ // Disable the vertex buffer instancing.
+ glVertexArrayBindingDivisor(vao, index, 0);
+ }
+ }
+}
- if (is_indexed) {
- MICROPROFILE_SCOPE(OpenGL_Index);
- params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
- params.count = regs.index_array.count;
- params.index_buffer_offset =
- buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
- params.base_vertex = static_cast<GLint>(regs.vb_element_base);
- } else {
- params.count = regs.vertex_buffer.count;
- params.vertex_first = regs.vertex_buffer.first;
+GLintptr RasterizerOpenGL::SetupIndexBuffer() {
+ if (accelerate_draw != AccelDraw::Indexed) {
+ return 0;
}
- return params;
+ MICROPROFILE_SCOPE(OpenGL_Index);
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ const std::size_t size = CalculateIndexBufferSize();
+ const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
+ vertex_array_pushbuffer.SetIndexBuffer(buffer);
+ return offset;
}
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
@@ -270,10 +243,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
BaseBindings base_bindings;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
- // Prepare packed bindings
- bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
- bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
-
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -294,16 +263,21 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu, stage);
- const GLintptr offset =
+ const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
// Bind the emulation info buffer
- bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
- static_cast<GLsizeiptr>(sizeof(ubo)));
+ bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
- const auto [program_handle, next_bindings] =
- shader->GetProgramHandle(primitive_mode, base_bindings);
+
+ const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
+ SetupDrawConstBuffers(stage_enum, shader);
+ SetupDrawGlobalMemory(stage_enum, shader);
+ const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
+
+ const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
+ const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -321,11 +295,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
shader_config.enable.Value(), shader_config.offset);
}
- const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
- SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
- SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
- SetupTextures(stage_enum, shader, program_handle, base_bindings);
-
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -343,50 +312,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
base_bindings = next_bindings;
}
- bind_ubo_pushbuffer.Bind();
- bind_ssbo_pushbuffer.Bind();
-
SyncClipEnabled(clip_distances);
- gpu.dirty_flags.shaders = false;
-}
-
-void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
- OpenGLState& current_state) {
- const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey);
- auto& framebuffer = entry->second;
-
- if (is_cache_miss)
- framebuffer.Create();
-
- current_state.draw.draw_framebuffer = framebuffer.handle;
- current_state.ApplyFramebufferState();
-
- if (!is_cache_miss)
- return;
-
- if (fbkey.is_single_buffer) {
- if (fbkey.color_attachments[0] != GL_NONE) {
- glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0],
- 0);
- }
- glDrawBuffer(fbkey.color_attachments[0]);
- } else {
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- if (fbkey.colors[index]) {
- glFramebufferTexture(GL_DRAW_FRAMEBUFFER,
- GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
- fbkey.colors[index], 0);
- }
- }
- glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data());
- }
-
- if (fbkey.zeta) {
- GLenum zeta_attachment =
- fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
- glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0);
- }
+ gpu.dirty.shaders = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -414,12 +342,6 @@ std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
}
-bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
- accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
- DrawArrays();
- return true;
-}
-
template <typename Map, typename Interval>
static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
@@ -460,107 +382,117 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
shader_cache.LoadDiskCache(stop_loading, callback);
}
-std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
- OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
- std::optional<std::size_t> single_color_target) {
+void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
- single_color_target};
- if (fb_config_state == current_framebuffer_config_state &&
- gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
- // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
- // single color targets). This is done because the guest registers may not change but the
- // host framebuffer may contain different attachments
- return current_depth_stencil_usage;
+ if (!gpu.dirty.render_settings) {
+ return;
}
- current_framebuffer_config_state = fb_config_state;
+ gpu.dirty.render_settings = false;
- Surface depth_surface;
- if (using_depth_fb) {
- depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
- }
+ texture_cache.GuardRenderTargets(true);
+
+ View depth_surface = texture_cache.GetDepthBufferSurface(true);
+ const auto& regs = gpu.regs;
+ state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
- current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
-
FramebufferCacheKey fbkey;
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ View color_surface{texture_cache.GetColorBufferSurface(index, true)};
- if (using_color_fb) {
- if (single_color_target) {
- // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
- Surface color_surface =
- res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
-
- if (color_surface) {
- // Assume that a surface will be written to if it is used as a framebuffer, even if
- // the shader doesn't actually write to it.
- color_surface->MarkAsModified(true, res_cache);
- // Workaround for and issue in nvidia drivers
- // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
- state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
- }
-
- fbkey.is_single_buffer = true;
- fbkey.color_attachments[0] =
- GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);
- fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0;
- } else {
- // Multiple color attachments are enabled
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
-
- if (color_surface) {
- // Assume that a surface will be written to if it is used as a framebuffer, even
- // if the shader doesn't actually write to it.
- color_surface->MarkAsModified(true, res_cache);
- // Enable sRGB only for supported formats
- // Workaround for and issue in nvidia drivers
- // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
- state.framebuffer_srgb.enabled |=
- color_surface->GetSurfaceParams().srgb_conversion;
- }
-
- fbkey.color_attachments[index] =
- GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
- fbkey.colors[index] =
- color_surface != nullptr ? color_surface->Texture().handle : 0;
- }
- fbkey.is_single_buffer = false;
- fbkey.colors_count = regs.rt_control.count;
+ if (color_surface) {
+ // Assume that a surface will be written to if it is used as a framebuffer, even
+ // if the shader doesn't actually write to it.
+ texture_cache.MarkColorBufferInUse(index);
}
- } else {
- // No color attachments are enabled - leave them as zero
- fbkey.is_single_buffer = true;
+
+ fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+ fbkey.colors[index] = std::move(color_surface);
}
+ fbkey.colors_count = regs.rt_control.count;
if (depth_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
- depth_surface->MarkAsModified(true, res_cache);
+ texture_cache.MarkDepthBufferInUse();
- fbkey.zeta = depth_surface->Texture().handle;
- fbkey.stencil_enable = regs.stencil_enable &&
- depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
+ fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
+ fbkey.zeta = std::move(depth_surface);
}
- SetupCachedFramebuffer(fbkey, current_state);
- SyncViewport(current_state);
+ texture_cache.GuardRenderTargets(false);
- return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
+ state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
+ SyncViewport(state);
+}
+
+void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
+ bool using_depth_fb, bool using_stencil_fb) {
+ auto& gpu = system.GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
+
+ texture_cache.GuardRenderTargets(true);
+ View color_surface{};
+ if (using_color_fb) {
+ color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
+ }
+ View depth_surface{};
+ if (using_depth_fb || using_stencil_fb) {
+ depth_surface = texture_cache.GetDepthBufferSurface(false);
+ }
+ texture_cache.GuardRenderTargets(false);
+
+ current_state.draw.draw_framebuffer = clear_framebuffer.handle;
+ current_state.ApplyFramebufferState();
+
+ if (color_surface) {
+ color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
+ } else {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ }
+
+ if (depth_surface) {
+ const auto& params = depth_surface->GetSurfaceParams();
+ switch (params.type) {
+ case VideoCore::Surface::SurfaceType::Depth:
+ depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ break;
+ case VideoCore::Surface::SurfaceType::DepthStencil:
+ depth_surface->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+ } else {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+ }
}
void RasterizerOpenGL::Clear() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& maxwell3d = system.GPU().Maxwell3D();
+
+ if (!maxwell3d.ShouldExecute()) {
+ return;
+ }
+
+ const auto& regs = maxwell3d.regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
- OpenGLState clear_state;
+ OpenGLState prev_state{OpenGLState::GetCurState()};
+ SCOPE_EXIT({
+ prev_state.AllDirty();
+ prev_state.Apply();
+ });
+
+ OpenGLState clear_state{OpenGLState::GetCurState()};
+ clear_state.SetDefaultViewports();
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
use_color = true;
@@ -580,11 +512,13 @@ void RasterizerOpenGL::Clear() {
// true.
clear_state.depth.test_enabled = true;
clear_state.depth.test_func = GL_ALWAYS;
+ clear_state.depth.write_mask = GL_TRUE;
}
if (regs.clear_buffers.S) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
use_stencil = true;
clear_state.stencil.test_enabled = true;
+
if (regs.clear_flags.stencil) {
// Stencil affects the clear so fill it with the used masks
clear_state.stencil.front.test_func = GL_ALWAYS;
@@ -616,8 +550,9 @@ void RasterizerOpenGL::Clear() {
return;
}
- const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
- clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
+ ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
+
+ SyncViewport(clear_state);
if (regs.clear_flags.scissor) {
SyncScissorTest(clear_state);
}
@@ -626,33 +561,25 @@ void RasterizerOpenGL::Clear() {
clear_state.EmulateViewportWithScissor();
}
- clear_state.ApplyColorMask();
- clear_state.ApplyDepth();
- clear_state.ApplyStencilTest();
- clear_state.ApplyViewport();
+ clear_state.AllDirty();
+ clear_state.Apply();
if (use_color) {
- glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+ glClearBufferfv(GL_COLOR, 0, regs.clear_color);
}
- if (clear_depth && clear_stencil) {
+ if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
- } else if (clear_depth) {
+ } else if (use_depth) {
glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
- } else if (clear_stencil) {
+ } else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
}
-void RasterizerOpenGL::DrawArrays() {
- if (accelerate_draw == AccelDraw::Disabled)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_Drawing);
+void RasterizerOpenGL::DrawPrelude() {
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
- ConfigureFramebuffers(state);
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
@@ -684,31 +611,196 @@ void RasterizerOpenGL::DrawArrays() {
Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
- buffer_size +=
- Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
+ buffer_size += Maxwell::MaxConstBuffers *
+ (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
- const bool invalidate = buffer_cache.Map(buffer_size);
- if (invalidate) {
- // As all cached buffers are invalidated, we need to recheck their state.
- gpu.dirty_flags.vertex_array.set();
- }
+ // Prepare the vertex array.
+ buffer_cache.Map(buffer_size);
+ // Prepare vertex array format.
const GLuint vao = SetupVertexFormat();
+ vertex_array_pushbuffer.Setup(vao);
+
+ // Upload vertex and index data.
SetupVertexBuffer(vao);
+ SetupVertexInstances(vao);
+ index_buffer_offset = SetupIndexBuffer();
- DrawParameters params = SetupDraw();
- SetupShaders(params.primitive_mode);
+ // Prepare packed bindings.
+ bind_ubo_pushbuffer.Setup(0);
+ bind_ssbo_pushbuffer.Setup(0);
- buffer_cache.Unmap();
+ // Setup shaders and their used resources.
+ texture_cache.GuardSamplers(true);
+ const auto primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
+ SetupShaders(primitive_mode);
+ texture_cache.GuardSamplers(false);
+
+ ConfigureFramebuffers();
+
+ // Signal the buffer cache that we are not going to upload more things.
+ const bool invalidate = buffer_cache.Unmap();
+
+ // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
+ vertex_array_pushbuffer.Bind();
+ bind_ubo_pushbuffer.Bind();
+ bind_ssbo_pushbuffer.Bind();
+
+ if (invalidate) {
+ // As all cached buffers are invalidated, we need to recheck their state.
+ gpu.dirty.ResetVertexArrays();
+ }
shader_program_manager->ApplyTo(state);
state.Apply();
- res_cache.SignalPreDrawCall();
- params.DispatchDraw();
- res_cache.SignalPostDrawCall();
+ if (texture_cache.TextureBarrier()) {
+ glTextureBarrier();
+ }
+}
+
+struct DrawParams {
+ bool is_indexed{};
+ bool is_instanced{};
+ GLenum primitive_mode{};
+ GLint count{};
+ GLint base_vertex{};
+ // Indexed settings
+ GLenum index_format{};
+ GLintptr index_buffer_offset{};
+
+ // Instanced setting
+ GLint num_instances{};
+ GLint base_instance{};
+
+ void DispatchDraw() {
+ if (is_indexed) {
+ const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
+ if (is_instanced) {
+ glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
+ index_buffer_ptr, num_instances,
+ base_vertex, base_instance);
+ } else {
+ glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
+ base_vertex);
+ }
+ } else {
+ if (is_instanced) {
+ glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances,
+ base_instance);
+ } else {
+ glDrawArrays(primitive_mode, base_vertex, count);
+ }
+ }
+ }
+};
+
+bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
+ accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
+
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
+
+ DrawPrelude();
+
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ const auto& regs = maxwell3d.regs;
+ const auto current_instance = maxwell3d.state.current_instance;
+ DrawParams draw_call{};
+ draw_call.is_indexed = is_indexed;
+ draw_call.num_instances = static_cast<GLint>(1);
+ draw_call.base_instance = static_cast<GLint>(current_instance);
+ draw_call.is_instanced = current_instance > 0;
+ draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
+ if (draw_call.is_indexed) {
+ draw_call.count = static_cast<GLint>(regs.index_array.count);
+ draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
+ draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
+ draw_call.index_buffer_offset = index_buffer_offset;
+ } else {
+ draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
+ draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
+ }
+ draw_call.DispatchDraw();
+
+ maxwell3d.dirty.memory_general = false;
accelerate_draw = AccelDraw::Disabled;
+ return true;
+}
+
+bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
+ accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
+
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
+
+ DrawPrelude();
+
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ const auto& regs = maxwell3d.regs;
+ const auto& draw_setup = maxwell3d.mme_draw;
+ DrawParams draw_call{};
+ draw_call.is_indexed = is_indexed;
+ draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count);
+ draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance);
+ draw_call.is_instanced = draw_setup.instance_count > 1;
+ draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
+ if (draw_call.is_indexed) {
+ draw_call.count = static_cast<GLint>(regs.index_array.count);
+ draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
+ draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
+ draw_call.index_buffer_offset = index_buffer_offset;
+ } else {
+ draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
+ draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
+ }
+ draw_call.DispatchDraw();
+
+ maxwell3d.dirty.memory_general = false;
+ accelerate_draw = AccelDraw::Disabled;
+ return true;
+}
+
+void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
+ if (!GLAD_GL_ARB_compute_variable_group_size) {
+ LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
+ "lack of GL_ARB_compute_variable_group_size");
+ return;
+ }
+
+ auto kernel = shader_cache.GetComputeKernel(code_addr);
+ ProgramVariant variant;
+ variant.texture_buffer_usage = SetupComputeTextures(kernel);
+ SetupComputeImages(kernel);
+
+ const auto [program, next_bindings] = kernel->GetProgramHandle(variant);
+ state.draw.shader_program = program;
+ state.draw.program_pipeline = 0;
+
+ const std::size_t buffer_size =
+ Tegra::Engines::KeplerCompute::NumConstBuffers *
+ (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+ buffer_cache.Map(buffer_size);
+
+ bind_ubo_pushbuffer.Setup(0);
+ bind_ssbo_pushbuffer.Setup(0);
+
+ SetupComputeConstBuffers(kernel);
+ SetupComputeGlobalMemory(kernel);
+
+ buffer_cache.Unmap();
+
+ bind_ubo_pushbuffer.Bind();
+ bind_ssbo_pushbuffer.Bind();
+
+ state.ApplyTextures();
+ state.ApplyImages();
+ state.ApplyShaderProgram();
+ state.ApplyProgramPipeline();
+
+ const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
+ launch_desc.grid_dim_z, launch_desc.block_dim_x,
+ launch_desc.block_dim_y, launch_desc.block_dim_z);
}
void RasterizerOpenGL::FlushAll() {}
@@ -718,8 +810,8 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
if (!addr || !size) {
return;
}
- res_cache.FlushRegion(addr, size);
- global_cache.FlushRegion(addr, size);
+ texture_cache.FlushRegion(addr, size);
+ buffer_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -727,23 +819,31 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
if (!addr || !size) {
return;
}
- res_cache.InvalidateRegion(addr, size);
+ texture_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
- global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
- FlushRegion(addr, size);
+ if (Settings::values.use_accurate_gpu_emulation) {
+ FlushRegion(addr, size);
+ }
InvalidateRegion(addr, size);
}
+void RasterizerOpenGL::FlushCommands() {
+ glFlush();
+}
+
+void RasterizerOpenGL::TickFrame() {
+ buffer_cache.TickFrame();
+}
+
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
- const Common::Rectangle<u32>& src_rect,
- const Common::Rectangle<u32>& dst_rect) {
+ const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
- res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
+ texture_cache.DoFermiCopy(src, dst, copy_config);
return true;
}
@@ -755,7 +855,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
+ const auto surface{
+ texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
if (!surface) {
return {};
}
@@ -771,109 +872,204 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
}
- screen_info.display_texture = surface->Texture().handle;
+ screen_info.display_texture = surface->GetTexture();
+ screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
return true;
}
-void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, GLuint program_handle,
- BaseBindings base_bindings) {
+void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader) {
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& gpu = system.GPU();
- const auto& maxwell3d = gpu.Maxwell3D();
- const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
- const auto& entries = shader->GetShaderEntries().const_buffers;
-
- // Upload only the enabled buffers from the 16 constbuffers of each shader stage
- for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
- const auto& used_buffer = entries[bindpoint];
- const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
-
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- bind_ubo_pushbuffer.Push(0, 0, 0);
- continue;
- }
-
- std::size_t size = 0;
+ const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
+ const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
+ for (const auto& entry : shader->GetShaderEntries().const_buffers) {
+ const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
+ SetupConstBuffer(buffer, entry);
+ }
+}
- if (used_buffer.IsIndirect()) {
- // Buffer is accessed indirectly, so upload the entire thing
- size = buffer.size;
+void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
+ MICROPROFILE_SCOPE(OpenGL_UBO);
+ const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
+ const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
+ const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
+ Tegra::Engines::ConstBufferInfo buffer;
+ buffer.address = config.Address();
+ buffer.size = config.size;
+ buffer.enabled = mask[entry.GetIndex()];
+ SetupConstBuffer(buffer, entry);
+ }
+}
- if (size > MaxConstbufferSize) {
- LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
- MaxConstbufferSize);
- size = MaxConstbufferSize;
- }
- } else {
- // Buffer is accessed directly, upload just what we use
- size = used_buffer.GetSize();
- }
+void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
+ const GLShader::ConstBufferEntry& entry) {
+ if (!buffer.enabled) {
+ // Set values to zero to unbind buffers
+ bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
+ return;
+ }
- // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
- // UBO alignment requirements.
- size = Common::AlignUp(size, sizeof(GLvec4));
- ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
+ // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
+ // UBO alignment requirements.
+ const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
- const GLintptr const_buffer_offset =
- buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
+ const auto alignment = device.GetUniformBufferAlignment();
+ const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
+ bind_ubo_pushbuffer.Push(cbuf, offset, size);
+}
- bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
+void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader) {
+ auto& gpu{system.GPU()};
+ auto& memory_manager{gpu.MemoryManager()};
+ const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
+ for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
+ const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
+ const auto gpu_addr{memory_manager.Read<u64>(addr)};
+ const auto size{memory_manager.Read<u32>(addr + 8)};
+ SetupGlobalMemory(entry, gpu_addr, size);
}
}
-void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, GLenum primitive_mode,
- BaseBindings base_bindings) {
- const auto& entries = shader->GetShaderEntries().global_memory_entries;
- for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
- const auto& entry{entries[bindpoint]};
- const auto& region{global_cache.GetGlobalRegion(entry, stage)};
- if (entry.IsWritten()) {
- region->MarkAsModified(true, global_cache);
- }
- bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
- static_cast<GLsizeiptr>(region->GetSizeInBytes()));
+void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
+ auto& gpu{system.GPU()};
+ auto& memory_manager{gpu.MemoryManager()};
+ const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
+ for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
+ const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
+ const auto gpu_addr{memory_manager.Read<u64>(addr)};
+ const auto size{memory_manager.Read<u32>(addr + 8)};
+ SetupGlobalMemory(entry, gpu_addr, size);
}
}
-void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
- GLuint program_handle, BaseBindings base_bindings) {
+void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
+ GPUVAddr gpu_addr, std::size_t size) {
+ const auto alignment{device.GetShaderStorageBufferAlignment()};
+ const auto [ssbo, buffer_offset] =
+ buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
+ bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
+}
+
+TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage,
+ const Shader& shader,
+ BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers;
- ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
+ ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures),
"Exceeded the number of active textures.");
+ TextureBufferUsage texture_buffer_usage{0};
+
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- Tegra::Texture::FullTextureInfo texture;
- if (entry.IsBindless()) {
+ const auto texture = [&]() {
+ if (!entry.IsBindless()) {
+ return maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ }
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
- texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
- } else {
- texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
+ }();
+
+ if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
+ texture_buffer_usage.set(bindpoint);
}
- const u32 current_bindpoint = base_bindings.sampler + bindpoint;
+ }
- state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
+ return texture_buffer_usage;
+}
- if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
- state.texture_units[current_bindpoint].texture =
- surface->Texture(entry.IsArray()).handle;
- surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
- texture.tic.w_source);
- } else {
- // Can occur when texture addr is null or its memory is unmapped/invalid
- state.texture_units[current_bindpoint].texture = 0;
+TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+ MICROPROFILE_SCOPE(OpenGL_Texture);
+ const auto& compute = system.GPU().KeplerCompute();
+ const auto& entries = kernel->GetShaderEntries().samplers;
+
+ ASSERT_MSG(entries.size() <= std::size(state.textures),
+ "Exceeded the number of active textures.");
+
+ TextureBufferUsage texture_buffer_usage{0};
+
+ for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry = entries[bindpoint];
+ const auto texture = [&]() {
+ if (!entry.IsBindless()) {
+ return compute.GetTexture(entry.GetOffset());
+ }
+ const auto cbuf = entry.GetBindlessCBuf();
+ Tegra::Texture::TextureHandle tex_handle;
+ tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
+ return compute.GetTextureInfo(tex_handle, entry.GetOffset());
+ }();
+
+ if (SetupTexture(bindpoint, texture, entry)) {
+ texture_buffer_usage.set(bindpoint);
}
}
+
+ return texture_buffer_usage;
+}
+
+bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
+ const GLShader::SamplerEntry& entry) {
+ state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
+
+ const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
+ if (!view) {
+ // Can occur when texture addr is null or its memory is unmapped/invalid
+ state.textures[binding] = 0;
+ return false;
+ }
+ state.textures[binding] = view->GetTexture();
+
+ if (view->GetSurfaceParams().IsBuffer()) {
+ return true;
+ }
+
+ // Apply swizzle to textures that are not buffers.
+ view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
+ texture.tic.w_source);
+ return false;
+}
+
+void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
+ const auto& compute = system.GPU().KeplerCompute();
+ const auto& entries = shader->GetShaderEntries().images;
+ for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry = entries[bindpoint];
+ const auto tic = [&]() {
+ if (!entry.IsBindless()) {
+ return compute.GetTexture(entry.GetOffset()).tic;
+ }
+ const auto cbuf = entry.GetBindlessCBuf();
+ Tegra::Texture::TextureHandle tex_handle;
+ tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
+ return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
+ }();
+ SetupImage(bindpoint, tic, entry);
+ }
+}
+
+void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
+ const GLShader::ImageEntry& entry) {
+ const auto view = texture_cache.GetImageSurface(tic, entry);
+ if (!view) {
+ state.images[binding] = 0;
+ return;
+ }
+ if (!tic.IsBuffer()) {
+ view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ }
+ if (entry.IsWritten()) {
+ view->MarkAsModified(texture_cache.Tick());
+ }
+ state.images[binding] = view->GetTexture();
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
@@ -917,10 +1113,11 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
- state.cull.enabled = regs.cull.enabled != 0;
+ const auto& regs = maxwell3d.regs;
+ state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@@ -953,15 +1150,23 @@ void RasterizerOpenGL::SyncDepthTestState() {
state.depth.test_enabled = regs.depth_test_enable != 0;
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
- if (!state.depth.test_enabled)
+ if (!state.depth.test_enabled) {
return;
+ }
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
}
void RasterizerOpenGL::SyncStencilTestState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.stencil_test) {
+ return;
+ }
+ maxwell3d.dirty.stencil_test = false;
+
+ const auto& regs = maxwell3d.regs;
state.stencil.test_enabled = regs.stencil_enable != 0;
+ state.MarkDirtyStencilState();
if (!regs.stencil_enable) {
return;
@@ -994,7 +1199,12 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
void RasterizerOpenGL::SyncColorMask() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.color_mask) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
+
const std::size_t count =
regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
for (std::size_t i = 0; i < count; i++) {
@@ -1005,6 +1215,9 @@ void RasterizerOpenGL::SyncColorMask() {
dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
}
+
+ state.MarkDirtyColorMask();
+ maxwell3d.dirty.color_mask = false;
}
void RasterizerOpenGL::SyncMultiSampleState() {
@@ -1019,7 +1232,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
}
void RasterizerOpenGL::SyncBlendState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.blend_state) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
state.blend_color.red = regs.blend_color.r;
state.blend_color.green = regs.blend_color.g;
@@ -1042,6 +1259,8 @@ void RasterizerOpenGL::SyncBlendState() {
for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
state.blend[i].enabled = false;
}
+ maxwell3d.dirty.blend_state = false;
+ state.MarkDirtyBlendState();
return;
}
@@ -1058,6 +1277,9 @@ void RasterizerOpenGL::SyncBlendState() {
blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
}
+
+ state.MarkDirtyBlendState();
+ maxwell3d.dirty.blend_state = false;
}
void RasterizerOpenGL::SyncLogicOpState() {
@@ -1109,13 +1331,21 @@ void RasterizerOpenGL::SyncPointState() {
}
void RasterizerOpenGL::SyncPolygonOffset() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.polygon_offset) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
+
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
state.polygon_offset.units = regs.polygon_offset_units;
state.polygon_offset.factor = regs.polygon_offset_factor;
state.polygon_offset.clamp = regs.polygon_offset_clamp;
+
+ state.MarkDirtyPolygonOffset();
+ maxwell3d.dirty.polygon_offset = false;
}
void RasterizerOpenGL::SyncAlphaTest() {