summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h16
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp59
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp39
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp34
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h61
-rw-r--r--src/video_core/renderer_opengl/utils.cpp28
-rw-r--r--src/video_core/renderer_opengl/utils.h20
15 files changed, 264 insertions, 125 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 7989ec11b..25652e794 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,6 +7,7 @@
#include "common/alignment.h"
#include "core/core.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 5842d6213..ea4a593af 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -6,6 +6,7 @@
#include "common/logging/log.h"
#include "core/core.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -13,28 +14,28 @@
namespace OpenGL {
-CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
- : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
+ max_size{max_size} {
buffer.Create();
- // Bind and unbind the buffer so it gets allocated by the driver
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
}
-void CachedGlobalRegion::Reload(u32 size_) {
- constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
+CachedGlobalRegion::~CachedGlobalRegion() = default;
+void CachedGlobalRegion::Reload(u32 size_) {
size = size_;
if (size > max_size) {
size = max_size;
- LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
+ LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
max_size);
}
+ glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
+}
- // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
- glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
+void CachedGlobalRegion::Flush() {
+ LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
+ glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
}
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
@@ -45,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr,
return search->second;
}
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
- u8* host_ptr) {
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
+ u32 size) {
GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
- region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
+ const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
+ ASSERT(cpu_addr);
+
+ region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
ReserveGlobalRegion(region);
}
region->Reload(size);
@@ -64,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
- : RasterizerCache{rasterizer} {}
+ : RasterizerCache{rasterizer} {
+ GLint max_ssbo_size_;
+ glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
+ max_ssbo_size = static_cast<u32>(max_ssbo_size_);
+}
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
const GLShader::GlobalMemoryEntry& global_region,
@@ -72,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
auto& gpu{Core::System::GetInstance().GPU()};
auto& memory_manager{gpu.MemoryManager()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+ const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
global_region.GetCbufOffset()};
const auto actual_addr{memory_manager.Read<u64>(addr)};
@@ -84,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
if (!region) {
// No global region found - create a new one
- region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
+ region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
Register(region);
}
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 5a21ab66f..196e6e278 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -19,7 +19,7 @@ namespace OpenGL {
namespace GLShader {
class GlobalMemoryEntry;
-} // namespace GLShader
+}
class RasterizerOpenGL;
class CachedGlobalRegion;
@@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
- explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
+ explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
+ ~CachedGlobalRegion();
VAddr GetCpuAddr() const override {
return cpu_addr;
@@ -45,14 +46,14 @@ public:
/// Reloads the global region from guest memory
void Reload(u32 size_);
- // TODO(Rodrigo): When global memory is written (STG), implement flushing
- void Flush() override {
- UNIMPLEMENTED();
- }
+ void Flush() override;
private:
VAddr cpu_addr{};
+ u8* host_ptr{};
u32 size{};
+ u32 max_size{};
+
OGLBuffer buffer;
};
@@ -66,10 +67,11 @@ public:
private:
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
- GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
+ GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
void ReserveGlobalRegion(GlobalRegion region);
std::unordered_map<CacheAddr, GlobalRegion> reserve;
+ u32 max_ssbo_size{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6f3bcccec..86a2e117d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -299,6 +299,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
BaseBindings base_bindings;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+ // Prepare packed bindings
+ bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
+ bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
+
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -321,8 +325,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
// Bind the emulation info buffer
- glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
- static_cast<GLsizeiptr>(sizeof(ubo)));
+ bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
+ static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
@@ -366,6 +370,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
base_bindings = next_bindings;
}
+ bind_ubo_pushbuffer.Bind();
+ bind_ssbo_pushbuffer.Bind();
+
SyncClipEnabled(clip_distances);
gpu.dirty_flags.shaders = false;
@@ -575,9 +582,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
}
void RasterizerOpenGL::Clear() {
- const auto prev_state{state};
- SCOPE_EXIT({ prev_state.Apply(); });
-
const auto& regs = system.GPU().Maxwell3D().regs;
bool use_color{};
bool use_depth{};
@@ -649,7 +653,10 @@ void RasterizerOpenGL::Clear() {
clear_state.EmulateViewportWithScissor();
}
- clear_state.Apply();
+ clear_state.ApplyColorMask();
+ clear_state.ApplyDepth();
+ clear_state.ApplyStencilTest();
+ clear_state.ApplyViewport();
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -749,6 +756,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
return;
}
res_cache.FlushRegion(addr, size);
+ global_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -900,23 +908,14 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
const auto& entries = shader->GetShaderEntries().const_buffers;
- constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
- std::array<GLuint, max_binds> bind_buffers;
- std::array<GLintptr, max_binds> bind_offsets;
- std::array<GLsizeiptr, max_binds> bind_sizes;
-
- ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
-
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& used_buffer = entries[bindpoint];
const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
if (!buffer.enabled) {
- // With disabled buffers set values as zero to unbind them
- bind_buffers[bindpoint] = 0;
- bind_offsets[bindpoint] = 0;
- bind_sizes[bindpoint] = 0;
+ // Set values to zero to unbind buffers
+ bind_ubo_pushbuffer.Push(0, 0, 0);
continue;
}
@@ -944,30 +943,22 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
- // Prepare values for multibind
- bind_buffers[bindpoint] = buffer_cache.GetHandle();
- bind_offsets[bindpoint] = const_buffer_offset;
- bind_sizes[bindpoint] = size;
+ bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
}
-
- // The first binding is reserved for emulation values
- const GLuint ubo_base_binding = base_bindings.cbuf + 1;
- glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
- bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings) {
- // TODO(Rodrigo): Use ARB_multi_bind here
const auto& entries = shader->GetShaderEntries().global_memory_entries;
-
- for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
- const auto& entry = entries[bindpoint];
- const u32 current_bindpoint = base_bindings.gmem + bindpoint;
- const auto& region = global_cache.GetGlobalRegion(entry, stage);
-
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
+ for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry{entries[bindpoint]};
+ const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+ if (entry.IsWritten()) {
+ region->MarkAsModified(true, global_cache);
+ }
+ bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
+ static_cast<GLsizeiptr>(region->GetSizeInBytes()));
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 54fbf48aa..d4c2cf80e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,6 +28,7 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/utils.h"
namespace Core {
class System;
@@ -70,10 +71,6 @@ public:
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
- static constexpr std::size_t MaxGlobalMemorySize = 0x10000;
- static_assert(MaxGlobalMemorySize % sizeof(float) == 0,
- "The maximum size of a global memory must be a multiple of the size of float");
-
private:
class SamplerInfo {
public:
@@ -229,6 +226,9 @@ private:
PrimitiveAssembler primitive_assembler{buffer_cache};
GLint uniform_buffer_alignment;
+ BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
+ BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
+
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 7a3280620..7a68b8738 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,6 +15,7 @@
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
@@ -111,11 +112,26 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
params.srgb_conversion);
- if (params.pixel_format == PixelFormat::R16U && config.tsc.depth_compare_enabled) {
+ if (config.tsc.depth_compare_enabled) {
// Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled,
// then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also
// causes GetFormatType to properly return 'Depth' below).
- params.pixel_format = PixelFormat::Z16;
+ if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) {
+ switch (params.pixel_format) {
+ case PixelFormat::R16S:
+ case PixelFormat::R16U:
+ case PixelFormat::R16F:
+ params.pixel_format = PixelFormat::Z16;
+ break;
+ case PixelFormat::R32F:
+ params.pixel_format = PixelFormat::Z32F;
+ break;
+ default:
+ LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}",
+ static_cast<u32>(params.pixel_format));
+ break;
+ }
+ }
}
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
@@ -265,6 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
params.width = config.width;
+ params.pitch = config.pitch;
params.height = config.height;
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
@@ -661,8 +678,8 @@ void CachedSurface::FlushGLBuffer() {
gl_buffer[0].resize(GetSizeInBytes());
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
+ glPixelStorei(GL_PACK_ALIGNMENT, align);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
ASSERT(!tuple.compressed);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -707,8 +724,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
+ glPixelStorei(GL_UNPACK_ALIGNMENT, align);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
@@ -1174,10 +1191,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
return new_surface;
}
+ const bool old_compressed =
+ GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
+ const bool new_compressed =
+ GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
+ const bool compatible_formats =
+ GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
+ !(old_compressed || new_compressed);
// For compatible surfaces, we can just do fast glCopyImageSubData based copy
- if (old_params.target == new_params.target && old_params.type == new_params.type &&
- old_params.depth == new_params.depth && old_params.depth == 1 &&
- GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
+ if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
+ old_params.depth == 1 && compatible_formats) {
FastCopySurface(old_surface, new_surface);
return new_surface;
}
@@ -1192,7 +1215,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
- if (old_params.pixel_format == new_params.pixel_format)
+ if (compatible_formats)
FastLayeredCopySurface(old_surface, new_surface);
else {
AccurateCopySurface(old_surface, new_surface);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index ad4fd3ad2..db280dbb3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -11,6 +11,7 @@
#include <vector>
#include "common/alignment.h"
+#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
@@ -205,6 +206,13 @@ struct SurfaceParams {
return bd;
}
+ u32 RowAlign(u32 mip_level) const {
+ const u32 m_width = MipWidth(mip_level);
+ const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
+ const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
+ return (1U << l2);
+ }
+
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index ab381932c..99f67494c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -7,6 +7,7 @@
#include "common/hash.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3ea08ef7b..445048daf 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -45,8 +45,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>;
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
-constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
- static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
class ShaderWriter {
public:
@@ -208,8 +206,10 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
- for (const auto& gmem : ir.GetGlobalMemoryBases()) {
- entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
+ for (const auto& gmem_pair : ir.GetGlobalMemory()) {
+ const auto& [base, usage] = gmem_pair;
+ entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
+ usage.is_read, usage.is_written);
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -380,12 +380,22 @@ private:
}
void DeclareGlobalMemory() {
- for (const auto& entry : ir.GetGlobalMemoryBases()) {
+ for (const auto& gmem : ir.GetGlobalMemory()) {
+ const auto& [base, usage] = gmem;
+
+ // Since we don't know how the shader will use the shader, hint the driver to disable as
+ // much optimizations as possible
+ std::string qualifier = "coherent volatile";
+ if (usage.is_read && !usage.is_written)
+ qualifier += " readonly";
+ else if (usage.is_written && !usage.is_read)
+ qualifier += " writeonly";
+
const std::string binding =
- fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
- code.AddLine("layout (std430, binding = " + binding + ") buffer " +
- GetGlobalMemoryBlock(entry) + " {");
- code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
+ fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset);
+ code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " +
+ GetGlobalMemoryBlock(base) + " {");
+ code.AddLine(" float " + GetGlobalMemory(base) + "[];");
code.AddLine("};");
code.AddNewLine();
}
@@ -552,8 +562,7 @@ private:
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " +
- std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';');
+ code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);");
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
final_offset, final_offset);
@@ -869,6 +878,12 @@ private:
} else if (const auto lmem = std::get_if<LmemNode>(dest)) {
target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
+ } else if (const auto gmem = std::get_if<GmemNode>(dest)) {
+ const std::string real = Visit(gmem->GetRealAddress());
+ const std::string base = Visit(gmem->GetBaseAddress());
+ const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+ target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+
} else {
UNREACHABLE_MSG("Assign called without a proper target");
}
@@ -1622,9 +1637,7 @@ private:
std::string GetCommonDeclarations() {
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
- const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
- "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
"#define ftoi floatBitsToInt\n"
"#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 9f7b7272e..74032d237 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -39,8 +39,9 @@ private:
class GlobalMemoryEntry {
public:
- explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
- : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+ explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written)
+ : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{
+ is_written} {}
u32 GetCbufIndex() const {
return cbuf_index;
@@ -50,9 +51,19 @@ public:
return cbuf_offset;
}
+ bool IsRead() const {
+ return is_read;
+ }
+
+ bool IsWritten() const {
+ return is_written;
+ }
+
private:
u32 cbuf_index{};
u32 cbuf_offset{};
+ bool is_read{};
+ bool is_written{};
};
struct ShaderEntries {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 08603b7a5..53752b38d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -340,11 +340,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
+ u8 is_read{};
+ u8 is_written{};
if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
+ file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
+ file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
+ file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
return {};
}
- entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
+ entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
+ is_written != 0);
}
for (auto& clip_distance : entry.entries.clip_distances) {
@@ -401,7 +406,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
return false;
for (const auto& gmem : entries.global_memory_entries) {
if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
- file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
+ file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
+ file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
+ file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
return false;
}
}
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index eaf3e03a0..05ab01dcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,12 +2,44 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL::GLShader {
using Tegra::Engines::Maxwell3D;
+ProgramManager::ProgramManager() {
+ pipeline.Create();
+}
+
+ProgramManager::~ProgramManager() = default;
+
+void ProgramManager::ApplyTo(OpenGLState& state) {
+ UpdatePipeline();
+ state.draw.shader_program = 0;
+ state.draw.program_pipeline = pipeline.handle;
+}
+
+void ProgramManager::UpdatePipeline() {
+ // Avoid updating the pipeline when values have no changed
+ if (old_state == current_state) {
+ return;
+ }
+
+ // Workaround for AMD bug
+ constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
+ GL_FRAGMENT_SHADER_BIT};
+ glUseProgramStages(pipeline.handle, all_used_stages, 0);
+
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
+ glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+
+ old_state = current_state;
+}
+
void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
const auto& regs = maxwell.regs;
const auto& state = maxwell.state;
@@ -16,7 +48,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
- u32 func = static_cast<u32>(regs.alpha_test_func);
+ auto func{static_cast<u32>(regs.alpha_test_func)};
// Normalize the gl variants of opCompare to be the same as the normal variants
const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
if (func >= op_gl_variant_base) {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 37dcfefdb..cec18a832 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,6 +4,8 @@
#pragma once
+#include <cstddef>
+
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -38,55 +40,48 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
class ProgramManager {
public:
- ProgramManager() {
- pipeline.Create();
- }
+ explicit ProgramManager();
+ ~ProgramManager();
+
+ void ApplyTo(OpenGLState& state);
void UseProgrammableVertexShader(GLuint program) {
- vs = program;
+ current_state.vertex_shader = program;
}
void UseProgrammableGeometryShader(GLuint program) {
- gs = program;
+ current_state.geometry_shader = program;
}
void UseProgrammableFragmentShader(GLuint program) {
- fs = program;
+ current_state.fragment_shader = program;
}
void UseTrivialGeometryShader() {
- gs = 0;
- }
-
- void ApplyTo(OpenGLState& state) {
- UpdatePipeline();
- state.draw.shader_program = 0;
- state.draw.program_pipeline = pipeline.handle;
+ current_state.geometry_shader = 0;
}
private:
- void UpdatePipeline() {
- // Avoid updating the pipeline when values have no changed
- if (old_vs == vs && old_fs == fs && old_gs == gs)
- return;
- // Workaround for AMD bug
- glUseProgramStages(pipeline.handle,
- GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
- 0);
-
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs);
- glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs);
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs);
-
- // Update the old values
- old_vs = vs;
- old_fs = fs;
- old_gs = gs;
- }
+ struct PipelineState {
+ bool operator==(const PipelineState& rhs) const {
+ return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
+ geometry_shader == rhs.geometry_shader;
+ }
+
+ bool operator!=(const PipelineState& rhs) const {
+ return !operator==(rhs);
+ }
+
+ GLuint vertex_shader{};
+ GLuint fragment_shader{};
+ GLuint geometry_shader{};
+ };
+
+ void UpdatePipeline();
OGLPipeline pipeline;
- GLuint vs{}, fs{}, gs{};
- GLuint old_vs{}, old_fs{}, old_gs{};
+ PipelineState current_state;
+ PipelineState old_state;
};
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index d84634cb3..84a987371 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -5,11 +5,39 @@
#include <string>
#include <fmt/format.h>
#include <glad/glad.h>
+#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
+BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
+
+BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
+
+void BindBuffersRangePushBuffer::Setup(GLuint first_) {
+ first = first_;
+ buffers.clear();
+ offsets.clear();
+ sizes.clear();
+}
+
+void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
+ buffers.push_back(buffer);
+ offsets.push_back(offset);
+ sizes.push_back(size);
+}
+
+void BindBuffersRangePushBuffer::Bind() const {
+ const std::size_t count{buffers.size()};
+ DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
+ if (count == 0) {
+ return;
+ }
+ glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
+ sizes.data());
+}
+
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
if (!GLAD_GL_KHR_debug) {
return; // We don't need to throw an error as this is just for debugging
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 1fcb6fc11..aef45c9dc 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -5,11 +5,31 @@
#pragma once
#include <string>
+#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
namespace OpenGL {
+class BindBuffersRangePushBuffer {
+public:
+ BindBuffersRangePushBuffer(GLenum target);
+ ~BindBuffersRangePushBuffer();
+
+ void Setup(GLuint first_);
+
+ void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
+
+ void Bind() const;
+
+private:
+ GLenum target;
+ GLuint first;
+ std::vector<GLuint> buffers;
+ std::vector<GLintptr> offsets;
+ std::vector<GLsizeiptr> sizes;
+};
+
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
} // namespace OpenGL \ No newline at end of file