summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp74
-rw-r--r--src/video_core/renderer_opengl/gl_device.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp107
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp123
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h6
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
9 files changed, 236 insertions, 108 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 466a911db..b772c37d9 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
#include <cstring>
+#include <limits>
#include <optional>
#include <vector>
@@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
-constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
- GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
- GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS};
+constexpr std::array LimitUBOs = {
+ GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
+ GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
+ GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
constexpr std::array LimitSSBOs = {
- GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
+ GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS};
+ GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
-constexpr std::array LimitSamplers = {
- GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS};
+constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
+ GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
-constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS,
- GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
- GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS,
- GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS};
+constexpr std::array LimitImages = {
+ GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
+ GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
+ GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
template <typename T>
T GetInteger(GLenum pname) {
@@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
return std::exchange(base, base + amount);
}
+std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
+ std::array<u32, Tegra::Engines::MaxShaderTypes> max;
+ std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
+ [](GLenum pname) { return GetInteger<u32>(pname); });
+ return max;
+}
+
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
@@ -133,6 +144,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
}
bool IsASTCSupported() {
+ static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
static constexpr std::array formats = {
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
@@ -149,25 +161,35 @@ bool IsASTCSupported() {
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
- return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
- GLint supported;
- glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
- &supported);
- return supported == GL_TRUE;
- }) == formats.end();
+ static constexpr std::array required_support = {
+ GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
+ GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
+ };
+
+ for (const GLenum target : targets) {
+ for (const GLenum format : formats) {
+ for (const GLenum support : required_support) {
+ GLint value;
+ glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value);
+ if (value != GL_FULL_SUPPORT) {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
}
} // Anonymous namespace
-Device::Device() : base_bindings{BuildBaseBindings()} {
+Device::Device()
+ : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
- const bool is_intel = vendor == "Intel";
- const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -182,7 +204,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
- has_broken_compute = is_intel_proprietary;
has_fast_buffer_sub_data = is_nvidia;
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
GLAD_GL_NV_compute_program5;
@@ -197,7 +218,9 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
}
Device::Device(std::nullptr_t) {
- uniform_buffer_alignment = 0;
+ max_uniform_buffers.fill(std::numeric_limits<u32>::max());
+ uniform_buffer_alignment = 4;
+ shader_storage_alignment = 4;
max_vertex_attributes = 16;
max_varyings = 15;
has_warp_intrinsics = true;
@@ -205,9 +228,6 @@ Device::Device(std::nullptr_t) {
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
has_variable_aoffi = true;
- has_component_indexing_bug = false;
- has_broken_compute = false;
- has_precise_bug = false;
}
bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e915dbd86..98cca0254 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -24,6 +24,10 @@ public:
explicit Device();
explicit Device(std::nullptr_t);
+ u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
+ return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
+ }
+
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
return base_bindings[stage_index];
}
@@ -80,10 +84,6 @@ public:
return has_precise_bug;
}
- bool HasBrokenCompute() const {
- return has_broken_compute;
- }
-
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
@@ -96,7 +96,8 @@ private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
- std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
+ std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
+ std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
std::size_t uniform_buffer_alignment{};
std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
@@ -109,7 +110,6 @@ private:
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
- bool has_broken_compute{};
bool has_fast_buffer_sub_data{};
bool use_assembly_shaders{};
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 716d43e65..55e79aaf6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -54,6 +54,12 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
namespace {
+constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
+constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
+ NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
+constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
+ NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
+
constexpr std::size_t NumSupportedVertexAttributes = 16;
template <typename Engine, typename Entry>
@@ -104,6 +110,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
CheckExtensions();
+ unified_uniform_buffer.Create();
+ glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
+
if (device.UseAssemblyShaders()) {
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
for (const GLuint cbuf : staging_cbufs) {
@@ -655,10 +664,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- if (device.HasBrokenCompute()) {
- return;
- }
-
buffer_cache.Acquire();
current_cbuf = 0;
@@ -846,34 +851,56 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
const auto& shader_stage = stages[stage_index];
+ const auto& entries = shader->GetEntries();
+ const bool use_unified = entries.use_unified_uniforms;
+ const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
- u32 binding =
- device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
- for (const auto& entry : shader->GetEntries().const_buffers) {
- const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
- SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
+ const auto base_bindings = device.GetBaseBindings(stage_index);
+ u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
+ for (const auto& entry : entries.const_buffers) {
+ const u32 index = entry.GetIndex();
+ const auto& buffer = shader_stage.const_buffers[index];
+ SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
+ base_unified_offset + index * Maxwell::MaxConstBufferSize);
+ ++binding;
+ }
+ if (use_unified) {
+ const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
+ entries.global_memory_entries.size());
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
+ base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
}
}
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& entries = kernel->GetEntries();
+ const bool use_unified = entries.use_unified_uniforms;
u32 binding = 0;
- for (const auto& entry : kernel->GetEntries().const_buffers) {
+ for (const auto& entry : entries.const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
buffer.address = config.Address();
buffer.size = config.size;
buffer.enabled = mask[entry.GetIndex()];
- SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
+ SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
+ use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
+ ++binding;
+ }
+ if (use_unified) {
+ const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
+ NUM_CONST_BUFFERS_BYTES_PER_STAGE);
}
}
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry) {
+ const ConstBufferEntry& entry, bool use_unified,
+ std::size_t unified_offset) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
if (device.UseAssemblyShaders()) {
@@ -889,20 +916,29 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
// UBO alignment requirements.
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
- const auto alignment = device.GetUniformBufferAlignment();
- auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
- device.HasFastBufferSubData());
- if (!device.UseAssemblyShaders()) {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
+ const bool fast_upload = !use_unified && device.HasFastBufferSubData();
+
+ const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
+ const GPUVAddr gpu_addr = buffer.address;
+ auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
+
+ if (device.UseAssemblyShaders()) {
+ UNIMPLEMENTED_IF(use_unified);
+ if (offset != 0) {
+ const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
+ glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
+ cbuf = staging_cbuf;
+ offset = 0;
+ }
+ glBindBufferRangeNV(stage, binding, cbuf, offset, size);
return;
}
- if (offset != 0) {
- const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
- glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
- cbuf = staging_cbuf;
- offset = 0;
+
+ if (use_unified) {
+ glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
+ } else {
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
}
- glBindBufferRangeNV(stage, binding, cbuf, offset, size);
}
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@@ -1024,6 +1060,26 @@ void RasterizerOpenGL::SyncViewport() {
const auto& regs = gpu.regs;
const bool dirty_viewport = flags[Dirty::Viewports];
+ const bool dirty_clip_control = flags[Dirty::ClipControl];
+
+ if (dirty_clip_control || flags[Dirty::FrontFace]) {
+ flags[Dirty::FrontFace] = false;
+
+ GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
+ if (regs.screen_y_control.triangle_rast_flip != 0 &&
+ regs.viewport_transform[0].scale_y < 0.0f) {
+ switch (mode) {
+ case GL_CW:
+ mode = GL_CCW;
+ break;
+ case GL_CCW:
+ mode = GL_CW;
+ break;
+ }
+ }
+ glFrontFace(mode);
+ }
+
if (dirty_viewport || flags[Dirty::ClipControl]) {
flags[Dirty::ClipControl] = false;
@@ -1121,11 +1177,6 @@ void RasterizerOpenGL::SyncCullMode() {
glDisable(GL_CULL_FACE);
}
}
-
- if (flags[Dirty::FrontFace]) {
- flags[Dirty::FrontFace] = false;
- glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
- }
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 87f7fe159..f5dc56a0e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -107,7 +107,8 @@ private:
/// Configures a constant buffer.
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry);
+ const ConstBufferEntry& entry, bool use_unified,
+ std::size_t unified_offset);
/// Configures the current global memory entries to use for the draw command.
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -253,6 +254,7 @@ private:
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
std::size_t current_cbuf = 0;
+ OGLBuffer unified_uniform_buffer;
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4cd0f36cf..a991ca64a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -241,8 +241,9 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
- return std::shared_ptr<CachedShader>(new CachedShader(
- params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
+ MakeEntries(params.device, ir, shader_type), std::move(program)));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -265,8 +266,9 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
- return std::shared_ptr<CachedShader>(new CachedShader(
- params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
+ MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
}
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
@@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
PrecompiledShader shader;
shader.program = std::move(program);
shader.registry = std::move(registry);
- shader.entries = MakeEntries(ir);
+ shader.entries = MakeEntries(device, ir, entry.type);
std::scoped_lock lock{mutex};
if (callback) {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 253484968..d6e30b321 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -61,8 +61,8 @@ struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
- static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
+constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
@@ -402,6 +402,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
+bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
+ const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
+ // We waste one UBO for emulation
+ const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
+ return num_ubos > num_available_ubos;
+}
+
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
@@ -412,8 +419,9 @@ class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view identifier, std::string_view suffix)
- : device{device}, ir{ir}, registry{registry}, stage{stage},
- identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
+ : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
+ suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
+ UseUnifiedUniforms(device, ir, stage)} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
@@ -618,7 +626,9 @@ private:
break;
}
}
- if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
+
+ if (stage != ShaderType::Geometry &&
+ (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
if (ir.UsesLayer()) {
code.AddLine("int gl_Layer;");
}
@@ -647,6 +657,16 @@ private:
--code.scope;
code.AddLine("}};");
code.AddNewLine();
+
+ if (stage == ShaderType::Geometry) {
+ if (ir.UsesLayer()) {
+ code.AddLine("out int gl_Layer;");
+ }
+ if (ir.UsesViewportIndex()) {
+ code.AddLine("out int gl_ViewportIndex;");
+ }
+ }
+ code.AddNewLine();
}
void DeclareRegisters() {
@@ -834,12 +854,24 @@ private:
}
void DeclareConstantBuffers() {
+ if (use_unified_uniforms) {
+ const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
+ static_cast<u32>(ir.GetGlobalMemory().size());
+ code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
+ binding);
+ code.AddLine(" uint cbufs[];");
+ code.AddLine("}};");
+ code.AddNewLine();
+ return;
+ }
+
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
- for (const auto& buffers : ir.GetConstantBuffers()) {
- const auto index = buffers.first;
+ for (const auto [index, info] : ir.GetConstantBuffers()) {
+ const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
+ const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
GetConstBufferBlock(index));
- code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
+ code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
code.AddLine("}};");
code.AddNewLine();
}
@@ -1038,42 +1070,51 @@ private:
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
+ const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
+
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
+ if (use_unified_uniforms) {
+ return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
+ Type::Uint};
+ } else {
+ return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4),
+ Type::Uint};
+ }
+ }
+
+ // Indirect access
+ if (use_unified_uniforms) {
+ return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
+ Visit(offset).AsUint()),
Type::Uint};
}
- if (std::holds_alternative<OperationNode>(*offset)) {
- // Indirect access
- const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
+ const std::string final_offset = code.GenerateTemporary();
+ code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
- if (!device.HasComponentIndexingBug()) {
- return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset),
- Type::Uint};
- }
-
- // AMD's proprietary GLSL compiler emits ill code for variable component access.
- // To bypass this driver bug generate 4 ifs, one per each component.
- const std::string pack = code.GenerateTemporary();
- code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
- final_offset);
-
- const std::string result = code.GenerateTemporary();
- code.AddLine("uint {};", result);
- for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
- code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
- pack, GetSwizzle(swizzle));
- }
- return {result, Type::Uint};
+ if (!device.HasComponentIndexingBug()) {
+ return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+ final_offset, final_offset),
+ Type::Uint};
}
- UNREACHABLE_MSG("Unmanaged offset node type");
+ // AMD's proprietary GLSL compiler emits ill code for variable component access.
+ // To bypass this driver bug generate 4 ifs, one per each component.
+ const std::string pack = code.GenerateTemporary();
+ code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+ final_offset);
+
+ const std::string result = code.GenerateTemporary();
+ code.AddLine("uint {};", result);
+ for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+ code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
+ GetSwizzle(swizzle));
+ }
+ return {result, Type::Uint};
}
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
@@ -2344,7 +2385,12 @@ private:
return {};
}
- Expression MemoryBarrierGL(Operation) {
+ Expression MemoryBarrierGroup(Operation) {
+ code.AddLine("groupMemoryBarrier();");
+ return {};
+ }
+
+ Expression MemoryBarrierGlobal(Operation) {
code.AddLine("memoryBarrier();");
return {};
}
@@ -2591,7 +2637,8 @@ private:
&GLSLDecompiler::ShuffleIndexed,
&GLSLDecompiler::Barrier,
- &GLSLDecompiler::MemoryBarrierGL,
+ &GLSLDecompiler::MemoryBarrierGroup,
+ &GLSLDecompiler::MemoryBarrierGlobal,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2704,6 +2751,7 @@ private:
const std::string_view identifier;
const std::string_view suffix;
const Header header;
+ const bool use_unified_uniforms;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
@@ -2899,7 +2947,7 @@ void GLSLDecompiler::DecompileAST() {
} // Anonymous namespace
-ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
+ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2920,6 +2968,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
}
entries.shader_length = ir.GetLength();
+ entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
return entries;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e8a178764..451c9689a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -53,11 +53,13 @@ struct ShaderEntries {
std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images;
- u32 clip_distances{};
std::size_t shader_length{};
+ u32 clip_distances{};
+ bool use_unified_uniforms{};
};
-ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
+ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ Tegra::Engines::ShaderType stage);
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 4faa8b90c..57db5a08b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -404,8 +404,7 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
bool is_proxy)
- : VideoCommon::ViewBase(params), surface{surface},
- format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
+ : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format},
target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
if (!is_proxy) {
main_view = CreateTextureView();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6b489e6db..e7952924a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() {
bool RendererOpenGL::Init() {
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
+ if (Settings::values.renderer_debug) {
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+ }
glDebugMessageCallback(DebugHandler, nullptr);
}