summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp55
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp89
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp28
6 files changed, 165 insertions, 83 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 38497678a..65a88b06c 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -2,11 +2,14 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <cstddef>
#include <glad/glad.h>
#include "common/logging/log.h"
+#include "common/scope_exit.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
@@ -24,6 +27,7 @@ Device::Device() {
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_variable_aoffi = TestVariableAoffi();
+ has_component_indexing_bug = TestComponentIndexingBug();
}
Device::Device(std::nullptr_t) {
@@ -31,10 +35,12 @@ Device::Device(std::nullptr_t) {
max_vertex_attributes = 16;
max_varyings = 15;
has_variable_aoffi = true;
+ has_component_indexing_bug = false;
}
bool Device::TestVariableAoffi() {
const GLchar* AOFFI_TEST = R"(#version 430 core
+// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
void main() {
@@ -51,4 +57,53 @@ void main() {
return supported;
}
+bool Device::TestComponentIndexingBug() {
+ constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
+ const GLchar* COMPONENT_TEST = R"(#version 430 core
+layout (std430, binding = 0) buffer OutputBuffer {
+ uint output_value;
+};
+layout (std140, binding = 0) uniform InputBuffer {
+ uvec4 input_value[4096];
+};
+layout (location = 0) uniform uint idx;
+void main() {
+ output_value = input_value[idx >> 2][idx & 3];
+})";
+ const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)};
+ SCOPE_EXIT({ glDeleteProgram(shader); });
+ glUseProgram(shader);
+
+ OGLVertexArray vao;
+ vao.Create();
+ glBindVertexArray(vao.handle);
+
+ constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432};
+ OGLBuffer ubo;
+ ubo.Create();
+ glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW);
+ glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle);
+
+ OGLBuffer ssbo;
+ ssbo.Create();
+ glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT);
+
+ for (GLuint index = 4; index < 8; ++index) {
+ glInvalidateBufferData(ssbo.handle);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle);
+
+ glProgramUniform1ui(shader, 0, index);
+ glDrawArrays(GL_POINTS, 0, 1);
+
+ GLuint result;
+ glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
+ if (result != values.at(index)) {
+ LOG_INFO(Render_OpenGL, log_message, true);
+ return true;
+ }
+ }
+ LOG_INFO(Render_OpenGL, log_message, false);
+ return false;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de8490682..8c8c93760 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -30,13 +30,19 @@ public:
return has_variable_aoffi;
}
+ bool HasComponentIndexingBug() const {
+ return has_component_indexing_bug;
+ }
+
private:
static bool TestVariableAoffi();
+ static bool TestComponentIndexingBug();
std::size_t uniform_buffer_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
bool has_variable_aoffi{};
+ bool has_component_indexing_bug{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7ee1c99c0..ac8a9e6b7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -35,8 +35,8 @@ struct UnspecializedShader {
namespace {
/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
- const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+ const auto& gpu{system.GPU().Maxwell3D()};
const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
@@ -170,7 +170,8 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
GLenum primitive_mode, bool hint_retrievable = false) {
- std::string source = "#version 430 core\n";
+ std::string source = "#version 430 core\n"
+ "#extension GL_ARB_separate_shader_objects : enable\n\n";
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
for (const auto& cbuf : entries.const_buffers) {
@@ -349,7 +350,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
Core::Frontend::EmuWindow& emu_window, const Device& device)
- : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {}
+ : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
+ disk_cache{system} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@@ -545,42 +547,45 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
- return last_shaders[static_cast<u32>(program)];
+ if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
+ return last_shaders[static_cast<std::size_t>(program)];
}
- auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const GPUVAddr program_addr{GetShaderAddress(program)};
+ auto& memory_manager{system.GPU().MemoryManager()};
+ const GPUVAddr program_addr{GetShaderAddress(system, program)};
// Look up shader in the cache based on address
- const auto& host_ptr{memory_manager.GetPointer(program_addr)};
+ const auto host_ptr{memory_manager.GetPointer(program_addr)};
Shader shader{TryGet(host_ptr)};
+ if (shader) {
+ return last_shaders[static_cast<std::size_t>(program)] = shader;
+ }
- if (!shader) {
- // No shader found - create a new one
- ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
- ProgramCode program_code_b;
- if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)};
- program_code_b = GetShaderCode(memory_manager, program_addr_b,
- memory_manager.GetPointer(program_addr_b));
- }
- const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
- const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
- const auto found = precompiled_shaders.find(unique_identifier);
- if (found != precompiled_shaders.end()) {
- shader =
- std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
- precompiled_programs, found->second, host_ptr);
- } else {
- shader = std::make_shared<CachedShader>(
- device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
- std::move(program_code), std::move(program_code_b), host_ptr);
- }
- Register(shader);
+ // No shader found - create a new one
+ ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
+ ProgramCode program_code_b;
+ if (program == Maxwell::ShaderProgram::VertexA) {
+ const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
+ program_code_b = GetShaderCode(memory_manager, program_addr_b,
+ memory_manager.GetPointer(program_addr_b));
+ }
+
+ const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
+ const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
+ const auto found = precompiled_shaders.find(unique_identifier);
+ if (found != precompiled_shaders.end()) {
+ // Create a shader from the cache
+ shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
+ precompiled_programs, found->second, host_ptr);
+ } else {
+ // Create a shader from guest memory
+ shader = std::make_shared<CachedShader>(
+ device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+ std::move(program_code), std::move(program_code_b), host_ptr);
}
+ Register(shader);
- return last_shaders[static_cast<u32>(program)] = shader;
+ return last_shaders[static_cast<std::size_t>(program)] = shader;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 64e5a5594..09bd0761d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -137,6 +137,7 @@ private:
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats);
+ Core::System& system;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
ShaderDiskCacheOpenGL disk_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 6d4658c8b..29de5c9db 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -31,6 +31,8 @@ using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::Register;
+
+using namespace std::string_literals;
using namespace VideoCommon::Shader;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -43,7 +45,6 @@ struct TextureAoffi {};
using TextureArgument = std::pair<Type, Node>;
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
-enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
@@ -93,11 +94,9 @@ private:
};
/// Generates code to use for a swizzle operation.
-std::string GetSwizzle(u32 elem) {
- ASSERT(elem <= 3);
- std::string swizzle = ".";
- swizzle += "xyzw"[elem];
- return swizzle;
+constexpr const char* GetSwizzle(u32 element) {
+ constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"};
+ return swizzle.at(element);
}
/// Translate topology
@@ -247,6 +246,12 @@ private:
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
code.AddNewLine();
+ code.AddLine("in gl_PerVertex {{");
+ ++code.scope;
+ code.AddLine("vec4 gl_Position;");
+ --code.scope;
+ code.AddLine("}} gl_in[];");
+
DeclareVertexRedeclarations();
}
@@ -349,7 +354,7 @@ private:
}
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
- const u32 generic_index{GetGenericAttributeIndex(index)};
+ const u32 location{GetGenericAttributeIndex(index)};
std::string name{GetInputAttribute(index)};
if (stage == ShaderStage::Geometry) {
@@ -358,19 +363,13 @@ private:
std::string suffix;
if (stage == ShaderStage::Fragment) {
- const auto input_mode{header.ps.GetAttributeUse(generic_index)};
+ const auto input_mode{header.ps.GetAttributeUse(location)};
if (skip_unused && input_mode == AttributeUse::Unused) {
return;
}
suffix = GetInputFlags(input_mode);
}
- u32 location = generic_index;
- if (stage != ShaderStage::Vertex) {
- // If inputs are varyings, add an offset
- location += GENERIC_VARYING_START_LOCATION;
- }
-
code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name);
}
@@ -395,7 +394,7 @@ private:
}
void DeclareOutputAttribute(Attribute::Index index) {
- const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION};
+ const u32 location{GetGenericAttributeIndex(index)};
code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
}
@@ -577,9 +576,26 @@ private:
if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset));
- return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset);
+ code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
+
+ if (!device.HasComponentIndexingBug()) {
+ return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+ final_offset, final_offset);
+ }
+
+ // AMD's proprietary GLSL compiler emits ill code for variable component access.
+ // To bypass this driver bug generate 4 ifs, one per each component.
+ const std::string pack = code.GenerateTemporary();
+ code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+ final_offset);
+
+ const std::string result = code.GenerateTemporary();
+ code.AddLine("float {};", result);
+ for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+ code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
+ pack, GetSwizzle(swizzle));
+ }
+ return result;
}
UNREACHABLE_MSG("Unmanaged offset node type");
@@ -633,10 +649,14 @@ private:
switch (attribute) {
case Attribute::Index::Position:
- if (stage != ShaderStage::Fragment) {
- return GeometryPass("position") + GetSwizzle(element);
- } else {
- return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
+ switch (stage) {
+ case ShaderStage::Geometry:
+ return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
+ GetSwizzle(element));
+ case ShaderStage::Fragment:
+ return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
+ default:
+ UNREACHABLE();
}
case Attribute::Index::PointCoord:
switch (element) {
@@ -921,7 +941,7 @@ private:
target = [&]() -> std::string {
switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
case Attribute::Index::Position:
- return "position" + GetSwizzle(abuf->GetElement());
+ return "gl_Position"s + GetSwizzle(abuf->GetElement());
case Attribute::Index::PointSize:
return "gl_PointSize";
case Attribute::Index::ClipDistances0123:
@@ -1506,9 +1526,7 @@ private:
// If a geometry shader is attached, it will always flip (it's the last stage before
// fragment). For more info about flipping, refer to gl_shader_gen.cpp.
- code.AddLine("position.xy *= viewport_flip.xy;");
- code.AddLine("gl_Position = position;");
- code.AddLine("position.w = 1.0;");
+ code.AddLine("gl_Position.xy *= viewport_flip.xy;");
code.AddLine("EmitVertex();");
return {};
}
@@ -1526,6 +1544,16 @@ private:
return "uintBitsToFloat(config_pack[2])";
}
+ template <u32 element>
+ std::string LocalInvocationId(Operation) {
+ return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')';
+ }
+
+ template <u32 element>
+ std::string WorkGroupId(Operation) {
+ return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
+ }
+
static constexpr OperationDecompilersArray operation_decompilers = {
&GLSLDecompiler::Assign,
@@ -1665,6 +1693,12 @@ private:
&GLSLDecompiler::EndPrimitive,
&GLSLDecompiler::YNegate,
+ &GLSLDecompiler::LocalInvocationId<0>,
+ &GLSLDecompiler::LocalInvocationId<1>,
+ &GLSLDecompiler::LocalInvocationId<2>,
+ &GLSLDecompiler::WorkGroupId<0>,
+ &GLSLDecompiler::WorkGroupId<1>,
+ &GLSLDecompiler::WorkGroupId<2>,
};
std::string GetRegister(u32 index) const {
@@ -1730,8 +1764,7 @@ private:
}
u32 GetNumPhysicalVaryings() const {
- return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION,
- Maxwell::NumVaryings);
+ return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
const Device& device;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 7ab0b4553..c845b29aa 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -19,13 +19,10 @@ static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
- std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
- out += "// Shader Unique Id: VS" + id + "\n\n";
+ std::string out = "// Shader Unique Id: VS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
-layout (location = 0) out vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
@@ -49,7 +46,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
out += R"(
void main() {
- position = vec4(0.0, 0.0, 0.0, 0.0);
execute_vertex();
)";
@@ -60,19 +56,12 @@ void main() {
out += R"(
// Set Position Y direction
- position.y *= utof(config_pack[2]);
+ gl_Position.y *= utof(config_pack[2]);
// Check if the flip stage is VertexB
// Config pack's second value is flip_stage
if (config_pack[1] == 1) {
// Viewport can be flipped, which is unsupported by glViewport
- position.xy *= viewport_flip.xy;
- }
- gl_Position = position;
-
- // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
- // For now, this is here to bring order in lieu of proper emulation
- if (config_pack[1] == 1) {
- position.w = 1.0;
+ gl_Position.xy *= viewport_flip.xy;
}
})";
@@ -82,14 +71,10 @@ void main() {
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
- std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
- out += "// Shader Unique Id: GS" + id + "\n\n";
+ std::string out = "// Shader Unique Id: GS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
-layout (location = 0) in vec4 gs_position[];
-layout (location = 0) out vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
@@ -113,8 +98,7 @@ void main() {
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
- std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
- out += "// Shader Unique Id: FS" + id + "\n\n";
+ std::string out = "// Shader Unique Id: FS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
@@ -127,8 +111,6 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
-layout (location = 0) in noperspective vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding