summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp45
-rw-r--r--src/video_core/renderer_opengl/gl_device.h30
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp67
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp144
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h29
11 files changed, 250 insertions, 132 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
new file mode 100644
index 000000000..b6d9e0ddb
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -0,0 +1,45 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstddef>
+#include <glad/glad.h>
+
+#include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_device.h"
+
+namespace OpenGL {
+
+namespace {
+template <typename T>
+T GetInteger(GLenum pname) {
+ GLint temporary;
+ glGetIntegerv(pname, &temporary);
+ return static_cast<T>(temporary);
+}
+} // Anonymous namespace
+
+Device::Device() {
+ uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
+ has_variable_aoffi = TestVariableAoffi();
+}
+
+bool Device::TestVariableAoffi() {
+ const GLchar* AOFFI_TEST = R"(#version 430 core
+uniform sampler2D tex;
+uniform ivec2 variable_offset;
+void main() {
+ gl_Position = textureOffset(tex, vec2(0), variable_offset);
+}
+)";
+ const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)};
+ GLint link_status{};
+ glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
+ glDeleteProgram(shader);
+
+ const bool supported{link_status == GL_TRUE};
+ LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported);
+ return supported;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
new file mode 100644
index 000000000..78ff5ee58
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -0,0 +1,30 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+
+namespace OpenGL {
+
+class Device {
+public:
+ Device();
+
+ std::size_t GetUniformBufferAlignment() const {
+ return uniform_buffer_alignment;
+ }
+
+ bool HasVariableAoffi() const {
+ return has_variable_aoffi;
+ }
+
+private:
+ static bool TestVariableAoffi();
+
+ std::size_t uniform_buffer_alignment{};
+ bool has_variable_aoffi{};
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6034dc489..9a088a503 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -99,7 +99,7 @@ struct FramebufferCacheKey {
};
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
- : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
+ : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system},
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
OpenGLState::ApplyDefaultState();
@@ -107,8 +107,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
state.draw.shader_program = 0;
state.Apply();
- glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
-
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
CheckExtensions();
}
@@ -315,8 +313,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu, stage);
- const GLintptr offset = buffer_cache.UploadHostMemory(
- &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
+ const GLintptr offset =
+ buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
// Bind the emulation info buffer
bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
@@ -700,23 +698,24 @@ void RasterizerOpenGL::DrawArrays() {
// Add space for index buffer (keeping in mind non-core primitives)
switch (regs.draw.topology) {
case Maxwell::PrimitiveTopology::Quads:
- buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
+ buffer_size = Common::AlignUp(buffer_size, 4) +
primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count);
break;
default:
if (is_indexed) {
- buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize();
+ buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
}
break;
}
// Uniform space for the 5 shader stages
- buffer_size =
- Common::AlignUp<std::size_t>(buffer_size, 4) +
- (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
+ buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
+ (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) *
+ Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
- buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
+ buffer_size +=
+ Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
const bool invalidate = buffer_cache.Map(buffer_size);
if (invalidate) {
@@ -848,8 +847,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
- const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
- buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
+ const GLintptr const_buffer_offset =
+ buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a0e056142..71b9c5ead 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -21,6 +21,7 @@
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
@@ -172,6 +173,7 @@ private:
/// but are needed for correct emulation
void CheckExtensions();
+ const Device device;
OpenGLState state;
RasterizerCacheOpenGL res_cache;
@@ -180,7 +182,6 @@ private:
SamplerCacheOpenGL sampler_cache;
Core::System& system;
-
ScreenInfo& screen_info;
std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
@@ -196,7 +197,6 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
PrimitiveAssembler primitive_assembler{buffer_cache};
- GLint uniform_buffer_alignment;
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 7a68b8738..5a25f5b37 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -640,13 +640,16 @@ void CachedSurface::LoadGLBuffer() {
SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
} else {
const u32 bpp = params.GetFormatBpp() / 8;
- const u32 copy_size = params.width * bpp;
+ const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) /
+ GetDefaultBlockWidth(params.pixel_format);
if (params.pitch == copy_size) {
std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
} else {
+ const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) /
+ GetDefaultBlockHeight(params.pixel_format);
const u8* start{params.host_ptr};
u8* write_to = gl_buffer[0].data();
- for (u32 h = params.height; h > 0; h--) {
+ for (u32 h = height; h > 0; h--) {
std::memcpy(write_to, start, copy_size);
start += params.pitch;
write_to += copy_size;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 99f67494c..2a81b1169 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -38,13 +38,15 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
}
/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(const u8* host_ptr) {
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+ const u8* host_ptr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
ASSERT_OR_EXECUTE(host_ptr != nullptr, {
std::fill(program_code.begin(), program_code.end(), 0);
return program_code;
});
- std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
+ memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
+ program_code.size() * sizeof(u64));
return program_code;
}
@@ -134,8 +136,8 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
}
/// Creates an unspecialized program from code streams
-GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
- ProgramCode program_code_b) {
+GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type,
+ ProgramCode program_code, ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code);
if (program_type == Maxwell::ShaderProgram::VertexA) {
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
@@ -149,11 +151,11 @@ GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, Progr
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
- return GLShader::GenerateVertexShader(setup);
+ return GLShader::GenerateVertexShader(device, setup);
case Maxwell::ShaderProgram::Geometry:
- return GLShader::GenerateGeometryShader(setup);
+ return GLShader::GenerateGeometryShader(device, setup);
case Maxwell::ShaderProgram::Fragment:
- return GLShader::GenerateFragmentShader(setup);
+ return GLShader::GenerateFragmentShader(device, setup);
default:
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
UNREACHABLE();
@@ -212,22 +214,20 @@ std::set<GLenum> GetSupportedFormats() {
return supported_formats;
}
-} // namespace
+} // Anonymous namespace
-CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+CachedShader::CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
: RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
precompiled_programs{precompiled_programs} {
-
- const std::size_t code_size = CalculateProgramSize(program_code);
- const std::size_t code_size_b =
- program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
-
- GLShader::ProgramResult program_result =
- CreateProgram(program_type, program_code, program_code_b);
+ const std::size_t code_size{CalculateProgramSize(program_code)};
+ const std::size_t code_size_b{program_code_b.empty() ? 0
+ : CalculateProgramSize(program_code_b)};
+ GLShader::ProgramResult program_result{
+ CreateProgram(device, program_type, program_code, program_code_b)};
if (program_result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
return;
@@ -251,7 +251,6 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
precompiled_programs} {
-
code = std::move(result.first);
entries = result.second;
shader_length = entries.shader_length;
@@ -344,8 +343,9 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
return {unique_identifier, base_bindings, primitive_mode};
}
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
- : RasterizerCache{rasterizer}, disk_cache{system} {}
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
+ const Device& device)
+ : RasterizerCache{rasterizer}, disk_cache{system}, device{device} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@@ -439,17 +439,18 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
std::unordered_map<u64, UnspecializedShader> unspecialized;
- if (callback)
+ if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
+ }
for (std::size_t i = 0; i < raws.size(); ++i) {
- if (stop_loading)
+ if (stop_loading) {
return {};
-
+ }
const auto& raw{raws[i]};
- const u64 unique_identifier = raw.GetUniqueIdentifier();
- const u64 calculated_hash =
- GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
+ const u64 unique_identifier{raw.GetUniqueIdentifier()};
+ const u64 calculated_hash{
+ GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
if (unique_identifier != calculated_hash) {
LOG_ERROR(
Render_OpenGL,
@@ -466,8 +467,8 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
result = {stored_decompiled.code, stored_decompiled.entries};
} else {
// Otherwise decompile the shader at boot and save the result to the decompiled file
- result =
- CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
+ result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(),
+ raw.GetProgramCodeB());
disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
}
@@ -477,8 +478,9 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
{raw.GetUniqueIdentifier(),
{std::move(result.first), std::move(result.second), raw.GetProgramType()}});
- if (callback)
+ if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
+ }
}
return unspecialized;
}
@@ -497,11 +499,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!shader) {
// No shader found - create a new one
- ProgramCode program_code{GetShaderCode(host_ptr)};
+ ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
ProgramCode program_code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
- program_code_b = GetShaderCode(
- memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
+ const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)};
+ program_code_b = GetShaderCode(memory_manager, program_addr_b,
+ memory_manager.GetPointer(program_addr_b));
}
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
@@ -512,7 +515,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
precompiled_programs, found->second, host_ptr);
} else {
shader = std::make_shared<CachedShader>(
- cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+ device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
std::move(program_code), std::move(program_code_b), host_ptr);
}
Register(shader);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 0cf8e0b3d..a332087f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -27,6 +27,7 @@ class System;
namespace OpenGL {
class CachedShader;
+class Device;
class RasterizerOpenGL;
struct UnspecializedShader;
@@ -38,7 +39,7 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
class CachedShader final : public RasterizerCacheObject {
public:
- explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ explicit CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
@@ -112,7 +113,8 @@ private:
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
+ explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
+ const Device& device);
/// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading,
@@ -130,6 +132,8 @@ private:
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats);
+ const Device& device;
+
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
ShaderDiskCacheOpenGL disk_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 445048daf..ef1a1995f 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -15,6 +15,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
@@ -119,14 +120,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
/// Returns true if an object has to be treated as precise
bool IsPrecise(Operation operand) {
- const auto& meta = operand.GetMeta();
-
+ const auto& meta{operand.GetMeta()};
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
return arithmetic->precise;
}
- if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) {
- return half_arithmetic->precise;
- }
return false;
}
@@ -139,8 +136,9 @@ bool IsPrecise(Node node) {
class GLSLDecompiler final {
public:
- explicit GLSLDecompiler(const ShaderIR& ir, ShaderStage stage, std::string suffix)
- : ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
+ explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
+ std::string suffix)
+ : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
void Decompile() {
DeclareVertex();
@@ -627,28 +625,7 @@ private:
}
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
- std::string value = VisitOperand(operation, operand_index);
- switch (type) {
- case Type::HalfFloat: {
- const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
- if (!half_meta) {
- value = "toHalf2(" + value + ')';
- }
-
- switch (half_meta->types.at(operand_index)) {
- case Tegra::Shader::HalfType::H0_H1:
- return "toHalf2(" + value + ')';
- case Tegra::Shader::HalfType::F32:
- return "vec2(" + value + ')';
- case Tegra::Shader::HalfType::H0_H0:
- return "vec2(toHalf2(" + value + ")[0])";
- case Tegra::Shader::HalfType::H1_H1:
- return "vec2(toHalf2(" + value + ")[1])";
- }
- }
- default:
- return CastOperand(value, type);
- }
+ return CastOperand(VisitOperand(operation, operand_index), type);
}
std::string CastOperand(const std::string& value, Type type) const {
@@ -662,9 +639,7 @@ private:
case Type::Uint:
return "ftou(" + value + ')';
case Type::HalfFloat:
- // Can't be handled as a stand-alone value
- UNREACHABLE();
- return value;
+ return "toHalf2(" + value + ')';
}
UNREACHABLE();
return value;
@@ -829,8 +804,12 @@ private:
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
// to be constant by the standard).
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
+ } else if (device.HasVariableAoffi()) {
+ // Avoid using variable AOFFI on unsupported devices.
expr += "ftoi(" + Visit(operand) + ')';
+ } else {
+ // Insert 0 on devices not supporting variable AOFFI.
+ expr += '0';
}
if (index + 1 < aoffi.size()) {
expr += ", ";
@@ -1083,13 +1062,40 @@ private:
return BitwiseCastResult(value, Type::HalfFloat);
}
+ std::string HClamp(Operation operation) {
+ const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
+ const std::string min = VisitOperand(operation, 1, Type::Float);
+ const std::string max = VisitOperand(operation, 2, Type::Float);
+ const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))";
+ return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
+ }
+
+ std::string HUnpack(Operation operation) {
+ const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
+ const auto value = [&]() -> std::string {
+ switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+ case Tegra::Shader::HalfType::H0_H1:
+ return operand;
+ case Tegra::Shader::HalfType::F32:
+ return "vec2(fromHalf2(" + operand + "))";
+ case Tegra::Shader::HalfType::H0_H0:
+ return "vec2(" + operand + "[0])";
+ case Tegra::Shader::HalfType::H1_H1:
+ return "vec2(" + operand + "[1])";
+ }
+ UNREACHABLE();
+ return "0";
+ }();
+ return "fromHalf2(" + value + ')';
+ }
+
std::string HMergeF32(Operation operation) {
return "float(toHalf2(" + Visit(operation[0]) + ")[0])";
}
std::string HMergeH0(Operation operation) {
- return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" +
- Visit(operation[1]) + ")[0]))";
+ return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" +
+ Visit(operation[0]) + ")[1]))";
}
std::string HMergeH1(Operation operation) {
@@ -1189,34 +1195,46 @@ private:
return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
}
+ template <bool with_nan>
+ std::string GenerateHalfComparison(Operation operation, std::string compare_op) {
+ std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
+ Type::HalfFloat, Type::HalfFloat)};
+ if constexpr (!with_nan) {
+ return comparison;
+ }
+ return "halfFloatNanComparison(" + comparison + ", " +
+ VisitOperand(operation, 0, Type::HalfFloat) + ", " +
+ VisitOperand(operation, 1, Type::HalfFloat) + ')';
+ }
+
+ template <bool with_nan>
std::string Logical2HLessThan(Operation operation) {
- return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "lessThan");
}
+ template <bool with_nan>
std::string Logical2HEqual(Operation operation) {
- return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "equal");
}
+ template <bool with_nan>
std::string Logical2HLessEqual(Operation operation) {
- return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
}
+ template <bool with_nan>
std::string Logical2HGreaterThan(Operation operation) {
- return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "greaterThan");
}
+ template <bool with_nan>
std::string Logical2HNotEqual(Operation operation) {
- return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "notEqual");
}
+ template <bool with_nan>
std::string Logical2HGreaterEqual(Operation operation) {
- return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
}
std::string Texture(Operation operation) {
@@ -1505,6 +1523,8 @@ private:
&GLSLDecompiler::Fma<Type::HalfFloat>,
&GLSLDecompiler::Absolute<Type::HalfFloat>,
&GLSLDecompiler::HNegate,
+ &GLSLDecompiler::HClamp,
+ &GLSLDecompiler::HUnpack,
&GLSLDecompiler::HMergeF32,
&GLSLDecompiler::HMergeH0,
&GLSLDecompiler::HMergeH1,
@@ -1541,12 +1561,18 @@ private:
&GLSLDecompiler::LogicalNotEqual<Type::Uint>,
&GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
- &GLSLDecompiler::Logical2HLessThan,
- &GLSLDecompiler::Logical2HEqual,
- &GLSLDecompiler::Logical2HLessEqual,
- &GLSLDecompiler::Logical2HGreaterThan,
- &GLSLDecompiler::Logical2HNotEqual,
- &GLSLDecompiler::Logical2HGreaterEqual,
+ &GLSLDecompiler::Logical2HLessThan<false>,
+ &GLSLDecompiler::Logical2HEqual<false>,
+ &GLSLDecompiler::Logical2HLessEqual<false>,
+ &GLSLDecompiler::Logical2HGreaterThan<false>,
+ &GLSLDecompiler::Logical2HNotEqual<false>,
+ &GLSLDecompiler::Logical2HGreaterEqual<false>,
+ &GLSLDecompiler::Logical2HLessThan<true>,
+ &GLSLDecompiler::Logical2HEqual<true>,
+ &GLSLDecompiler::Logical2HLessEqual<true>,
+ &GLSLDecompiler::Logical2HGreaterThan<true>,
+ &GLSLDecompiler::Logical2HNotEqual<true>,
+ &GLSLDecompiler::Logical2HGreaterEqual<true>,
&GLSLDecompiler::Texture,
&GLSLDecompiler::TextureLod,
@@ -1625,6 +1651,7 @@ private:
return name + '_' + std::to_string(index) + '_' + suffix;
}
+ const Device& device;
const ShaderIR& ir;
const ShaderStage stage;
const std::string suffix;
@@ -1647,11 +1674,18 @@ std::string GetCommonDeclarations() {
"}\n\n"
"vec2 toHalf2(float value) {\n"
" return unpackHalf2x16(ftou(value));\n"
+ "}\n\n"
+ "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n"
+ " bvec2 is_nan1 = isnan(pair1);\n"
+ " bvec2 is_nan2 = isnan(pair2);\n"
+ " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
+ "is_nan2.y);\n"
"}\n";
}
-ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix) {
- GLSLDecompiler decompiler(ir, stage, suffix);
+ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage,
+ const std::string& suffix) {
+ GLSLDecompiler decompiler(device, ir, stage, suffix);
decompiler.Decompile();
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 74032d237..c1569e737 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -12,6 +12,10 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/shader_ir.h"
+namespace OpenGL {
+class Device;
+}
+
namespace VideoCommon::Shader {
class ShaderIR;
}
@@ -77,7 +81,7 @@ struct ShaderEntries {
std::string GetCommonDeclarations();
-ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
- const std::string& suffix);
+ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ Maxwell::ShaderStage stage, const std::string& suffix);
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 03ec1f020..6abf948f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -16,7 +16,7 @@ using VideoCommon::Shader::ShaderIR;
static constexpr u32 PROGRAM_OFFSET{10};
-ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
+ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@@ -34,14 +34,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
- ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
+ ProgramResult program =
+ Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ProgramResult program_b =
- Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
+ Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
out += program_b.first;
}
@@ -78,7 +79,7 @@ void main() {
return {out, program.second};
}
-ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
+ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@@ -98,7 +99,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
- Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
+ Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
out += program.first;
out += R"(
@@ -109,7 +110,7 @@ void main() {
return {out, program.second};
}
-ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
+ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@@ -161,7 +162,7 @@ bool AlphaFunc(in float value) {
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
- Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
+ Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
out += program.first;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index fad346b48..0536c8a03 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -10,6 +10,10 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
+namespace OpenGL {
+class Device;
+}
+
namespace OpenGL::GLShader {
using VideoCommon::Shader::ProgramCode;
@@ -39,22 +43,13 @@ private:
bool has_program_b{};
};
-/**
- * Generates the GLSL vertex shader program source code for the given VS program
- * @returns String of the shader source code
- */
-ProgramResult GenerateVertexShader(const ShaderSetup& setup);
-
-/**
- * Generates the GLSL geometry shader program source code for the given GS program
- * @returns String of the shader source code
- */
-ProgramResult GenerateGeometryShader(const ShaderSetup& setup);
-
-/**
- * Generates the GLSL fragment shader program source code for the given FS program
- * @returns String of the shader source code
- */
-ProgramResult GenerateFragmentShader(const ShaderSetup& setup);
+/// Generates the GLSL vertex shader program source code for the given VS program
+ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup);
+
+/// Generates the GLSL geometry shader program source code for the given GS program
+ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup);
+
+/// Generates the GLSL fragment shader program source code for the given FS program
+ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
} // namespace OpenGL::GLShader