summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h20
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp120
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp376
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp9
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
-rw-r--r--src/video_core/renderer_opengl/utils.cpp16
-rw-r--r--src/video_core/renderer_opengl/utils.h4
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp3
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp1
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp3
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp2
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp2
-rw-r--r--src/video_core/shader/decode/bfe.cpp2
-rw-r--r--src/video_core/shader/decode/bfi.cpp2
-rw-r--r--src/video_core/shader/decode/ffma.cpp2
-rw-r--r--src/video_core/shader/decode/float_set.cpp2
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/half_set.cpp3
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/integer_set.cpp3
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp84
-rw-r--r--src/video_core/shader/decode/other.cpp1
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp2
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/video.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp4
-rw-r--r--src/video_core/shader/shader_ir.h8
37 files changed, 423 insertions, 310 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7bbc556da..e83f25fa1 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -530,6 +530,11 @@ union Instruction {
BitField<48, 16, u64> opcode;
union {
+ BitField<8, 8, Register> gpr;
+ BitField<20, 24, s64> offset;
+ } gmem;
+
+ union {
BitField<20, 16, u64> imm20_16;
BitField<20, 19, u64> imm20_19;
BitField<20, 32, s64> imm20_32;
@@ -812,13 +817,11 @@ union Instruction {
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
- BitField<20, 24, s64> immediate_offset;
} ldg;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
- BitField<20, 24, s64> immediate_offset;
} stg;
union {
@@ -828,6 +831,11 @@ union Instruction {
} al2p;
union {
+ BitField<53, 3, UniformType> type;
+ BitField<52, 1, u64> extended;
+ } generic;
+
+ union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
BitField<7, 1, u64> abs_a;
@@ -1387,10 +1395,12 @@ public:
LD_L,
LD_S,
LD_C,
+ LD, // Load from generic memory
+ LDG, // Load from global memory
ST_A,
ST_L,
ST_S,
- LDG, // Load from global memory
+ ST, // Store in generic memory
STG, // Store in global memory
AL2P, // Transforms attribute memory into physical memory
TEX,
@@ -1658,10 +1668,12 @@ private:
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
+ INST("100-------------", Id::LD, Type::Memory, "LD"),
+ INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
- INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
+ INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 38497678a..1d1581f49 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -35,6 +35,7 @@ Device::Device(std::nullptr_t) {
bool Device::TestVariableAoffi() {
const GLchar* AOFFI_TEST = R"(#version 430 core
+// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
void main() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index dbd8049f5..f9b6dfeea 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -98,9 +98,11 @@ struct FramebufferCacheKey {
}
};
-RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
- : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system},
- screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
+RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
+ ScreenInfo& info)
+ : res_cache{*this}, shader_cache{*this, system, emu_window, device},
+ global_cache{*this}, system{system}, screen_info{info},
+ buffer_cache(*this, STREAM_BUFFER_SIZE) {
OpenGLState::ApplyDefaultState();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 71b9c5ead..d78094138 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -48,7 +48,8 @@ struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
- explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
+ explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
+ ScreenInfo& info);
~RasterizerOpenGL() override;
void DrawArrays() override;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f700dc89a..d66252224 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -2,10 +2,14 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <mutex>
+#include <thread>
#include <boost/functional/hash.hpp>
#include "common/assert.h"
#include "common/hash.h"
+#include "common/scope_exit.h"
#include "core/core.h"
+#include "core/frontend/emu_window.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -166,7 +170,8 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
GLenum primitive_mode, bool hint_retrievable = false) {
- std::string source = "#version 430 core\n";
+ std::string source = "#version 430 core\n"
+ "#extension GL_ARB_separate_shader_objects : enable\n\n";
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
for (const auto& cbuf : entries.const_buffers) {
@@ -344,8 +349,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- const Device& device)
- : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}
+ Core::Frontend::EmuWindow& emu_window, const Device& device)
+ : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@@ -353,62 +358,107 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (!transferable) {
return;
}
- const auto [raws, usages] = *transferable;
+ const auto [raws, shader_usages] = *transferable;
auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
const auto supported_formats{GetSupportedFormats()};
- const auto unspecialized{
+ const auto unspecialized_shaders{
GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
- if (stop_loading)
+ if (stop_loading) {
return;
+ }
// Track if precompiled cache was altered during loading to know if we have to serialize the
// virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false;
- // Build shaders
- if (callback)
- callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
- for (std::size_t i = 0; i < usages.size(); ++i) {
- if (stop_loading)
- return;
+ // Inform the frontend about shader build initialization
+ if (callback) {
+ callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size());
+ }
- const auto& usage{usages[i]};
- LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
- i + 1, usages.size());
+ std::mutex mutex;
+ std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
+ std::atomic_bool compilation_failed = false;
- const auto& unspec{unspecialized.at(usage.unique_identifier)};
- const auto dump_it = dumps.find(usage);
+ const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
+ std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages,
+ const ShaderDumpsMap& dumps) {
+ context->MakeCurrent();
+ SCOPE_EXIT({ return context->DoneCurrent(); });
- CachedProgram shader;
- if (dump_it != dumps.end()) {
- // If the shader is dumped, attempt to load it with
- shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
+ for (std::size_t i = begin; i < end; ++i) {
+ if (stop_loading || compilation_failed) {
+ return;
+ }
+ const auto& usage{shader_usages[i]};
+ LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
+ usage.unique_identifier, i, shader_usages.size());
+
+ const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
+ const auto dump{dumps.find(usage)};
+
+ CachedProgram shader;
+ if (dump != dumps.end()) {
+ // If the shader is dumped, attempt to load it with
+ shader = GeneratePrecompiledProgram(dump->second, supported_formats);
+ if (!shader) {
+ compilation_failed = true;
+ return;
+ }
+ }
if (!shader) {
- // Invalidate the precompiled cache if a shader dumped shader was rejected
- disk_cache.InvalidatePrecompiled();
- precompiled_cache_altered = true;
- dumps.clear();
+ shader = SpecializeShader(unspecialized.code, unspecialized.entries,
+ unspecialized.program_type, usage.bindings,
+ usage.primitive, true);
}
+
+ std::scoped_lock lock(mutex);
+ if (callback) {
+ callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
+ shader_usages.size());
+ }
+
+ precompiled_programs.emplace(usage, std::move(shader));
}
- if (!shader) {
- shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
- usage.bindings, usage.primitive, true);
- }
- precompiled_programs.insert({usage, std::move(shader)});
+ };
+
+ const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)};
+ const std::size_t bucket_size{shader_usages.size() / num_workers};
+ std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
+ std::vector<std::thread> threads(num_workers);
+ for (std::size_t i = 0; i < num_workers; ++i) {
+ const bool is_last_worker = i + 1 == num_workers;
+ const std::size_t start{bucket_size * i};
+ const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size};
+
+ // On some platforms the shared context has to be created from the GUI thread
+ contexts[i] = emu_window.CreateSharedContext();
+ threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps);
+ }
+ for (auto& thread : threads) {
+ thread.join();
+ }
- if (callback)
- callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
+ if (compilation_failed) {
+ // Invalidate the precompiled cache if a shader dumped shader was rejected
+ disk_cache.InvalidatePrecompiled();
+ dumps.clear();
+ precompiled_cache_altered = true;
+ return;
+ }
+ if (stop_loading) {
+ return;
}
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
// precompiling them
- for (std::size_t i = 0; i < usages.size(); ++i) {
- const auto& usage{usages[i]};
+ for (std::size_t i = 0; i < shader_usages.size(); ++i) {
+ const auto& usage{shader_usages[i]};
if (dumps.find(usage) == dumps.end()) {
- const auto& program = precompiled_programs.at(usage);
+ const auto& program{precompiled_programs.at(usage)};
disk_cache.SaveDump(usage, program->handle);
precompiled_cache_altered = true;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 31b979987..64e5a5594 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -22,7 +22,11 @@
namespace Core {
class System;
-} // namespace Core
+}
+
+namespace Core::Frontend {
+class EmuWindow;
+}
namespace OpenGL {
@@ -111,7 +115,7 @@ private:
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- const Device& device);
+ Core::Frontend::EmuWindow& emu_window, const Device& device);
/// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading,
@@ -133,13 +137,13 @@ private:
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats);
+ Core::Frontend::EmuWindow& emu_window;
const Device& device;
-
- std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
-
ShaderDiskCacheOpenGL disk_cache;
+
PrecompiledShaders precompiled_shaders;
PrecompiledPrograms precompiled_programs;
+ std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d437afad1..e9f8d40db 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -59,15 +59,14 @@ public:
shader_source += text;
}
- void AddLine(std::string_view text) {
- AddExpression(text);
- AddNewLine();
- }
-
- void AddLine(char character) {
- DEBUG_ASSERT(scope >= 0);
- AppendIndentation();
- shader_source += character;
+ // Forwards all arguments directly to libfmt.
+ // Note that all formatting requirements for fmt must be
+ // obeyed when using this function. (e.g. {{ must be used
+ // printing the character '{' is desirable. Ditto for }} and '}',
+ // etc).
+ template <typename... Args>
+ void AddLine(std::string_view text, Args&&... args) {
+ AddExpression(fmt::format(text, std::forward<Args>(args)...));
AddNewLine();
}
@@ -77,9 +76,7 @@ public:
}
std::string GenerateTemporary() {
- std::string temporary = "tmp";
- temporary += std::to_string(temporary_index++);
- return temporary;
+ return fmt::format("tmp{}", temporary_index++);
}
std::string GetResult() {
@@ -167,41 +164,41 @@ public:
DeclareSamplers();
DeclarePhysicalAttributeReader();
- code.AddLine("void execute_" + suffix + "() {");
+ code.AddLine("void execute_{}() {{", suffix);
++code.scope;
// VM's program counter
const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = " + std::to_string(first_address) + "u;");
+ code.AddLine("uint jmp_to = {}u;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
constexpr u32 FLOW_STACK_SIZE = 20;
- code.AddLine(fmt::format("uint flow_stack[{}];", FLOW_STACK_SIZE));
+ code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE);
code.AddLine("uint flow_stack_top = 0u;");
- code.AddLine("while (true) {");
+ code.AddLine("while (true) {{");
++code.scope;
- code.AddLine("switch (jmp_to) {");
+ code.AddLine("switch (jmp_to) {{");
for (const auto& pair : ir.GetBasicBlocks()) {
const auto [address, bb] = pair;
- code.AddLine(fmt::format("case 0x{:x}u: {{", address));
+ code.AddLine("case 0x{:x}u: {{", address);
++code.scope;
VisitBlock(bb);
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
}
code.AddLine("default: return;");
- code.AddLine('}');
+ code.AddLine("}}");
for (std::size_t i = 0; i < 2; ++i) {
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
}
}
@@ -241,12 +238,13 @@ private:
}
void DeclareGeometry() {
- if (stage != ShaderStage::Geometry)
+ if (stage != ShaderStage::Geometry) {
return;
+ }
const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_vertices = std::to_string(header.common4.max_output_vertices);
- code.AddLine("layout (" + topology + ", max_vertices = " + max_vertices + ") out;");
+ const auto max_vertices = header.common4.max_output_vertices.Value();
+ code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
code.AddNewLine();
DeclareVertexRedeclarations();
@@ -255,7 +253,7 @@ private:
void DeclareVertexRedeclarations() {
bool clip_distances_declared = false;
- code.AddLine("out gl_PerVertex {");
+ code.AddLine("out gl_PerVertex {{");
++code.scope;
code.AddLine("vec4 gl_Position;");
@@ -271,40 +269,42 @@ private:
}
--code.scope;
- code.AddLine("};");
+ code.AddLine("}};");
code.AddNewLine();
}
void DeclareRegisters() {
const auto& registers = ir.GetRegisters();
for (const u32 gpr : registers) {
- code.AddLine("float " + GetRegister(gpr) + " = 0;");
+ code.AddLine("float {} = 0;", GetRegister(gpr));
}
- if (!registers.empty())
+ if (!registers.empty()) {
code.AddNewLine();
+ }
}
void DeclarePredicates() {
const auto& predicates = ir.GetPredicates();
for (const auto pred : predicates) {
- code.AddLine("bool " + GetPredicate(pred) + " = false;");
+ code.AddLine("bool {} = false;", GetPredicate(pred));
}
- if (!predicates.empty())
+ if (!predicates.empty()) {
code.AddNewLine();
+ }
}
void DeclareLocalMemory() {
if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("float " + GetLocalMemory() + '[' + std::to_string(element_count) + "];");
+ code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
}
void DeclareInternalFlags() {
for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
- const InternalFlag flag_code = static_cast<InternalFlag>(flag);
- code.AddLine("bool " + GetInternalFlag(flag_code) + " = false;");
+ const auto flag_code = static_cast<InternalFlag>(flag);
+ code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
}
code.AddNewLine();
}
@@ -343,8 +343,9 @@ private:
DeclareInputAttribute(index, false);
}
}
- if (!attributes.empty())
+ if (!attributes.empty()) {
code.AddNewLine();
+ }
}
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
@@ -370,8 +371,7 @@ private:
location += GENERIC_VARYING_START_LOCATION;
}
- code.AddLine("layout (location = " + std::to_string(location) + ") " + suffix + "in vec4 " +
- name + ';');
+ code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name);
}
void DeclareOutputAttributes() {
@@ -389,23 +389,23 @@ private:
DeclareOutputAttribute(index);
}
}
- if (!attributes.empty())
+ if (!attributes.empty()) {
code.AddNewLine();
+ }
}
void DeclareOutputAttribute(Attribute::Index index) {
const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION};
- code.AddLine("layout (location = " + std::to_string(location) + ") out vec4 " +
- GetOutputAttribute(index) + ';');
+ code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
}
void DeclareConstantBuffers() {
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
- code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) +
- ") uniform " + GetConstBufferBlock(index) + " {");
- code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];");
- code.AddLine("};");
+ code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index,
+ GetConstBufferBlock(index));
+ code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index));
+ code.AddLine("}};");
code.AddNewLine();
}
}
@@ -417,17 +417,16 @@ private:
// Since we don't know how the shader will use the shader, hint the driver to disable as
// much optimizations as possible
std::string qualifier = "coherent volatile";
- if (usage.is_read && !usage.is_written)
+ if (usage.is_read && !usage.is_written) {
qualifier += " readonly";
- else if (usage.is_written && !usage.is_read)
+ } else if (usage.is_written && !usage.is_read) {
qualifier += " writeonly";
+ }
- const std::string binding =
- fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset);
- code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " +
- GetGlobalMemoryBlock(base) + " {");
- code.AddLine(" float " + GetGlobalMemory(base) + "[];");
- code.AddLine("};");
+ code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{",
+ base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base));
+ code.AddLine(" float {}[];", GetGlobalMemory(base));
+ code.AddLine("}};");
code.AddNewLine();
}
}
@@ -435,7 +434,7 @@ private:
void DeclareSamplers() {
const auto& samplers = ir.GetSamplers();
for (const auto& sampler : samplers) {
- std::string sampler_type = [&]() {
+ std::string sampler_type = [&sampler] {
switch (sampler.GetType()) {
case Tegra::Shader::TextureType::Texture1D:
return "sampler1D";
@@ -450,25 +449,28 @@ private:
return "sampler2D";
}
}();
- if (sampler.IsArray())
+ if (sampler.IsArray()) {
sampler_type += "Array";
- if (sampler.IsShadow())
+ }
+ if (sampler.IsShadow()) {
sampler_type += "Shadow";
+ }
- code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) +
- ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
+ code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(),
+ sampler_type, GetSampler(sampler));
}
- if (!samplers.empty())
+ if (!samplers.empty()) {
code.AddNewLine();
+ }
}
void DeclarePhysicalAttributeReader() {
if (!ir.HasPhysicalAttributes()) {
return;
}
- code.AddLine("float readPhysicalAttribute(uint physical_address) {");
+ code.AddLine("float readPhysicalAttribute(uint physical_address) {{");
++code.scope;
- code.AddLine("switch (physical_address) {");
+ code.AddLine("switch (physical_address) {{");
// Just declare generic attributes for now.
const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
@@ -483,15 +485,15 @@ private:
const bool declared{stage != ShaderStage::Fragment ||
header.ps.GetAttributeUse(index) != AttributeUse::Unused};
const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
- code.AddLine(fmt::format("case 0x{:x}: return {};", address, value));
+ code.AddLine("case 0x{:x}: return {};", address, value);
}
}
code.AddLine("default: return 0;");
- code.AddLine('}');
+ code.AddLine("}}");
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
code.AddNewLine();
}
@@ -516,23 +518,26 @@ private:
return {};
}
return (this->*decompiler)(*operation);
+ }
- } else if (const auto gpr = std::get_if<GprNode>(node)) {
+ if (const auto gpr = std::get_if<GprNode>(node)) {
const u32 index = gpr->GetIndex();
if (index == Register::ZeroIndex) {
return "0";
}
return GetRegister(index);
+ }
- } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(node)) {
const u32 value = immediate->GetValue();
if (value < 10) {
// For eyecandy avoid using hex numbers on single digits
return fmt::format("utof({}u)", immediate->GetValue());
}
return fmt::format("utof(0x{:x}u)", immediate->GetValue());
+ }
- } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
+ if (const auto predicate = std::get_if<PredicateNode>(node)) {
const auto value = [&]() -> std::string {
switch (const auto index = predicate->GetIndex(); index) {
case Tegra::Shader::Pred::UnusedIndex:
@@ -544,19 +549,22 @@ private:
}
}();
if (predicate->IsNegated()) {
- return "!(" + value + ')';
+ return fmt::format("!({})", value);
}
return value;
+ }
- } else if (const auto abuf = std::get_if<AbufNode>(node)) {
+ if (const auto abuf = std::get_if<AbufNode>(node)) {
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
"Physical attributes in geometry shaders are not implemented");
if (abuf->IsPhysicalBuffer()) {
- return "readPhysicalAttribute(ftou(" + Visit(abuf->GetPhysicalAddress()) + "))";
+ return fmt::format("readPhysicalAttribute(ftou({}))",
+ Visit(abuf->GetPhysicalAddress()));
}
return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
+ }
- } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
+ if (const auto cbuf = std::get_if<CbufNode>(node)) {
const Node offset = cbuf->GetOffset();
if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
// Direct access
@@ -564,57 +572,63 @@ private:
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
offset_imm / (4 * 4), (offset_imm / 4) % 4);
+ }
- } else if (std::holds_alternative<OperationNode>(*offset)) {
+ if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);");
+ code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset));
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
final_offset, final_offset);
-
- } else {
- UNREACHABLE_MSG("Unmanaged offset node type");
}
- } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+ UNREACHABLE_MSG("Unmanaged offset node type");
+ }
+
+ if (const auto gmem = std::get_if<GmemNode>(node)) {
const std::string real = Visit(gmem->GetRealAddress());
const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+ const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+ }
- } else if (const auto lmem = std::get_if<LmemNode>(node)) {
+ if (const auto lmem = std::get_if<LmemNode>(node)) {
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
+ }
- } else if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) {
+ if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) {
return GetInternalFlag(internal_flag->GetFlag());
+ }
- } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+ if (const auto conditional = std::get_if<ConditionalNode>(node)) {
// It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
+ code.AddLine("if ({}) {{", Visit(conditional->GetCondition()));
++code.scope;
VisitBlock(conditional->GetCode());
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
return {};
+ }
- } else if (const auto comment = std::get_if<CommentNode>(node)) {
+ if (const auto comment = std::get_if<CommentNode>(node)) {
return "// " + comment->GetText();
}
+
UNREACHABLE();
return {};
}
std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) {
- const auto GeometryPass = [&](std::string name) {
+ const auto GeometryPass = [&](std::string_view name) {
if (stage == ShaderStage::Geometry && buffer) {
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
- return "gs_" + std::move(name) + "[ftou(" + Visit(buffer) + ") % MAX_VERTEX_INPUT]";
+ return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer));
}
- return name;
+ return std::string(name);
};
switch (attribute) {
@@ -677,7 +691,7 @@ private:
const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
const std::string temporary = code.GenerateTemporary();
- code.AddLine(precise + "float " + temporary + " = " + value + ';');
+ code.AddLine("{}float {} = {};", precise, temporary, value);
return temporary;
}
@@ -691,7 +705,7 @@ private:
}
const std::string temporary = code.GenerateTemporary();
- code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
+ code.AddLine("float {} = {};", temporary, Visit(operand));
return temporary;
}
@@ -706,31 +720,32 @@ private:
case Type::Float:
return value;
case Type::Int:
- return "ftoi(" + value + ')';
+ return fmt::format("ftoi({})", value);
case Type::Uint:
- return "ftou(" + value + ')';
+ return fmt::format("ftou({})", value);
case Type::HalfFloat:
- return "toHalf2(" + value + ')';
+ return fmt::format("toHalf2({})", value);
}
UNREACHABLE();
return value;
}
- std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
+ std::string BitwiseCastResult(const std::string& value, Type type,
+ bool needs_parenthesis = false) {
switch (type) {
case Type::Bool:
case Type::Bool2:
case Type::Float:
if (needs_parenthesis) {
- return '(' + value + ')';
+ return fmt::format("({})", value);
}
return value;
case Type::Int:
- return "itof(" + value + ')';
+ return fmt::format("itof({})", value);
case Type::Uint:
- return "utof(" + value + ')';
+ return fmt::format("utof({})", value);
case Type::HalfFloat:
- return "fromHalf2(" + value + ')';
+ return fmt::format("fromHalf2({})", value);
}
UNREACHABLE();
return value;
@@ -738,27 +753,27 @@ private:
std::string GenerateUnary(Operation operation, const std::string& func, Type result_type,
Type type_a, bool needs_parenthesis = true) {
- return ApplyPrecise(operation,
- BitwiseCastResult(func + '(' + VisitOperand(operation, 0, type_a) + ')',
- result_type, needs_parenthesis));
+ const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a));
+
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis));
}
std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type,
Type type_a, Type type_b) {
const std::string op_a = VisitOperand(operation, 0, type_a);
const std::string op_b = VisitOperand(operation, 1, type_b);
+ const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
- return ApplyPrecise(
- operation, BitwiseCastResult('(' + op_a + ' ' + func + ' ' + op_b + ')', result_type));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
}
std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type,
Type type_a, Type type_b) {
const std::string op_a = VisitOperand(operation, 0, type_a);
const std::string op_b = VisitOperand(operation, 1, type_b);
+ const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
- return ApplyPrecise(operation,
- BitwiseCastResult(func + '(' + op_a + ", " + op_b + ')', result_type));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
}
std::string GenerateTernary(Operation operation, const std::string& func, Type result_type,
@@ -766,10 +781,9 @@ private:
const std::string op_a = VisitOperand(operation, 0, type_a);
const std::string op_b = VisitOperand(operation, 1, type_b);
const std::string op_c = VisitOperand(operation, 2, type_c);
+ const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
- return ApplyPrecise(
- operation,
- BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + op_c + ')', result_type));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
}
std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
@@ -778,10 +792,9 @@ private:
const std::string op_b = VisitOperand(operation, 1, type_b);
const std::string op_c = VisitOperand(operation, 2, type_c);
const std::string op_d = VisitOperand(operation, 3, type_d);
+ const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
- return ApplyPrecise(operation, BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " +
- op_c + ", " + op_d + ')',
- result_type));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
@@ -844,7 +857,7 @@ private:
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
- expr += "ftoi(" + Visit(operand) + ')';
+ expr += fmt::format("ftoi({})", Visit(operand));
}
break;
case Type::Float:
@@ -877,7 +890,7 @@ private:
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else if (device.HasVariableAoffi()) {
// Avoid using variable AOFFI on unsupported devices.
- expr += "ftoi(" + Visit(operand) + ')';
+ expr += fmt::format("ftoi({})", Visit(operand));
} else {
// Insert 0 on devices not supporting variable AOFFI.
expr += '0';
@@ -902,7 +915,6 @@ private:
return {};
}
target = GetRegister(gpr->GetIndex());
-
} else if (const auto abuf = std::get_if<AbufNode>(dest)) {
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
@@ -913,9 +925,9 @@ private:
case Attribute::Index::PointSize:
return "gl_PointSize";
case Attribute::Index::ClipDistances0123:
- return "gl_ClipDistance[" + std::to_string(abuf->GetElement()) + ']';
+ return fmt::format("gl_ClipDistance[{}]", abuf->GetElement());
case Attribute::Index::ClipDistances4567:
- return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']';
+ return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4);
default:
if (IsGenericAttribute(attribute)) {
return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
@@ -925,21 +937,18 @@ private:
return "0";
}
}();
-
} else if (const auto lmem = std::get_if<LmemNode>(dest)) {
- target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
-
+ target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
} else if (const auto gmem = std::get_if<GmemNode>(dest)) {
const std::string real = Visit(gmem->GetRealAddress());
const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+ const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
-
} else {
UNREACHABLE_MSG("Assign called without a proper target");
}
- code.AddLine(target + " = " + Visit(src) + ';');
+ code.AddLine("{} = {};", target, Visit(src));
return {};
}
@@ -992,8 +1001,9 @@ private:
const std::string condition = Visit(operation[0]);
const std::string true_case = Visit(operation[1]);
const std::string false_case = Visit(operation[2]);
- return ApplyPrecise(operation,
- '(' + condition + " ? " + true_case + " : " + false_case + ')');
+ const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
+
+ return ApplyPrecise(operation, op_str);
}
std::string FCos(Operation operation) {
@@ -1057,9 +1067,9 @@ private:
std::string ILogicalShiftRight(Operation operation) {
const std::string op_a = VisitOperand(operation, 0, Type::Uint);
const std::string op_b = VisitOperand(operation, 1, Type::Uint);
+ const std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
- return ApplyPrecise(operation,
- BitwiseCastResult("int(" + op_a + " >> " + op_b + ')', Type::Int));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int));
}
std::string IArithmeticShiftRight(Operation operation) {
@@ -1115,11 +1125,12 @@ private:
}
std::string HNegate(Operation operation) {
- const auto GetNegate = [&](std::size_t index) -> std::string {
+ const auto GetNegate = [&](std::size_t index) {
return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1";
};
- const std::string value = '(' + VisitOperand(operation, 0, Type::HalfFloat) + " * vec2(" +
- GetNegate(1) + ", " + GetNegate(2) + "))";
+ const std::string value =
+ fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat),
+ GetNegate(1), GetNegate(2));
return BitwiseCastResult(value, Type::HalfFloat);
}
@@ -1127,7 +1138,8 @@ private:
const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
const std::string min = VisitOperand(operation, 1, Type::Float);
const std::string max = VisitOperand(operation, 2, Type::Float);
- const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))";
+ const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
+
return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
}
@@ -1138,34 +1150,35 @@ private:
case Tegra::Shader::HalfType::H0_H1:
return operand;
case Tegra::Shader::HalfType::F32:
- return "vec2(fromHalf2(" + operand + "))";
+ return fmt::format("vec2(fromHalf2({}))", operand);
case Tegra::Shader::HalfType::H0_H0:
- return "vec2(" + operand + "[0])";
+ return fmt::format("vec2({}[0])", operand);
case Tegra::Shader::HalfType::H1_H1:
- return "vec2(" + operand + "[1])";
+ return fmt::format("vec2({}[1])", operand);
}
UNREACHABLE();
return "0";
}();
- return "fromHalf2(" + value + ')';
+ return fmt::format("fromHalf2({})", value);
}
std::string HMergeF32(Operation operation) {
- return "float(toHalf2(" + Visit(operation[0]) + ")[0])";
+ return fmt::format("float(toHalf2({})[0])", Visit(operation[0]));
}
std::string HMergeH0(Operation operation) {
- return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" +
- Visit(operation[0]) + ")[1]))";
+ return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]),
+ Visit(operation[0]));
}
std::string HMergeH1(Operation operation) {
- return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[0], toHalf2(" +
- Visit(operation[1]) + ")[1]))";
+ return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]),
+ Visit(operation[1]));
}
std::string HPack2(Operation operation) {
- return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))";
+ return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]),
+ Visit(operation[1]));
}
template <Type type>
@@ -1223,7 +1236,7 @@ private:
target = GetInternalFlag(flag->GetFlag());
}
- code.AddLine(target + " = " + Visit(src) + ';');
+ code.AddLine("{} = {};", target, Visit(src));
return {};
}
@@ -1245,7 +1258,7 @@ private:
std::string LogicalPick2(Operation operation) {
const std::string pair = VisitOperand(operation, 0, Type::Bool2);
- return pair + '[' + VisitOperand(operation, 1, Type::Uint) + ']';
+ return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
}
std::string LogicalAll2(Operation operation) {
@@ -1257,15 +1270,15 @@ private:
}
template <bool with_nan>
- std::string GenerateHalfComparison(Operation operation, std::string compare_op) {
- std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat)};
+ std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
+ const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
+ Type::HalfFloat, Type::HalfFloat)};
if constexpr (!with_nan) {
return comparison;
}
- return "halfFloatNanComparison(" + comparison + ", " +
- VisitOperand(operation, 0, Type::HalfFloat) + ", " +
- VisitOperand(operation, 1, Type::HalfFloat) + ')';
+ return fmt::format("halfFloatNanComparison({}, {}, {})", comparison,
+ VisitOperand(operation, 0, Type::HalfFloat),
+ VisitOperand(operation, 1, Type::HalfFloat));
}
template <bool with_nan>
@@ -1342,12 +1355,12 @@ private:
switch (meta->element) {
case 0:
case 1:
- return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
- GetSwizzle(meta->element) + "))";
+ return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod,
+ GetSwizzle(meta->element));
case 2:
return "0";
case 3:
- return "itof(textureQueryLevels(" + sampler + "))";
+ return fmt::format("itof(textureQueryLevels({}))", sampler);
}
UNREACHABLE();
return "0";
@@ -1358,8 +1371,9 @@ private:
ASSERT(meta);
if (meta->element < 2) {
- return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
- GetSwizzle(meta->element) + "))";
+ return fmt::format("itof(int(({} * vec2(256)){}))",
+ GenerateTexture(operation, "QueryLod", {}),
+ GetSwizzle(meta->element));
}
return "0";
}
@@ -1398,7 +1412,7 @@ private:
const auto target = std::get_if<ImmediateNode>(operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine(fmt::format("jmp_to = 0x{:x}u;", target->GetValue()));
+ code.AddLine("jmp_to = 0x{:x}u;", target->GetValue());
code.AddLine("break;");
return {};
}
@@ -1407,7 +1421,7 @@ private:
const auto target = std::get_if<ImmediateNode>(operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine(fmt::format("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()));
+ code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue());
return {};
}
@@ -1433,7 +1447,7 @@ private:
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
- code.AddLine("if (alpha_test[0] != 0) {");
+ code.AddLine("if (alpha_test[0] != 0) {{");
++code.scope;
// We start on the register containing the alpha value in the first RT.
u32 current_reg = 3;
@@ -1444,13 +1458,12 @@ private:
header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
- code.AddLine(
- fmt::format("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)));
+ code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg));
current_reg += 4;
}
}
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
@@ -1459,8 +1472,8 @@ private:
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
- SafeGetRegister(current_reg)));
+ code.AddLine("FragColor{}[{}] = {};", render_target, component,
+ SafeGetRegister(current_reg));
++current_reg;
}
}
@@ -1469,7 +1482,7 @@ private:
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
- code.AddLine("gl_FragDepth = " + SafeGetRegister(current_reg + 1) + ';');
+ code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1));
}
code.AddLine("return;");
@@ -1479,11 +1492,11 @@ private:
std::string Discard(Operation operation) {
// Enclose "discard" in a conditional, so that GLSL compilation does not complain
// about unexecuted instructions that may follow this.
- code.AddLine("if (true) {");
+ code.AddLine("if (true) {{");
++code.scope;
code.AddLine("discard;");
--code.scope;
- code.AddLine("}");
+ code.AddLine("}}");
return {};
}
@@ -1713,7 +1726,7 @@ private:
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
- return std::string(InternalFlagNames[index]) + '_' + suffix;
+ return fmt::format("{}_{}", InternalFlagNames[index], suffix);
}
std::string GetSampler(const Sampler& sampler) const {
@@ -1721,7 +1734,7 @@ private:
}
std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
- return name + '_' + std::to_string(index) + '_' + suffix;
+ return fmt::format("{}_{}_{}", name, index, suffix);
}
u32 GetNumPhysicalInputAttributes() const {
@@ -1749,24 +1762,25 @@ private:
} // Anonymous namespace
std::string GetCommonDeclarations() {
- const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
- return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
- "#define ftoi floatBitsToInt\n"
- "#define ftou floatBitsToUint\n"
- "#define itof intBitsToFloat\n"
- "#define utof uintBitsToFloat\n\n"
- "float fromHalf2(vec2 pair) {\n"
- " return utof(packHalf2x16(pair));\n"
- "}\n\n"
- "vec2 toHalf2(float value) {\n"
- " return unpackHalf2x16(ftou(value));\n"
- "}\n\n"
- "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n"
- " bvec2 is_nan1 = isnan(pair1);\n"
- " bvec2 is_nan2 = isnan(pair2);\n"
- " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
- "is_nan2.y);\n"
- "}\n";
+ return fmt::format(
+ "#define MAX_CONSTBUFFER_ELEMENTS {}\n"
+ "#define ftoi floatBitsToInt\n"
+ "#define ftou floatBitsToUint\n"
+ "#define itof intBitsToFloat\n"
+ "#define utof uintBitsToFloat\n\n"
+ "float fromHalf2(vec2 pair) {{\n"
+ " return utof(packHalf2x16(pair));\n"
+ "}}\n\n"
+ "vec2 toHalf2(float value) {{\n"
+ " return unpackHalf2x16(ftou(value));\n"
+ "}}\n\n"
+ "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
+ " bvec2 is_nan1 = isnan(pair1);\n"
+ " bvec2 is_nan2 = isnan(pair2);\n"
+ " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
+ "is_nan2.y);\n"
+ "}}\n",
+ MAX_CONSTBUFFER_ELEMENTS);
}
ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index fba9c594a..ee4a45ca2 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -183,8 +183,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
return {{raws, usages}};
}
-std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
- std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
+std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!IsUsable())
return {};
@@ -208,8 +207,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
return *result;
}
-std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
- std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
@@ -230,7 +228,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
}
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
- std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
+ ShaderDumpsMap dumps;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
PrecompiledEntryKind kind{};
if (!LoadObjectFromPrecompiled(kind)) {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 2da0a4a23..ecd72ba58 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -33,6 +33,11 @@ namespace OpenGL {
using ProgramCode = std::vector<u64>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct ShaderDiskCacheUsage;
+struct ShaderDiskCacheDump;
+
+using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
+
/// Allocated bindings used by an OpenGL shader program
struct BaseBindings {
u32 cbuf{};
@@ -294,4 +299,4 @@ private:
bool tried_to_load{};
};
-} // namespace OpenGL \ No newline at end of file
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 7ab0b4553..d2bb705a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -19,8 +19,7 @@ static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
- std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
- out += "// Shader Unique Id: VS" + id + "\n\n";
+ std::string out = "// Shader Unique Id: VS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
@@ -82,8 +81,7 @@ void main() {
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
- std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
- out += "// Shader Unique Id: GS" + id + "\n\n";
+ std::string out = "// Shader Unique Id: GS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
@@ -113,8 +111,7 @@ void main() {
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
- std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
- out += "// Shader Unique Id: FS" + id + "\n\n";
+ std::string out = "// Shader Unique Id: FS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index d69cba9c3..3451d321d 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -97,8 +97,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix;
}
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
- : VideoCore::RendererBase{window}, system{system} {}
+RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
+ : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
RendererOpenGL::~RendererOpenGL() = default;
@@ -265,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() {
}
// Initialize sRGB Usage
OpenGLState::ClearsRGBUsed();
- rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
+ rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 6cbf9d2cb..4aebf2321 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -45,7 +45,7 @@ struct ScreenInfo {
class RendererOpenGL : public VideoCore::RendererBase {
public:
- explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
+ explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
~RendererOpenGL() override;
/// Swap buffers (render frame)
@@ -77,6 +77,7 @@ private:
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
+ Core::Frontend::EmuWindow& emu_window;
Core::System& system;
OpenGLState state;
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 84a987371..f23fc9f9d 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -38,27 +38,27 @@ void BindBuffersRangePushBuffer::Bind() const {
sizes.data());
}
-void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
+void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
if (!GLAD_GL_KHR_debug) {
- return; // We don't need to throw an error as this is just for debugging
+ // We don't need to throw an error as this is just for debugging
+ return;
}
- const std::string nice_addr = fmt::format("0x{:016x}", addr);
- std::string object_label;
+ std::string object_label;
if (extra_info.empty()) {
switch (identifier) {
case GL_TEXTURE:
- object_label = "Texture@" + nice_addr;
+ object_label = fmt::format("Texture@0x{:016X}", addr);
break;
case GL_PROGRAM:
- object_label = "Shader@" + nice_addr;
+ object_label = fmt::format("Shader@0x{:016X}", addr);
break;
default:
- object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
+ object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
break;
}
} else {
- object_label = extra_info + '@' + nice_addr;
+ object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
}
glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
}
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index aef45c9dc..b3e9fc499 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -4,7 +4,7 @@
#pragma once
-#include <string>
+#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -30,6 +30,6 @@ private:
std::vector<GLsizeiptr> sizes;
};
-void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
+void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 3190e2d7c..b4859bc1e 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -152,4 +153,4 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index 2098c1170..3a29c4a46 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index fbcd35b18..5341e460f 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -47,4 +48,4 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 0d139c0d2..3095f2fd4 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -49,4 +49,4 @@ u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 3ed5ccc5a..679ac0d4e 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -93,4 +93,4 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
}
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index 6a95dc928..1ae192c6a 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -46,4 +46,4 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index 601d66f1f..0b12a0d08 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -38,4 +38,4 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 0559cc8de..a1d04c6e5 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -56,4 +56,4 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index 1bd6755dd..cc522f1de 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -55,4 +55,4 @@ u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 9285b8d05..9d2322a1d 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -53,4 +53,4 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 1dd94bf9d..755f2ec44 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -6,6 +6,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -64,4 +65,4 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index 6e59eb650..fba44d714 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -59,4 +59,4 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index a3bf17eba..a4cdaf74d 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -47,4 +46,4 @@ u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index aad836d24..a6a1fb632 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -50,4 +50,4 @@ u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 6a992c543..e6a010a7d 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -12,8 +12,6 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
-#pragma optimize("", off)
-
namespace VideoCommon::Shader {
using Tegra::Shader::Attribute;
@@ -148,12 +146,25 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::LD:
case OpCode::Id::LDG: {
+ const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::LD:
+ UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
+ return instr.generic.type;
+ case OpCode::Id::LDG:
+ return instr.ldg.type;
+ default:
+ UNREACHABLE();
+ return {};
+ }
+ }();
+
const auto [real_address_base, base_address, descriptor] =
- TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
- static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
+ TrackAndGetGlobalMemory(bb, instr, false);
- const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
+ const u32 count = GetUniformTypeElementsCount(type);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
@@ -167,28 +178,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
- case OpCode::Id::STG: {
- const auto [real_address_base, base_address, descriptor] =
- TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
- static_cast<u32>(instr.stg.immediate_offset.Value()), true);
-
- // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
- SetTemporal(bb, 0, real_address_base);
-
- const u32 count = GetUniformTypeElementsCount(instr.stg.type);
- for (u32 i = 0; i < count; ++i) {
- SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
- }
- for (u32 i = 0; i < count; ++i) {
- const Node it_offset = Immediate(i * 4);
- const Node real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
- const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
-
- bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
- }
- break;
- }
case OpCode::Id::ST_A: {
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported");
@@ -244,6 +233,41 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::ST:
+ case OpCode::Id::STG: {
+ const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::ST:
+ UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
+ return instr.generic.type;
+ case OpCode::Id::STG:
+ return instr.stg.type;
+ default:
+ UNREACHABLE();
+ return {};
+ }
+ }();
+
+ const auto [real_address_base, base_address, descriptor] =
+ TrackAndGetGlobalMemory(bb, instr, true);
+
+ // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
+ SetTemporal(bb, 0, real_address_base);
+
+ const u32 count = GetUniformTypeElementsCount(type);
+ for (u32 i = 0; i < count; ++i) {
+ SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
+ }
+ for (u32 i = 0; i < count; ++i) {
+ const Node it_offset = Immediate(i * 4);
+ const Node real_address =
+ Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
+ const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
+
+ bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
+ }
+ break;
+ }
case OpCode::Id::AL2P: {
// Ignore al2p.direction since we don't care about it.
@@ -267,9 +291,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
- Node addr_register,
- u32 immediate_offset,
+ Instruction instr,
bool is_write) {
+ const auto addr_register{GetRegister(instr.gmem.gpr)};
+ const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
+
const Node base_address{
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf = std::get_if<CbufNode>(base_address);
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 77c6f9951..a6c123573 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
index 83c61680e..71844c42b 100644
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -64,4 +64,4 @@ u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index d0495995d..387491bd3 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -43,4 +43,4 @@ u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index f070e8912..f8659e48e 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -48,4 +48,4 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 951e85f44..44ae87ece 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -52,4 +52,4 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 956c01d9b..cb9ab72b1 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -108,4 +108,4 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
}
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 153ad1fd0..8a6ee5cf5 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -39,8 +39,8 @@ Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) {
return StoreNode(ConditionalNode(condition, std::move(code)));
}
-Node ShaderIR::Comment(const std::string& text) {
- return StoreNode(CommentNode(text));
+Node ShaderIR::Comment(std::string text) {
+ return StoreNode(CommentNode(std::move(text)));
}
Node ShaderIR::Immediate(u32 value) {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index f99300c1c..ff7472e30 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -669,7 +669,7 @@ private:
/// Creates a conditional node
Node Conditional(Node condition, std::vector<Node>&& code);
/// Creates a commentary
- Node Comment(const std::string& text);
+ Node Comment(std::string text);
/// Creates an u32 immediate
Node Immediate(u32 value);
/// Creates a s32 immediate
@@ -824,10 +824,8 @@ private:
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) const;
- std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
- Node addr_register,
- u32 immediate_offset,
- bool is_write);
+ std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
+ NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
template <typename... T>
Node Operation(OperationCode code, const T*... operands) {