diff options
Diffstat (limited to 'src/video_core')
37 files changed, 423 insertions, 310 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7bbc556da..e83f25fa1 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -530,6 +530,11 @@ union Instruction { BitField<48, 16, u64> opcode; union { + BitField<8, 8, Register> gpr; + BitField<20, 24, s64> offset; + } gmem; + + union { BitField<20, 16, u64> imm20_16; BitField<20, 19, u64> imm20_19; BitField<20, 32, s64> imm20_32; @@ -812,13 +817,11 @@ union Instruction { union { BitField<48, 3, UniformType> type; BitField<46, 2, u64> cache_mode; - BitField<20, 24, s64> immediate_offset; } ldg; union { BitField<48, 3, UniformType> type; BitField<46, 2, u64> cache_mode; - BitField<20, 24, s64> immediate_offset; } stg; union { @@ -828,6 +831,11 @@ union Instruction { } al2p; union { + BitField<53, 3, UniformType> type; + BitField<52, 1, u64> extended; + } generic; + + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; BitField<7, 1, u64> abs_a; @@ -1387,10 +1395,12 @@ public: LD_L, LD_S, LD_C, + LD, // Load from generic memory + LDG, // Load from global memory ST_A, ST_L, ST_S, - LDG, // Load from global memory + ST, // Store in generic memory STG, // Store in global memory AL2P, // Transforms attribute memory into physical memory TEX, @@ -1658,10 +1668,12 @@ private: INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), + INST("100-------------", Id::LD, Type::Memory, "LD"), + INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), + INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 38497678a..1d1581f49 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -35,6 +35,7 @@ Device::Device(std::nullptr_t) { bool Device::TestVariableAoffi() { const GLchar* AOFFI_TEST = R"(#version 430 core +// This is a unit test, please ignore me on apitrace bug reports. uniform sampler2D tex; uniform ivec2 variable_offset; void main() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dbd8049f5..f9b6dfeea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,9 +98,11 @@ struct FramebufferCacheKey { } }; -RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system}, - screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { +RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, + ScreenInfo& info) + : res_cache{*this}, shader_cache{*this, system, emu_window, device}, + global_cache{*this}, system{system}, screen_info{info}, + buffer_cache(*this, STREAM_BUFFER_SIZE) { OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 71b9c5ead..d78094138 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -48,7 +48,8 @@ struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info); + explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, + ScreenInfo& info); ~RasterizerOpenGL() override; void DrawArrays() override; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f700dc89a..d66252224 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -2,10 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <mutex> +#include <thread> #include <boost/functional/hash.hpp> #include "common/assert.h" #include "common/hash.h" +#include "common/scope_exit.h" #include "core/core.h" +#include "core/frontend/emu_window.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -166,7 +170,8 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, Maxwell::ShaderProgram program_type, BaseBindings base_bindings, GLenum primitive_mode, bool hint_retrievable = false) { - std::string source = "#version 430 core\n"; + std::string source = "#version 430 core\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); for (const auto& cbuf : entries.const_buffers) { @@ -344,8 +349,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device) - : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {} + Core::Frontend::EmuWindow& emu_window, const Device& device) + : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -353,62 +358,107 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (!transferable) { return; } - const auto [raws, usages] = *transferable; + const auto [raws, shader_usages] = *transferable; auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); const auto supported_formats{GetSupportedFormats()}; - const auto unspecialized{ + const auto unspecialized_shaders{ GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; - if (stop_loading) + if (stop_loading) { return; + } // Track if precompiled cache was altered during loading to know if we have to serialize the // virtual precompiled cache file back to the hard drive bool precompiled_cache_altered = false; - // Build shaders - if (callback) - callback(VideoCore::LoadCallbackStage::Build, 0, usages.size()); - for (std::size_t i = 0; i < usages.size(); ++i) { - if (stop_loading) - return; + // Inform the frontend about shader build initialization + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); + } - const auto& usage{usages[i]}; - LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier, - i + 1, usages.size()); + std::mutex mutex; + std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex + std::atomic_bool compilation_failed = false; - const auto& unspec{unspecialized.at(usage.unique_identifier)}; - const auto dump_it = dumps.find(usage); + const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, + std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, + const ShaderDumpsMap& dumps) { + context->MakeCurrent(); + SCOPE_EXIT({ return context->DoneCurrent(); }); - CachedProgram shader; - if (dump_it != dumps.end()) { - // If the shader is dumped, attempt to load it with - shader = GeneratePrecompiledProgram(dump_it->second, supported_formats); + for (std::size_t i = begin; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + const auto& usage{shader_usages[i]}; + LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", + usage.unique_identifier, i, shader_usages.size()); + + const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; + const auto dump{dumps.find(usage)}; + + CachedProgram shader; + if (dump != dumps.end()) { + // If the shader is dumped, attempt to load it with + shader = GeneratePrecompiledProgram(dump->second, supported_formats); + if (!shader) { + compilation_failed = true; + return; + } + } if (!shader) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - dumps.clear(); + shader = SpecializeShader(unspecialized.code, unspecialized.entries, + unspecialized.program_type, usage.bindings, + usage.primitive, true); } + + std::scoped_lock lock(mutex); + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, + shader_usages.size()); + } + + precompiled_programs.emplace(usage, std::move(shader)); } - if (!shader) { - shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type, - usage.bindings, usage.primitive, true); - } - precompiled_programs.insert({usage, std::move(shader)}); + }; + + const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; + const std::size_t bucket_size{shader_usages.size() / num_workers}; + std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); + std::vector<std::thread> threads(num_workers); + for (std::size_t i = 0; i < num_workers; ++i) { + const bool is_last_worker = i + 1 == num_workers; + const std::size_t start{bucket_size * i}; + const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; + + // On some platforms the shared context has to be created from the GUI thread + contexts[i] = emu_window.CreateSharedContext(); + threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); + } + for (auto& thread : threads) { + thread.join(); + } - if (callback) - callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size()); + if (compilation_failed) { + // Invalidate the precompiled cache if a shader dumped shader was rejected + disk_cache.InvalidatePrecompiled(); + dumps.clear(); + precompiled_cache_altered = true; + return; + } + if (stop_loading) { + return; } // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before // precompiling them - for (std::size_t i = 0; i < usages.size(); ++i) { - const auto& usage{usages[i]}; + for (std::size_t i = 0; i < shader_usages.size(); ++i) { + const auto& usage{shader_usages[i]}; if (dumps.find(usage) == dumps.end()) { - const auto& program = precompiled_programs.at(usage); + const auto& program{precompiled_programs.at(usage)}; disk_cache.SaveDump(usage, program->handle); precompiled_cache_altered = true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 31b979987..64e5a5594 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -22,7 +22,11 @@ namespace Core { class System; -} // namespace Core +} + +namespace Core::Frontend { +class EmuWindow; +} namespace OpenGL { @@ -111,7 +115,7 @@ private: class ShaderCacheOpenGL final : public RasterizerCache<Shader> { public: explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device); + Core::Frontend::EmuWindow& emu_window, const Device& device); /// Loads disk cache for the current game void LoadDiskCache(const std::atomic_bool& stop_loading, @@ -133,13 +137,13 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats); + Core::Frontend::EmuWindow& emu_window; const Device& device; - - std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; - ShaderDiskCacheOpenGL disk_cache; + PrecompiledShaders precompiled_shaders; PrecompiledPrograms precompiled_programs; + std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d437afad1..e9f8d40db 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -59,15 +59,14 @@ public: shader_source += text; } - void AddLine(std::string_view text) { - AddExpression(text); - AddNewLine(); - } - - void AddLine(char character) { - DEBUG_ASSERT(scope >= 0); - AppendIndentation(); - shader_source += character; + // Forwards all arguments directly to libfmt. + // Note that all formatting requirements for fmt must be + // obeyed when using this function. (e.g. {{ must be used + // printing the character '{' is desirable. Ditto for }} and '}', + // etc). + template <typename... Args> + void AddLine(std::string_view text, Args&&... args) { + AddExpression(fmt::format(text, std::forward<Args>(args)...)); AddNewLine(); } @@ -77,9 +76,7 @@ public: } std::string GenerateTemporary() { - std::string temporary = "tmp"; - temporary += std::to_string(temporary_index++); - return temporary; + return fmt::format("tmp{}", temporary_index++); } std::string GetResult() { @@ -167,41 +164,41 @@ public: DeclareSamplers(); DeclarePhysicalAttributeReader(); - code.AddLine("void execute_" + suffix + "() {"); + code.AddLine("void execute_{}() {{", suffix); ++code.scope; // VM's program counter const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = " + std::to_string(first_address) + "u;"); + code.AddLine("uint jmp_to = {}u;", first_address); // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. constexpr u32 FLOW_STACK_SIZE = 20; - code.AddLine(fmt::format("uint flow_stack[{}];", FLOW_STACK_SIZE)); + code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); code.AddLine("uint flow_stack_top = 0u;"); - code.AddLine("while (true) {"); + code.AddLine("while (true) {{"); ++code.scope; - code.AddLine("switch (jmp_to) {"); + code.AddLine("switch (jmp_to) {{"); for (const auto& pair : ir.GetBasicBlocks()) { const auto [address, bb] = pair; - code.AddLine(fmt::format("case 0x{:x}u: {{", address)); + code.AddLine("case 0x{:x}u: {{", address); ++code.scope; VisitBlock(bb); --code.scope; - code.AddLine('}'); + code.AddLine("}}"); } code.AddLine("default: return;"); - code.AddLine('}'); + code.AddLine("}}"); for (std::size_t i = 0; i < 2; ++i) { --code.scope; - code.AddLine('}'); + code.AddLine("}}"); } } @@ -241,12 +238,13 @@ private: } void DeclareGeometry() { - if (stage != ShaderStage::Geometry) + if (stage != ShaderStage::Geometry) { return; + } const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_vertices = std::to_string(header.common4.max_output_vertices); - code.AddLine("layout (" + topology + ", max_vertices = " + max_vertices + ") out;"); + const auto max_vertices = header.common4.max_output_vertices.Value(); + code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); code.AddNewLine(); DeclareVertexRedeclarations(); @@ -255,7 +253,7 @@ private: void DeclareVertexRedeclarations() { bool clip_distances_declared = false; - code.AddLine("out gl_PerVertex {"); + code.AddLine("out gl_PerVertex {{"); ++code.scope; code.AddLine("vec4 gl_Position;"); @@ -271,40 +269,42 @@ private: } --code.scope; - code.AddLine("};"); + code.AddLine("}};"); code.AddNewLine(); } void DeclareRegisters() { const auto& registers = ir.GetRegisters(); for (const u32 gpr : registers) { - code.AddLine("float " + GetRegister(gpr) + " = 0;"); + code.AddLine("float {} = 0;", GetRegister(gpr)); } - if (!registers.empty()) + if (!registers.empty()) { code.AddNewLine(); + } } void DeclarePredicates() { const auto& predicates = ir.GetPredicates(); for (const auto pred : predicates) { - code.AddLine("bool " + GetPredicate(pred) + " = false;"); + code.AddLine("bool {} = false;", GetPredicate(pred)); } - if (!predicates.empty()) + if (!predicates.empty()) { code.AddNewLine(); + } } void DeclareLocalMemory() { if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("float " + GetLocalMemory() + '[' + std::to_string(element_count) + "];"); + code.AddLine("float {}[{}];", GetLocalMemory(), element_count); code.AddNewLine(); } } void DeclareInternalFlags() { for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { - const InternalFlag flag_code = static_cast<InternalFlag>(flag); - code.AddLine("bool " + GetInternalFlag(flag_code) + " = false;"); + const auto flag_code = static_cast<InternalFlag>(flag); + code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); } code.AddNewLine(); } @@ -343,8 +343,9 @@ private: DeclareInputAttribute(index, false); } } - if (!attributes.empty()) + if (!attributes.empty()) { code.AddNewLine(); + } } void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { @@ -370,8 +371,7 @@ private: location += GENERIC_VARYING_START_LOCATION; } - code.AddLine("layout (location = " + std::to_string(location) + ") " + suffix + "in vec4 " + - name + ';'); + code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); } void DeclareOutputAttributes() { @@ -389,23 +389,23 @@ private: DeclareOutputAttribute(index); } } - if (!attributes.empty()) + if (!attributes.empty()) { code.AddNewLine(); + } } void DeclareOutputAttribute(Attribute::Index index) { const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; - code.AddLine("layout (location = " + std::to_string(location) + ") out vec4 " + - GetOutputAttribute(index) + ';'); + code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); } void DeclareConstantBuffers() { for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) + - ") uniform " + GetConstBufferBlock(index) + " {"); - code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];"); - code.AddLine("};"); + code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, + GetConstBufferBlock(index)); + code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index)); + code.AddLine("}};"); code.AddNewLine(); } } @@ -417,17 +417,16 @@ private: // Since we don't know how the shader will use the shader, hint the driver to disable as // much optimizations as possible std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) + if (usage.is_read && !usage.is_written) { qualifier += " readonly"; - else if (usage.is_written && !usage.is_read) + } else if (usage.is_written && !usage.is_read) { qualifier += " writeonly"; + } - const std::string binding = - fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset); - code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " + - GetGlobalMemoryBlock(base) + " {"); - code.AddLine(" float " + GetGlobalMemory(base) + "[];"); - code.AddLine("};"); + code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", + base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); + code.AddLine(" float {}[];", GetGlobalMemory(base)); + code.AddLine("}};"); code.AddNewLine(); } } @@ -435,7 +434,7 @@ private: void DeclareSamplers() { const auto& samplers = ir.GetSamplers(); for (const auto& sampler : samplers) { - std::string sampler_type = [&]() { + std::string sampler_type = [&sampler] { switch (sampler.GetType()) { case Tegra::Shader::TextureType::Texture1D: return "sampler1D"; @@ -450,25 +449,28 @@ private: return "sampler2D"; } }(); - if (sampler.IsArray()) + if (sampler.IsArray()) { sampler_type += "Array"; - if (sampler.IsShadow()) + } + if (sampler.IsShadow()) { sampler_type += "Shadow"; + } - code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) + - ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); + code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(), + sampler_type, GetSampler(sampler)); } - if (!samplers.empty()) + if (!samplers.empty()) { code.AddNewLine(); + } } void DeclarePhysicalAttributeReader() { if (!ir.HasPhysicalAttributes()) { return; } - code.AddLine("float readPhysicalAttribute(uint physical_address) {"); + code.AddLine("float readPhysicalAttribute(uint physical_address) {{"); ++code.scope; - code.AddLine("switch (physical_address) {"); + code.AddLine("switch (physical_address) {{"); // Just declare generic attributes for now. const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; @@ -483,15 +485,15 @@ private: const bool declared{stage != ShaderStage::Fragment || header.ps.GetAttributeUse(index) != AttributeUse::Unused}; const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; - code.AddLine(fmt::format("case 0x{:x}: return {};", address, value)); + code.AddLine("case 0x{:x}: return {};", address, value); } } code.AddLine("default: return 0;"); - code.AddLine('}'); + code.AddLine("}}"); --code.scope; - code.AddLine('}'); + code.AddLine("}}"); code.AddNewLine(); } @@ -516,23 +518,26 @@ private: return {}; } return (this->*decompiler)(*operation); + } - } else if (const auto gpr = std::get_if<GprNode>(node)) { + if (const auto gpr = std::get_if<GprNode>(node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return "0"; } return GetRegister(index); + } - } else if (const auto immediate = std::get_if<ImmediateNode>(node)) { + if (const auto immediate = std::get_if<ImmediateNode>(node)) { const u32 value = immediate->GetValue(); if (value < 10) { // For eyecandy avoid using hex numbers on single digits return fmt::format("utof({}u)", immediate->GetValue()); } return fmt::format("utof(0x{:x}u)", immediate->GetValue()); + } - } else if (const auto predicate = std::get_if<PredicateNode>(node)) { + if (const auto predicate = std::get_if<PredicateNode>(node)) { const auto value = [&]() -> std::string { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -544,19 +549,22 @@ private: } }(); if (predicate->IsNegated()) { - return "!(" + value + ')'; + return fmt::format("!({})", value); } return value; + } - } else if (const auto abuf = std::get_if<AbufNode>(node)) { + if (const auto abuf = std::get_if<AbufNode>(node)) { UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, "Physical attributes in geometry shaders are not implemented"); if (abuf->IsPhysicalBuffer()) { - return "readPhysicalAttribute(ftou(" + Visit(abuf->GetPhysicalAddress()) + "))"; + return fmt::format("readPhysicalAttribute(ftou({}))", + Visit(abuf->GetPhysicalAddress())); } return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); + } - } else if (const auto cbuf = std::get_if<CbufNode>(node)) { + if (const auto cbuf = std::get_if<CbufNode>(node)) { const Node offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(offset)) { // Direct access @@ -564,57 +572,63 @@ private: ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), offset_imm / (4 * 4), (offset_imm / 4) % 4); + } - } else if (std::holds_alternative<OperationNode>(*offset)) { + if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);"); + code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), final_offset, final_offset); - - } else { - UNREACHABLE_MSG("Unmanaged offset node type"); } - } else if (const auto gmem = std::get_if<GmemNode>(node)) { + UNREACHABLE_MSG("Unmanaged offset node type"); + } + + if (const auto gmem = std::get_if<GmemNode>(node)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); - const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } - } else if (const auto lmem = std::get_if<LmemNode>(node)) { + if (const auto lmem = std::get_if<LmemNode>(node)) { return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); + } - } else if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { + if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { return GetInternalFlag(internal_flag->GetFlag()); + } - } else if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(node)) { // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {"); + code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); ++code.scope; VisitBlock(conditional->GetCode()); --code.scope; - code.AddLine('}'); + code.AddLine("}}"); return {}; + } - } else if (const auto comment = std::get_if<CommentNode>(node)) { + if (const auto comment = std::get_if<CommentNode>(node)) { return "// " + comment->GetText(); } + UNREACHABLE(); return {}; } std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) { - const auto GeometryPass = [&](std::string name) { + const auto GeometryPass = [&](std::string_view name) { if (stage == ShaderStage::Geometry && buffer) { // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games // set an 0x80000000 index for those and the shader fails to build. Find out why // this happens and what's its intent. - return "gs_" + std::move(name) + "[ftou(" + Visit(buffer) + ") % MAX_VERTEX_INPUT]"; + return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer)); } - return name; + return std::string(name); }; switch (attribute) { @@ -677,7 +691,7 @@ private: const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; const std::string temporary = code.GenerateTemporary(); - code.AddLine(precise + "float " + temporary + " = " + value + ';'); + code.AddLine("{}float {} = {};", precise, temporary, value); return temporary; } @@ -691,7 +705,7 @@ private: } const std::string temporary = code.GenerateTemporary(); - code.AddLine("float " + temporary + " = " + Visit(operand) + ';'); + code.AddLine("float {} = {};", temporary, Visit(operand)); return temporary; } @@ -706,31 +720,32 @@ private: case Type::Float: return value; case Type::Int: - return "ftoi(" + value + ')'; + return fmt::format("ftoi({})", value); case Type::Uint: - return "ftou(" + value + ')'; + return fmt::format("ftou({})", value); case Type::HalfFloat: - return "toHalf2(" + value + ')'; + return fmt::format("toHalf2({})", value); } UNREACHABLE(); return value; } - std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) { + std::string BitwiseCastResult(const std::string& value, Type type, + bool needs_parenthesis = false) { switch (type) { case Type::Bool: case Type::Bool2: case Type::Float: if (needs_parenthesis) { - return '(' + value + ')'; + return fmt::format("({})", value); } return value; case Type::Int: - return "itof(" + value + ')'; + return fmt::format("itof({})", value); case Type::Uint: - return "utof(" + value + ')'; + return fmt::format("utof({})", value); case Type::HalfFloat: - return "fromHalf2(" + value + ')'; + return fmt::format("fromHalf2({})", value); } UNREACHABLE(); return value; @@ -738,27 +753,27 @@ private: std::string GenerateUnary(Operation operation, const std::string& func, Type result_type, Type type_a, bool needs_parenthesis = true) { - return ApplyPrecise(operation, - BitwiseCastResult(func + '(' + VisitOperand(operation, 0, type_a) + ')', - result_type, needs_parenthesis)); + const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a)); + + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis)); } std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type, Type type_a, Type type_b) { const std::string op_a = VisitOperand(operation, 0, type_a); const std::string op_b = VisitOperand(operation, 1, type_b); + const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - return ApplyPrecise( - operation, BitwiseCastResult('(' + op_a + ' ' + func + ' ' + op_b + ')', result_type)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); } std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type, Type type_a, Type type_b) { const std::string op_a = VisitOperand(operation, 0, type_a); const std::string op_b = VisitOperand(operation, 1, type_b); + const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - return ApplyPrecise(operation, - BitwiseCastResult(func + '(' + op_a + ", " + op_b + ')', result_type)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); } std::string GenerateTernary(Operation operation, const std::string& func, Type result_type, @@ -766,10 +781,9 @@ private: const std::string op_a = VisitOperand(operation, 0, type_a); const std::string op_b = VisitOperand(operation, 1, type_b); const std::string op_c = VisitOperand(operation, 2, type_c); + const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - return ApplyPrecise( - operation, - BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + op_c + ')', result_type)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); } std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type, @@ -778,10 +792,9 @@ private: const std::string op_b = VisitOperand(operation, 1, type_b); const std::string op_c = VisitOperand(operation, 2, type_c); const std::string op_d = VisitOperand(operation, 3, type_d); + const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - return ApplyPrecise(operation, BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + - op_c + ", " + op_d + ')', - result_type)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); } std::string GenerateTexture(Operation operation, const std::string& function_suffix, @@ -844,7 +857,7 @@ private: // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); } else { - expr += "ftoi(" + Visit(operand) + ')'; + expr += fmt::format("ftoi({})", Visit(operand)); } break; case Type::Float: @@ -877,7 +890,7 @@ private: expr += std::to_string(static_cast<s32>(immediate->GetValue())); } else if (device.HasVariableAoffi()) { // Avoid using variable AOFFI on unsupported devices. - expr += "ftoi(" + Visit(operand) + ')'; + expr += fmt::format("ftoi({})", Visit(operand)); } else { // Insert 0 on devices not supporting variable AOFFI. expr += '0'; @@ -902,7 +915,6 @@ private: return {}; } target = GetRegister(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); @@ -913,9 +925,9 @@ private: case Attribute::Index::PointSize: return "gl_PointSize"; case Attribute::Index::ClipDistances0123: - return "gl_ClipDistance[" + std::to_string(abuf->GetElement()) + ']'; + return fmt::format("gl_ClipDistance[{}]", abuf->GetElement()); case Attribute::Index::ClipDistances4567: - return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']'; + return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4); default: if (IsGenericAttribute(attribute)) { return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); @@ -925,21 +937,18 @@ private: return "0"; } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { - target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; - + target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); } else if (const auto gmem = std::get_if<GmemNode>(dest)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); - const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); - } else { UNREACHABLE_MSG("Assign called without a proper target"); } - code.AddLine(target + " = " + Visit(src) + ';'); + code.AddLine("{} = {};", target, Visit(src)); return {}; } @@ -992,8 +1001,9 @@ private: const std::string condition = Visit(operation[0]); const std::string true_case = Visit(operation[1]); const std::string false_case = Visit(operation[2]); - return ApplyPrecise(operation, - '(' + condition + " ? " + true_case + " : " + false_case + ')'); + const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); + + return ApplyPrecise(operation, op_str); } std::string FCos(Operation operation) { @@ -1057,9 +1067,9 @@ private: std::string ILogicalShiftRight(Operation operation) { const std::string op_a = VisitOperand(operation, 0, Type::Uint); const std::string op_b = VisitOperand(operation, 1, Type::Uint); + const std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - return ApplyPrecise(operation, - BitwiseCastResult("int(" + op_a + " >> " + op_b + ')', Type::Int)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int)); } std::string IArithmeticShiftRight(Operation operation) { @@ -1115,11 +1125,12 @@ private: } std::string HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) -> std::string { + const auto GetNegate = [&](std::size_t index) { return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1"; }; - const std::string value = '(' + VisitOperand(operation, 0, Type::HalfFloat) + " * vec2(" + - GetNegate(1) + ", " + GetNegate(2) + "))"; + const std::string value = + fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat), + GetNegate(1), GetNegate(2)); return BitwiseCastResult(value, Type::HalfFloat); } @@ -1127,7 +1138,8 @@ private: const std::string value = VisitOperand(operation, 0, Type::HalfFloat); const std::string min = VisitOperand(operation, 1, Type::Float); const std::string max = VisitOperand(operation, 2, Type::Float); - const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))"; + const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); + return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); } @@ -1138,34 +1150,35 @@ private: case Tegra::Shader::HalfType::H0_H1: return operand; case Tegra::Shader::HalfType::F32: - return "vec2(fromHalf2(" + operand + "))"; + return fmt::format("vec2(fromHalf2({}))", operand); case Tegra::Shader::HalfType::H0_H0: - return "vec2(" + operand + "[0])"; + return fmt::format("vec2({}[0])", operand); case Tegra::Shader::HalfType::H1_H1: - return "vec2(" + operand + "[1])"; + return fmt::format("vec2({}[1])", operand); } UNREACHABLE(); return "0"; }(); - return "fromHalf2(" + value + ')'; + return fmt::format("fromHalf2({})", value); } std::string HMergeF32(Operation operation) { - return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; + return fmt::format("float(toHalf2({})[0])", Visit(operation[0])); } std::string HMergeH0(Operation operation) { - return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" + - Visit(operation[0]) + ")[1]))"; + return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]), + Visit(operation[0])); } std::string HMergeH1(Operation operation) { - return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[0], toHalf2(" + - Visit(operation[1]) + ")[1]))"; + return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]), + Visit(operation[1])); } std::string HPack2(Operation operation) { - return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))"; + return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]), + Visit(operation[1])); } template <Type type> @@ -1223,7 +1236,7 @@ private: target = GetInternalFlag(flag->GetFlag()); } - code.AddLine(target + " = " + Visit(src) + ';'); + code.AddLine("{} = {};", target, Visit(src)); return {}; } @@ -1245,7 +1258,7 @@ private: std::string LogicalPick2(Operation operation) { const std::string pair = VisitOperand(operation, 0, Type::Bool2); - return pair + '[' + VisitOperand(operation, 1, Type::Uint) + ']'; + return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); } std::string LogicalAll2(Operation operation) { @@ -1257,15 +1270,15 @@ private: } template <bool with_nan> - std::string GenerateHalfComparison(Operation operation, std::string compare_op) { - std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat)}; + std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { + const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, + Type::HalfFloat, Type::HalfFloat)}; if constexpr (!with_nan) { return comparison; } - return "halfFloatNanComparison(" + comparison + ", " + - VisitOperand(operation, 0, Type::HalfFloat) + ", " + - VisitOperand(operation, 1, Type::HalfFloat) + ')'; + return fmt::format("halfFloatNanComparison({}, {}, {})", comparison, + VisitOperand(operation, 0, Type::HalfFloat), + VisitOperand(operation, 1, Type::HalfFloat)); } template <bool with_nan> @@ -1342,12 +1355,12 @@ private: switch (meta->element) { case 0: case 1: - return "itof(int(textureSize(" + sampler + ", " + lod + ')' + - GetSwizzle(meta->element) + "))"; + return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod, + GetSwizzle(meta->element)); case 2: return "0"; case 3: - return "itof(textureQueryLevels(" + sampler + "))"; + return fmt::format("itof(textureQueryLevels({}))", sampler); } UNREACHABLE(); return "0"; @@ -1358,8 +1371,9 @@ private: ASSERT(meta); if (meta->element < 2) { - return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + - GetSwizzle(meta->element) + "))"; + return fmt::format("itof(int(({} * vec2(256)){}))", + GenerateTexture(operation, "QueryLod", {}), + GetSwizzle(meta->element)); } return "0"; } @@ -1398,7 +1412,7 @@ private: const auto target = std::get_if<ImmediateNode>(operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine(fmt::format("jmp_to = 0x{:x}u;", target->GetValue())); + code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); code.AddLine("break;"); return {}; } @@ -1407,7 +1421,7 @@ private: const auto target = std::get_if<ImmediateNode>(operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine(fmt::format("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue())); + code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); return {}; } @@ -1433,7 +1447,7 @@ private: UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - code.AddLine("if (alpha_test[0] != 0) {"); + code.AddLine("if (alpha_test[0] != 0) {{"); ++code.scope; // We start on the register containing the alpha value in the first RT. u32 current_reg = 3; @@ -1444,13 +1458,12 @@ private: header.ps.IsColorComponentOutputEnabled(render_target, 1) || header.ps.IsColorComponentOutputEnabled(render_target, 2) || header.ps.IsColorComponentOutputEnabled(render_target, 3)) { - code.AddLine( - fmt::format("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg))); + code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)); current_reg += 4; } } --code.scope; - code.AddLine('}'); + code.AddLine("}}"); // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. @@ -1459,8 +1472,8 @@ private: // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, - SafeGetRegister(current_reg))); + code.AddLine("FragColor{}[{}] = {};", render_target, component, + SafeGetRegister(current_reg)); ++current_reg; } } @@ -1469,7 +1482,7 @@ private: if (header.ps.omap.depth) { // The depth output is always 2 registers after the last color output, and current_reg // already contains one past the last color register. - code.AddLine("gl_FragDepth = " + SafeGetRegister(current_reg + 1) + ';'); + code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1)); } code.AddLine("return;"); @@ -1479,11 +1492,11 @@ private: std::string Discard(Operation operation) { // Enclose "discard" in a conditional, so that GLSL compilation does not complain // about unexecuted instructions that may follow this. - code.AddLine("if (true) {"); + code.AddLine("if (true) {{"); ++code.scope; code.AddLine("discard;"); --code.scope; - code.AddLine("}"); + code.AddLine("}}"); return {}; } @@ -1713,7 +1726,7 @@ private: const auto index = static_cast<u32>(flag); ASSERT(index < static_cast<u32>(InternalFlag::Amount)); - return std::string(InternalFlagNames[index]) + '_' + suffix; + return fmt::format("{}_{}", InternalFlagNames[index], suffix); } std::string GetSampler(const Sampler& sampler) const { @@ -1721,7 +1734,7 @@ private: } std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { - return name + '_' + std::to_string(index) + '_' + suffix; + return fmt::format("{}_{}_{}", name, index, suffix); } u32 GetNumPhysicalInputAttributes() const { @@ -1749,24 +1762,25 @@ private: } // Anonymous namespace std::string GetCommonDeclarations() { - const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); - return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + - "#define ftoi floatBitsToInt\n" - "#define ftou floatBitsToUint\n" - "#define itof intBitsToFloat\n" - "#define utof uintBitsToFloat\n\n" - "float fromHalf2(vec2 pair) {\n" - " return utof(packHalf2x16(pair));\n" - "}\n\n" - "vec2 toHalf2(float value) {\n" - " return unpackHalf2x16(ftou(value));\n" - "}\n\n" - "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n" - " bvec2 is_nan1 = isnan(pair1);\n" - " bvec2 is_nan2 = isnan(pair2);\n" - " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " - "is_nan2.y);\n" - "}\n"; + return fmt::format( + "#define MAX_CONSTBUFFER_ELEMENTS {}\n" + "#define ftoi floatBitsToInt\n" + "#define ftou floatBitsToUint\n" + "#define itof intBitsToFloat\n" + "#define utof uintBitsToFloat\n\n" + "float fromHalf2(vec2 pair) {{\n" + " return utof(packHalf2x16(pair));\n" + "}}\n\n" + "vec2 toHalf2(float value) {{\n" + " return unpackHalf2x16(ftou(value));\n" + "}}\n\n" + "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" + " bvec2 is_nan1 = isnan(pair1);\n" + " bvec2 is_nan2 = isnan(pair2);\n" + " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " + "is_nan2.y);\n" + "}}\n", + MAX_CONSTBUFFER_ELEMENTS); } ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index fba9c594a..ee4a45ca2 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -183,8 +183,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { return {{raws, usages}}; } -std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> +std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> ShaderDiskCacheOpenGL::LoadPrecompiled() { if (!IsUsable()) return {}; @@ -208,8 +207,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { return *result; } -std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> +std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector<u8> compressed(file.GetSize()); @@ -230,7 +228,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { } std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps; + ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { PrecompiledEntryKind kind{}; if (!LoadObjectFromPrecompiled(kind)) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 2da0a4a23..ecd72ba58 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -33,6 +33,11 @@ namespace OpenGL { using ProgramCode = std::vector<u64>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct ShaderDiskCacheUsage; +struct ShaderDiskCacheDump; + +using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; + /// Allocated bindings used by an OpenGL shader program struct BaseBindings { u32 cbuf{}; @@ -294,4 +299,4 @@ private: bool tried_to_load{}; }; -} // namespace OpenGL
\ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7ab0b4553..d2bb705a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -19,8 +19,7 @@ static constexpr u32 PROGRAM_OFFSET{10}; ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; - out += "// Shader Unique Id: VS" + id + "\n\n"; + std::string out = "// Shader Unique Id: VS" + id + "\n\n"; out += GetCommonDeclarations(); out += R"( @@ -82,8 +81,7 @@ void main() { ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; - out += "// Shader Unique Id: GS" + id + "\n\n"; + std::string out = "// Shader Unique Id: GS" + id + "\n\n"; out += GetCommonDeclarations(); out += R"( @@ -113,8 +111,7 @@ void main() { ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; - out += "// Shader Unique Id: FS" + id + "\n\n"; + std::string out = "// Shader Unique Id: FS" + id + "\n\n"; out += GetCommonDeclarations(); out += R"( diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d69cba9c3..3451d321d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -97,8 +97,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons return matrix; } -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system) - : VideoCore::RendererBase{window}, system{system} {} +RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) + : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} RendererOpenGL::~RendererOpenGL() = default; @@ -265,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() { } // Initialize sRGB Usage OpenGLState::ClearsRGBUsed(); - rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info); + rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 6cbf9d2cb..4aebf2321 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -45,7 +45,7 @@ struct ScreenInfo { class RendererOpenGL : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system); + explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); ~RendererOpenGL() override; /// Swap buffers (render frame) @@ -77,6 +77,7 @@ private: void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); + Core::Frontend::EmuWindow& emu_window; Core::System& system; OpenGLState state; diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 84a987371..f23fc9f9d 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -38,27 +38,27 @@ void BindBuffersRangePushBuffer::Bind() const { sizes.data()); } -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) { +void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { if (!GLAD_GL_KHR_debug) { - return; // We don't need to throw an error as this is just for debugging + // We don't need to throw an error as this is just for debugging + return; } - const std::string nice_addr = fmt::format("0x{:016x}", addr); - std::string object_label; + std::string object_label; if (extra_info.empty()) { switch (identifier) { case GL_TEXTURE: - object_label = "Texture@" + nice_addr; + object_label = fmt::format("Texture@0x{:016X}", addr); break; case GL_PROGRAM: - object_label = "Shader@" + nice_addr; + object_label = fmt::format("Shader@0x{:016X}", addr); break; default: - object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr); + object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr); break; } } else { - object_label = extra_info + '@' + nice_addr; + object_label = fmt::format("{}@0x{:016X}", extra_info, addr); } glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); } diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index aef45c9dc..b3e9fc499 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -4,7 +4,7 @@ #pragma once -#include <string> +#include <string_view> #include <vector> #include <glad/glad.h> #include "common/common_types.h" @@ -30,6 +30,6 @@ private: std::vector<GLsizeiptr> sizes; }; -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = ""); +void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); } // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 3190e2d7c..b4859bc1e 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -152,4 +153,4 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 2098c1170..3a29c4a46 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index fbcd35b18..5341e460f 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -47,4 +48,4 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 0d139c0d2..3095f2fd4 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -49,4 +49,4 @@ u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 3ed5ccc5a..679ac0d4e 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -93,4 +93,4 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation } } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 6a95dc928..1ae192c6a 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -46,4 +46,4 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 601d66f1f..0b12a0d08 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -38,4 +38,4 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 0559cc8de..a1d04c6e5 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -56,4 +56,4 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index 1bd6755dd..cc522f1de 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -55,4 +55,4 @@ u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 9285b8d05..9d2322a1d 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -53,4 +53,4 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 1dd94bf9d..755f2ec44 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -64,4 +65,4 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 6e59eb650..fba44d714 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -59,4 +59,4 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index a3bf17eba..a4cdaf74d 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -47,4 +46,4 @@ u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index aad836d24..a6a1fb632 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -50,4 +50,4 @@ u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 6a992c543..e6a010a7d 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -12,8 +12,6 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" -#pragma optimize("", off) - namespace VideoCommon::Shader { using Tegra::Shader::Attribute; @@ -148,12 +146,25 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::LD: case OpCode::Id::LDG: { + const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { + switch (opcode->get().GetId()) { + case OpCode::Id::LD: + UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); + return instr.generic.type; + case OpCode::Id::LDG: + return instr.ldg.type; + default: + UNREACHABLE(); + return {}; + } + }(); + const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), - static_cast<u32>(instr.ldg.immediate_offset.Value()), false); + TrackAndGetGlobalMemory(bb, instr, false); - const u32 count = GetUniformTypeElementsCount(instr.ldg.type); + const u32 count = GetUniformTypeElementsCount(type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = @@ -167,28 +178,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } - case OpCode::Id::STG: { - const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), - static_cast<u32>(instr.stg.immediate_offset.Value()), true); - - // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} - SetTemporal(bb, 0, real_address_base); - - const u32 count = GetUniformTypeElementsCount(instr.stg.type); - for (u32 i = 0; i < count; ++i) { - SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); - } - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); - - bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); - } - break; - } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -244,6 +233,41 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::ST: + case OpCode::Id::STG: { + const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { + switch (opcode->get().GetId()) { + case OpCode::Id::ST: + UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); + return instr.generic.type; + case OpCode::Id::STG: + return instr.stg.type; + default: + UNREACHABLE(); + return {}; + } + }(); + + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, instr, true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::AL2P: { // Ignore al2p.direction since we don't care about it. @@ -267,9 +291,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, - Node addr_register, - u32 immediate_offset, + Instruction instr, bool is_write) { + const auto addr_register{GetRegister(instr.gmem.gpr)}; + const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; + const Node base_address{ TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; const auto cbuf = std::get_if<CbufNode>(base_address); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 77c6f9951..a6c123573 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 83c61680e..71844c42b 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -64,4 +64,4 @@ u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index d0495995d..387491bd3 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -43,4 +43,4 @@ u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index f070e8912..f8659e48e 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -48,4 +48,4 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 951e85f44..44ae87ece 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -52,4 +52,4 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 956c01d9b..cb9ab72b1 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -108,4 +108,4 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, } } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 153ad1fd0..8a6ee5cf5 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -39,8 +39,8 @@ Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { return StoreNode(ConditionalNode(condition, std::move(code))); } -Node ShaderIR::Comment(const std::string& text) { - return StoreNode(CommentNode(text)); +Node ShaderIR::Comment(std::string text) { + return StoreNode(CommentNode(std::move(text))); } Node ShaderIR::Immediate(u32 value) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index f99300c1c..ff7472e30 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -669,7 +669,7 @@ private: /// Creates a conditional node Node Conditional(Node condition, std::vector<Node>&& code); /// Creates a commentary - Node Comment(const std::string& text); + Node Comment(std::string text); /// Creates an u32 immediate Node Immediate(u32 value); /// Creates a s32 immediate @@ -824,10 +824,8 @@ private: std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) const; - std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, - Node addr_register, - u32 immediate_offset, - bool is_write); + std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( + NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); template <typename... T> Node Operation(OperationCode code, const T*... operands) { |