diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/ast.h | 25 | ||||
-rw-r--r-- | src/video_core/shader/async_shaders.cpp | 221 | ||||
-rw-r--r-- | src/video_core/shader/async_shaders.h | 147 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 57 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_half.cpp | 3 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 6 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 35 | ||||
-rw-r--r-- | src/video_core/shader/decode/image.cpp | 69 | ||||
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 9 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 3 | ||||
-rw-r--r-- | src/video_core/shader/decode/texture.cpp | 43 | ||||
-rw-r--r-- | src/video_core/shader/decode/video.cpp | 19 | ||||
-rw-r--r-- | src/video_core/shader/decode/xmad.cpp | 15 | ||||
-rw-r--r-- | src/video_core/shader/memory_util.cpp | 7 | ||||
-rw-r--r-- | src/video_core/shader/memory_util.h | 6 | ||||
-rw-r--r-- | src/video_core/shader/registry.cpp | 50 | ||||
-rw-r--r-- | src/video_core/shader/registry.h | 2 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/track.cpp | 4 |
19 files changed, 550 insertions, 175 deletions
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index cca13bcde..8e5a22ab3 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -199,55 +199,48 @@ public: } std::optional<u32> GetGotoLabel() const { - auto inner = std::get_if<ASTGoto>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTGoto>(&data)) { return {inner->label}; } - return {}; + return std::nullopt; } Expr GetGotoCondition() const { - auto inner = std::get_if<ASTGoto>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTGoto>(&data)) { return inner->condition; } return nullptr; } void MarkLabelUnused() { - auto inner = std::get_if<ASTLabel>(&data); - if (inner) { + if (auto* inner = std::get_if<ASTLabel>(&data)) { inner->unused = true; } } bool IsLabelUnused() const { - auto inner = std::get_if<ASTLabel>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTLabel>(&data)) { return inner->unused; } return true; } std::optional<u32> GetLabelIndex() const { - auto inner = std::get_if<ASTLabel>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTLabel>(&data)) { return {inner->index}; } - return {}; + return std::nullopt; } Expr GetIfCondition() const { - auto inner = std::get_if<ASTIfThen>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTIfThen>(&data)) { return inner->condition; } return nullptr; } void SetGotoCondition(Expr new_condition) { - auto inner = std::get_if<ASTGoto>(&data); - if (inner) { + if (auto* inner = std::get_if<ASTGoto>(&data)) { inner->condition = std::move(new_condition); } } diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..aabd62c5c --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp @@ -0,0 +1,221 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <condition_variable> +#include <mutex> +#include <thread> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/shader/async_shaders.h" + +namespace VideoCommon::Shader { + +AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} + +AsyncShaders::~AsyncShaders() { + KillWorkers(); +} + +void AsyncShaders::AllocateWorkers() { + // Max worker threads we should allow + constexpr u32 MAX_THREADS = 4; + // Deduce how many threads we can use + const u32 threads_used = std::thread::hardware_concurrency() / 4; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1U, threads_used); + // Don't use more than MAX_THREADS + const auto num_workers = std::min(max_worker_count, MAX_THREADS); + + // If we already have workers queued, ignore + if (num_workers == worker_threads.size()) { + return; + } + + // If workers already exist, clear them + if (!worker_threads.empty()) { + FreeWorkers(); + } + + // Create workers + for (std::size_t i = 0; i < num_workers; i++) { + context_list.push_back(emu_window.CreateSharedContext()); + worker_threads.push_back( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())); + } +} + +void AsyncShaders::FreeWorkers() { + // Mark all threads to quit + is_thread_exiting.store(true); + cv.notify_all(); + for (auto& thread : worker_threads) { + thread.join(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +void AsyncShaders::KillWorkers() { + is_thread_exiting.store(true); + for (auto& thread : worker_threads) { + thread.detach(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +bool AsyncShaders::HasWorkQueued() const { + return !pending_queue.empty(); +} + +bool AsyncShaders::HasCompletedWork() const { + std::shared_lock lock{completed_mutex}; + return !finished_work.empty(); +} + +bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { + const auto& regs = gpu.Maxwell3D().regs; + + // If something is using depth, we can assume that games are not rendering anything which will + // be used one time. + if (regs.zeta_enable) { + return true; + } + + // If games are using a small index count, we can assume these are full screen quads. Usually + // these shaders are only used once for building textures so we can assume they can't be built + // async + if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { + return false; + } + + return true; +} + +std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { + std::vector<Result> results; + { + std::unique_lock lock{completed_mutex}; + results.assign(std::make_move_iterator(finished_work.begin()), + std::make_move_iterator(finished_work.end())); + finished_work.clear(); + } + return results; +} + +void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, + Tegra::Engines::ShaderType shader_type, u64 uid, + std::vector<u64> code, std::vector<u64> code_b, + u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, + VAddr cpu_addr) { + WorkerParams params{ + .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, + .device = &device, + .shader_type = shader_type, + .uid = uid, + .code = std::move(code), + .code_b = std::move(code_b), + .main_offset = main_offset, + .compiler_settings = compiler_settings, + .registry = registry, + .cpu_address = cpu_addr, + }; + std::unique_lock lock(queue_mutex); + pending_queue.push(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, + const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, + Vulkan::GraphicsPipelineCacheKey key) { + WorkerParams params{ + .backend = Backend::Vulkan, + .pp_cache = pp_cache, + .vk_device = &device, + .scheduler = &scheduler, + .descriptor_pool = &descriptor_pool, + .update_descriptor_queue = &update_descriptor_queue, + .renderpass_cache = &renderpass_cache, + .bindings = bindings, + .program = program, + .key = key, + }; + + std::unique_lock lock(queue_mutex); + pending_queue.push(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { + while (!is_thread_exiting.load(std::memory_order_relaxed)) { + std::unique_lock lock{queue_mutex}; + cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); + if (is_thread_exiting) { + return; + } + + // Partial lock to allow all threads to read at the same time + if (!HasWorkQueued()) { + continue; + } + // Another thread beat us, just unlock and wait for the next load + if (pending_queue.empty()) { + continue; + } + + // Pull work from queue + WorkerParams work = std::move(pending_queue.front()); + pending_queue.pop(); + lock.unlock(); + + if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); + const auto scope = context->Acquire(); + auto program = + OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); + Result result{}; + result.backend = work.backend; + result.cpu_address = work.cpu_address; + result.uid = work.uid; + result.code = std::move(work.code); + result.code_b = std::move(work.code_b); + result.shader_type = work.shader_type; + + if (work.backend == Backend::OpenGL) { + result.program.opengl = std::move(program->source_program); + } else if (work.backend == Backend::GLASM) { + result.program.glasm = std::move(program->assembly_program); + } + + { + std::unique_lock complete_lock(completed_mutex); + finished_work.push_back(std::move(result)); + } + } else if (work.backend == Backend::Vulkan) { + auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( + *work.vk_device, *work.scheduler, *work.descriptor_pool, + *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, + work.program); + + work.pp_cache->EmplacePipeline(std::move(pipeline)); + } + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..7a99e1dc5 --- /dev/null +++ b/src/video_core/shader/async_shaders.h @@ -0,0 +1,147 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <condition_variable> +#include <memory> +#include <shared_mutex> +#include <thread> + +// This header includes both Vulkan and OpenGL headers, this has to be fixed +// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues. +// Forcefully include glad early and undefine macros +#include <glad/glad.h> +#ifdef CreateEvent +#undef CreateEvent +#endif +#ifdef CreateSemaphore +#undef CreateSemaphore +#endif + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Core::Frontend { +class EmuWindow; +class GraphicsContext; +} // namespace Core::Frontend + +namespace Tegra { +class GPU; +} + +namespace Vulkan { +class VKPipelineCache; +} + +namespace VideoCommon::Shader { + +class AsyncShaders { +public: + enum class Backend { + OpenGL, + GLASM, + Vulkan, + }; + + struct ResultPrograms { + OpenGL::OGLProgram opengl; + OpenGL::OGLAssemblyProgram glasm; + }; + + struct Result { + u64 uid; + VAddr cpu_address; + Backend backend; + ResultPrograms program; + std::vector<u64> code; + std::vector<u64> code_b; + Tegra::Engines::ShaderType shader_type; + }; + + explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); + ~AsyncShaders(); + + /// Start up shader worker threads + void AllocateWorkers(); + + /// Clear the shader queue and kill all worker threads + void FreeWorkers(); + + // Force end all threads + void KillWorkers(); + + /// Check to see if any shaders have actually been compiled + [[nodiscard]] bool HasCompletedWork() const; + + /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build + /// every shader async as some shaders are only built and executed once. We try to "guess" which + /// shader would be used only once + [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; + + /// Pulls completed compiled shaders + [[nodiscard]] std::vector<Result> GetCompletedWork(); + + void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, + u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, + CompilerSettings compiler_settings, const Registry& registry, + VAddr cpu_addr); + + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, + Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); + +private: + void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); + + /// Check our worker queue to see if we have any work queued already + [[nodiscard]] bool HasWorkQueued() const; + + struct WorkerParams { + Backend backend; + // For OGL + const OpenGL::Device* device; + Tegra::Engines::ShaderType shader_type; + u64 uid; + std::vector<u64> code; + std::vector<u64> code_b; + u32 main_offset; + CompilerSettings compiler_settings; + std::optional<Registry> registry; + VAddr cpu_address; + + // For Vulkan + Vulkan::VKPipelineCache* pp_cache; + const Vulkan::VKDevice* vk_device; + Vulkan::VKScheduler* scheduler; + Vulkan::VKDescriptorPool* descriptor_pool; + Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; + Vulkan::VKRenderPassCache* renderpass_cache; + std::vector<VkDescriptorSetLayoutBinding> bindings; + Vulkan::SPIRVProgram program; + Vulkan::GraphicsPipelineCacheKey key; + }; + + std::condition_variable cv; + mutable std::mutex queue_mutex; + mutable std::shared_mutex completed_mutex; + std::atomic<bool> is_thread_exiting{}; + std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; + std::vector<std::thread> worker_threads; + std::queue<WorkerParams> pending_queue; + std::vector<Result> finished_work; + Core::Frontend::EmuWindow& emu_window; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 8d86020f6..4c8971615 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -187,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, u64 ldc_tracked_register) { - return TrackInstruction<u64>(state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); + return TrackInstruction<u64>( + state, pos, + [ldc_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::SHL_IMM && + instr.gpr0.Value() == ldc_tracked_register; + }, + [](auto instr, const auto&) { return instr.gpr8.Value(); }); } std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, u64 shl_tracked_register) { - return TrackInstruction<u32>(state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); - }); + return TrackInstruction<u32>( + state, pos, + [shl_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::IMNMX_IMM && + instr.gpr0.Value() == shl_tracked_register; + }, + [](auto instr, const auto&) { + return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); + }); } std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { @@ -545,13 +547,13 @@ bool TryQuery(CFGRebuildState& state) { gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); if (std::holds_alternative<SingleBranch>(*block.branch)) { - const auto branch = std::get_if<SingleBranch>(block.branch.get()); + auto* branch = std::get_if<SingleBranch>(block.branch.get()); if (!branch->condition.IsUnconditional()) { q2.address = block.end + 1; state.queries.push_back(q2); } - Query conditional_query{q2}; + auto& conditional_query = state.queries.emplace_back(q2); if (branch->is_sync) { if (branch->address == unassigned_branch) { branch->address = conditional_query.ssy_stack.top(); @@ -565,21 +567,21 @@ bool TryQuery(CFGRebuildState& state) { conditional_query.pbk_stack.pop(); } conditional_query.address = branch->address; - state.queries.push_back(std::move(conditional_query)); return true; } - const auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + + const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get()); for (const auto& branch_case : multi_branch->branches) { - Query conditional_query{q2}; + auto& conditional_query = state.queries.emplace_back(q2); conditional_query.address = branch_case.address; - state.queries.push_back(std::move(conditional_query)); } + return true; } void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { - const auto get_expr = ([&](const Condition& cond) -> Expr { - Expr result{}; + const auto get_expr = [](const Condition& cond) -> Expr { + Expr result; if (cond.cc != ConditionCode::T) { result = MakeExpr<ExprCondCode>(cond.cc); } @@ -592,10 +594,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { } Expr extra = MakeExpr<ExprPredicate>(pred); if (negate) { - extra = MakeExpr<ExprNot>(extra); + extra = MakeExpr<ExprNot>(std::move(extra)); } if (result) { - return MakeExpr<ExprAnd>(extra, result); + return MakeExpr<ExprAnd>(std::move(extra), std::move(result)); } return extra; } @@ -603,9 +605,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { return result; } return MakeExpr<ExprBoolean>(true); - }); + }; + if (std::holds_alternative<SingleBranch>(*branch_info)) { - const auto branch = std::get_if<SingleBranch>(branch_info.get()); + const auto* branch = std::get_if<SingleBranch>(branch_info.get()); if (branch->address < 0) { if (branch->kill) { mm.InsertReturn(get_expr(branch->condition), true); @@ -617,7 +620,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { mm.InsertGoto(get_expr(branch->condition), branch->address); return; } - const auto multi_branch = std::get_if<MultiBranch>(branch_info.get()); + const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get()); for (const auto& branch_case : multi_branch->branches) { mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), branch_case.address); diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index a276aee44..88103fede 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -53,6 +53,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { absolute_a = ((instr.value >> 44) & 1) != 0; absolute_b = ((instr.value >> 54) & 1) != 0; break; + default: + UNREACHABLE(); + break; } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index a041519b7..73155966f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - const Node value = [&]() { - const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); + const Node value = [&] { + Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); if (opcode->get().GetId() != OpCode::Id::IADD3_R) { return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); } - const Node shifted = [&]() { + const Node shifted = [&] { switch (instr.iadd3.mode) { case Tegra::Shader::IAdd3Mode::RightShift: // TODO(tech4me): According to diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 73880db0e..2a30aab2b 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -28,23 +28,26 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { case OpCode::Id::IADD32I: { UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); + op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); + if (instr.alu.lop32i.invert_a) { + op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); + } - if (instr.alu.lop32i.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); + if (instr.alu.lop32i.invert_b) { + op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); + } - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, - PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); + WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), + std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, + instr.op_32.generates_cc != 0); break; } default: @@ -58,14 +61,14 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, Node op_b, PredicateResultMode predicate_mode, Pred predicate, bool sets_cc) { - const Node result = [&]() { + Node result = [&] { switch (logic_op) { case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::PassB: return op_b; default: @@ -84,8 +87,8 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation return; case PredicateResultMode::NotZero: { // Set the predicate to true if the result is not zero. - const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); - SetPredicate(bb, static_cast<u64>(predicate), compare); + Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); + SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); break; } default: diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 07778dc3e..618d309d2 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, std::size_t component) { const TextureFormat format{descriptor.format}; switch (format) { - case TextureFormat::R16_G16_B16_A16: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32_B32: - case TextureFormat::R32_G32: - case TextureFormat::R16_G16: + case TextureFormat::R16G16B16A16: + case TextureFormat::R32G32B32A32: + case TextureFormat::R32G32B32: + case TextureFormat::R32G32: + case TextureFormat::R16G16: case TextureFormat::R32: case TextureFormat::R16: case TextureFormat::R8: @@ -97,7 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, break; case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: if (component == 0) { return descriptor.b_type; } @@ -108,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, return descriptor.r_type; } break; - case TextureFormat::G8R24: - case TextureFormat::G24R8: - case TextureFormat::G8R8: + case TextureFormat::R24G8: + case TextureFormat::R8G24: + case TextureFormat::R8G8: case TextureFormat::G4R4: if (component == 0) { return descriptor.g_type; @@ -119,6 +119,8 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, return descriptor.r_type; } break; + default: + break; } UNIMPLEMENTED_MSG("Texture format not implemented={}", format); return ComponentType::FLOAT; @@ -137,15 +139,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { u32 GetComponentSize(TextureFormat format, std::size_t component) { switch (format) { - case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R32G32B32A32: return 32; - case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R16G16B16A16: return 16; - case TextureFormat::R32_G32_B32: + case TextureFormat::R32G32B32: return component <= 2 ? 32 : 0; - case TextureFormat::R32_G32: + case TextureFormat::R32G32: return component <= 1 ? 32 : 0; - case TextureFormat::R16_G16: + case TextureFormat::R16G16: return component <= 1 ? 16 : 0; case TextureFormat::R32: return component == 0 ? 32 : 0; @@ -192,7 +194,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 6; } return 0; - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: if (component == 1 || component == 2) { return 11; } @@ -200,7 +202,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 10; } return 0; - case TextureFormat::G8R24: + case TextureFormat::R24G8: if (component == 0) { return 8; } @@ -208,7 +210,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 24; } return 0; - case TextureFormat::G24R8: + case TextureFormat::R8G24: if (component == 0) { return 8; } @@ -216,13 +218,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 24; } return 0; - case TextureFormat::G8R8: + case TextureFormat::R8G8: return (component == 0 || component == 1) ? 8 : 0; case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; + default: + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return 0; } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return 0; } std::size_t GetImageComponentMask(TextureFormat format) { @@ -231,25 +234,25 @@ std::size_t GetImageComponentMask(TextureFormat format) { constexpr u8 B = 0b0100; constexpr u8 A = 0b1000; switch (format) { - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32G32B32A32: + case TextureFormat::R16G16B16A16: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A4B4G4R4: case TextureFormat::A5B5G5R1: case TextureFormat::A1B5G5R5: return std::size_t{R | G | B | A}; - case TextureFormat::R32_G32_B32: + case TextureFormat::R32G32B32: case TextureFormat::R32_B24G8: case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: return std::size_t{R | G | B}; - case TextureFormat::R32_G32: - case TextureFormat::R16_G16: - case TextureFormat::G8R24: - case TextureFormat::G24R8: - case TextureFormat::G8R8: + case TextureFormat::R32G32: + case TextureFormat::R16G16: + case TextureFormat::R24G8: + case TextureFormat::R8G24: + case TextureFormat::R8G8: case TextureFormat::G4R4: return std::size_t{R | G}; case TextureFormat::R32: @@ -257,9 +260,10 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R8: case TextureFormat::R1: return std::size_t{R}; + default: + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return std::size_t{R | G | B | A}; } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return std::size_t{R | G | B | A}; } std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { @@ -463,7 +467,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { return OperationCode::AtomicImageXor; case Tegra::Shader::ImageAtomicOperation::Exch: return OperationCode::AtomicImageExchange; + default: + break; } + break; default: break; } diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 63adbc4a3..e2bba88dd 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); + UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", + static_cast<int>(instr.red.type.Value())); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); if (!real_address || !base_address) { @@ -471,9 +472,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& const auto [base_address, index, offset] = TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT_OR_EXECUTE_MSG(base_address != nullptr, - { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); + ASSERT_OR_EXECUTE_MSG( + base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, + "Global memory tracking failed"); bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index c0a8f233f..29a7cfbfe 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -75,8 +75,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::LaneId: - LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete"); - return Immediate(0U); + return Operation(OperationCode::ThreadId); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 29ebf65ba..4e932a4b6 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -292,33 +292,36 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { break; } - std::vector<Node> coords; - - // TODO: Add coordinates for different samplers once other texture types are implemented. - switch (texture_type) { - case TextureType::Texture1D: - coords.push_back(GetRegister(instr.gpr8)); - break; - case TextureType::Texture2D: - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - break; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type)); + const u64 base_index = is_array ? 1 : 0; + const u64 num_components = [texture_type] { + switch (texture_type) { + case TextureType::Texture1D: + return 1; + case TextureType::Texture2D: + return 2; + case TextureType::TextureCube: + return 3; + default: + UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type)); + return 2; + } + }(); + // TODO: What's the array component used for? - // Fallback to interpreting as a 2D texture for now - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + std::vector<Node> coords; + coords.reserve(num_components); + for (u64 component = 0; component < num_components; ++component) { + coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); } + u32 indexer = 0; for (u32 element = 0; element < 2; ++element) { if (!instr.tmml.IsComponentEnabled(element)) { continue; } - auto params = coords; MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporary(bb, indexer++, value); + Node value = Operation(OperationCode::TextureQueryLod, meta, coords); + SetTemporary(bb, indexer++, std::move(value)); } for (u32 i = 0; i < indexer; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); @@ -763,7 +766,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { const auto texture_type{instr.tld.texture_type}; - const bool is_array{instr.tld.is_array}; + const bool is_array{instr.tld.is_array != 0}; const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; const std::size_t coord_count{GetCoordCount(texture_type)}; diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 64ba60ea2..1c0957277 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { return pc; } -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, - Tegra::Shader::VideoType type, u64 byte_height) { +Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, + u64 byte_height) { if (!is_chunk) { return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); } - const Node zero = Immediate(0); switch (type) { - case Tegra::Shader::VideoType::Size16_Low: + case VideoType::Size16_Low: return BitfieldExtract(op, 0, 16); - case Tegra::Shader::VideoType::Size16_High: + case VideoType::Size16_High: return BitfieldExtract(op, 16, 16); - case Tegra::Shader::VideoType::Size32: + case VideoType::Size32: // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. UNIMPLEMENTED(); - return zero; - case Tegra::Shader::VideoType::Invalid: + return Immediate(0); + case VideoType::Invalid: UNREACHABLE_MSG("Invalid instruction encoding"); - return zero; + return Immediate(0); default: UNREACHABLE(); - return zero; + return Immediate(0); } } diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index c83dc6615..233b8fa42 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { SetTemporary(bb, 0, product); product = GetTemporary(0); - const Node original_c = op_c; + Node original_c = op_c; const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&]() { + op_c = [&] { switch (set_mode) { case Tegra::Shader::XmadMode::None: return original_c; case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(original_c, 0, 16); + return BitfieldExtract(std::move(original_c), 0, 16); case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(original_c, 16, 16); + return BitfieldExtract(std::move(original_c), 16, 16); case Tegra::Shader::XmadMode::CBcc: { - const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); + Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, + original_b, Immediate(16)); + return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), + std::move(shifted_b)); } case Tegra::Shader::XmadMode::CSfu: { const Node comp_a = diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp index 5071c83ca..e18ccba8e 100644 --- a/src/video_core/shader/memory_util.cpp +++ b/src/video_core/shader/memory_util.cpp @@ -16,11 +16,10 @@ namespace VideoCommon::Shader { -GPUVAddr GetShaderAddress(Core::System& system, +GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& gpu{system.GPU().Maxwell3D()}; - const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; - return gpu.regs.code_address.CodeAddress() + shader_config.offset; + const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; + return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; } bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h index be90d24fd..4624d38e6 100644 --- a/src/video_core/shader/memory_util.h +++ b/src/video_core/shader/memory_util.h @@ -11,10 +11,6 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" -namespace Core { -class System; -} - namespace Tegra { class MemoryManager; } @@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10; constexpr u32 KERNEL_MAIN_OFFSET = 0; /// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Core::System& system, +GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); /// Gets if the current instruction offset is a scheduler instruction diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index cdf274e54..148d91fcb 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -24,44 +24,45 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac if (shader_stage == ShaderType::Compute) { return {}; } - auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); - - GraphicsInfo info; - info.tfb_layouts = graphics.regs.tfb_layouts; - info.tfb_varying_locs = graphics.regs.tfb_varying_locs; - info.primitive_topology = graphics.regs.draw.topology; - info.tessellation_primitive = graphics.regs.tess_mode.prim; - info.tessellation_spacing = graphics.regs.tess_mode.spacing; - info.tfb_enabled = graphics.regs.tfb_enabled; - info.tessellation_clockwise = graphics.regs.tess_mode.cw; - return info; + + auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine); + + return { + .tfb_layouts = graphics.regs.tfb_layouts, + .tfb_varying_locs = graphics.regs.tfb_varying_locs, + .primitive_topology = graphics.regs.draw.topology, + .tessellation_primitive = graphics.regs.tess_mode.prim, + .tessellation_spacing = graphics.regs.tess_mode.spacing, + .tfb_enabled = graphics.regs.tfb_enabled != 0, + .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, + }; } ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { if (shader_stage != ShaderType::Compute) { return {}; } - auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); + + auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine); const auto& launch = compute.launch_description; - ComputeInfo info; - info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; - info.local_memory_size_in_words = launch.local_pos_alloc; - info.shared_memory_size_in_words = launch.shared_alloc; - return info; + return { + .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, + .shared_memory_size_in_words = launch.shared_alloc, + .local_memory_size_in_words = launch.local_pos_alloc, + }; } } // Anonymous namespace -Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) +Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} -Registry::Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( - shader_stage, engine)} {} +Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) + : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, + graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( + shader_stage, engine_)} {} Registry::~Registry() = default; @@ -113,8 +114,7 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler return value; } -std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, - u32 offset) { +std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { const std::pair key = {buffer, offset}; const auto iter = bindless_samplers.find(key); if (iter != bindless_samplers.end()) { diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 231206765..4bebefdde 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -94,7 +94,7 @@ public: explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine); + Tegra::Engines::ConstBufferEngineInterface& engine_); ~Registry(); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index e322c3402..29d794b34 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff } Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - const Node node = MakeNode<InternalFlagNode>(flag); + Node node = MakeNode<InternalFlagNode>(flag); if (negated) { - return Operation(OperationCode::LogicalNegate, node); + return Operation(OperationCode::LogicalNegate, std::move(node)); } return node; } diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index d5ed81442..6be3ea92b 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -205,12 +205,12 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); const auto& found = result.first; if (!found) { - return {}; + return std::nullopt; } if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { return immediate->GetValue(); } - return {}; + return std::nullopt; } std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, |