summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-02-05 09:58:02 +0100
committerameerj <52414509+ameerj@users.noreply.github.com>2021-07-23 03:51:21 +0200
commite81739493a0cacc1efe3295f9d287d5d31b1a989 (patch)
tree11a3d04ce9def535414a00226030798f337c053c /src/shader_recompiler/ir_opt
parentshader: Initial instruction support (diff)
downloadyuzu-e81739493a0cacc1efe3295f9d287d5d31b1a989.tar
yuzu-e81739493a0cacc1efe3295f9d287d5d31b1a989.tar.gz
yuzu-e81739493a0cacc1efe3295f9d287d5d31b1a989.tar.bz2
yuzu-e81739493a0cacc1efe3295f9d287d5d31b1a989.tar.lz
yuzu-e81739493a0cacc1efe3295f9d287d5d31b1a989.tar.xz
yuzu-e81739493a0cacc1efe3295f9d287d5d31b1a989.tar.zst
yuzu-e81739493a0cacc1efe3295f9d287d5d31b1a989.zip
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp146
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp331
-rw-r--r--src/shader_recompiler/ir_opt/identity_removal_pass.cpp28
-rw-r--r--src/shader_recompiler/ir_opt/passes.h6
-rw-r--r--src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp56
-rw-r--r--src/shader_recompiler/ir_opt/verification_pass.cpp42
6 files changed, 573 insertions, 36 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..02f5b653d
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,146 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <type_traits>
+
+#include "common/bit_util.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) {
+ if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) {
+ throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count);
+ }
+ return (base >> shift) & ((1U << count) - 1);
+}
+
+template <typename T>
+[[nodiscard]] T Arg(const IR::Value& value) {
+ if constexpr (std::is_same_v<T, bool>) {
+ return value.U1();
+ } else if constexpr (std::is_same_v<T, u32>) {
+ return value.U32();
+ } else if constexpr (std::is_same_v<T, u64>) {
+ return value.U64();
+ }
+}
+
+template <typename ImmFn>
+bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
+ const auto arg = [](const IR::Value& value) {
+ if constexpr (std::is_invocable_r_v<bool, ImmFn, bool, bool>) {
+ return value.U1();
+ } else if constexpr (std::is_invocable_r_v<u32, ImmFn, u32, u32>) {
+ return value.U32();
+ } else if constexpr (std::is_invocable_r_v<u64, ImmFn, u64, u64>) {
+ return value.U64();
+ }
+ };
+ const IR::Value lhs{inst.Arg(0)};
+ const IR::Value rhs{inst.Arg(1)};
+
+ const bool is_lhs_immediate{lhs.IsImmediate()};
+ const bool is_rhs_immediate{rhs.IsImmediate()};
+
+ if (is_lhs_immediate && is_rhs_immediate) {
+ const auto result{imm_fn(arg(lhs), arg(rhs))};
+ inst.ReplaceUsesWith(IR::Value{result});
+ return false;
+ }
+ if (is_lhs_immediate && !is_rhs_immediate) {
+ IR::Inst* const rhs_inst{rhs.InstRecursive()};
+ if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) {
+ const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))};
+ inst.SetArg(0, rhs_inst->Arg(0));
+ inst.SetArg(1, IR::Value{combined});
+ } else {
+ // Normalize
+ inst.SetArg(0, rhs);
+ inst.SetArg(1, lhs);
+ }
+ }
+ if (!is_lhs_immediate && is_rhs_immediate) {
+ const IR::Inst* const lhs_inst{lhs.InstRecursive()};
+ if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) {
+ const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))};
+ inst.SetArg(0, lhs_inst->Arg(0));
+ inst.SetArg(1, IR::Value{combined});
+ }
+ }
+ return true;
+}
+
+void FoldGetRegister(IR::Inst& inst) {
+ if (inst.Arg(0).Reg() == IR::Reg::RZ) {
+ inst.ReplaceUsesWith(IR::Value{u32{0}});
+ }
+}
+
+void FoldGetPred(IR::Inst& inst) {
+ if (inst.Arg(0).Pred() == IR::Pred::PT) {
+ inst.ReplaceUsesWith(IR::Value{true});
+ }
+}
+
+template <typename T>
+void FoldAdd(IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ return;
+ }
+ if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ }
+}
+
+void FoldLogicalAnd(IR::Inst& inst) {
+ if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate()) {
+ if (rhs.U1()) {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ } else {
+ inst.ReplaceUsesWith(IR::Value{false});
+ }
+ }
+}
+
+void ConstantPropagation(IR::Inst& inst) {
+ switch (inst.Opcode()) {
+ case IR::Opcode::GetRegister:
+ return FoldGetRegister(inst);
+ case IR::Opcode::GetPred:
+ return FoldGetPred(inst);
+ case IR::Opcode::IAdd32:
+ return FoldAdd<u32>(inst);
+ case IR::Opcode::IAdd64:
+ return FoldAdd<u64>(inst);
+ case IR::Opcode::BitFieldUExtract:
+ if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) {
+ inst.ReplaceUsesWith(IR::Value{
+ BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())});
+ }
+ break;
+ case IR::Opcode::LogicalAnd:
+ return FoldLogicalAnd(inst);
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void ConstantPropagationPass(IR::Block& block) {
+ std::ranges::for_each(block, ConstantPropagation);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 000000000..ee69a5c9d
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,331 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <compare>
+#include <optional>
+#include <ranges>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+/// Address in constant buffers to the storage buffer descriptor
+struct StorageBufferAddr {
+ auto operator<=>(const StorageBufferAddr&) const noexcept = default;
+
+ u32 index;
+ u32 offset;
+};
+
+/// Block iterator to a global memory instruction and the storage buffer it uses
+struct StorageInst {
+ StorageBufferAddr storage_buffer;
+ IR::Block::iterator inst;
+};
+
+/// Bias towards a certain range of constant buffers when looking for storage buffers
+struct Bias {
+ u32 index;
+ u32 offset_begin;
+ u32 offset_end;
+};
+
+using StorageBufferSet =
+ boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>,
+ boost::container::small_vector<StorageBufferAddr, 16>>;
+using StorageInstVector = boost::container::small_vector<StorageInst, 32>;
+
+/// Returns true when the instruction is a global memory instruction
+bool IsGlobalMemory(const IR::Inst& inst) {
+ switch (inst.Opcode()) {
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Converts a global memory opcode to its storage buffer equivalent
+IR::Opcode GlobalToStorage(IR::Opcode opcode) {
+ switch (opcode) {
+ case IR::Opcode::LoadGlobalS8:
+ return IR::Opcode::LoadStorageS8;
+ case IR::Opcode::LoadGlobalU8:
+ return IR::Opcode::LoadStorageU8;
+ case IR::Opcode::LoadGlobalS16:
+ return IR::Opcode::LoadStorageS16;
+ case IR::Opcode::LoadGlobalU16:
+ return IR::Opcode::LoadStorageU16;
+ case IR::Opcode::LoadGlobal32:
+ return IR::Opcode::LoadStorage32;
+ case IR::Opcode::LoadGlobal64:
+ return IR::Opcode::LoadStorage64;
+ case IR::Opcode::LoadGlobal128:
+ return IR::Opcode::LoadStorage128;
+ case IR::Opcode::WriteGlobalS8:
+ return IR::Opcode::WriteStorageS8;
+ case IR::Opcode::WriteGlobalU8:
+ return IR::Opcode::WriteStorageU8;
+ case IR::Opcode::WriteGlobalS16:
+ return IR::Opcode::WriteStorageS16;
+ case IR::Opcode::WriteGlobalU16:
+ return IR::Opcode::WriteStorageU16;
+ case IR::Opcode::WriteGlobal32:
+ return IR::Opcode::WriteStorage32;
+ case IR::Opcode::WriteGlobal64:
+ return IR::Opcode::WriteStorage64;
+ case IR::Opcode::WriteGlobal128:
+ return IR::Opcode::WriteStorage128;
+ default:
+ throw InvalidArgument("Invalid global memory opcode {}", opcode);
+ }
+}
+
+/// Returns true when a storage buffer address satisfies a bias
+bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
+ return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
+ storage_buffer.offset < bias.offset_end;
+}
+
+/// Ignores a global memory operation, reads return zero and writes are ignored
+void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
+ const IR::Value zero{u32{0}};
+ switch (inst->Opcode()) {
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobal32:
+ inst->ReplaceUsesWith(zero);
+ break;
+ case IR::Opcode::LoadGlobal64:
+ inst->ReplaceUsesWith(
+ IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})});
+ break;
+ case IR::Opcode::LoadGlobal128:
+ inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst(
+ inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})});
+ break;
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ inst->Invalidate();
+ break;
+ default:
+ throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode());
+ }
+}
+
+/// Recursively tries to track the storage buffer address used by a global memory instruction
+std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
+ if (value.IsImmediate()) {
+ // Immediates can't be a storage buffer
+ return std::nullopt;
+ }
+ const IR::Inst* const inst{value.InstRecursive()};
+ if (inst->Opcode() == IR::Opcode::GetCbuf) {
+ const IR::Value index{inst->Arg(0)};
+ const IR::Value offset{inst->Arg(1)};
+ if (!index.IsImmediate()) {
+ // Definitely not a storage buffer if it's read from a non-immediate index
+ return std::nullopt;
+ }
+ if (!offset.IsImmediate()) {
+ // TODO: Support SSBO arrays
+ return std::nullopt;
+ }
+ const StorageBufferAddr storage_buffer{
+ .index = index.U32(),
+ .offset = offset.U32(),
+ };
+ if (bias && !MeetsBias(storage_buffer, *bias)) {
+ // We have to blacklist some addresses in case we wrongly point to them
+ return std::nullopt;
+ }
+ return storage_buffer;
+ }
+ // Reversed loops are more likely to find the right result
+ for (size_t arg = inst->NumArgs(); arg--;) {
+ if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) {
+ return *storage_buffer;
+ }
+ }
+ return std::nullopt;
+}
+
+/// Collects the storage buffer used by a global memory instruction and the instruction itself
+void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst,
+ StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) {
+ // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
+ // avoid getting false positives
+ static constexpr Bias nvn_bias{
+ .index{0},
+ .offset_begin{0x110},
+ .offset_end{0x610},
+ };
+ // First try to find storage buffers in the NVN address
+ const IR::U64 addr{inst->Arg(0)};
+ std::optional<StorageBufferAddr> storage_buffer{Track(addr, &nvn_bias)};
+ if (!storage_buffer) {
+ // If it fails, track without a bias
+ storage_buffer = Track(addr, nullptr);
+ if (!storage_buffer) {
+ // If that also failed, drop the global memory usage
+ IgnoreGlobalMemory(block, inst);
+ }
+ }
+ // Collect storage buffer and the instruction
+ storage_buffer_set.insert(*storage_buffer);
+ to_replace.push_back(StorageInst{
+ .storage_buffer{*storage_buffer},
+ .inst{inst},
+ });
+}
+
+/// Tries to track the first 32-bits of a global memory instruction
+std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) {
+ // The first argument is the low level GPU pointer to the global memory instruction
+ const IR::U64 addr{inst->Arg(0)};
+ if (addr.IsImmediate()) {
+ // Not much we can do if it's an immediate
+ return std::nullopt;
+ }
+ // This address is expected to either be a PackUint2x32 or a IAdd64
+ IR::Inst* addr_inst{addr.InstRecursive()};
+ s32 imm_offset{0};
+ if (addr_inst->Opcode() == IR::Opcode::IAdd64) {
+ // If it's an IAdd64, get the immediate offset it is applying and grab the address
+ // instruction. This expects for the instruction to be canonicalized having the address on
+ // the first argument and the immediate offset on the second one.
+ const IR::U64 imm_offset_value{addr_inst->Arg(1)};
+ if (!imm_offset_value.IsImmediate()) {
+ return std::nullopt;
+ }
+ imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
+ const IR::U64 iadd_addr{addr_inst->Arg(0)};
+ if (iadd_addr.IsImmediate()) {
+ return std::nullopt;
+ }
+ addr_inst = iadd_addr.Inst();
+ }
+ // With IAdd64 handled, now PackUint2x32 is expected without exceptions
+ if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) {
+ return std::nullopt;
+ }
+ // PackUint2x32 is expected to be generated from a vector
+ const IR::Value vector{addr_inst->Arg(0)};
+ if (vector.IsImmediate()) {
+ return std::nullopt;
+ }
+ // This vector is expected to be a CompositeConstruct2
+ IR::Inst* const vector_inst{vector.InstRecursive()};
+ if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) {
+ return std::nullopt;
+ }
+ // Grab the first argument from the CompositeConstruct2, this is the low address.
+ // Re-apply the offset in case we found one.
+ const IR::U32 low_addr{vector_inst->Arg(0)};
+ return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr;
+}
+
+/// Returns the offset in indices (not bytes) for an equivalent storage instruction
+IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) {
+ IR::IREmitter ir{block, inst};
+ IR::U32 offset;
+ if (const std::optional<IR::U32> low_addr{TrackLowAddress(ir, &*inst)}) {
+ offset = *low_addr;
+ } else {
+ offset = ir.ConvertU(32, IR::U64{inst->Arg(0)});
+ }
+ // Subtract the least significant 32 bits from the guest offset. The result is the storage
+ // buffer offset in bytes.
+ const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+ return ir.ISub(offset, low_cbuf);
+}
+
+/// Replace a global memory load instruction with its storage buffer equivalent
+void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())};
+ const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})};
+ inst->ReplaceUsesWith(value);
+}
+
+/// Replace a global memory write instruction with its storage buffer equivalent
+void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())};
+ block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)});
+ inst->Invalidate();
+}
+
+/// Replace a global memory instruction with its storage buffer equivalent
+void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ switch (inst->Opcode()) {
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ return ReplaceLoad(block, inst, storage_index, offset);
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ return ReplaceWrite(block, inst, storage_index, offset);
+ default:
+ throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode());
+ }
+}
+} // Anonymous namespace
+
+void GlobalMemoryToStorageBufferPass(IR::Block& block) {
+ StorageBufferSet storage_buffers;
+ StorageInstVector to_replace;
+
+ for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) {
+ if (!IsGlobalMemory(*inst)) {
+ continue;
+ }
+ CollectStorageBuffers(block, inst, storage_buffers, to_replace);
+ }
+ for (const auto [storage_buffer, inst] : to_replace) {
+ const auto it{storage_buffers.find(storage_buffer)};
+ const IR::U32 storage_index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}};
+ const IR::U32 offset{StorageOffset(block, inst, storage_buffer)};
+ Replace(block, inst, storage_index, offset);
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
index 7f8500087..39a972919 100644
--- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -10,22 +10,24 @@
namespace Shader::Optimization {
-void IdentityRemovalPass(IR::Block& block) {
+void IdentityRemovalPass(IR::Function& function) {
std::vector<IR::Inst*> to_invalidate;
- for (auto inst = block.begin(); inst != block.end();) {
- const size_t num_args{inst->NumArgs()};
- for (size_t i = 0; i < num_args; ++i) {
- IR::Value arg;
- while ((arg = inst->Arg(i)).IsIdentity()) {
- inst->SetArg(i, arg.Inst()->Arg(0));
+ for (auto& block : function.blocks) {
+ for (auto inst = block->begin(); inst != block->end();) {
+ const size_t num_args{inst->NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::Value arg;
+ while ((arg = inst->Arg(i)).IsIdentity()) {
+ inst->SetArg(i, arg.Inst()->Arg(0));
+ }
+ }
+ if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
+ to_invalidate.push_back(&*inst);
+ inst = block->Instructions().erase(inst);
+ } else {
+ ++inst;
}
- }
- if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
- to_invalidate.push_back(&*inst);
- inst = block.Instructions().erase(inst);
- } else {
- ++inst;
}
}
for (IR::Inst* const inst : to_invalidate) {
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 7ed4005ed..578a24d89 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -16,9 +16,11 @@ void Invoke(Func&& func, IR::Function& function) {
}
}
+void ConstantPropagationPass(IR::Block& block);
void DeadCodeEliminationPass(IR::Block& block);
-void IdentityRemovalPass(IR::Block& block);
+void GlobalMemoryToStorageBufferPass(IR::Block& block);
+void IdentityRemovalPass(IR::Function& function);
void SsaRewritePass(IR::Function& function);
-void VerificationPass(const IR::Block& block);
+void VerificationPass(const IR::Function& function);
} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index a4b256a40..3c9b020e0 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -14,8 +14,6 @@
// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
//
-#include <map>
-
#include <boost/container/flat_map.hpp>
#include "shader_recompiler/frontend/ir/basic_block.h"
@@ -30,6 +28,12 @@ namespace Shader::Optimization {
namespace {
using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>;
+struct FlagTag {};
+struct ZeroFlagTag : FlagTag {};
+struct SignFlagTag : FlagTag {};
+struct CarryFlagTag : FlagTag {};
+struct OverflowFlagTag : FlagTag {};
+
struct DefTable {
[[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept {
return regs[IR::RegIndex(variable)];
@@ -39,8 +43,28 @@ struct DefTable {
return preds[IR::PredIndex(variable)];
}
+ [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept {
+ return zero_flag;
+ }
+
+ [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept {
+ return sign_flag;
+ }
+
+ [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept {
+ return carry_flag;
+ }
+
+ [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept {
+ return overflow_flag;
+ }
+
std::array<ValueMap, IR::NUM_USER_REGS> regs;
std::array<ValueMap, IR::NUM_USER_PREDS> preds;
+ ValueMap zero_flag;
+ ValueMap sign_flag;
+ ValueMap carry_flag;
+ ValueMap overflow_flag;
};
IR::Opcode UndefOpcode(IR::Reg) noexcept {
@@ -51,6 +75,10 @@ IR::Opcode UndefOpcode(IR::Pred) noexcept {
return IR::Opcode::Undef1;
}
+IR::Opcode UndefOpcode(const FlagTag&) noexcept {
+ return IR::Opcode::Undef1;
+}
+
[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
return inst.Opcode() == IR::Opcode::Phi;
}
@@ -135,6 +163,18 @@ void SsaRewritePass(IR::Function& function) {
pass.WriteVariable(pred, block.get(), inst.Arg(1));
}
break;
+ case IR::Opcode::SetZFlag:
+ pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0));
+ break;
+ case IR::Opcode::SetSFlag:
+ pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0));
+ break;
+ case IR::Opcode::SetCFlag:
+ pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0));
+ break;
+ case IR::Opcode::SetOFlag:
+ pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0));
+ break;
case IR::Opcode::GetRegister:
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get()));
@@ -145,6 +185,18 @@ void SsaRewritePass(IR::Function& function) {
inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get()));
}
break;
+ case IR::Opcode::GetZFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get()));
+ break;
+ case IR::Opcode::GetSFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get()));
+ break;
+ case IR::Opcode::GetCFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get()));
+ break;
+ case IR::Opcode::GetOFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get()));
+ break;
default:
break;
}
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
index 36d9ae39b..8a5adf5a2 100644
--- a/src/shader_recompiler/ir_opt/verification_pass.cpp
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -11,40 +11,44 @@
namespace Shader::Optimization {
-static void ValidateTypes(const IR::Block& block) {
- for (const IR::Inst& inst : block) {
- const size_t num_args{inst.NumArgs()};
- for (size_t i = 0; i < num_args; ++i) {
- const IR::Type t1{inst.Arg(i).Type()};
- const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)};
- if (!IR::AreTypesCompatible(t1, t2)) {
- throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block));
+static void ValidateTypes(const IR::Function& function) {
+ for (const auto& block : function.blocks) {
+ for (const IR::Inst& inst : *block) {
+ const size_t num_args{inst.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ const IR::Type t1{inst.Arg(i).Type()};
+ const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)};
+ if (!IR::AreTypesCompatible(t1, t2)) {
+ throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
+ }
}
}
}
}
-static void ValidateUses(const IR::Block& block) {
+static void ValidateUses(const IR::Function& function) {
std::map<IR::Inst*, int> actual_uses;
- for (const IR::Inst& inst : block) {
- const size_t num_args{inst.NumArgs()};
- for (size_t i = 0; i < num_args; ++i) {
- const IR::Value arg{inst.Arg(i)};
- if (!arg.IsImmediate()) {
- ++actual_uses[arg.Inst()];
+ for (const auto& block : function.blocks) {
+ for (const IR::Inst& inst : *block) {
+ const size_t num_args{inst.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ const IR::Value arg{inst.Arg(i)};
+ if (!arg.IsImmediate()) {
+ ++actual_uses[arg.Inst()];
+ }
}
}
}
for (const auto [inst, uses] : actual_uses) {
if (inst->UseCount() != uses) {
- throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block));
+ throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/);
}
}
}
-void VerificationPass(const IR::Block& block) {
- ValidateTypes(block);
- ValidateUses(block);
+void VerificationPass(const IR::Function& function) {
+ ValidateTypes(function);
+ ValidateUses(function);
}
} // namespace Shader::Optimization