author: ReinUsesLisp <reinuseslisp@airmail.cc> 2021-02-03 20:43:04 +0100
committer: ameerj <52414509+ameerj@users.noreply.github.com> 2021-07-23 03:51:21 +0200
commit: d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f (patch)
tree: 0108a028b437bc59dfe7864f333cf4c50a46d3b5 /src/shader_recompiler/frontend/ir
parent: shader: SSA and dominance (diff)
download: yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar
yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.gz
yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.bz2
yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.lz
yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.xz
yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.zst
yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.zip
8 files changed, 398 insertions, 65 deletions
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index e795618fc..249251dd0 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
 }
 
 Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
-                                      std::initializer_list<Value> args) {
-    Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)};
+                                      std::initializer_list<Value> args, u64 flags) {
+    Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)};
     const auto result_it{instructions.insert(insertion_point, *inst)};
 
     if (inst->NumArgs() != args.size()) {
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index 4b6b80c4b..ec4a41cb1 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -39,7 +39,7 @@ public:
 
     /// Prepends a new instruction to this basic block before the insertion point.
     iterator PrependNewInst(iterator insertion_point, Opcode op,
-                            std::initializer_list<Value> args = {});
+                            std::initializer_list<Value> args = {}, u64 flags = 0);
 
     /// Adds a new immediate predecessor to the basic block.
     void AddImmediatePredecessor(IR::Block* immediate_predecessor);
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 6450e4b2c..87b253c9a 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) {
     Inst(Opcode::SetAttribute, attribute, value);
 }
 
+U32 IREmitter::WorkgroupIdX() {
+    return Inst<U32>(Opcode::WorkgroupIdX);
+}
+
+U32 IREmitter::WorkgroupIdY() {
+    return Inst<U32>(Opcode::WorkgroupIdY);
+}
+
+U32 IREmitter::WorkgroupIdZ() {
+    return Inst<U32>(Opcode::WorkgroupIdZ);
+}
+
+U32 IREmitter::LocalInvocationIdX() {
+    return Inst<U32>(Opcode::LocalInvocationIdX);
+}
+
+U32 IREmitter::LocalInvocationIdY() {
+    return Inst<U32>(Opcode::LocalInvocationIdY);
+}
+
+U32 IREmitter::LocalInvocationIdZ() {
+    return Inst<U32>(Opcode::LocalInvocationIdZ);
+}
+
+U32 IREmitter::LoadGlobalU8(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalU8, address);
+}
+
+U32 IREmitter::LoadGlobalS8(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalS8, address);
+}
+
+U32 IREmitter::LoadGlobalU16(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalU16, address);
+}
+
+U32 IREmitter::LoadGlobalS16(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalS16, address);
+}
+
+U32 IREmitter::LoadGlobal32(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobal32, address);
+}
+
+Value IREmitter::LoadGlobal64(const U64& address) {
+    return Inst<Value>(Opcode::LoadGlobal64, address);
+}
+
+Value IREmitter::LoadGlobal128(const U64& address) {
+    return Inst<Value>(Opcode::LoadGlobal128, address);
+}
+
 void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
     Inst(Opcode::WriteGlobalU8, address, value);
 }
@@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
     return Inst<U1>(Opcode::GetOverflowFromOp, op);
 }
 
-U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) {
+U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) {
     if (a.Type() != a.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
     }
     switch (a.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPAdd16, a, b);
+        return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b);
     case Type::U32:
-        return Inst<U32>(Opcode::FPAdd32, a, b);
+        return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b);
     case Type::U64:
-        return Inst<U64>(Opcode::FPAdd64, a, b);
+        return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b);
     default:
         ThrowInvalidType(a.Type());
     }
@@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) {
 
 Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) {
     if (e1.Type() != e2.Type()) {
-        throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type());
+        throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
     }
     return Inst(Opcode::CompositeConstruct2, e1, e2);
 }
 
 Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) {
     if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
-        throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type());
+        throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
     }
     return Inst(Opcode::CompositeConstruct3, e1, e2, e3);
 }
@@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny&
 Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3,
                                     const UAny& e4) {
     if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
-        throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(),
-                              e4.Type());
+        throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
+                              e3.Type(), e4.Type());
     }
     return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4);
 }
@@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) {
     return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element)));
 }
 
+UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) {
+    if (true_value.Type() != false_value.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
+    }
+    switch (true_value.Type()) {
+    case Type::U8:
+        return Inst<UAny>(Opcode::Select8, condition, true_value, false_value);
+    case Type::U16:
+        return Inst<UAny>(Opcode::Select16, condition, true_value, false_value);
+    case Type::U32:
+        return Inst<UAny>(Opcode::Select32, condition, true_value, false_value);
+    case Type::U64:
+        return Inst<UAny>(Opcode::Select64, condition, true_value, false_value);
+    default:
+        throw InvalidArgument("Invalid type {}", true_value.Type());
+    }
+}
+
 U64 IREmitter::PackUint2x32(const Value& vector) {
     return Inst<U64>(Opcode::PackUint2x32, vector);
 }
@@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) {
     return Inst<Value>(Opcode::UnpackDouble2x32, value);
 }
 
-U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) {
+U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) {
     if (a.Type() != b.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
     }
     switch (a.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPMul16, a, b);
+        return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b);
     case Type::U32:
-        return Inst<U32>(Opcode::FPMul32, a, b);
+        return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b);
     case Type::U64:
-        return Inst<U64>(Opcode::FPMul64, a, b);
+        return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
+                           FpControl control) {
+    if (a.Type() != b.Type() || a.Type() != c.Type()) {
+        throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
+    }
+    switch (a.Type()) {
+    case Type::U16:
+        return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c);
+    case Type::U32:
+        return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c);
+    case Type::U64:
+        return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c);
     default:
         ThrowInvalidType(a.Type());
     }
@@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) {
     }
 }
 
+U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
+    if (a.Type() != b.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::IAdd32, a, b);
+    case Type::U64:
+        return Inst<U64>(Opcode::IAdd64, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+U32 IREmitter::IMul(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::IMul32, a, b);
+}
+
+U32 IREmitter::INeg(const U32& value) {
+    return Inst<U32>(Opcode::INeg32, value);
+}
+
+U32 IREmitter::IAbs(const U32& value) {
+    return Inst<U32>(Opcode::IAbs32, value);
+}
+
+U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) {
+    return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
+}
+
+U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) {
+    return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
+}
+
+U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) {
+    return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
+}
+
+U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseAnd32, a, b);
+}
+
+U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseOr32, a, b);
+}
+
+U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseXor32, a, b);
+}
+
+U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+                              const U32& count) {
+    return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
+}
+
+U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+                               bool is_signed) {
+    return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
+                     count);
+}
+
+U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
+}
+
+U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) {
+    return Inst<U1>(Opcode::IEqual, lhs, rhs);
+}
+
+U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
+}
+
+U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
+    return Inst<U1>(Opcode::INotEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
+}
+
 U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
     return Inst<U1>(Opcode::LogicalOr, a, b);
 }
@@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
     return Inst<U1>(Opcode::LogicalAnd, a, b);
 }
 
+U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
+    return Inst<U1>(Opcode::LogicalXor, a, b);
+}
+
 U1 IREmitter::LogicalNot(const U1& value) {
     return Inst<U1>(Opcode::LogicalNot, value);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 1af79f41c..7ff763ecf 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -4,8 +4,12 @@
 
 #pragma once
 
+#include <cstring>
+#include <type_traits>
+
 #include "shader_recompiler/frontend/ir/attribute.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
 #include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::IR {
@@ -52,6 +56,22 @@ public:
     [[nodiscard]] U32 GetAttribute(IR::Attribute attribute);
     void SetAttribute(IR::Attribute attribute, const U32& value);
 
+    [[nodiscard]] U32 WorkgroupIdX();
+    [[nodiscard]] U32 WorkgroupIdY();
+    [[nodiscard]] U32 WorkgroupIdZ();
+
+    [[nodiscard]] U32 LocalInvocationIdX();
+    [[nodiscard]] U32 LocalInvocationIdY();
+    [[nodiscard]] U32 LocalInvocationIdZ();
+
+    [[nodiscard]] U32 LoadGlobalU8(const U64& address);
+    [[nodiscard]] U32 LoadGlobalS8(const U64& address);
+    [[nodiscard]] U32 LoadGlobalU16(const U64& address);
+    [[nodiscard]] U32 LoadGlobalS16(const U64& address);
+    [[nodiscard]] U32 LoadGlobal32(const U64& address);
+    [[nodiscard]] Value LoadGlobal64(const U64& address);
+    [[nodiscard]] Value LoadGlobal128(const U64& address);
+
     void WriteGlobalU8(const U64& address, const U32& value);
     void WriteGlobalS8(const U64& address, const U32& value);
     void WriteGlobalU16(const U64& address, const U32& value);
@@ -71,6 +91,8 @@ public:
                                            const UAny& e4);
     [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element);
 
+    [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value);
+
     [[nodiscard]] U64 PackUint2x32(const Value& vector);
     [[nodiscard]] Value UnpackUint2x32(const U64& value);
 
@@ -80,8 +102,10 @@ public:
     [[nodiscard]] U64 PackDouble2x32(const Value& vector);
     [[nodiscard]] Value UnpackDouble2x32(const U64& value);
 
-    [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b);
-    [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b);
+    [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
+    [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
+    [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
+                                  FpControl control = {});
 
     [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value);
     [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value);
@@ -100,8 +124,31 @@ public:
     [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value);
     [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value);
 
+    [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
+    [[nodiscard]] U32 IMul(const U32& a, const U32& b);
+    [[nodiscard]] U32 INeg(const U32& value);
+    [[nodiscard]] U32 IAbs(const U32& value);
+    [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift);
+    [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift);
+    [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift);
+    [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+                                     const U32& count);
+    [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+                                      bool is_signed);
+
+    [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs);
+    [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
+    [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+
     [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
+    [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalNot(const U1& value);
 
     [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value);
@@ -118,6 +165,22 @@ private:
         auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})};
         return T{Value{&*it}};
     }
+
+    template <typename T>
+    requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags {
+        Flags() = default;
+        Flags(T proxy_) : proxy{proxy_} {}
+
+        T proxy;
+    };
+
+    template <typename T = Value, typename FlagType, typename... Args>
+    T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
+        u64 raw_flags{};
+        std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
+        auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
+        return T{Value{&*it}};
+    }
 };
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 7f1ed6710..61849695a 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -5,7 +5,9 @@
 #pragma once
 
 #include <array>
+#include <cstring>
 #include <span>
+#include <type_traits>
 #include <vector>
 
 #include <boost/intrusive/list.hpp>
@@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4;
 
 class Inst : public boost::intrusive::list_base_hook<> {
 public:
-    explicit Inst(Opcode op_) noexcept : op(op_) {}
+    explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {}
 
     /// Get the number of uses this instruction has.
     [[nodiscard]] int UseCount() const noexcept {
@@ -73,6 +75,14 @@ public:
 
     void ReplaceUsesWith(Value replacement);
 
+    template <typename FlagsType>
+    requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>)
+        [[nodiscard]] FlagsType Flags() const noexcept {
+        FlagsType ret;
+        std::memcpy(&ret, &flags, sizeof(ret));
+        return ret;
+    }
+
 private:
     void Use(const Value& value);
     void UndoUse(const Value& value);
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 000000000..28bb9e798
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Shader::IR {
+
+enum class FmzMode {
+    None, // Denorms are not flushed, NAN is propagated (nouveau)
+    FTZ,  // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
+    FMZ,  // Flush denorms to zero, x * 0 == 0 (D3D9)
+};
+
+enum class FpRounding {
+    RN, // Round to nearest even,
+    RM, // Round towards negative infinity
+    RP, // Round towards positive infinity
+    RZ, // Round towards zero
+};
+
+struct FpControl {
+    bool no_contraction{false};
+    FpRounding rounding : 8 = FpRounding::RN;
+    FmzMode fmz_mode : 8 = FmzMode::FTZ;
+};
+static_assert(sizeof(FpControl) <= sizeof(u64));
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc
index 40759e96a..4ecb5e936 100644
--- a/src/shader_recompiler/frontend/ir/opcode.inc
+++ b/src/shader_recompiler/frontend/ir/opcode.inc
@@ -35,6 +35,12 @@ OPCODE(SetZFlag,                                            Void,           U1,
 OPCODE(SetSFlag,                                            Void,           U1,                                                             )
 OPCODE(SetCFlag,                                            Void,           U1,                                                             )
 OPCODE(SetOFlag,                                            Void,           U1,                                                             )
+OPCODE(WorkgroupIdX,                                        U32,                                                                            )
+OPCODE(WorkgroupIdY,                                        U32,                                                                            )
+OPCODE(WorkgroupIdZ,                                        U32,                                                                            )
+OPCODE(LocalInvocationIdX,                                  U32,                                                                            )
+OPCODE(LocalInvocationIdY,                                  U32,                                                                            )
+OPCODE(LocalInvocationIdZ,                                  U32,                                                                            )
 
 // Undefined
 OPCODE(Undef1,                                              U1,                                                                             )
@@ -44,6 +50,13 @@ OPCODE(Undef32,                                             U32,
 OPCODE(Undef64,                                             U64,                                                                            )
 
 // Memory operations
+OPCODE(LoadGlobalU8,                                        U32,            U64,                                                            )
+OPCODE(LoadGlobalS8,                                        U32,            U64,                                                            )
+OPCODE(LoadGlobalU16,                                       U32,            U64,                                                            )
+OPCODE(LoadGlobalS16,                                       U32,            U64,                                                            )
+OPCODE(LoadGlobal32,                                        U32,            U64,                                                            )
+OPCODE(LoadGlobal64,                                        Opaque,         U64,                                                            )
+OPCODE(LoadGlobal128,                                       Opaque,         U64,                                                            )
 OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                            )
 OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                            )
 OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                            )
@@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3,                                 Opaque,         Opaq
 OPCODE(CompositeConstruct4,                                 Opaque,         Opaque,         Opaque,         Opaque,         Opaque,         )
 OPCODE(CompositeExtract,                                    Opaque,         Opaque,         U32,                                            )
 
+// Select operations
+OPCODE(Select8,                                             U8,             U1,             U8,             U8,                             )
+OPCODE(Select16,                                            U16,            U1,             U16,            U16,                            )
+OPCODE(Select32,                                            U32,            U1,             U32,            U32,                            )
+OPCODE(Select64,                                            U64,            U1,             U64,            U64,                            )
+
 // Bitwise conversions
 OPCODE(PackUint2x32,                                        U64,            Opaque,                                                         )
 OPCODE(UnpackUint2x32,                                      Opaque,         U64,                                                            )
@@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp,                                   U1,             Opaq
 OPCODE(GetZSCOFromOp,                                       ZSCO,           Opaque,                                                         )
 
 // Floating-point operations
-OPCODE(FPAbs16,                                             U16,            U16                                                             )
-OPCODE(FPAbs32,                                             U32,            U32                                                             )
-OPCODE(FPAbs64,                                             U64,            U64                                                             )
-OPCODE(FPAdd16,                                             U16,            U16,            U16                                             )
-OPCODE(FPAdd32,                                             U32,            U32,            U32                                             )
-OPCODE(FPAdd64,                                             U64,            U64,            U64                                             )
-OPCODE(FPFma16,                                             U16,            U16,            U16                                             )
-OPCODE(FPFma32,                                             U32,            U32,            U32                                             )
-OPCODE(FPFma64,                                             U64,            U64,            U64                                             )
-OPCODE(FPMax32,                                             U32,            U32,            U32                                             )
-OPCODE(FPMax64,                                             U64,            U64,            U64                                             )
-OPCODE(FPMin32,                                             U32,            U32,            U32                                             )
-OPCODE(FPMin64,                                             U64,            U64,            U64                                             )
-OPCODE(FPMul16,                                             U16,            U16,            U16                                             )
-OPCODE(FPMul32,                                             U32,            U32,            U32                                             )
-OPCODE(FPMul64,                                             U64,            U64,            U64                                             )
-OPCODE(FPNeg16,                                             U16,            U16                                                             )
-OPCODE(FPNeg32,                                             U32,            U32                                                             )
-OPCODE(FPNeg64,                                             U64,            U64                                                             )
-OPCODE(FPRecip32,                                           U32,            U32                                                             )
-OPCODE(FPRecip64,                                           U64,            U64                                                             )
-OPCODE(FPRecipSqrt32,                                       U32,            U32                                                             )
-OPCODE(FPRecipSqrt64,                                       U64,            U64                                                             )
-OPCODE(FPSqrt,                                              U32,            U32                                                             )
-OPCODE(FPSin,                                               U32,            U32                                                             )
-OPCODE(FPSinNotReduced,                                     U32,            U32                                                             )
-OPCODE(FPExp2,                                              U32,            U32                                                             )
-OPCODE(FPExp2NotReduced,                                    U32,            U32                                                             )
-OPCODE(FPCos,                                               U32,            U32                                                             )
-OPCODE(FPCosNotReduced,                                     U32,            U32                                                             )
-OPCODE(FPLog2,                                              U32,            U32                                                             )
-OPCODE(FPSaturate16,                                        U16,            U16                                                             )
-OPCODE(FPSaturate32,                                        U32,            U32                                                             )
-OPCODE(FPSaturate64,                                        U64,            U64                                                             )
-OPCODE(FPRoundEven16,                                       U16,            U16                                                             )
-OPCODE(FPRoundEven32,                                       U32,            U32                                                             )
-OPCODE(FPRoundEven64,                                       U64,            U64                                                             )
-OPCODE(FPFloor16,                                           U16,            U16                                                             )
-OPCODE(FPFloor32,                                           U32,            U32                                                             )
-OPCODE(FPFloor64,                                           U64,            U64                                                             )
-OPCODE(FPCeil16,                                            U16,            U16                                                             )
-OPCODE(FPCeil32,                                            U32,            U32                                                             )
-OPCODE(FPCeil64,                                            U64,            U64                                                             )
-OPCODE(FPTrunc16,                                           U16,            U16                                                             )
-OPCODE(FPTrunc32,                                           U32,            U32                                                             )
-OPCODE(FPTrunc64,                                           U64,            U64                                                             )
+OPCODE(FPAbs16,                                             U16,            U16,                                                            )
+OPCODE(FPAbs32,                                             U32,            U32,                                                            )
+OPCODE(FPAbs64,                                             U64,            U64,                                                            )
+OPCODE(FPAdd16,                                             U16,            U16,            U16,                                            )
+OPCODE(FPAdd32,                                             U32,            U32,            U32,                                            )
+OPCODE(FPAdd64,                                             U64,            U64,            U64,                                            )
+OPCODE(FPFma16,                                             U16,            U16,            U16,            U16,                            )
+OPCODE(FPFma32,                                             U32,            U32,            U32,            U32,                            )
+OPCODE(FPFma64,                                             U64,            U64,            U64,            U64,                            )
+OPCODE(FPMax32,                                             U32,            U32,            U32,                                            )
+OPCODE(FPMax64,                                             U64,            U64,            U64,                                            )
+OPCODE(FPMin32,                                             U32,            U32,            U32,                                            )
+OPCODE(FPMin64,                                             U64,            U64,            U64,                                            )
+OPCODE(FPMul16,                                             U16,            U16,            U16,                                            )
+OPCODE(FPMul32,                                             U32,            U32,            U32,                                            )
+OPCODE(FPMul64,                                             U64,            U64,            U64,                                            )
+OPCODE(FPNeg16,                                             U16,            U16,                                                            )
+OPCODE(FPNeg32,                                             U32,            U32,                                                            )
+OPCODE(FPNeg64,                                             U64,            U64,                                                            )
+OPCODE(FPRecip32,                                           U32,            U32,                                                            )
+OPCODE(FPRecip64,                                           U64,            U64,                                                            )
+OPCODE(FPRecipSqrt32,                                       U32,            U32,                                                            )
+OPCODE(FPRecipSqrt64,                                       U64,            U64,                                                            )
+OPCODE(FPSqrt,                                              U32,            U32,                                                            )
+OPCODE(FPSin,                                               U32,            U32,                                                            )
+OPCODE(FPSinNotReduced,                                     U32,            U32,                                                            )
+OPCODE(FPExp2,                                              U32,            U32,                                                            )
+OPCODE(FPExp2NotReduced,                                    U32,            U32,                                                            )
+OPCODE(FPCos,                                               U32,            U32,                                                            )
+OPCODE(FPCosNotReduced,                                     U32,            U32,                                                            )
+OPCODE(FPLog2,                                              U32,            U32,                                                            )
+OPCODE(FPSaturate16,                                        U16,            U16,                                                            )
+OPCODE(FPSaturate32,                                        U32,            U32,                                                            )
+OPCODE(FPSaturate64,                                        U64,            U64,                                                            )
+OPCODE(FPRoundEven16,                                       U16,            U16,                                                            )
+OPCODE(FPRoundEven32,                                       U32,            U32,                                                            )
+OPCODE(FPRoundEven64,                                       U64,            U64,                                                            )
+OPCODE(FPFloor16,                                           U16,            U16,                                                            )
+OPCODE(FPFloor32,                                           U32,            U32,                                                            )
+OPCODE(FPFloor64,                                           U64,            U64,                                                            )
+OPCODE(FPCeil16,                                            U16,            U16,                                                            )
+OPCODE(FPCeil32,                                            U32,            U32,                                                            )
+OPCODE(FPCeil64,                                            U64,            U64,                                                            )
+OPCODE(FPTrunc16,                                           U16,            U16,                                                            )
+OPCODE(FPTrunc32,                                           U32,            U32,                                                            )
+OPCODE(FPTrunc64,                                           U64,            U64,                                                            )
+
+// Integer operations
+OPCODE(IAdd32,                                              U32,            U32,            U32,                                            )
+OPCODE(IAdd64,                                              U64,            U64,            U64,                                            )
+OPCODE(IMul32,                                              U32,            U32,            U32,                                            )
+OPCODE(INeg32,                                              U32,            U32,                                                            )
+OPCODE(IAbs32,                                              U32,            U32,                                                            )
+OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                            )
+OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                            )
+OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                            )
+OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                            )
+OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                            )
+OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                            )
+OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,            )
+OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                            )
+OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                            )
+
+OPCODE(SLessThan,                                           U1,             U32,            U32,                                            )
+OPCODE(ULessThan,                                           U1,             U32,            U32,                                            )
+OPCODE(IEqual,                                              U1,             U32,            U32,                                            )
+OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                            )
+OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                            )
+OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                            )
+OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                            )
+OPCODE(INotEqual,                                           U1,             U32,            U32,                                            )
+OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                            )
+OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                            )
 
 // Logical operations
 OPCODE(LogicalOr,                                           U1,             U1,             U1,                                             )
 OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                             )
+OPCODE(LogicalXor,                                          U1,             U1,             U1,                                             )
 OPCODE(LogicalNot,                                          U1,             U1,                                                             )
 
 // Conversion operations
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
index daf23193f..c6f2f82bf 100644
--- a/src/shader_recompiler/frontend/ir/pred.h
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -8,7 +8,16 @@
 
 namespace Shader::IR {
 
-enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT };
+enum class Pred : u64 {
+    P0,
+    P1,
+    P2,
+    P3,
+    P4,
+    P5,
+    P6,
+    PT,
+};
 
 constexpr size_t NUM_USER_PREDS = 6;
 constexpr size_t NUM_PREDS = 7;
author	ReinUsesLisp <reinuseslisp@airmail.cc>	2021-02-03 20:43:04 +0100
committer	ameerj <52414509+ameerj@users.noreply.github.com>	2021-07-23 03:51:21 +0200
commit	d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f (patch)
tree	0108a028b437bc59dfe7864f333cf4c50a46d3b5 /src/shader_recompiler/frontend/ir
parent	shader: SSA and dominance (diff)
download	yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.gz yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.bz2 yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.lz yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.xz yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.tar.zst yuzu-d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f.zip