summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp610
1 files changed, 610 insertions, 0 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..8dd6d6c2c
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,610 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <tuple>
+#include <type_traits>
+
+#include "common/bit_cast.h"
+#include "common/bit_util.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+// Metaprogramming stuff to get arguments information out of a lambda
+template <typename Func>
+struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {};
+
+template <typename ReturnType, typename LambdaType, typename... Args>
+struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> {
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+
+ static constexpr size_t NUM_ARGS{sizeof...(Args)};
+};
+
+template <typename T>
+[[nodiscard]] T Arg(const IR::Value& value) {
+ if constexpr (std::is_same_v<T, bool>) {
+ return value.U1();
+ } else if constexpr (std::is_same_v<T, u32>) {
+ return value.U32();
+ } else if constexpr (std::is_same_v<T, s32>) {
+ return static_cast<s32>(value.U32());
+ } else if constexpr (std::is_same_v<T, f32>) {
+ return value.F32();
+ } else if constexpr (std::is_same_v<T, u64>) {
+ return value.U64();
+ }
+}
+
+template <typename T, typename ImmFn>
+bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
+ const IR::Value lhs{inst.Arg(0)};
+ const IR::Value rhs{inst.Arg(1)};
+
+ const bool is_lhs_immediate{lhs.IsImmediate()};
+ const bool is_rhs_immediate{rhs.IsImmediate()};
+
+ if (is_lhs_immediate && is_rhs_immediate) {
+ const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
+ inst.ReplaceUsesWith(IR::Value{result});
+ return false;
+ }
+ if (is_lhs_immediate && !is_rhs_immediate) {
+ IR::Inst* const rhs_inst{rhs.InstRecursive()};
+ if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
+ const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
+ inst.SetArg(0, rhs_inst->Arg(0));
+ inst.SetArg(1, IR::Value{combined});
+ } else {
+ // Normalize
+ inst.SetArg(0, rhs);
+ inst.SetArg(1, lhs);
+ }
+ }
+ if (!is_lhs_immediate && is_rhs_immediate) {
+ const IR::Inst* const lhs_inst{lhs.InstRecursive()};
+ if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
+ const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
+ inst.SetArg(0, lhs_inst->Arg(0));
+ inst.SetArg(1, IR::Value{combined});
+ }
+ }
+ return true;
+}
+
+template <typename Func>
+bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
+ if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
+ return false;
+ }
+ using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
+ inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
+ return true;
+}
+
+void FoldGetRegister(IR::Inst& inst) {
+ if (inst.Arg(0).Reg() == IR::Reg::RZ) {
+ inst.ReplaceUsesWith(IR::Value{u32{0}});
+ }
+}
+
+void FoldGetPred(IR::Inst& inst) {
+ if (inst.Arg(0).Pred() == IR::Pred::PT) {
+ inst.ReplaceUsesWith(IR::Value{true});
+ }
+}
+
+/// Replaces the pattern generated by two XMAD multiplications
+bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
+ /*
+ * We are looking for this pattern:
+ * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
+ * %rhs_mul = IMul32 %rhs_bfe, %factor_b
+ * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
+ * %rhs_mul = IMul32 %lhs_bfe, %factor_b
+ * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
+ * %result = IAdd32 %lhs_shl, %rhs_mul
+ *
+ * And replacing it with
+ * %result = IMul32 %factor_a, %factor_b
+ *
+ * This optimization has been proven safe by LLVM and MSVC.
+ */
+ const IR::Value lhs_arg{inst.Arg(0)};
+ const IR::Value rhs_arg{inst.Arg(1)};
+ if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
+ if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
+ lhs_shl->Arg(1) != IR::Value{16U}) {
+ return false;
+ }
+ if (lhs_shl->Arg(0).IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
+ IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
+ if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
+ return false;
+ }
+ if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
+ return false;
+ }
+ const IR::U32 factor_b{lhs_mul->Arg(1)};
+ if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
+ IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
+ if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return false;
+ }
+ if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return false;
+ }
+ if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
+ return false;
+ }
+ if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
+ return false;
+ }
+ if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
+ return false;
+ }
+ const IR::U32 factor_a{lhs_bfe->Arg(0)};
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
+ return true;
+}
+
+template <typename T>
+void FoldAdd(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ return;
+ }
+ if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ return;
+ }
+ if constexpr (std::is_same_v<T, u32>) {
+ if (FoldXmadMultiply(block, inst)) {
+ return;
+ }
+ }
+}
+
+void FoldISub32(IR::Inst& inst) {
+ if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) {
+ return;
+ }
+ if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) {
+ return;
+ }
+ // ISub32 is generally used to subtract two constant buffers, compare and replace this with
+ // zero if they equal.
+ const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
+ return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
+ b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
+ a->Arg(1) == b->Arg(1);
+ }};
+ IR::Inst* op_a{inst.Arg(0).InstRecursive()};
+ IR::Inst* op_b{inst.Arg(1).InstRecursive()};
+ if (equal_cbuf(op_a, op_b)) {
+ inst.ReplaceUsesWith(IR::Value{u32{0}});
+ return;
+ }
+ // It's also possible a value is being added to a cbuf and then subtracted
+ if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
+ // Canonicalize local variables to simplify the following logic
+ std::swap(op_a, op_b);
+ }
+ if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
+ return;
+ }
+ IR::Inst* const inst_cbuf{op_b};
+ if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
+ return;
+ }
+ IR::Value add_op_a{op_a->Arg(0)};
+ IR::Value add_op_b{op_a->Arg(1)};
+ if (add_op_b.IsImmediate()) {
+ // Canonicalize
+ std::swap(add_op_a, add_op_b);
+ }
+ if (add_op_b.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const add_cbuf{add_op_b.InstRecursive()};
+ if (equal_cbuf(add_cbuf, inst_cbuf)) {
+ inst.ReplaceUsesWith(add_op_a);
+ }
+}
+
+void FoldSelect(IR::Inst& inst) {
+ const IR::Value cond{inst.Arg(0)};
+ if (cond.IsImmediate()) {
+ inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
+ }
+}
+
+void FoldFPMul32(IR::Inst& inst) {
+ const auto control{inst.Flags<IR::FpControl>()};
+ if (control.no_contraction) {
+ return;
+ }
+ // Fold interpolation operations
+ const IR::Value lhs_value{inst.Arg(0)};
+ const IR::Value rhs_value{inst.Arg(1)};
+ if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const lhs_op{lhs_value.InstRecursive()};
+ IR::Inst* const rhs_op{rhs_value.InstRecursive()};
+ if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
+ rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
+ return;
+ }
+ const IR::Value recip_source{rhs_op->Arg(0)};
+ const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()};
+ if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const attr_a{recip_source.InstRecursive()};
+ IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
+ if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
+ attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
+ return;
+ }
+ if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
+ inst.ReplaceUsesWith(lhs_op->Arg(0));
+ }
+}
+
+void FoldLogicalAnd(IR::Inst& inst) {
+ if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate()) {
+ if (rhs.U1()) {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ } else {
+ inst.ReplaceUsesWith(IR::Value{false});
+ }
+ }
+}
+
+void FoldLogicalOr(IR::Inst& inst) {
+ if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate()) {
+ if (rhs.U1()) {
+ inst.ReplaceUsesWith(IR::Value{true});
+ } else {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ }
+ }
+}
+
+void FoldLogicalNot(IR::Inst& inst) {
+ const IR::U1 value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ inst.ReplaceUsesWith(IR::Value{!value.U1()});
+ return;
+ }
+ IR::Inst* const arg{value.InstRecursive()};
+ if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
+ inst.ReplaceUsesWith(arg->Arg(0));
+ }
+}
+
+template <IR::Opcode op, typename Dest, typename Source>
+void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
+ const IR::Value value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
+ return;
+ }
+ IR::Inst* const arg_inst{value.InstRecursive()};
+ if (arg_inst->GetOpcode() == reverse) {
+ inst.ReplaceUsesWith(arg_inst->Arg(0));
+ return;
+ }
+ if constexpr (op == IR::Opcode::BitCastF32U32) {
+ if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
+ // Replace the bitcast with a typed constant buffer read
+ inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
+ inst.SetArg(0, arg_inst->Arg(0));
+ inst.SetArg(1, arg_inst->Arg(1));
+ return;
+ }
+ }
+}
+
+void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
+ const IR::Value value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const arg_inst{value.InstRecursive()};
+ if (arg_inst->GetOpcode() == reverse) {
+ inst.ReplaceUsesWith(arg_inst->Arg(0));
+ return;
+ }
+}
+
+template <typename Func, size_t... I>
+IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
+ using Traits = LambdaTraits<decltype(func)>;
+ return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
+}
+
+std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
+ IR::Opcode construct, u32 first_index) {
+ IR::Inst* const inst{inst_value.InstRecursive()};
+ if (inst->GetOpcode() == construct) {
+ return inst->Arg(first_index);
+ }
+ if (inst->GetOpcode() != insert) {
+ return std::nullopt;
+ }
+ IR::Value value_index{inst->Arg(2)};
+ if (!value_index.IsImmediate()) {
+ return std::nullopt;
+ }
+ const u32 second_index{value_index.U32()};
+ if (first_index != second_index) {
+ IR::Value value_composite{inst->Arg(0)};
+ if (value_composite.IsImmediate()) {
+ return std::nullopt;
+ }
+ return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
+ }
+ return inst->Arg(1);
+}
+
+void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
+ const IR::Value value_1{inst.Arg(0)};
+ const IR::Value value_2{inst.Arg(1)};
+ if (value_1.IsImmediate()) {
+ return;
+ }
+ if (!value_2.IsImmediate()) {
+ return;
+ }
+ const u32 first_index{value_2.U32()};
+ const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
+ if (!result) {
+ return;
+ }
+ inst.ReplaceUsesWith(*result);
+}
+
+IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) {
+ if (value.IsImmediate()) {
+ return value;
+ }
+ IR::Inst* const inst{value.InstRecursive()};
+ if (inst->GetOpcode() == expected_cast) {
+ return inst->Arg(0).Resolve();
+ }
+ return value;
+}
+
+void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
+ const IR::Value swizzle{inst.Arg(2)};
+ if (!swizzle.IsImmediate()) {
+ return;
+ }
+ const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)};
+ const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)};
+ if (value_1.IsImmediate()) {
+ return;
+ }
+ const u32 swizzle_value{swizzle.U32()};
+ if (swizzle_value != 0x99 && swizzle_value != 0xA5) {
+ return;
+ }
+ IR::Inst* const inst2{value_1.InstRecursive()};
+ if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) {
+ return;
+ }
+ const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
+ if (value_2 != value_3) {
+ return;
+ }
+ const IR::Value index{inst2->Arg(1)};
+ const IR::Value clamp{inst2->Arg(2)};
+ const IR::Value segmentation_mask{inst2->Arg(3)};
+ if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
+ return;
+ }
+ if (clamp.U32() != 3 || segmentation_mask.U32() != 28) {
+ return;
+ }
+ if (swizzle_value == 0x99) {
+ // DPdxFine
+ if (index.U32() == 1) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)}));
+ }
+ } else if (swizzle_value == 0xA5) {
+ // DPdyFine
+ if (index.U32() == 2) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)}));
+ }
+ }
+}
+
+void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetRegister:
+ return FoldGetRegister(inst);
+ case IR::Opcode::GetPred:
+ return FoldGetPred(inst);
+ case IR::Opcode::IAdd32:
+ return FoldAdd<u32>(block, inst);
+ case IR::Opcode::ISub32:
+ return FoldISub32(inst);
+ case IR::Opcode::IMul32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
+ return;
+ case IR::Opcode::ShiftRightArithmetic32:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
+ return;
+ case IR::Opcode::BitCastF32U32:
+ return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
+ case IR::Opcode::BitCastU32F32:
+ return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
+ case IR::Opcode::IAdd64:
+ return FoldAdd<u64>(block, inst);
+ case IR::Opcode::PackHalf2x16:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
+ case IR::Opcode::UnpackHalf2x16:
+ return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
+ case IR::Opcode::SelectU1:
+ case IR::Opcode::SelectU8:
+ case IR::Opcode::SelectU16:
+ case IR::Opcode::SelectU32:
+ case IR::Opcode::SelectU64:
+ case IR::Opcode::SelectF16:
+ case IR::Opcode::SelectF32:
+ case IR::Opcode::SelectF64:
+ return FoldSelect(inst);
+ case IR::Opcode::FPMul32:
+ return FoldFPMul32(inst);
+ case IR::Opcode::LogicalAnd:
+ return FoldLogicalAnd(inst);
+ case IR::Opcode::LogicalOr:
+ return FoldLogicalOr(inst);
+ case IR::Opcode::LogicalNot:
+ return FoldLogicalNot(inst);
+ case IR::Opcode::SLessThan:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
+ return;
+ case IR::Opcode::ULessThan:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
+ return;
+ case IR::Opcode::SLessThanEqual:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
+ return;
+ case IR::Opcode::ULessThanEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
+ return;
+ case IR::Opcode::SGreaterThan:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
+ return;
+ case IR::Opcode::UGreaterThan:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
+ return;
+ case IR::Opcode::SGreaterThanEqual:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
+ return;
+ case IR::Opcode::UGreaterThanEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
+ return;
+ case IR::Opcode::IEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
+ return;
+ case IR::Opcode::INotEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
+ return;
+ case IR::Opcode::BitwiseAnd32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
+ return;
+ case IR::Opcode::BitwiseOr32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
+ return;
+ case IR::Opcode::BitwiseXor32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
+ return;
+ case IR::Opcode::BitFieldUExtract:
+ FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
+ if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
+ throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
+ base, shift, count);
+ }
+ return (base >> shift) & ((1U << count) - 1);
+ });
+ return;
+ case IR::Opcode::BitFieldSExtract:
+ FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
+ const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
+ const size_t left_shift{32 - back_shift};
+ const size_t right_shift{static_cast<size_t>(32 - count)};
+ if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
+ throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
+ base, shift, count);
+ }
+ return static_cast<u32>((base << left_shift) >> right_shift);
+ });
+ return;
+ case IR::Opcode::BitFieldInsert:
+ FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
+ if (bits >= 32 || offset >= 32) {
+ throw LogicError("Undefined result in {}({}, {}, {}, {})",
+ IR::Opcode::BitFieldInsert, base, insert, offset, bits);
+ }
+ return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
+ });
+ return;
+ case IR::Opcode::CompositeExtractU32x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
+ IR::Opcode::CompositeInsertU32x2);
+ case IR::Opcode::CompositeExtractU32x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
+ IR::Opcode::CompositeInsertU32x3);
+ case IR::Opcode::CompositeExtractU32x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
+ IR::Opcode::CompositeInsertU32x4);
+ case IR::Opcode::CompositeExtractF32x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
+ IR::Opcode::CompositeInsertF32x2);
+ case IR::Opcode::CompositeExtractF32x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
+ IR::Opcode::CompositeInsertF32x3);
+ case IR::Opcode::CompositeExtractF32x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
+ IR::Opcode::CompositeInsertF32x4);
+ case IR::Opcode::CompositeExtractF16x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
+ IR::Opcode::CompositeInsertF16x2);
+ case IR::Opcode::CompositeExtractF16x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
+ IR::Opcode::CompositeInsertF16x3);
+ case IR::Opcode::CompositeExtractF16x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
+ IR::Opcode::CompositeInsertF16x4);
+ case IR::Opcode::FSwizzleAdd:
+ return FoldFSwizzleAdd(block, inst);
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void ConstantPropagationPass(IR::Program& program) {
+ const auto end{program.post_order_blocks.rend()};
+ for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
+ IR::Block* const block{*it};
+ for (IR::Inst& inst : block->Instructions()) {
+ ConstantPropagation(*block, inst);
+ }
+ }
+}
+
+} // namespace Shader::Optimization