From f43995ec535042d24fa1af637642a2974bfa4773 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Apr 2019 19:48:11 -0300 Subject: shader_ir/decode: Fix half float pre-operations and remove MetaHalfArithmetic Operations done before the main half float operation (like HAdd) were managing a packed value instead of the unpacked one. Adding an unpacked operation allows us to drop the per-operand MetaHalfArithmetic entry, simplifying the code overall. --- src/video_core/shader/decode/arithmetic_half.cpp | 11 +++++---- .../shader/decode/arithmetic_half_immediate.cpp | 7 +++--- src/video_core/shader/decode/half_set.cpp | 12 +++++----- .../shader/decode/half_set_predicate.cpp | 8 +++---- src/video_core/shader/decode/hfma2.cpp | 12 ++++------ src/video_core/shader/shader_ir.cpp | 18 +++++++++------ src/video_core/shader/shader_ir.h | 26 +++++++++------------- 7 files changed, 42 insertions(+), 52 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 8cf49314d..9467f9417 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -29,9 +29,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { const bool negate_b = opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; - const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); - - // instr.alu_half.type_a + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); + op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); Node op_b = [&]() { switch (opcode->get().GetId()) { @@ -46,17 +45,17 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); + op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); Node value = [&]() { - MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, meta, op_a, op_b); + return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, meta, op_a, op_b); + return Operation(OperationCode::HMul, PRECISE, op_a, op_b); default: UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); return Immediate(0); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index cf4bc0432..fbcd35b18 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -24,18 +24,17 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); } - Node op_a = GetRegister(instr.gpr8); + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); const Node op_b = UnpackHalfImmediate(instr, true); Node value = [&]() { - MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; switch (opcode->get().GetId()) { case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, meta, op_a, op_b); + return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, meta, op_a, op_b); + return Operation(OperationCode::HMul, PRECISE, op_a, op_b); default: UNREACHABLE(); return Immediate(0); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index d07625741..1dd94bf9d 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); } - // instr.hset2.type_a - // instr.hset2.type_b - Node op_a = GetRegister(instr.gpr8); + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); + op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); + Node op_b = [&]() { switch (opcode->get().GetId()) { case OpCode::Id::HSET2_R: @@ -34,14 +34,12 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); - - op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); + op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; - const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); + const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index e68512692..6e59eb650 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); - Node op_a = GetRegister(instr.gpr8); + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - const Node op_b = [&]() { + Node op_b = [&]() { switch (opcode->get().GetId()) { case OpCode::Id::HSETP2_R: return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, @@ -32,6 +32,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); + op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); // We can't use the constant predicate as destination. ASSERT(instr.hsetp2.pred3 != static_cast(Pred::UnusedIndex)); @@ -42,8 +43,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const OperationCode pair_combiner = instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; - MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; - const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); + const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); const Node first_pred = Operation(pair_combiner, comparison); // Set the primary predicate to the result of Predicate OP SecondPredicate diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 7a07c5ec6..5c1becce5 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -27,10 +27,6 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { } constexpr auto identity = HalfType::H0_H1; - - const HalfType type_a = instr.hfma2.type_a; - const Node op_a = GetRegister(instr.gpr8); - bool neg_b{}, neg_c{}; auto [saturate, type_b, op_b, type_c, op_c] = [&]() -> std::tuple { @@ -62,11 +58,11 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { }(); UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); - op_b = GetOperandAbsNegHalf(op_b, false, neg_b); - op_c = GetOperandAbsNegHalf(op_c, false, neg_c); + const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); + op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); + op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; - Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); + Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); SetRegister(bb, instr.gpr0, value); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index d158b4bfd..17f2f711c 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -189,7 +189,11 @@ Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); + return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); +} + +Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { + return Operation(OperationCode::HUnpack, type, value); } Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { @@ -209,10 +213,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { if (absolute) { - value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); + value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); } if (negate) { - value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), + value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), GetPredicate(true)); } return value; @@ -224,7 +228,7 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { } const Node positive_zero = Immediate(std::copysignf(0, 1)); const Node positive_one = Immediate(1.0f); - return Operation(OperationCode::HClamp, HALF_NO_PRECISE, value, positive_zero, positive_one); + return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); } Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { @@ -292,8 +296,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si return predicate; } -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, - const MetaHalfArithmetic& meta, Node op_a, Node op_b) { +Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, + Node op_b) { const std::unordered_map PredicateComparisonTable = { {PredCondition::LessThan, OperationCode::Logical2HLessThan}, {PredCondition::Equal, OperationCode::Logical2HEqual}, @@ -311,7 +315,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), "Unknown predicate comparison operation"); - const Node predicate = Operation(comparison->second, meta, op_a, op_b); + const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); return predicate; } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index d329da58d..88957e970 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -109,12 +109,13 @@ enum class OperationCode { UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint UBitCount, /// (MetaArithmetic, uint) -> uint - HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 HAbsolute, /// (f16vec2 a) -> f16vec2 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 HMergeF32, /// (f16vec2 src) -> float HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 @@ -287,13 +288,6 @@ struct MetaArithmetic { bool precise{}; }; -struct MetaHalfArithmetic { - bool precise{}; - std::array types = {Tegra::Shader::HalfType::H0_H1, - Tegra::Shader::HalfType::H0_H1, - Tegra::Shader::HalfType::H0_H1}; -}; - struct MetaTexture { const Sampler& sampler; Node array{}; @@ -305,11 +299,10 @@ struct MetaTexture { u32 element{}; }; -constexpr MetaArithmetic PRECISE = {true}; -constexpr MetaArithmetic NO_PRECISE = {false}; -constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; +inline constexpr MetaArithmetic PRECISE = {true}; +inline constexpr MetaArithmetic NO_PRECISE = {false}; -using Meta = std::variant; +using Meta = std::variant; /// Holds any kind of operation that can be done in the IR class OperationNode final { @@ -713,6 +706,8 @@ private: /// Unpacks a half immediate from an instruction Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); + /// Unpacks a binary value into a half float pair with a type format + Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); /// Merges a half pair into another value Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); /// Conditionally absolute/negated half float pair. Absolute is applied first @@ -726,8 +721,7 @@ private: Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, Node op_a, Node op_b); /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, - const MetaHalfArithmetic& meta, Node op_a, Node op_b); + Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); /// Returns a predicate combiner operation OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); -- cgit v1.2.3