From 08b2b1080a2e5794c65ebb999c435f6eb9d1aa26 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 2 Nov 2019 23:44:13 -0300 Subject: gl_shader_decompiler: Reimplement shuffles with platform agnostic intrinsics --- src/video_core/shader/decode/warp.cpp | 68 ++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 33 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..c2875eb2b 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; using Tegra::Shader::VoteOperation; namespace { + OperationCode GetOperationCode(VoteOperation vote_op) { switch (vote_op) { case VoteOperation::All: @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { return OperationCode::VoteAll; } } + } // Anonymous namespace u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { @@ -46,50 +48,50 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::SHFL: { - Node width = [this, instr] { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + Node index = instr.shfl.is_index_imm ? Immediate(static_cast(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); - // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has - // been done reversing Nvidia's math. It won't work on all cases due to SHFL having - // different parameters that don't properly map to GLSL's interface, but it should work - // for cases emitted by Nvidia's compiler. - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation( - OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), - Immediate(8)); - } else { - return Operation(OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, Immediate(0x201F), - Operation(OperationCode::INegate, std::move(mask))), - Immediate(8)); - } - }(); + Node thread_id = Operation(OperationCode::ThreadId); + Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); + Node seg_mask = BitfieldExtract(mask, 8, 16); + + Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); + Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); + Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, + Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); - const auto [operation, in_range] = [instr]() -> std::pair { + Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { switch (instr.shfl.operation) { case ShuffleOperation::Idx: - return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; - case ShuffleOperation::Up: - return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + return Operation(OperationCode::IBitwiseOr, + Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), + min_thread_id); case ShuffleOperation::Down: - return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + return Operation(OperationCode::IAdd, thread_id, index); + case ShuffleOperation::Up: + return Operation(OperationCode::IAdd, thread_id, + Operation(OperationCode::INegate, index)); case ShuffleOperation::Bfly: - return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + return Operation(OperationCode::IBitwiseXor, thread_id, index); } - UNREACHABLE_MSG("Invalid SHFL operation: {}", - static_cast(instr.shfl.operation.Value())); - return {}; + UNREACHABLE(); + return Immediate(0U); }(); - // Setting the predicate before the register is intentional to avoid overwriting. - Node index = instr.shfl.is_index_imm ? Immediate(static_cast(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); + } else { + return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); + } + }(); + + SetPredicate(bb, instr.shfl.pred48, in_bounds); SetRegister( bb, instr.gpr0, - Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); break; } default: -- cgit v1.2.3 From 56e237d1f998a4090afb6763222cd65593b299d7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 2 Nov 2019 23:44:46 -0300 Subject: shader_ir/warp: Implement FSWZADD --- src/video_core/shader/decode/warp.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index c2875eb2b..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -94,6 +94,15 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); break; } + case OpCode::Id::FSWZADD: { + UNIMPLEMENTED_IF(instr.fswzadd.ndv); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node mask = Immediate(static_cast(instr.fswzadd.swizzle)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); + break; + } default: UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); break; -- cgit v1.2.3