diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r-- | src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 10 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/passes.h | 1 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp | 79 |
3 files changed, 85 insertions, 5 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f46e55122..ec12c843a 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) { } } -bool FoldDerivateYFromCorrection(IR::Inst& inst) { +bool FoldDerivativeYFromCorrection(IR::Inst& inst) { const IR::Value lhs_value{inst.Arg(0)}; const IR::Value rhs_value{inst.Arg(1)}; IR::Inst* const lhs_op{lhs_value.InstRecursive()}; @@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) { if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { return; } - if (FoldDerivateYFromCorrection(inst)) { + if (FoldDerivativeYFromCorrection(inst)) { return; } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; @@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { } } -bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { +bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) { if (coord.IsImmediate()) { return false; } @@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { IR::Inst* const inst2 = coords.InstRecursive(); std::array<std::array<IR::Value, 3>, 3> results_matrix; for (size_t i = 0; i < 3; i++) { - if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { + if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) { return; } } @@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], results_matrix[1][1], results_matrix[1][2]); IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); - info.num_derivates.Assign(3); + info.num_derivatives.Assign(3); IR::Value new_gradient_instruction = ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 7082fc5f2..1e637cb23 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program); void PositionPass(Environment& env, IR::Program& program); void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); +void VendorWorkaroundPass(IR::Program& program); void VerificationPass(const IR::Program& program); // Dual Vertex diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp new file mode 100644 index 000000000..08c658cb8 --- /dev/null +++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +namespace { +void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) { + /* + * Workaround for an NVIDIA bug seen in Super Mario RPG + * + * We are looking for this pattern: + * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional? + * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16 + * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %result = IAdd32 %lhs_shl, %rhs_bfe + * + * And replacing the IAdd32 with a BitwiseOr32 + * %result = BitwiseOr32 %lhs_shl, %rhs_bfe + * + */ + IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; + IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()}; + if (!lhs_shl || !rhs_bfe) { + return; + } + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { + return; + } + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || + rhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; + if (!lhs_mul) { + return; + } + const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && + lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; + if (!lhs_bfe) { + return; + } + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); +} + +} // Anonymous namespace + +void VendorWorkaroundPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::IAdd32: + AddingByteSwapsWorkaround(*block, inst); + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization |