diff options
author | bunnei <bunneidev@gmail.com> | 2021-07-25 20:39:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-25 20:39:04 +0200 |
commit | 98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch) | |
tree | 816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler/frontend/maxwell/translate | |
parent | Merge pull request #6699 from lat9nq/common-threads (diff) | |
parent | shader: Support out of bound local memory reads and immediate writes (diff) | |
download | yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.bz2 yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.lz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.zst yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip |
Diffstat (limited to '')
95 files changed, 10073 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..d9f999e05 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -0,0 +1,214 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, + SAFEADD, +}; + +enum class AtomSize : u64 { + U32, + S32, + U64, + F32, + F16x2, + S64, +}; + +IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, + AtomOp op, bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.GlobalAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.GlobalAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.GlobalAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.GlobalAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.GlobalAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.GlobalAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.GlobalAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.GlobalAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.GlobalAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atom Operation {}", op); + } +} + +IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, + AtomSize size) { + static constexpr IR::FpControl f16_control{ + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::DontCare, + }; + static constexpr IR::FpControl f32_control{ + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::FTZ, + }; + switch (op) { + case AtomOp::ADD: + return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) + : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); + case AtomOp::MIN: + return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); + case AtomOp::MAX: + return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); + default: + throw NotImplementedException("FP Atom Operation {}", op); + } +} + +IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<28, 20, s64> addr_offset; + BitField<28, 20, u64> rz_addr_offset; + BitField<48, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + return v.L(mem.addr_reg); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast<u64>(mem.rz_addr_offset.Value()); + } else { + return static_cast<u64>(mem.addr_offset.Value()); + } + }()}; + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} + +bool AtomOpNotApplicable(AtomSize size, AtomOp op) { + // TODO: SAFEADD + switch (size) { + case AtomSize::S32: + case AtomSize::U64: + return (op == AtomOp::INC || op == AtomOp::DEC); + case AtomSize::S64: + return !(op == AtomOp::MIN || op == AtomOp::MAX); + case AtomSize::F32: + return op != AtomOp::ADD; + case AtomSize::F16x2: + return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); + default: + return false; + } +} + +IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F32: + case AtomSize::F16x2: + return ir.LoadGlobal32(offset); + case AtomSize::U64: + case AtomSize::S64: + return ir.PackUint2x32(ir.LoadGlobal64(offset)); + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F16x2: + return v.X(dest_reg, IR::U32{result}); + case AtomSize::U64: + case AtomSize::S64: + return v.L(dest_reg, IR::U64{result}); + case AtomSize::F32: + return v.F(dest_reg, IR::F32{result}); + default: + break; + } +} + +IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, + AtomSize size, AtomOp op) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); + case AtomSize::U64: + case AtomSize::S64: + return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); + case AtomSize::F32: + return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); + case AtomSize::F16x2: { + return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); + } + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, + const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { + IR::Value result; + if (AtomOpNotApplicable(size, op)) { + result = LoadGlobal(v.ir, offset, size); + } else { + result = ApplyAtomOp(v, operand_reg, offset, size, op); + } + if (write_dest) { + StoreResult(v, dest_reg, result, size); + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOM(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, IR::Reg> operand_reg; + BitField<49, 3, AtomSize> size; + BitField<52, 4, AtomOp> op; + } const atom{insn}; + const IR::U64 offset{AtomOffset(*this, insn)}; + GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); +} + +void TranslatorVisitor::RED(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> operand_reg; + BitField<20, 3, AtomSize> size; + BitField<23, 3, AtomOp> op; + } const red{insn}; + const IR::U64 offset{AtomOffset(*this, insn)}; + GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class AtomsSize : u64 { + U32, + S32, + U64, +}; + +IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, + bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.SharedAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.SharedAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.SharedAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.SharedAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.SharedAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.SharedAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.SharedAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.SharedAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.SharedAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atoms Operation {}", op); + } +} + +IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<30, 22, u64> absolute_offset; + BitField<30, 22, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); + } else { + const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { + switch (size) { + case AtomsSize::U32: + case AtomsSize::S32: + return v.X(dest_reg, IR::U32{result}); + case AtomsSize::U64: + return v.L(dest_reg, IR::U64{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOMS(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<28, 2, AtomsSize> size; + BitField<52, 4, AtomOp> op; + } const atoms{insn}; + + const bool size_64{atoms.size == AtomsSize::U64}; + if (size_64 && atoms.op != AtomOp::EXCH) { + throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); + } + const bool is_signed{atoms.size == AtomsSize::S32}; + const IR::U32 offset{AtomsOffset(*this, insn)}; + + IR::Value result; + if (size_64) { + result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); + } else { + result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); + } + StoreResult(*this, atoms.dest_reg, result, atoms.size); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class BitSize : u64 { + B32, + B64, + B96, + B128, +}; + +void TranslatorVisitor::AL2P(u64 inst) { + union { + u64 raw; + BitField<0, 8, IR::Reg> result_register; + BitField<8, 8, IR::Reg> indexing_register; + BitField<20, 11, s64> offset; + BitField<47, 2, BitSize> bitsize; + } al2p{inst}; + if (al2p.bitsize != BitSize::B32) { + throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); + } + const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))}; + const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; + X(al2p.result_register, result); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..86e433e41 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -0,0 +1,96 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +// Seems to be in CUDA terminology. +enum class LocalScope : u64 { + CTA, + GL, + SYS, + VC, +}; +} // Anonymous namespace + +void TranslatorVisitor::MEMBAR(u64 inst) { + union { + u64 raw; + BitField<8, 2, LocalScope> scope; + } const membar{inst}; + + if (membar.scope == LocalScope::CTA) { + ir.WorkgroupMemoryBarrier(); + } else { + ir.DeviceMemoryBarrier(); + } +} + +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op +} + +void TranslatorVisitor::BAR(u64 insn) { + enum class Mode { + RedPopc, + Scan, + RedAnd, + RedOr, + Sync, + Arrive, + }; + union { + u64 raw; + BitField<43, 1, u64> is_a_imm; + BitField<44, 1, u64> is_b_imm; + BitField<8, 8, u64> imm_a; + BitField<20, 12, u64> imm_b; + BitField<42, 1, u64> neg_pred; + BitField<39, 3, IR::Pred> pred; + } const bar{insn}; + + const Mode mode{[insn] { + switch (insn & 0x0000009B00000000ULL) { + case 0x0000000200000000ULL: + return Mode::RedPopc; + case 0x0000000300000000ULL: + return Mode::Scan; + case 0x0000000A00000000ULL: + return Mode::RedAnd; + case 0x0000001200000000ULL: + return Mode::RedOr; + case 0x0000008000000000ULL: + return Mode::Sync; + case 0x0000008100000000ULL: + return Mode::Arrive; + } + throw NotImplementedException("Invalid encoding"); + }()}; + if (mode != Mode::Sync) { + throw NotImplementedException("BAR mode {}", mode); + } + if (bar.is_a_imm == 0) { + throw NotImplementedException("Non-immediate input A"); + } + if (bar.imm_a != 0) { + throw NotImplementedException("Non-zero input A"); + } + if (bar.is_b_imm == 0) { + throw NotImplementedException("Non-immediate input B"); + } + if (bar.imm_b != 0) { + throw NotImplementedException("Non-zero input B"); + } + if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { + throw NotImplementedException("Non-true input predicate"); + } + ir.Barrier(); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..9d5a87e52 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -0,0 +1,74 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_reg; + BitField<40, 1, u64> brev; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + } const bfe{insn}; + + const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + + // Common constants + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 one{v.ir.Imm32(1)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + // Edge case conditions + const IR::U1 zero_count{v.ir.IEqual(count, zero)}; + const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; + const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; + + IR::U32 base{v.X(bfe.offset_reg)}; + if (bfe.brev != 0) { + base = v.ir.BitReverse(base); + } + IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; + if (bfe.is_signed != 0) { + const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; + const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; + // Replicate condition + result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; + // Exceeding condition + const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; + result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; + } + // Zero count condition + result = IR::U32{v.ir.Select(zero_count, zero, result)}; + + v.X(bfe.dest_reg, result); + + if (bfe.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BFE_reg(u64 insn) { + BFE(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::BFE_cbuf(u64 insn) { + BFE(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::BFE_imm(u64 insn) { + BFE(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..1e1ec2119 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> insert_reg; + BitField<47, 1, u64> cc; + } const bfi{insn}; + + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; + const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + + // Edge case conditions + const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; + const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; + + const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; + const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; + + const IR::U32 insert{v.X(bfi.insert_reg)}; + IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; + + result = IR::U32{v.ir.Select(exceed_offset, base, result)}; + + v.X(bfi.dest_reg, result); + if (bfi.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BFI_reg(u64 insn) { + BFI(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_rc(u64 insn) { + BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::BFI_cr(u64 insn) { + BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_imm(u64 insn) { + BFI(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void Check(u64 insn) { + union { + u64 raw; + BitField<5, 1, u64> cbuf_mode; + BitField<6, 1, u64> lmt; + } const encoding{insn}; + + if (encoding.cbuf_mode != 0) { + throw NotImplementedException("Constant buffer mode"); + } + if (encoding.lmt != 0) { + throw NotImplementedException("LMT"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BRX(u64 insn) { + Check(insn); +} + +void TranslatorVisitor::JMX(u64 insn) { + Check(insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..fd73f656c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" + +namespace Shader::Maxwell { + +enum class FpRounding : u64 { + RN, + RM, + RP, + RZ, +}; + +enum class FmzMode : u64 { + None, + FTZ, + FMZ, + INVALIDFMZ3, +}; + +inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { + switch (fp_rounding) { + case FpRounding::RN: + return IR::FpRounding::RN; + case FpRounding::RM: + return IR::FpRounding::RM; + case FpRounding::RP: + return IR::FpRounding::RP; + case FpRounding::RZ: + return IR::FpRounding::RZ; + } + throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); +} + +inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { + switch (fmz_mode) { + case FmzMode::None: + return IR::FmzMode::None; + case FmzMode::FTZ: + return IR::FmzMode::FTZ; + case FmzMode::FMZ: + // FMZ is manually handled in the instruction + return IR::FmzMode::FTZ; + case FmzMode::INVALIDFMZ3: + break; + } + throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..20458d2ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -0,0 +1,153 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" + +namespace Shader::Maxwell { +IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return ir.ILessThan(operand_1, operand_2, is_signed); + case CompareOp::Equal: + return ir.IEqual(operand_1, operand_2); + case CompareOp::LessThanEqual: + return ir.ILessThanEqual(operand_1, operand_2, is_signed); + case CompareOp::GreaterThan: + return ir.IGreaterThan(operand_1, operand_2, is_signed); + case CompareOp::NotEqual: + return ir.INotEqual(operand_1, operand_2); + case CompareOp::GreaterThanEqual: + return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, + BooleanOp bop) { + switch (bop) { + case BooleanOp::AND: + return ir.LogicalAnd(predicate_1, predicate_2); + case BooleanOp::OR: + return ir.LogicalOr(predicate_1, predicate_2); + case BooleanOp::XOR: + return ir.LogicalXor(predicate_1, predicate_2); + default: + throw NotImplementedException("Invalid bop {}", bop); + } +} + +IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { + switch (op) { + case PredicateOp::False: + return ir.Imm1(false); + case PredicateOp::True: + return ir.Imm1(true); + case PredicateOp::Zero: + return ir.IEqual(result, ir.Imm32(0)); + case PredicateOp::NonZero: + return ir.INotEqual(result, ir.Imm32(0)); + default: + throw NotImplementedException("Invalid Predicate operation {}", op); + } +} + +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid FP compare op {}", compare_op); + } +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..214d0af3c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, bool is_signed); + +[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, + bool is_signed); + +[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, + const IR::U1& predicate_2, BooleanOp bop); + +[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); + +[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); + +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control = {}); +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..420f2fb94 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +void TranslatorVisitor::CSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; + } const cset{insn}; + + const IR::U32 one_mask{ir.Imm32(-1)}; + const IR::U32 fp_one{ir.Imm32(0x3f800000)}; + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; + const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; + const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; + const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; + X(cset.dest_reg, result); + if (cset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (cset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } +} + +void TranslatorVisitor::CSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, BooleanOp> bop; + } const csetp{insn}; + + const BooleanOp bop{csetp.bop}; + const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; + const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; + ir.SetPred(csetp.dest_pred_a, result_a); + ir.SetPred(csetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..5a1b3a8fc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + } const dadd{insn}; + if (dadd.cc != 0) { + throw NotImplementedException("DADD CC"); + } + + const IR::F64 src_a{v.D(dadd.src_a_reg)}; + const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; + + const IR::FpControl control{ + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, + }; + + v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DADD_reg(u64 insn) { + DADD(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DADD_cbuf(u64 insn) { + DADD(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DADD_imm(u64 insn) { + DADD(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..1173192e4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -0,0 +1,72 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + } const dset{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; + + IR::U1 pred{v.ir.GetPred(dset.pred)}; + if (dset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; + + v.X(dset.dest_reg, result); + if (dset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (dset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::DSET_reg(u64 insn) { + DSET(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSET_cbuf(u64 insn) { + DSET(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSET_imm(u64 insn) { + DSET(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..f66097014 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<50, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + } const dfma{insn}; + + if (dfma.cc != 0) { + throw NotImplementedException("DFMA CC"); + } + + const IR::F64 src_a{v.D(dfma.src_a_reg)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; + const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; + + const IR::FpControl control{ + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, + }; + + v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DFMA_reg(u64 insn) { + DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_cr(u64 insn) { + DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_rc(u64 insn) { + DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DFMA_imm(u64 insn) { + DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..6b551847c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const dmnmx{insn}; + + if (dmnmx.cc != 0) { + throw NotImplementedException("DMNMX CC"); + } + + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; + + IR::F64 max{v.ir.FPMax(op_a, op_b)}; + IR::F64 min{v.ir.FPMin(op_a, op_b)}; + + if (dmnmx.neg_pred != 0) { + std::swap(min, max); + } + v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DMNMX_reg(u64 insn) { + DMNMX(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMNMX_cbuf(u64 insn) { + DMNMX(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMNMX_imm(u64 insn) { + DMNMX(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..c0159fb65 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg; + } const dmul{insn}; + + if (dmul.cc != 0) { + throw NotImplementedException("DMUL CC"); + } + + const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; + const IR::FpControl control{ + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, + }; + + v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DMUL_reg(u64 insn) { + DMUL(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMUL_cbuf(u64 insn) { + DMUL(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMUL_imm(u64 insn) { + DMUL(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + } const dsetp{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; + + const BooleanOp bop{dsetp.bop}; + const FPCompareOp compare_op{dsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; + const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(dsetp.dest_pred_a, result_a); + v.ir.SetPred(dsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::DSETP_reg(u64 insn) { + DSETP(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSETP_cbuf(u64 insn) { + DSETP(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSETP_imm(u64 insn) { + DSETP(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..c2443c886 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -0,0 +1,43 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ExitFragment(TranslatorVisitor& v) { + const ProgramHeader sph{v.env.SPH()}; + IR::Reg src_reg{IR::Reg::R0}; + for (u32 render_target = 0; render_target < 8; ++render_target) { + const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; + for (u32 component = 0; component < 4; ++component) { + if (!mask[component]) { + continue; + } + v.ir.SetFragColor(render_target, component, v.F(src_reg)); + ++src_reg; + } + } + if (sph.ps.omap.sample_mask != 0) { + v.ir.SetSampleMask(v.X(src_reg)); + } + if (sph.ps.omap.depth != 0) { + v.ir.SetFragDepth(v.F(src_reg + 1)); + } +} +} // Anonymous namespace + +void TranslatorVisitor::EXIT() { + switch (env.ShaderStage()) { + case Stage::Fragment: + ExitFragment(*this); + break; + default: + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..f0cb25d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp @@ -0,0 +1,47 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + BitField<41, 1, u64> shift; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + } const flo{insn}; + + if (flo.cc != 0) { + throw NotImplementedException("CC"); + } + if (flo.tilde != 0) { + src = v.ir.BitwiseNot(src); + } + IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; + if (flo.shift != 0) { + const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; + result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; + } + v.X(flo.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FLO_reg(u64 insn) { + FLO(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FLO_cbuf(u64 insn) { + FLO(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::FLO_imm(u64 insn) { + FLO(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..b8c89810c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -0,0 +1,82 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, + const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fadd{insn}; + + if (cc) { + throw NotImplementedException("FADD CC"); + } + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + IR::FpControl control{ + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fadd.dest_reg, value); +} + +void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd{insn}; + + FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, + fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FADD_reg(u64 insn) { + FADD(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FADD_cbuf(u64 insn) { + FADD(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FADD_imm(u64 insn) { + FADD(*this, insn, GetFloatImm20(insn)); +} + +void TranslatorVisitor::FADD32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<56, 1, u64> neg_a; + BitField<54, 1, u64> abs_a; + BitField<52, 1, u64> cc; + BitField<53, 1, u64> neg_b; + BitField<57, 1, u64> abs_b; + } const fadd32i{insn}; + + FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), + fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..7127ebf54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fcmp{insn}; + + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; + const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; + const IR::U32 src_reg{v.X(fcmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(fcmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FCMP_reg(u64 insn) { + FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_rc(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FCMP_cr(u64 insn) { + FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_imm(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const fcmp{insn}; + const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; + const u32 value{static_cast<u32>(fcmp.value) << 12}; + + FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..eece4f28f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -0,0 +1,78 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + BitField<55, 1, u64> ftz; + } const fset{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + IR::U1 pred{v.ir.GetPred(fset.pred)}; + if (fset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; + + v.X(fset.dest_reg, result); + if (fset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (fset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::FSET_reg(u64 insn) { + FSET(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSET_cbuf(u64 insn) { + FSET(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSET_imm(u64 insn) { + FSET(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..02ab023c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -0,0 +1,214 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class RoundingOp : u64 { + None = 0, + Pass = 3, + Round = 8, + Floor = 9, + Ceil = 10, + Trunc = 11, +}; + +[[nodiscard]] u32 WidthSize(FloatFormat width) { + switch (width) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; + BitField<50, 1, u64> sat; + BitField<39, 4, u64> rounding_op; + BitField<39, 2, FpRounding> rounding; + BitField<10, 2, FloatFormat> src_size; + BitField<8, 2, FloatFormat> dst_size; + + [[nodiscard]] RoundingOp RoundingOperation() const { + constexpr u64 rounding_mask = 0x0B; + return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask); + } + } const f2f{insn}; + + if (f2f.cc != 0) { + throw NotImplementedException("F2F CC"); + } + + IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; + + const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; + IR::FpControl fp_control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + if (f2f.src_size != f2f.dst_size) { + fp_control.rounding = CastFpRounding(f2f.rounding); + input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); + } else { + switch (f2f.RoundingOperation()) { + case RoundingOp::None: + case RoundingOp::Pass: + // Make sure NANs are handled properly + switch (f2f.src_size) { + case FloatFormat::F16: + input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); + break; + case FloatFormat::F32: + input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); + break; + case FloatFormat::F64: + input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); + break; + } + break; + case RoundingOp::Round: + input = v.ir.FPRoundEven(input, fp_control); + break; + case RoundingOp::Floor: + input = v.ir.FPFloor(input, fp_control); + break; + case RoundingOp::Ceil: + input = v.ir.FPCeil(input, fp_control); + break; + case RoundingOp::Trunc: + input = v.ir.FPTrunc(input, fp_control); + break; + default: + throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); + } + } + if (f2f.sat != 0 && !any_fp64) { + input = v.ir.FPSaturate(input); + } + + switch (f2f.dst_size) { + case FloatFormat::F16: { + const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); + break; + } + case FloatFormat::F32: + v.F(f2f.dest_reg, input); + break; + case FloatFormat::F64: + v.D(f2f.dest_reg, input); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2F_reg(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatReg20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleReg20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_cbuf(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatCbuf(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleCbuf(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + BitField<20, 19, u64> imm; + BitField<56, 1, u64> imm_neg; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)}; + const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; + src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; + if (f2f.imm_neg != 0) { + throw NotImplementedException("Neg bit on F16"); + } + break; + } + case FloatFormat::F32: + src_a = GetFloatImm20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleImm20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..92b1ce015 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -0,0 +1,253 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <limits> + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class DestFormat : u64 { + Invalid, + I16, + I32, + I64, +}; +enum class SrcFormat : u64 { + Invalid, + F16, + F32, + F64, +}; +enum class Rounding : u64 { + Round, + Floor, + Ceil, + Trunc, +}; + +union F2I { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, DestFormat> dest_format; + BitField<10, 2, SrcFormat> src_format; + BitField<12, 1, u64> is_signed; + BitField<39, 2, Rounding> rounding; + BitField<41, 1, u64> half; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> abs; + BitField<47, 1, u64> cc; + BitField<49, 1, u64> neg; +}; + +size_t BitSize(DestFormat dest_format) { + switch (dest_format) { + case DestFormat::I16: + return 16; + case DestFormat::I32: + return 32; + case DestFormat::I64: + return 64; + default: + throw NotImplementedException("Invalid destination format {}", dest_format); + } +} + +std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) { + if (is_signed) { + switch (format) { + case DestFormat::I16: + return {static_cast<f64>(std::numeric_limits<s16>::max()), + static_cast<f64>(std::numeric_limits<s16>::min())}; + case DestFormat::I32: + return {static_cast<f64>(std::numeric_limits<s32>::max()), + static_cast<f64>(std::numeric_limits<s32>::min())}; + case DestFormat::I64: + return {static_cast<f64>(std::numeric_limits<s64>::max()), + static_cast<f64>(std::numeric_limits<s64>::min())}; + default: + break; + } + } else { + switch (format) { + case DestFormat::I16: + return {static_cast<f64>(std::numeric_limits<u16>::max()), + static_cast<f64>(std::numeric_limits<u16>::min())}; + case DestFormat::I32: + return {static_cast<f64>(std::numeric_limits<u32>::max()), + static_cast<f64>(std::numeric_limits<u32>::min())}; + case DestFormat::I64: + return {static_cast<f64>(std::numeric_limits<u64>::max()), + static_cast<f64>(std::numeric_limits<u64>::min())}; + default: + break; + } + } + throw NotImplementedException("Invalid destination format {}", format); +} + +IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); + } + if (cbuf.offset % 2 != 0) { + throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); + } + const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))}; + const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)}; + const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; + const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; + return v.ir.PackDouble2x32(vector); +} + +void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { + // F2I is used to convert from a floating point value to an integer + const F2I f2i{insn}; + + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, + }; + const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; + const IR::F16F32F64 rounded_value{[&] { + switch (f2i.rounding) { + case Rounding::Round: + return v.ir.FPRoundEven(op_a, fp_control); + case Rounding::Floor: + return v.ir.FPFloor(op_a, fp_control); + case Rounding::Ceil: + return v.ir.FPCeil(op_a, fp_control); + case Rounding::Trunc: + return v.ir.FPTrunc(op_a, fp_control); + default: + throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); + } + }()}; + const bool is_signed{f2i.is_signed != 0}; + const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); + + IR::F16F32F64 intermediate; + switch (f2i.src_format) { + case SrcFormat::F16: { + const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))}; + const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F32: { + const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))}; + const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F64: { + const IR::F64 max_val{v.ir.Imm64(max_bound)}; + const IR::F64 min_val{v.ir.Imm64(min_bound)}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + default: + throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); + } + + const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))}; + IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; + + bool handled_special_case = false; + const bool special_nan_cases = + (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); + if (special_nan_cases) { + if (f2i.dest_format == DestFormat::I32) { + handled_special_case = true; + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; + } else if (f2i.dest_format == DestFormat::I64) { + handled_special_case = true; + result = IR::U64{ + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; + } + } + if (!handled_special_case && is_signed) { + if (bitsize != 64) { + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; + } else { + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; + } + } + + if (bitsize == 64) { + v.L(f2i.dest_reg, result); + } else { + v.X(f2i.dest_reg, result); + } + + if (f2i.cc != 0) { + throw NotImplementedException("F2I CC"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2I_reg(u64 insn) { + union { + u64 raw; + F2I base; + BitField<20, 8, IR::Reg> src_reg; + } const f2i{insn}; + + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.base.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; + case SrcFormat::F32: + return F(f2i.src_reg); + case SrcFormat::F64: + return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); + default: + throw NotImplementedException("Invalid F2I source format {}", + f2i.base.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); +} + +void TranslatorVisitor::F2I_cbuf(u64 insn) { + const F2I f2i{insn}; + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; + case SrcFormat::F32: + return GetFloatCbuf(insn); + case SrcFormat::F64: { + return UnpackCbuf(*this, insn); + } + default: + throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); +} + +void TranslatorVisitor::F2I_imm(u64) { + throw NotImplementedException("{}", Opcode::F2I_imm); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fa2a7807b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -0,0 +1,94 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, + bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const ffma{insn}; + + if (cc) { + throw NotImplementedException("FFMA CC"); + } + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), + }; + IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, op_c, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(ffma.dest_reg, value); +} + +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> sat; + BitField<51, 2, FpRounding> fp_rounding; + BitField<53, 2, FmzMode> fmz_mode; + } const ffma{insn}; + + FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, + ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); +} +} // Anonymous namespace + +void TranslatorVisitor::FFMA_reg(u64 insn) { + FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FFMA_rc(u64 insn) { + FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FFMA_cr(u64 insn) { + FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FFMA_imm(u64 insn) { + FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FFMA32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz_mode; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> neg_c; + } const ffma32i{insn}; + + FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, + ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c0d6ee5af --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const fmnmx{insn}; + + if (fmnmx.cc) { + throw NotImplementedException("FMNMX CC"); + } + + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; + + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; + IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; + + if (fmnmx.neg_pred != 0) { + std::swap(min, max); + } + + v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FMNMX_reg(u64 insn) { + FMNMX(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FMNMX_cbuf(u64 insn) { + FMNMX(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FMNMX_imm(u64 insn) { + FMNMX(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..2f8605619 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Operation : u64 { + Cos = 0, + Sin = 1, + Ex2 = 2, // Base 2 exponent + Lg2 = 3, // Base 2 logarithm + Rcp = 4, // Reciprocal + Rsq = 5, // Reciprocal square root + Rcp64H = 6, // 64-bit reciprocal + Rsq64H = 7, // 64-bit reciprocal square root + Sqrt = 8, +}; +} // Anonymous namespace + +void TranslatorVisitor::MUFU(u64 insn) { + // MUFU is used to implement a bunch of special functions. See Operation. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 4, Operation> operation; + BitField<46, 1, u64> abs; + BitField<48, 1, u64> neg; + BitField<50, 1, u64> sat; + } const mufu{insn}; + + const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; + IR::F32 value{[&]() -> IR::F32 { + switch (mufu.operation) { + case Operation::Cos: + return ir.FPCos(op_a); + case Operation::Sin: + return ir.FPSin(op_a); + case Operation::Ex2: + return ir.FPExp2(op_a); + case Operation::Lg2: + return ir.FPLog2(op_a); + case Operation::Rcp: + return ir.FPRecip(op_a); + case Operation::Rsq: + return ir.FPRecipSqrt(op_a); + case Operation::Rcp64H: + throw NotImplementedException("MUFU.RCP64H"); + case Operation::Rsq64H: + throw NotImplementedException("MUFU.RSQ64H"); + case Operation::Sqrt: + return ir.FPSqrt(op_a); + default: + throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); + } + }()}; + + if (mufu.sat) { + value = ir.FPSaturate(value); + } + + F(mufu.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..06226b7ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -0,0 +1,127 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Scale : u64 { + None, + D2, + D4, + D8, + M8, + M4, + M2, + INVALIDSCALE37, +}; + +float ScaleFactor(Scale scale) { + switch (scale) { + case Scale::None: + return 1.0f; + case Scale::D2: + return 1.0f / 2.0f; + case Scale::D4: + return 1.0f / 4.0f; + case Scale::D8: + return 1.0f / 8.0f; + case Scale::M8: + return 8.0f; + case Scale::M4: + return 4.0f; + case Scale::M2: + return 2.0f; + case Scale::INVALIDSCALE37: + break; + } + throw NotImplementedException("Invalid FMUL scale {}", scale); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, + FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fmul{insn}; + + if (cc) { + throw NotImplementedException("FMUL CC"); + } + IR::F32 op_a{v.F(fmul.src_a)}; + if (scale != Scale::None) { + if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { + throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); + } + op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); + } + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), + }; + IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, zero, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fmul.dest_reg, value); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 3, Scale> scale; + BitField<44, 2, FmzMode> fmz; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<50, 1, u64> sat; + } const fmul{insn}; + + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, + fmul.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FMUL_reg(u64 insn) { + return FMUL(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FMUL_cbuf(u64 insn) { + return FMUL(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FMUL_imm(u64 insn) { + return FMUL(*this, insn, GetFloatImm20(insn)); +} + +void TranslatorVisitor::FMUL32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz; + BitField<55, 1, u64> sat; + } const fmul32i{insn}; + + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, + fmul32i.sat != 0, fmul32i.cc != 0, false); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + SINCOS, + EX2, +}; + +void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 1, Mode> mode; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + } const rro{insn}; + + v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); +} +} // Anonymous namespace + +void TranslatorVisitor::RRO_reg(u64 insn) { + RRO(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::RRO_cbuf(u64 insn) { + RRO(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::RRO_imm(u64) { + throw NotImplementedException("RRO (imm)"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..5f93a1513 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fsetp{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + const BooleanOp bop{fsetp.bop}; + const FPCompareOp compare_op{fsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; + const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(fsetp.dest_pred_a, result_a); + v.ir.SetPred(fsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::FSETP_reg(u64 insn) { + FSETP(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSETP_cbuf(u64 insn) { + FSETP(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSETP_imm(u64 insn) { + FSETP(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..7550a8d4c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::FSWZADD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<28, 8, u64> swizzle; + BitField<38, 1, u64> ndv; + BitField<39, 2, FpRounding> round; + BitField<44, 1, u64> ftz; + BitField<47, 1, u64> cc; + } const fswzadd{insn}; + + if (fswzadd.ndv != 0) { + throw NotImplementedException("FSWZADD NDV"); + } + + const IR::F32 src_a{GetFloatReg8(insn)}; + const IR::F32 src_b{GetFloatReg20(insn)}; + const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; + + const IR::FpControl fp_control{ + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; + F(fswzadd.dest_reg, result); + + if (fswzadd.cc != 0) { + throw NotImplementedException("FSWZADD CC"); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..f2738a93b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -0,0 +1,125 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hadd2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); +} + +void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, + const IR::U32& src_b) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + } const hadd2{insn}; + + HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, + hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); +} +} // Anonymous namespace + +void TranslatorVisitor::HADD2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, Swizzle> swizzle_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + GetReg20(insn)); +} + +void TranslatorVisitor::HADD2_cbuf(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> abs_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + GetCbuf(insn)); +} + +void TranslatorVisitor::HADD2_imm(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hadd2{insn}; + + const u32 imm{ + static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HADD2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hadd2{insn}; + + const u32 imm{static_cast<u32>(hadd2.imm32)}; + HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, + hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fd7986701 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -0,0 +1,169 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, + Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, + bool sat, HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hfma2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; + const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + if (lhs_c.Type() == IR::Type::F16) { + lhs_c = v.ir.FPConvert(32, lhs_c); + rhs_c = v.ir.FPConvert(32, rhs_c); + } + } + + lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); + + lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); + rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); + + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), + }; + IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); +} + +void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, + Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, + HalfPrecision precision) { + union { + u64 raw; + BitField<47, 2, Swizzle> swizzle_a; + BitField<49, 2, Merge> merge; + } const hfma2{insn}; + + HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, + sat, precision); +} +} // Anonymous namespace + +void TranslatorVisitor::HFMA2_reg(u64 insn) { + union { + u64 raw; + BitField<28, 2, Swizzle> swizzle_b; + BitField<32, 1, u64> saturate; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> neg_c; + BitField<35, 2, Swizzle> swizzle_c; + BitField<37, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, + GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_rc(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_b; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, + GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_cr(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, + GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_imm(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{ + static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + + HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), + GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; + BitField<20, 32, u64> imm32; + BitField<52, 1, u64> neg_c; + BitField<53, 2, Swizzle> swizzle_a; + BitField<55, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{static_cast<u32>(hfma2.imm32)}; + HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, + Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..0dbeb7f56 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { + +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { + switch (precision) { + case HalfPrecision::None: + return IR::FmzMode::None; + case HalfPrecision::FTZ: + return IR::FmzMode::FTZ; + case HalfPrecision::FMZ: + return IR::FmzMode::FMZ; + default: + return IR::FmzMode::DontCare; + } +} + +std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool is_h0{merge == Merge::MRG_H0}; + const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..59da56a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -0,0 +1,42 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +enum class HalfPrecision : u64 { + None = 0, + FTZ = 1, + FMZ = 2, +}; + +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); + +std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..3f548ce76 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -0,0 +1,143 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, + HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hmul2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), + }; + IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); +} + +void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, + Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<47, 2, Swizzle> swizzle_a; + BitField<39, 2, HalfPrecision> precision; + } const hmul2{insn}; + + HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, + hmul2.precision); +} +} // Anonymous namespace + +void TranslatorVisitor::HMUL2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<44, 1, u64> abs_a; + BitField<28, 2, Swizzle> swizzle_b; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, + hmul2.swizzle_b, GetReg20(insn)); +} + +void TranslatorVisitor::HMUL2_cbuf(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<54, 1, u64> abs_b; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, + Swizzle::F32, GetCbuf(insn)); +} + +void TranslatorVisitor::HMUL2_imm(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + const u32 imm{ + static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, + Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HMUL2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 2, HalfPrecision> precision; + BitField<52, 1, u64> sat; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hmul2{insn}; + + const u32 imm{static_cast<u32>(hmul2.imm32)}; + HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, + Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..cca5b831f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -0,0 +1,117 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, + bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + } const hset2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + + if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + + lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + IR::U1 pred{v.ir.GetPred(hset2.pred)}; + if (hset2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; + + const u32 true_value = bf ? 0x3c00 : 0xffff; + const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; + const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; + const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; + + v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); +} +} // Anonymous namespace + +void TranslatorVisitor::HSET2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> bf; + BitField<31, 1, u64> neg_b; + BitField<50, 1, u64> ftz; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hset2{insn}; + + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, + hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); +} + +void TranslatorVisitor::HSET2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + } const hset2{insn}; + + HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, + hset2.compare_op, Swizzle::F32); +} + +void TranslatorVisitor::HSET2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; + + const u32 imm{ + static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)}; + + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, + Swizzle::H1_H0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..b3931dae3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -0,0 +1,118 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, + Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { + union { + u64 insn; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<6, 1, u64> ftz; + BitField<47, 2, Swizzle> swizzle_a; + } const hsetp2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + + if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + + lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; + if (hsetp2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; + + if (h_and) { + auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); + v.ir.SetPred(hsetp2.dest_pred_a, result); + v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); + } else { + v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); + v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); + } +} +} // Anonymous namespace + +void TranslatorVisitor::HSETP2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> h_and; + BitField<31, 1, u64> neg_b; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hsetp2{insn}; + HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> abs_b; + BitField<56, 1, u64> neg_b; + BitField<49, 4, FPCompareOp> compare_op; + } const hsetp2{insn}; + + HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hsetp2{insn}; + + const u32 imm{static_cast<u32>(hsetp2.low << 6) | + static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hsetp2.high << 22) | + static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + + HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, + hsetp2.h_and != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..b446aae0e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -0,0 +1,272 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, + u32 offset) { + if (unaligned) { + return ir.Imm32(0); + } + return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); +} +} // Anonymous namespace + +IR::U32 TranslatorVisitor::X(IR::Reg reg) { + return ir.GetReg(reg); +} + +IR::U64 TranslatorVisitor::L(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + +IR::F32 TranslatorVisitor::F(IR::Reg reg) { + return ir.BitCast<IR::F32>(X(reg)); +} + +IR::F64 TranslatorVisitor::D(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + +void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { + ir.SetReg(dest_reg, value); +} + +void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackUint2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); + } +} + +void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { + X(dest_reg, ir.BitCast<IR::U32>(value)); +} + +void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); + } +} + +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::U32 TranslatorVisitor::GetReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::U32 TranslatorVisitor::GetReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { + return ir.BitCast<IR::F32>(GetReg8(insn)); +} + +IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { + return ir.BitCast<IR::F32>(GetReg20(insn)); +} + +IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { + return ir.BitCast<IR::F32>(GetReg39(insn)); +} + +IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); +} + +IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); +} + +static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) { + union { + u64 raw; + BitField<20, 14, u64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x10'000) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); + } + const IR::Value binding{static_cast<u32>(cbuf.binding)}; + const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4}; + return {IR::U32{binding}, IR::U32{byte_offset}}; +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + const auto [binding, byte_offset]{CbufAddr(insn)}; + return ir.GetCbuf(binding, byte_offset); +} + +IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { + const auto [binding, byte_offset]{CbufAddr(insn)}; + return ir.GetFloatCbuf(binding, byte_offset); +} + +IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + const auto [binding, offset_value]{CbufAddr(insn)}; + const bool unaligned{cbuf.unaligned != 0}; + const u32 offset{offset_value.U32()}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; + + const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; + const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; + return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); +} + +IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + if (cbuf.unaligned != 0) { + throw NotImplementedException("Unaligned packed constant buffer read"); + } + const auto [binding, lower_offset]{CbufAddr(insn)}; + const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; + const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; + const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; + return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); +} + +IR::U32 TranslatorVisitor::GetImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + + if (imm.is_negative != 0) { + const s64 raw{static_cast<s64>(imm.value)}; + return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw)); + } else { + return ir.Imm32(static_cast<u32>(imm.value)); + } +} + +IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)}; + const u32 value{static_cast<u32>(imm.value) << 12}; + return ir.Imm32(Common::BitCast<f32>(value | sign_bit)); +} + +IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; + const u64 value{imm.value << 44}; + return ir.Imm64(Common::BitCast<f64>(value | sign_bit)); +} + +IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { + const s64 value{GetImm20(insn).U32()}; + return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32)); +} + +IR::U32 TranslatorVisitor::GetImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(static_cast<u32>(imm.value)); +} + +IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value))); +} + +void TranslatorVisitor::SetZFlag(const IR::U1& value) { + ir.SetZFlag(value); +} + +void TranslatorVisitor::SetSFlag(const IR::U1& value) { + ir.SetSFlag(value); +} + +void TranslatorVisitor::SetCFlag(const IR::U1& value) { + ir.SetCFlag(value); +} + +void TranslatorVisitor::SetOFlag(const IR::U1& value) { + ir.SetOFlag(value); +} + +void TranslatorVisitor::ResetZero() { + SetZFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetSFlag() { + SetSFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetCFlag() { + SetCFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetOFlag() { + SetOFlag(ir.Imm1(false)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..335e4f24f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -0,0 +1,387 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" + +namespace Shader::Maxwell { + +enum class CompareOp : u64 { + False, + LessThan, + Equal, + LessThanEqual, + GreaterThan, + NotEqual, + GreaterThanEqual, + True, +}; + +enum class BooleanOp : u64 { + AND, + OR, + XOR, +}; + +enum class PredicateOp : u64 { + False, + True, + Zero, + NonZero, +}; + +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + +class TranslatorVisitor { +public: + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} + + Environment& env; + IR::IREmitter ir; + + void AL2P(u64 insn); + void ALD(u64 insn); + void AST(u64 insn); + void ATOM_cas(u64 insn); + void ATOM(u64 insn); + void ATOMS_cas(u64 insn); + void ATOMS(u64 insn); + void B2R(u64 insn); + void BAR(u64 insn); + void BFE_reg(u64 insn); + void BFE_cbuf(u64 insn); + void BFE_imm(u64 insn); + void BFI_reg(u64 insn); + void BFI_rc(u64 insn); + void BFI_cr(u64 insn); + void BFI_imm(u64 insn); + void BPT(u64 insn); + void BRA(u64 insn); + void BRK(u64 insn); + void BRX(u64 insn); + void CAL(); + void CCTL(u64 insn); + void CCTLL(u64 insn); + void CONT(u64 insn); + void CS2R(u64 insn); + void CSET(u64 insn); + void CSETP(u64 insn); + void DADD_reg(u64 insn); + void DADD_cbuf(u64 insn); + void DADD_imm(u64 insn); + void DEPBAR(); + void DFMA_reg(u64 insn); + void DFMA_rc(u64 insn); + void DFMA_cr(u64 insn); + void DFMA_imm(u64 insn); + void DMNMX_reg(u64 insn); + void DMNMX_cbuf(u64 insn); + void DMNMX_imm(u64 insn); + void DMUL_reg(u64 insn); + void DMUL_cbuf(u64 insn); + void DMUL_imm(u64 insn); + void DSET_reg(u64 insn); + void DSET_cbuf(u64 insn); + void DSET_imm(u64 insn); + void DSETP_reg(u64 insn); + void DSETP_cbuf(u64 insn); + void DSETP_imm(u64 insn); + void EXIT(); + void F2F_reg(u64 insn); + void F2F_cbuf(u64 insn); + void F2F_imm(u64 insn); + void F2I_reg(u64 insn); + void F2I_cbuf(u64 insn); + void F2I_imm(u64 insn); + void FADD_reg(u64 insn); + void FADD_cbuf(u64 insn); + void FADD_imm(u64 insn); + void FADD32I(u64 insn); + void FCHK_reg(u64 insn); + void FCHK_cbuf(u64 insn); + void FCHK_imm(u64 insn); + void FCMP_reg(u64 insn); + void FCMP_rc(u64 insn); + void FCMP_cr(u64 insn); + void FCMP_imm(u64 insn); + void FFMA_reg(u64 insn); + void FFMA_rc(u64 insn); + void FFMA_cr(u64 insn); + void FFMA_imm(u64 insn); + void FFMA32I(u64 insn); + void FLO_reg(u64 insn); + void FLO_cbuf(u64 insn); + void FLO_imm(u64 insn); + void FMNMX_reg(u64 insn); + void FMNMX_cbuf(u64 insn); + void FMNMX_imm(u64 insn); + void FMUL_reg(u64 insn); + void FMUL_cbuf(u64 insn); + void FMUL_imm(u64 insn); + void FMUL32I(u64 insn); + void FSET_reg(u64 insn); + void FSET_cbuf(u64 insn); + void FSET_imm(u64 insn); + void FSETP_reg(u64 insn); + void FSETP_cbuf(u64 insn); + void FSETP_imm(u64 insn); + void FSWZADD(u64 insn); + void GETCRSPTR(u64 insn); + void GETLMEMBASE(u64 insn); + void HADD2_reg(u64 insn); + void HADD2_cbuf(u64 insn); + void HADD2_imm(u64 insn); + void HADD2_32I(u64 insn); + void HFMA2_reg(u64 insn); + void HFMA2_rc(u64 insn); + void HFMA2_cr(u64 insn); + void HFMA2_imm(u64 insn); + void HFMA2_32I(u64 insn); + void HMUL2_reg(u64 insn); + void HMUL2_cbuf(u64 insn); + void HMUL2_imm(u64 insn); + void HMUL2_32I(u64 insn); + void HSET2_reg(u64 insn); + void HSET2_cbuf(u64 insn); + void HSET2_imm(u64 insn); + void HSETP2_reg(u64 insn); + void HSETP2_cbuf(u64 insn); + void HSETP2_imm(u64 insn); + void I2F_reg(u64 insn); + void I2F_cbuf(u64 insn); + void I2F_imm(u64 insn); + void I2I_reg(u64 insn); + void I2I_cbuf(u64 insn); + void I2I_imm(u64 insn); + void IADD_reg(u64 insn); + void IADD_cbuf(u64 insn); + void IADD_imm(u64 insn); + void IADD3_reg(u64 insn); + void IADD3_cbuf(u64 insn); + void IADD3_imm(u64 insn); + void IADD32I(u64 insn); + void ICMP_reg(u64 insn); + void ICMP_rc(u64 insn); + void ICMP_cr(u64 insn); + void ICMP_imm(u64 insn); + void IDE(u64 insn); + void IDP_reg(u64 insn); + void IDP_imm(u64 insn); + void IMAD_reg(u64 insn); + void IMAD_rc(u64 insn); + void IMAD_cr(u64 insn); + void IMAD_imm(u64 insn); + void IMAD32I(u64 insn); + void IMADSP_reg(u64 insn); + void IMADSP_rc(u64 insn); + void IMADSP_cr(u64 insn); + void IMADSP_imm(u64 insn); + void IMNMX_reg(u64 insn); + void IMNMX_cbuf(u64 insn); + void IMNMX_imm(u64 insn); + void IMUL_reg(u64 insn); + void IMUL_cbuf(u64 insn); + void IMUL_imm(u64 insn); + void IMUL32I(u64 insn); + void IPA(u64 insn); + void ISBERD(u64 insn); + void ISCADD_reg(u64 insn); + void ISCADD_cbuf(u64 insn); + void ISCADD_imm(u64 insn); + void ISCADD32I(u64 insn); + void ISET_reg(u64 insn); + void ISET_cbuf(u64 insn); + void ISET_imm(u64 insn); + void ISETP_reg(u64 insn); + void ISETP_cbuf(u64 insn); + void ISETP_imm(u64 insn); + void JCAL(u64 insn); + void JMP(u64 insn); + void JMX(u64 insn); + void KIL(); + void LD(u64 insn); + void LDC(u64 insn); + void LDG(u64 insn); + void LDL(u64 insn); + void LDS(u64 insn); + void LEA_hi_reg(u64 insn); + void LEA_hi_cbuf(u64 insn); + void LEA_lo_reg(u64 insn); + void LEA_lo_cbuf(u64 insn); + void LEA_lo_imm(u64 insn); + void LEPC(u64 insn); + void LONGJMP(u64 insn); + void LOP_reg(u64 insn); + void LOP_cbuf(u64 insn); + void LOP_imm(u64 insn); + void LOP3_reg(u64 insn); + void LOP3_cbuf(u64 insn); + void LOP3_imm(u64 insn); + void LOP32I(u64 insn); + void MEMBAR(u64 insn); + void MOV_reg(u64 insn); + void MOV_cbuf(u64 insn); + void MOV_imm(u64 insn); + void MOV32I(u64 insn); + void MUFU(u64 insn); + void NOP(u64 insn); + void OUT_reg(u64 insn); + void OUT_cbuf(u64 insn); + void OUT_imm(u64 insn); + void P2R_reg(u64 insn); + void P2R_cbuf(u64 insn); + void P2R_imm(u64 insn); + void PBK(); + void PCNT(); + void PEXIT(u64 insn); + void PIXLD(u64 insn); + void PLONGJMP(u64 insn); + void POPC_reg(u64 insn); + void POPC_cbuf(u64 insn); + void POPC_imm(u64 insn); + void PRET(u64 insn); + void PRMT_reg(u64 insn); + void PRMT_rc(u64 insn); + void PRMT_cr(u64 insn); + void PRMT_imm(u64 insn); + void PSET(u64 insn); + void PSETP(u64 insn); + void R2B(u64 insn); + void R2P_reg(u64 insn); + void R2P_cbuf(u64 insn); + void R2P_imm(u64 insn); + void RAM(u64 insn); + void RED(u64 insn); + void RET(u64 insn); + void RRO_reg(u64 insn); + void RRO_cbuf(u64 insn); + void RRO_imm(u64 insn); + void RTT(u64 insn); + void S2R(u64 insn); + void SAM(u64 insn); + void SEL_reg(u64 insn); + void SEL_cbuf(u64 insn); + void SEL_imm(u64 insn); + void SETCRSPTR(u64 insn); + void SETLMEMBASE(u64 insn); + void SHF_l_reg(u64 insn); + void SHF_l_imm(u64 insn); + void SHF_r_reg(u64 insn); + void SHF_r_imm(u64 insn); + void SHFL(u64 insn); + void SHL_reg(u64 insn); + void SHL_cbuf(u64 insn); + void SHL_imm(u64 insn); + void SHR_reg(u64 insn); + void SHR_cbuf(u64 insn); + void SHR_imm(u64 insn); + void SSY(); + void ST(u64 insn); + void STG(u64 insn); + void STL(u64 insn); + void STP(u64 insn); + void STS(u64 insn); + void SUATOM(u64 insn); + void SUATOM_cas(u64 insn); + void SULD(u64 insn); + void SURED(u64 insn); + void SUST(u64 insn); + void SYNC(u64 insn); + void TEX(u64 insn); + void TEX_b(u64 insn); + void TEXS(u64 insn); + void TLD(u64 insn); + void TLD_b(u64 insn); + void TLD4(u64 insn); + void TLD4_b(u64 insn); + void TLD4S(u64 insn); + void TLDS(u64 insn); + void TMML(u64 insn); + void TMML_b(u64 insn); + void TXA(u64 insn); + void TXD(u64 insn); + void TXD_b(u64 insn); + void TXQ(u64 insn); + void TXQ_b(u64 insn); + void VABSDIFF(u64 insn); + void VABSDIFF4(u64 insn); + void VADD(u64 insn); + void VMAD(u64 insn); + void VMNMX(u64 insn); + void VOTE(u64 insn); + void VOTE_vtg(u64 insn); + void VSET(u64 insn); + void VSETP(u64 insn); + void VSHL(u64 insn); + void VSHR(u64 insn); + void XMAD_reg(u64 insn); + void XMAD_rc(u64 insn); + void XMAD_cr(u64 insn); + void XMAD_imm(u64 insn); + + [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::U64 L(IR::Reg reg); + [[nodiscard]] IR::F32 F(IR::Reg reg); + [[nodiscard]] IR::F64 D(IR::Reg reg); + + void X(IR::Reg dest_reg, const IR::U32& value); + void L(IR::Reg dest_reg, const IR::U64& value); + void F(IR::Reg dest_reg, const IR::F32& value); + void D(IR::Reg dest_reg, const IR::F64& value); + + [[nodiscard]] IR::U32 GetReg8(u64 insn); + [[nodiscard]] IR::U32 GetReg20(u64 insn); + [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); + + [[nodiscard]] IR::U32 GetCbuf(u64 insn); + [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); + [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); + [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); + + [[nodiscard]] IR::U32 GetImm20(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); + [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); + + [[nodiscard]] IR::U32 GetImm32(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); + + void SetZFlag(const IR::U1& value); + void SetSFlag(const IR::U1& value); + void SetCFlag(const IR::U1& value); + void SetOFlag(const IR::U1& value); + + void ResetZero(); + void ResetSFlag(); + void ResetCFlag(); + void ResetOFlag(); +}; + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..8ffd84867 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, + bool cc) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const iadd{insn}; + + if (sat) { + throw NotImplementedException("IADD SAT"); + } + if (x && po) { + throw NotImplementedException("IADD X+PO"); + } + // Operand A is always read from here, negated if needed + IR::U32 op_a{v.X(iadd.src_a)}; + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + // Add both operands + IR::U32 result{v.ir.IAdd(op_a, op_b)}; + if (x) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + result = v.ir.IAdd(result, carry); + } + if (po) { + // .PO adds one to the result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + if (cc) { + // Store flags + // TODO: Does this grab the result pre-PO or after? + if (po) { + throw NotImplementedException("IADD CC+PO"); + } + // TODO: How does CC behave when X is set? + if (x) { + throw NotImplementedException("IADD X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); + } + // Store result + v.X(iadd.dest_reg, result); +} + +void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<50, 1, u64> sat; + } const iadd{insn}; + + const bool po{iadd.three_for_po == 3}; + if (!po && iadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD_reg(u64 insn) { + IADD(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::IADD_cbuf(u64 insn) { + IADD(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IADD_imm(u64 insn) { + IADD(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::IADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 1, u64> x; + BitField<54, 1, u64> sat; + BitField<55, 2, u64> three_for_po; + BitField<56, 1, u64> neg_a; + } const iadd32i{insn}; + + const bool po{iadd32i.three_for_po == 3}; + const bool neg_a{!po && iadd32i.neg_a != 0}; + IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..040cfc10f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -0,0 +1,122 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Shift : u64 { + None, + Right, + Left, +}; +enum class Half : u64 { + All, + Lower, + Upper, +}; + +[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { + constexpr bool is_signed{false}; + switch (half) { + case Half::All: + return value; + case Half::Lower: + return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); + case Half::Upper: + return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); + } + throw NotImplementedException("Invalid half"); +} + +[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { + switch (shift) { + case Shift::None: + return value; + case Shift::Right: { + // 33-bit RS IADD3 edge case + const IR::U1 edge_case{ir.GetCarryFromOp(value)}; + const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; + return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; + } + case Shift::Left: + return ir.ShiftLeftLogical(value, ir.Imm32(16)); + } + throw NotImplementedException("Invalid shift"); +} + +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, + Shift shift = Shift::None) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> x; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> neg_b; + BitField<51, 1, u64> neg_a; + } iadd3{insn}; + + if (iadd3.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iadd3.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + if (iadd3.neg_c != 0) { + op_c = v.ir.INeg(op_c); + } + IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; + if (iadd3.x != 0) { + // TODO: How does RS behave when X is set? + if (shift == Shift::Right) { + throw NotImplementedException("IADD3 X+RS"); + } + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + lhs_1 = v.ir.IAdd(lhs_1, carry); + } + const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; + const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; + + v.X(iadd3.dest_reg, result); + if (iadd3.cc != 0) { + // TODO: How does CC behave when X is set? + if (iadd3.x != 0) { + throw NotImplementedException("IADD3 X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; + v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); + } +} +} // Anonymous namespace + +void TranslatorVisitor::IADD3_reg(u64 insn) { + union { + u64 insn; + BitField<37, 2, Shift> shift; + BitField<35, 2, Half> half_a; + BitField<33, 2, Half> half_b; + BitField<31, 2, Half> half_c; + } const iadd3{insn}; + + const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; + const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; + const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; + IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); +} + +void TranslatorVisitor::IADD3_cbuf(u64 insn) { + IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::IADD3_imm(u64 insn) { + IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..ba6e01926 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -0,0 +1,48 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const icmp{insn}; + + const IR::U32 zero{v.ir.Imm32(0)}; + const bool is_signed{icmp.is_signed != 0}; + const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; + + const IR::U32 src_reg{v.X(icmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(icmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::ICMP_reg(u64 insn) { + ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_rc(u64 insn) { + ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::ICMP_cr(u64 insn) { + ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_imm(u64 insn) { + ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..8ce1aee04 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -0,0 +1,80 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + +void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> x; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const iset{insn}; + + const IR::U32 src_a{v.X(iset.src_reg)}; + const bool is_signed{iset.is_signed != 0}; + const IR::U32 zero{v.ir.Imm32(0)}; + const bool x{iset.x != 0}; + const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; + + IR::U1 pred{v.ir.GetPred(iset.pred)}; + if (iset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; + + v.X(iset.dest_reg, result); + if (iset.cc != 0) { + if (x) { + throw NotImplementedException("ISET.CC + X"); + } + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (iset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::ISET_reg(u64 insn) { + ISET(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISET_cbuf(u64 insn) { + ISET(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISET_imm(u64 insn) { + ISET(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..0b8119ddd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -0,0 +1,182 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class IntFormat : u64 { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, FloatFormat> float_format; + BitField<10, 2, IntFormat> int_format; + BitField<13, 1, u64> is_signed; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 2, u64> selector; + BitField<47, 1, u64> cc; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; +}; + +bool Is64(u64 insn) { + return Encoding{insn}.int_format == IntFormat::U64; +} + +int BitSize(FloatFormat format) { + switch (format) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + } + throw NotImplementedException("Invalid float format {}", format); +} + +IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { + const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; + const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; + const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; + const IR::U1 is_least{v.ir.IEqual(value, least_value)}; + return IR::U32{v.ir.Select(is_least, value, absolute)}; +} + +void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { + const Encoding i2f{insn}; + if (i2f.cc != 0) { + throw NotImplementedException("I2F CC"); + } + const bool is_signed{i2f.is_signed != 0}; + int src_bitsize{}; + switch (i2f.int_format) { + case IntFormat::U8: + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), + v.ir.Imm32(8), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 8); + } + src_bitsize = 8; + break; + case IntFormat::U16: + if (i2f.selector == 1 || i2f.selector == 3) { + throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); + } + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), + v.ir.Imm32(16), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 16); + } + src_bitsize = 16; + break; + case IntFormat::U32: + case IntFormat::U64: + if (i2f.selector != 0) { + throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); + } + if (i2f.abs != 0 && is_signed) { + src = v.ir.IAbs(src); + } + src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; + break; + } + const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; + const int dst_bitsize{BitSize(i2f.float_format)}; + const IR::FpControl fp_control{ + .no_contraction = false, + .rounding = CastFpRounding(i2f.fp_rounding), + .fmz_mode = IR::FmzMode::DontCare, + }; + auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize), + static_cast<size_t>(conversion_src_bitsize), is_signed, src, + fp_control)}; + if (i2f.neg != 0) { + if (i2f.abs != 0 || !is_signed) { + // We know the value is positive + value = v.ir.FPNeg(value); + } else { + // Only negate if the input isn't the lowest value + IR::U1 is_least; + if (src_bitsize == 64) { + is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min())); + } else if (src_bitsize == 32) { + is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min())); + } else { + const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; + is_least = v.ir.IEqual(src, least_value); + } + value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; + } + } + switch (i2f.float_format) { + case FloatFormat::F16: { + const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); + break; + } + case FloatFormat::F32: + v.F(i2f.dest_reg, value); + break; + case FloatFormat::F64: { + if (!IR::IsAligned(i2f.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); + } + const IR::Value vector{v.ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; ++i) { + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))}); + } + break; + } + default: + throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2F_reg(u64 insn) { + if (Is64(insn)) { + union { + u64 raw; + BitField<20, 8, IR::Reg> reg; + } const value{insn}; + const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; + I2F(*this, insn, ir.PackUint2x32(regs)); + } else { + I2F(*this, insn, GetReg20(insn)); + } +} + +void TranslatorVisitor::I2F_cbuf(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedCbuf(insn)); + } else { + I2F(*this, insn, GetCbuf(insn)); + } +} + +void TranslatorVisitor::I2F_imm(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedImm20(insn)); + } else { + I2F(*this, insn, GetImm20(insn)); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..5feefc0ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -0,0 +1,82 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class MaxShift : u64 { + U32, + Undefined, + U64, + S64, +}; + +IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, + bool right_shift, bool is_signed) { + if (!right_shift) { + return ir.ShiftLeftLogical(packed_int, safe_shift); + } + if (is_signed) { + return ir.ShiftRightArithmetic(packed_int, safe_shift); + } + return ir.ShiftRightLogical(packed_int, safe_shift); +} + +void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, + bool right_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<0, 8, IR::Reg> lo_bits_reg; + BitField<37, 2, MaxShift> max_shift; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> x_mode; + BitField<50, 1, u64> wrap; + } const shf{insn}; + + if (shf.cc != 0) { + throw NotImplementedException("SHF CC"); + } + if (shf.x_mode != 0) { + throw NotImplementedException("SHF X Mode"); + } + if (shf.max_shift == MaxShift::Undefined) { + throw NotImplementedException("SHF Use of undefined MaxShift value"); + } + const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; + const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; + const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; + const IR::U32 safe_shift{shf.wrap != 0 + ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) + : v.ir.UMin(shift, max_shift)}; + + const bool is_signed{shf.max_shift == MaxShift::S64}; + const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; + const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; + + const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; + v.X(shf.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHF_l_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_l_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_r_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); +} + +void TranslatorVisitor::SHF_r_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..1badbacc4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -0,0 +1,64 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 2, u64> mode; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + } const imnmx{insn}; + + if (imnmx.cc != 0) { + throw NotImplementedException("IMNMX CC"); + } + + if (imnmx.mode != 0) { + throw NotImplementedException("IMNMX.MODE"); + } + + const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; + const IR::U32 op_a{v.X(imnmx.src_reg)}; + IR::U32 min; + IR::U32 max; + + if (imnmx.is_signed != 0) { + min = IR::U32{v.ir.SMin(op_a, op_b)}; + max = IR::U32{v.ir.SMax(op_a, op_b)}; + } else { + min = IR::U32{v.ir.UMin(op_a, op_b)}; + max = IR::U32{v.ir.UMax(op_a, op_b)}; + } + if (imnmx.neg_pred != 0) { + std::swap(min, max); + } + + const IR::U32 result{v.ir.Select(pred, min, max)}; + v.X(imnmx.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::IMNMX_reg(u64 insn) { + IMNMX(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::IMNMX_cbuf(u64 insn) { + IMNMX(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IMNMX_imm(u64 insn) { + IMNMX(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + } const popc{insn}; + + const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); + const IR::U32 result = v.ir.BitCount(operand); + v.X(popc.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::POPC_reg(u64 insn) { + POPC(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::POPC_cbuf(u64 insn) { + POPC(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::POPC_imm(u64 insn) { + POPC(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..044671943 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -0,0 +1,86 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, + u64 scale_imm) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + } const iscadd{insn}; + + const bool po{neg_a && neg_b}; + IR::U32 op_a{v.X(iscadd.op_a)}; + if (po) { + // When PO is present, add one + op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); + } else { + // When PO is not present, the bits are interpreted as negation + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + if (neg_b) { + op_b = v.ir.INeg(op_b); + } + } + // With the operands already processed, scale A + const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))}; + const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; + + const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; + v.X(iscadd.dest_reg, result); + + if (cc) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + const IR::U1 carry{v.ir.GetCarryFromOp(result)}; + const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; + v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); + v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); + } +} + +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); +} + +} // Anonymous namespace + +void TranslatorVisitor::ISCADD_reg(u64 insn) { + ISCADD(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISCADD_cbuf(u64 insn) { + ISCADD(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISCADD_imm(u64 insn) { + ISCADD(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::ISCADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 5, u64> scale; + } const iscadd{insn}; + + return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..bee10e5b9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + +void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> x; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const isetp{insn}; + + const bool is_signed{isetp.is_signed != 0}; + const bool x{isetp.x != 0}; + const BooleanOp bop{isetp.bop}; + const CompareOp compare_op{isetp.compare_op}; + const IR::U32 op_a{v.X(isetp.src_reg_a)}; + const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; + const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(isetp.dest_pred_a, result_a); + v.ir.SetPred(isetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::ISETP_reg(u64 insn) { + ISETP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISETP_cbuf(u64 insn) { + ISETP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISETP_imm(u64 insn) { + ISETP(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..20af68852 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> w; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + } const shl{insn}; + + if (shl.x != 0) { + throw NotImplementedException("SHL.X"); + } + if (shl.cc != 0) { + throw NotImplementedException("SHL.CC"); + } + const IR::U32 base{v.X(shl.src_reg_a)}; + IR::U32 result; + if (shl.w != 0) { + // When .W is set, the shift value is wrapped + // To emulate this we just have to wrap it ourselves. + const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; + result = v.ir.ShiftLeftLogical(base, shift); + } else { + // When .W is not set, the shift value is clamped between 0 and 32. + // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. + // We can safely evaluate an out of bounds shift according to the SPIR-V specification: + // + // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical + // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than + // or equal to the bit width of the components of Base." + // + // And on the GLASM specification it is also safe to evaluate out of bounds: + // + // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt + // "The results of a shift operation ("<<") are undefined if the value of the second operand + // is negative, or greater than or equal to the number of bits in the first operand." + // + // Emphasis on undefined results in contrast to undefined behavior. + // + const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; + const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; + result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; + } + v.X(shl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHL_reg(u64 insn) { + SHL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SHL_cbuf(u64 insn) { + SHL(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SHL_imm(u64 insn) { + SHL(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..be00bb605 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> is_wrapped; + BitField<40, 1, u64> brev; + BitField<43, 1, u64> xmode; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + } const shr{insn}; + + if (shr.xmode != 0) { + throw NotImplementedException("SHR.XMODE"); + } + if (shr.cc != 0) { + throw NotImplementedException("SHR.CC"); + } + + IR::U32 base{v.X(shr.src_reg_a)}; + if (shr.brev == 1) { + base = v.ir.BitReverse(base); + } + IR::U32 result; + const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); + if (shr.is_signed == 1) { + result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; + } else { + result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; + } + + if (shr.is_wrapped == 0) { + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 safe_bits{v.ir.Imm32(32)}; + + const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; + const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; + const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; + } + v.X(shr.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHR_reg(u64 insn) { + SHR(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SHR_cbuf(u64 insn) { + SHR(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SHR_imm(u64 insn) { + SHR(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..2932cdc42 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -0,0 +1,135 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SelectMode : u64 { + Default, + CLO, + CHI, + CSFU, + CBCC, +}; + +enum class Half : u64 { + H0, // Least-significant bits (15:0) + H1, // Most-significant bits (31:16) +}; + +IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { + const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; + return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); +} + +void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, + SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_a_signed; + BitField<49, 1, u64> is_b_signed; + BitField<53, 1, Half> half_a; + } const xmad{insn}; + + if (x) { + throw NotImplementedException("XMAD X"); + } + const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; + const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; + + IR::U32 product{v.ir.IMul(op_a, op_b)}; + if (psl) { + // .PSL shifts the product 16 bits + product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); + } + const IR::U32 op_c{[&]() -> IR::U32 { + switch (select_mode) { + case SelectMode::Default: + return src_c; + case SelectMode::CLO: + return ExtractHalf(v, src_c, Half::H0, false); + case SelectMode::CHI: + return ExtractHalf(v, src_c, Half::H1, false); + case SelectMode::CBCC: + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); + case SelectMode::CSFU: + throw NotImplementedException("XMAD CSFU"); + } + throw NotImplementedException("Invalid XMAD select mode {}", select_mode); + }()}; + IR::U32 result{v.ir.IAdd(product, op_c)}; + if (mrg) { + // .MRG inserts src_b [15:0] into result's [31:16]. + const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; + result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); + } + if (xmad.cc) { + throw NotImplementedException("XMAD CC"); + } + // Store result + v.X(xmad.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::XMAD_reg(u64 insn) { + union { + u64 raw; + BitField<35, 1, Half> half_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); +} + +void TranslatorVisitor::XMAD_rc(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + } const xmad{insn}; + + XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, + xmad.x != 0); +} + +void TranslatorVisitor::XMAD_cr(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + BitField<55, 1, u64> psl; + BitField<56, 1, u64> mrg; + } const xmad{insn}; + + XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); +} + +void TranslatorVisitor::XMAD_imm(u64 insn) { + union { + u64 raw; + BitField<20, 16, u64> src_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode, + Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..53e8d8923 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -0,0 +1,126 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class IntegerWidth : u64 { + Byte, + Short, + Word, +}; + +[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { + switch (width) { + case IntegerWidth::Byte: + return ir.Imm32(8); + case IntegerWidth::Short: + return ir.Imm32(16); + case IntegerWidth::Word: + return ir.Imm32(32); + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, + IntegerWidth dst_width) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 count{WidthSize(ir, dst_width)}; + return ir.BitFieldExtract(src, zero, count, false); +} + +[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, + bool dst_signed, bool src_signed) { + IR::U32 min{}; + IR::U32 max{}; + const IR::U32 zero{ir.Imm32(0)}; + switch (dst_width) { + case IntegerWidth::Byte: + min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; + max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); + break; + case IntegerWidth::Short: + min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; + max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); + break; + case IntegerWidth::Word: + min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; + max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); + break; + default: + throw NotImplementedException("Invalid width {}", dst_width); + } + const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; + return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); +} + +void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, IntegerWidth> dst_fmt; + BitField<12, 1, u64> dst_fmt_sign; + BitField<10, 2, IntegerWidth> src_fmt; + BitField<13, 1, u64> src_fmt_sign; + BitField<41, 3, u64> selector; + BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; + BitField<49, 1, u64> abs; + BitField<50, 1, u64> sat; + } const i2i{insn}; + + if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { + throw NotImplementedException("16-bit source format incompatible with selector {}", + i2i.selector); + } + if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { + throw NotImplementedException("32-bit source format incompatible with selector {}", + i2i.selector); + } + + const s32 selector{static_cast<s32>(i2i.selector)}; + const IR::U32 offset{v.ir.Imm32(selector * 8)}; + const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; + const bool src_signed{i2i.src_fmt_sign != 0}; + const bool dst_signed{i2i.dst_fmt_sign != 0}; + const bool sat{i2i.sat != 0}; + + IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; + if (i2i.abs != 0) { + src_values = v.ir.IAbs(src_values); + } + if (i2i.neg != 0) { + src_values = v.ir.INeg(src_values); + } + const IR::U32 result{ + sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) + : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; + + v.X(i2i.dest_reg, result); + if (i2i.cc != 0) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2I_reg(u64 insn) { + I2I(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::I2I_cbuf(u64 insn) { + I2I(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::I2I_imm(u64 insn) { + I2I(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..9b85f8059 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + Patch, + Prim, + Attr, +}; + +enum class Shift : u64 { + Default, + U16, + B32, +}; + +} // Anonymous namespace + +void TranslatorVisitor::ISBERD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<31, 1, u64> skew; + BitField<32, 1, u64> o; + BitField<33, 2, Mode> mode; + BitField<47, 2, Shift> shift; + } const isberd{insn}; + + if (isberd.skew != 0) { + throw NotImplementedException("SKEW"); + } + if (isberd.o != 0) { + throw NotImplementedException("O"); + } + if (isberd.mode != Mode::Default) { + throw NotImplementedException("Mode {}", isberd.mode.Value()); + } + if (isberd.shift != Shift::Default) { + throw NotImplementedException("Shift {}", isberd.shift.Value()); + } + LOG_WARNING(Shader, "(STUBBED) called"); + X(isberd.dest_reg, X(isberd.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..2300088e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" + +namespace Shader::Maxwell { +using namespace LDC; +namespace { +std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, + const IR::U32& reg, const IR::U32& imm) { + switch (mode) { + case Mode::Default: + return {imm_index, ir.IAdd(reg, imm)}; + default: + break; + } + throw NotImplementedException("Mode {}", mode); +} +} // Anonymous namespace + +void TranslatorVisitor::LDC(u64 insn) { + const Encoding ldc{insn}; + const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))}; + const IR::U32 reg{X(ldc.src_reg)}; + const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))}; + const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; + switch (ldc.size) { + case Size::U8: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); + break; + case Size::S8: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); + break; + case Size::U16: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); + break; + case Size::S16: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); + break; + case Size::B32: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); + break; + case Size::B64: { + if (!IR::IsAligned(ldc.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register"); + } + const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; + for (int i = 0; i < 2; ++i) { + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); + } + break; + } + default: + throw NotImplementedException("Invalid size {}", ldc.size.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/reg.h" + +namespace Shader::Maxwell::LDC { + +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; +}; + +} // namespace Shader::Maxwell::LDC diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..4a0f04e47 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, + bool neg, bool x) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + + if (x) { + throw NotImplementedException("LEA.HI X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.HI Pred"); + } + if (lea.cc != 0) { + throw NotImplementedException("LEA.HI CC"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; + const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; + + const s32 hi_scale{32 - static_cast<s32>(scale)}; + const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; + const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; + v.X(lea.dest_reg, result); +} + +void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<39, 5, u64> scale; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> x; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + if (lea.x != 0) { + throw NotImplementedException("LEA.LO X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.LO Pred"); + } + if (lea.cc != 0) { + throw NotImplementedException("LEA.LO CC"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const s32 scale{static_cast<s32>(lea.scale)}; + const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; + const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset)}; + v.X(lea.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::LEA_hi_reg(u64 insn) { + union { + u64 insn; + BitField<28, 5, u64> scale; + BitField<37, 1, u64> neg; + BitField<38, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { + union { + u64 insn; + BitField<51, 5, u64> scale; + BitField<56, 1, u64> neg; + BitField<57, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_lo_reg(u64 insn) { + LEA_lo(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { + LEA_lo(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LEA_lo_imm(u64 insn) { + LEA_lo(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..924fb7a40 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -0,0 +1,196 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Size : u64 { + B32, + B64, + B96, + B128, +}; + +enum class InterpolationMode : u64 { + Pass, + Multiply, + Constant, + Sc, +}; + +enum class SampleMode : u64 { + Default, + Centroid, + Offset, +}; + +u32 NumElements(Size size) { + switch (size) { + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B96: + return 3; + case Size::B128: + return 4; + } + throw InvalidArgument("Invalid size {}", size); +} + +template <typename F> +void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { + const IR::U32 index_value{v.X(index_reg)}; + for (u32 element = 0; element < num_elements; ++element) { + const IR::U32 final_offset{ + element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; + f(element, final_offset); + } +} + +} // Anonymous namespace + +void TranslatorVisitor::ALD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<39, 8, IR::Reg> vertex_reg; + BitField<32, 1, u64> o; + BitField<31, 1, u64> patch; + BitField<47, 2, Size> size; + } const ald{insn}; + + const u64 offset{ald.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const IR::U32 vertex{X(ald.vertex_reg)}; + const u32 num_elements{NumElements(ald.size)}; + if (ald.index_reg == IR::Reg::RZ) { + for (u32 element = 0; element < num_elements; ++element) { + if (ald.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch)); + } else { + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex)); + } + } + return; + } + if (ald.patch != 0) { + throw NotImplementedException("Indirect patch read"); + } + HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex)); + }); +} + +void TranslatorVisitor::AST(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<31, 1, u64> patch; + BitField<39, 8, IR::Reg> vertex_reg; + BitField<47, 2, Size> size; + } const ast{insn}; + + if (ast.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed store"); + } + const u64 offset{ast.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const IR::U32 vertex{X(ast.vertex_reg)}; + const u32 num_elements{NumElements(ast.size)}; + if (ast.index_reg == IR::Reg::RZ) { + for (u32 element = 0; element < num_elements; ++element) { + if (ast.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element))); + } else { + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex); + } + } + return; + } + if (ast.patch != 0) { + throw NotImplementedException("Indexed tessellation patch store"); + } + HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex); + }); +} + +void TranslatorVisitor::IPA(u64 insn) { + // IPA is the instruction used to read varyings from a fragment shader. + // gl_FragCoord is mapped to the gl_Position attribute. + // It yields unknown results when used outside of the fragment shader stage. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 8, IR::Reg> multiplier; + BitField<30, 8, IR::Attribute> attribute; + BitField<38, 1, u64> idx; + BitField<51, 1, u64> sat; + BitField<52, 2, SampleMode> sample_mode; + BitField<54, 2, InterpolationMode> interpolation_mode; + } const ipa{insn}; + + // Indexed IPAs are used for indexed varyings. + // For example: + // + // in vec4 colors[4]; + // uniform int idx; + // void main() { + // gl_FragColor = colors[idx]; + // } + const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; + const IR::Attribute attribute{ipa.attribute}; + IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg)) + : ir.GetAttribute(attribute)}; + if (IR::IsGeneric(attribute)) { + const ProgramHeader& sph{env.SPH()}; + const u32 attr_index{IR::GenericAttributeIndex(attribute)}; + const u32 element{static_cast<u32>(attribute) % 4}; + const std::array input_map{sph.ps.GenericInputMap(attr_index)}; + const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; + if (is_perspective) { + const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; + value = ir.FPMul(value, position_w); + } + } + if (ipa.interpolation_mode == InterpolationMode::Multiply) { + value = ir.FPMul(value, F(ipa.multiplier)); + } + + // Saturated IPAs are generally generated out of clamped varyings. + // For example: clamp(some_varying, 0.0, 1.0) + const bool is_saturated{ipa.sat != 0}; + if (is_saturated) { + if (attribute == IR::Attribute::FrontFace) { + throw NotImplementedException("IPA.SAT on FrontFace"); + } + value = ir.FPSaturate(value); + } + + F(ipa.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..d2a1dbf61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -0,0 +1,218 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +IR::U32 Offset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<20, 24, u64> absolute_offset; + BitField<20, 24, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); + } else { + const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) { + const IR::U32 offset{Offset(v, insn)}; + if (offset.IsImmediate()) { + return {v.ir.Imm32(offset.U32() / 4), offset}; + } else { + return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; + } +} + +std::pair<int, bool> GetSize(u64 insn) { + union { + u64 raw; + BitField<48, 3, Size> size; + } const encoding{insn}; + + switch (encoding.size) { + case Size::U8: + return {8, false}; + case Size::S8: + return {8, true}; + case Size::U16: + return {16, false}; + case Size::S16: + return {16, true}; + case Size::B32: + return {32, false}; + case Size::B64: + return {64, false}; + case Size::B128: + return {128, false}; + default: + throw NotImplementedException("Invalid size {}", encoding.size.Value()); + } +} + +IR::Reg Reg(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> reg; + } const encoding{insn}; + + return encoding.reg; +} + +IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); +} + +IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); +} + +IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { + const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; + const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; + return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; +} +} // Anonymous namespace + +void TranslatorVisitor::LDL(u64 insn) { + const auto [word_offset, offset]{WordOffset(*this, insn)}; + const IR::U32 word{LoadLocal(*this, word_offset, offset)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + X(dest, word); + for (int i = 1; i < bit_size / 32; ++i) { + const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; + const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; + X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); + } + break; + } +} + +void TranslatorVisitor::LDS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; + switch (bit_size) { + case 8: + case 16: + case 32: + X(dest, IR::U32{value}); + break; + case 64: + case 128: + if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + for (int element = 0; element < bit_size / 32; ++element) { + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))}); + } + break; + } +} + +void TranslatorVisitor::STL(u64 insn) { + const auto [word_offset, offset]{WordOffset(*this, insn)}; + if (offset.IsImmediate()) { + // TODO: Support storing out of bounds at runtime + if (offset.U32() >= env.LocalMemorySize()) { + LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", + offset.U32(), env.LocalMemorySize()); + return; + } + } + const IR::Reg reg{Reg(insn)}; + const IR::U32 src{X(reg)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; + ir.WriteLocal(word_offset, value); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; + ir.WriteLocal(word_offset, value); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) { + throw NotImplementedException("Unaligned source register"); + } + ir.WriteLocal(word_offset, src); + for (int i = 1; i < bit_size / 32; ++i) { + ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); + } + break; + } +} + +void TranslatorVisitor::STS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg reg{Reg(insn)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: + case 16: + case 32: + ir.WriteShared(bit_size, offset, X(reg)); + break; + case 64: + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); + break; + case 128: { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; + ir.WriteShared(128, offset, vector); + break; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..36c5cff2f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -0,0 +1,184 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class LoadSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend + B32, + B64, + B128, + U128, // ??? +}; + +enum class StoreSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend + B32, + B64, + B128, +}; + +// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class LoadCache : u64 { + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (cache in L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) +}; + +// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class StoreCache : u64 { + WB, // Cache write-back all coherent levels + CG, // Cache at global level + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory) +}; + +IR::U64 Address(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 24, s64> addr_offset; + BitField<20, 24, u64> rz_addr_offset; + BitField<45, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + // LDG/STG without .E uses a 32-bit pointer, zero-extend it + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + if (!IR::IsAligned(mem.addr_reg, 2)) { + throw NotImplementedException("Unaligned address register"); + } + // Pack two registers to build the 64-bit address + return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast<u64>(mem.rz_addr_offset.Value()); + } else { + return static_cast<u64>(mem.addr_offset.Value()); + } + }()}; + // Apply the offset + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDG(u64 insn) { + // LDG loads global memory into registers + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<46, 2, LoadCache> cache; + BitField<48, 3, LoadSize> size; + } const ldg{insn}; + + // Pointer to load data from + const IR::U64 address{Address(*this, insn)}; + const IR::Reg dest_reg{ldg.dest_reg}; + switch (ldg.size) { + case LoadSize::U8: + X(dest_reg, ir.LoadGlobalU8(address)); + break; + case LoadSize::S8: + X(dest_reg, ir.LoadGlobalS8(address)); + break; + case LoadSize::U16: + X(dest_reg, ir.LoadGlobalU16(address)); + break; + case LoadSize::S16: + X(dest_reg, ir.LoadGlobalS16(address)); + break; + case LoadSize::B32: + X(dest_reg, ir.LoadGlobal32(address)); + break; + case LoadSize::B64: { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal64(address)}; + for (int i = 0; i < 2; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); + } + break; + } + case LoadSize::B128: + case LoadSize::U128: { + if (!IR::IsAligned(dest_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal128(address)}; + for (int i = 0; i < 4; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); + } + break; + } + default: + throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); + } +} + +void TranslatorVisitor::STG(u64 insn) { + // STG stores registers into global memory. + union { + u64 raw; + BitField<0, 8, IR::Reg> data_reg; + BitField<46, 2, StoreCache> cache; + BitField<48, 3, StoreSize> size; + } const stg{insn}; + + // Pointer to store data into + const IR::U64 address{Address(*this, insn)}; + const IR::Reg data_reg{stg.data_reg}; + switch (stg.size) { + case StoreSize::U8: + ir.WriteGlobalU8(address, X(data_reg)); + break; + case StoreSize::S8: + ir.WriteGlobalS8(address, X(data_reg)); + break; + case StoreSize::U16: + ir.WriteGlobalU16(address, X(data_reg)); + break; + case StoreSize::S16: + ir.WriteGlobalS16(address, X(data_reg)); + break; + case StoreSize::B32: + ir.WriteGlobal32(address, X(data_reg)); + break; + case StoreSize::B64: { + if (!IR::IsAligned(data_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; + ir.WriteGlobal64(address, vector); + break; + } + case StoreSize::B128: + if (!IR::IsAligned(data_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ + ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; + ir.WriteGlobal128(address, vector); + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..92cd27ed4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class LogicalOp : u64 { + AND, + OR, + XOR, + PASS_B, +}; + +[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, LogicalOp op) { + switch (op) { + case LogicalOp::AND: + return ir.BitwiseAnd(operand_1, operand_2); + case LogicalOp::OR: + return ir.BitwiseOr(operand_1, operand_2); + case LogicalOp::XOR: + return ir.BitwiseXor(operand_1, operand_2); + case LogicalOp::PASS_B: + return operand_2; + default: + throw NotImplementedException("Invalid Logical operation {}", op); + } +} + +void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, + LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt, + IR::Pred dest_pred = IR::Pred::PT) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + } const lop{insn}; + + if (x) { + throw NotImplementedException("X"); + } + IR::U32 op_a{v.X(lop.src_reg)}; + if (inv_a != 0) { + op_a = v.ir.BitwiseNot(op_a); + } + if (inv_b != 0) { + op_b = v.ir.BitwiseNot(op_b); + } + + const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; + if (pred_op) { + const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; + v.ir.SetPred(dest_pred, pred_result); + } + if (cc) { + if (bit_op == LogicalOp::PASS_B) { + v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); + v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); + } else { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } + v.X(lop.dest_reg, result); +} + +void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<39, 1, u64> inv_a; + BitField<40, 1, u64> inv_b; + BitField<41, 2, LogicalOp> bit_op; + BitField<43, 1, u64> x; + BitField<44, 2, PredicateOp> pred_op; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> dest_pred; + } const lop{insn}; + + LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, + lop.pred_op, lop.dest_pred); +} +} // Anonymous namespace + +void TranslatorVisitor::LOP_reg(u64 insn) { + LOP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LOP_cbuf(u64 insn) { + LOP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LOP_imm(u64 insn) { + LOP(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::LOP32I(u64 insn) { + union { + u64 raw; + BitField<53, 2, LogicalOp> bit_op; + BitField<57, 1, u64> x; + BitField<52, 1, u64> cc; + BitField<55, 1, u64> inv_a; + BitField<56, 1, u64> inv_b; + } const lop32i{insn}; + + LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, + lop32i.inv_b != 0, lop32i.bit_op); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..e0fe47912 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -0,0 +1,122 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 +// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) +IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, + u64 ttbl) { + IR::U32 r{ir.Imm32(0)}; + const IR::U32 not_a{ir.BitwiseNot(a)}; + const IR::U32 not_b{ir.BitwiseNot(b)}; + const IR::U32 not_c{ir.BitwiseNot(c)}; + if (ttbl & 0x01) { + // r |= ~a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x02) { + // r |= ~a & ~b & c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x04) { + // r |= ~a & b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x08) { + // r |= ~a & b & c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x10) { + // r |= a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x20) { + // r |= a & ~b & c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x40) { + // r |= a & b & ~c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x80) { + // r |= a & b & c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + return r; +} + +IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> cc; + } const lop3{insn}; + + if (lop3.cc != 0) { + throw NotImplementedException("LOP3 CC"); + } + + const IR::U32 op_a{v.X(lop3.src_reg)}; + const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; + v.X(lop3.dest_reg, result); + return result; +} + +u64 GetLut48(u64 insn) { + union { + u64 raw; + BitField<48, 8, u64> lut; + } const lut{insn}; + return lut.lut; +} +} // Anonymous namespace + +void TranslatorVisitor::LOP3_reg(u64 insn) { + union { + u64 insn; + BitField<28, 8, u64> lut; + BitField<38, 1, u64> x; + BitField<36, 2, PredicateOp> pred_op; + BitField<48, 3, IR::Pred> pred; + } const lop3{insn}; + + if (lop3.x != 0) { + throw NotImplementedException("LOP3 X"); + } + const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; + const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; + ir.SetPred(lop3.pred, pred_result); +} + +void TranslatorVisitor::LOP3_cbuf(u64 insn) { + LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); +} + +void TranslatorVisitor::LOP3_imm(u64 insn) { + LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; +} // Anonymous namespace + +void TranslatorVisitor::P2R_reg(u64) { + throw NotImplementedException("P2R (reg)"); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + throw NotImplementedException("P2R (cbuf)"); +} + +void TranslatorVisitor::P2R_imm(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const p2r{insn}; + + const u32 mask{GetImm20(insn).U32()}; + const bool pr_mode{p2r.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset{static_cast<u32>(p2r.byte_selector) * 8}; + IR::U32 insert{ir.Imm32(0)}; + for (u32 index = 0; index < num_items; ++index) { + if (((mask >> index) & 1) == 0) { + continue; + } + const IR::U1 cond{[this, index, pr_mode] { + if (pr_mode) { + return ir.GetPred(IR::Pred{index}); + } + switch (index) { + case 0: + return ir.GetZFlag(); + case 1: + return ir.GetSFlag(); + case 2: + return ir.GetCFlag(); + case 3: + return ir.GetOFlag(); + } + throw LogicError("Unreachable P2R index"); + }()}; + const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; + insert = ir.BitwiseOr(insert, bit); + } + const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; + X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 000000000..6bb08db8a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; + + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { + throw NotImplementedException("Non-full move mask"); + } + v.X(mov.dest_reg, src); +} +} // Anonymous namespace + +void TranslatorVisitor::MOV_reg(u64 insn) { + MOV(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::MOV_cbuf(u64 insn) { + MOV(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::MOV_imm(u64 insn) { + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; + +void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { + switch (index) { + case 0: + return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); + case 1: + return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); + case 2: + return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); + case 3: + return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); + default: + throw LogicError("Unreachable R2P index"); + } +} + +void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { + union { + u64 raw; + BitField<8, 8, IR::Reg> src_reg; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const r2p{insn}; + const IR::U32 src{v.X(r2p.src_reg)}; + const IR::U32 count{v.ir.Imm32(1)}; + const bool pr_mode{r2p.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8}; + for (u32 index = 0; index < num_items; ++index) { + const IR::U32 offset{v.ir.Imm32(offset_base + index)}; + const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; + const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; + const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; + const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; + if (pr_mode) { + const IR::Pred pred{index}; + v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); + } else { + SetFlag(v.ir, inv_mask_bit, src_bit, index); + } + } +} +} // Anonymous namespace + +void TranslatorVisitor::R2P_reg(u64 insn) { + R2P(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::R2P_cbuf(u64 insn) { + R2P(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::R2P_imm(u64 insn) { + R2P(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..20cb2674e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -0,0 +1,181 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SpecialRegister : u64 { + SR_LANEID = 0, + SR_CLOCK = 1, + SR_VIRTCFG = 2, + SR_VIRTID = 3, + SR_PM0 = 4, + SR_PM1 = 5, + SR_PM2 = 6, + SR_PM3 = 7, + SR_PM4 = 8, + SR_PM5 = 9, + SR_PM6 = 10, + SR_PM7 = 11, + SR12 = 12, + SR13 = 13, + SR14 = 14, + SR_ORDERING_TICKET = 15, + SR_PRIM_TYPE = 16, + SR_INVOCATION_ID = 17, + SR_Y_DIRECTION = 18, + SR_THREAD_KILL = 19, + SM_SHADER_TYPE = 20, + SR_DIRECTCBEWRITEADDRESSLOW = 21, + SR_DIRECTCBEWRITEADDRESSHIGH = 22, + SR_DIRECTCBEWRITEENABLE = 23, + SR_MACHINE_ID_0 = 24, + SR_MACHINE_ID_1 = 25, + SR_MACHINE_ID_2 = 26, + SR_MACHINE_ID_3 = 27, + SR_AFFINITY = 28, + SR_INVOCATION_INFO = 29, + SR_WSCALEFACTOR_XY = 30, + SR_WSCALEFACTOR_Z = 31, + SR_TID = 32, + SR_TID_X = 33, + SR_TID_Y = 34, + SR_TID_Z = 35, + SR_CTA_PARAM = 36, + SR_CTAID_X = 37, + SR_CTAID_Y = 38, + SR_CTAID_Z = 39, + SR_NTID = 40, + SR_CirQueueIncrMinusOne = 41, + SR_NLATC = 42, + SR43 = 43, + SR_SM_SPA_VERSION = 44, + SR_MULTIPASSSHADERINFO = 45, + SR_LWINHI = 46, + SR_SWINHI = 47, + SR_SWINLO = 48, + SR_SWINSZ = 49, + SR_SMEMSZ = 50, + SR_SMEMBANKS = 51, + SR_LWINLO = 52, + SR_LWINSZ = 53, + SR_LMEMLOSZ = 54, + SR_LMEMHIOFF = 55, + SR_EQMASK = 56, + SR_LTMASK = 57, + SR_LEMASK = 58, + SR_GTMASK = 59, + SR_GEMASK = 60, + SR_REGALLOC = 61, + SR_BARRIERALLOC = 62, + SR63 = 63, + SR_GLOBALERRORSTATUS = 64, + SR65 = 65, + SR_WARPERRORSTATUS = 66, + SR_WARPERRORSTATUSCLEAR = 67, + SR68 = 68, + SR69 = 69, + SR70 = 70, + SR71 = 71, + SR_PM_HI0 = 72, + SR_PM_HI1 = 73, + SR_PM_HI2 = 74, + SR_PM_HI3 = 75, + SR_PM_HI4 = 76, + SR_PM_HI5 = 77, + SR_PM_HI6 = 78, + SR_PM_HI7 = 79, + SR_CLOCKLO = 80, + SR_CLOCKHI = 81, + SR_GLOBALTIMERLO = 82, + SR_GLOBALTIMERHI = 83, + SR84 = 84, + SR85 = 85, + SR86 = 86, + SR87 = 87, + SR88 = 88, + SR89 = 89, + SR90 = 90, + SR91 = 91, + SR92 = 92, + SR93 = 93, + SR94 = 94, + SR95 = 95, + SR_HWTASKID = 96, + SR_CIRCULARQUEUEENTRYINDEX = 97, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, +}; + +[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { + switch (special_register) { + case SpecialRegister::SR_INVOCATION_ID: + return ir.InvocationId(); + case SpecialRegister::SR_THREAD_KILL: + return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; + case SpecialRegister::SR_INVOCATION_INFO: + LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); + return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } + case SpecialRegister::SR_TID_X: + return ir.LocalInvocationIdX(); + case SpecialRegister::SR_TID_Y: + return ir.LocalInvocationIdY(); + case SpecialRegister::SR_TID_Z: + return ir.LocalInvocationIdZ(); + case SpecialRegister::SR_CTAID_X: + return ir.WorkgroupIdX(); + case SpecialRegister::SR_CTAID_Y: + return ir.WorkgroupIdY(); + case SpecialRegister::SR_CTAID_Z: + return ir.WorkgroupIdZ(); + case SpecialRegister::SR_WSCALEFACTOR_XY: + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); + return ir.Imm32(Common::BitCast<u32>(1.0f)); + case SpecialRegister::SR_WSCALEFACTOR_Z: + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); + return ir.Imm32(Common::BitCast<u32>(1.0f)); + case SpecialRegister::SR_LANEID: + return ir.LaneId(); + case SpecialRegister::SR_EQMASK: + return ir.SubgroupEqMask(); + case SpecialRegister::SR_LTMASK: + return ir.SubgroupLtMask(); + case SpecialRegister::SR_LEMASK: + return ir.SubgroupLeMask(); + case SpecialRegister::SR_GTMASK: + return ir.SubgroupGtMask(); + case SpecialRegister::SR_GEMASK: + return ir.SubgroupGeMask(); + case SpecialRegister::SR_Y_DIRECTION: + return ir.BitCast<IR::U32>(ir.YDirection()); + case SpecialRegister::SR_AFFINITY: + LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); + return ir.Imm32(0); // This is the default value hardware returns. + default: + throw NotImplementedException("S2R special register {}", special_register); + } +} +} // Anonymous namespace + +void TranslatorVisitor::S2R(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, SpecialRegister> src_reg; + } const s2r{insn}; + + X(s2r.dest_reg, Read(ir, s2r.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..7e26ab359 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -0,0 +1,283 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +[[noreturn]] static void ThrowNotImplemented(Opcode opcode) { + throw NotImplementedException("Instruction {} is not implemented", opcode); +} + +void TranslatorVisitor::ATOM_cas(u64) { + ThrowNotImplemented(Opcode::ATOM_cas); +} + +void TranslatorVisitor::ATOMS_cas(u64) { + ThrowNotImplemented(Opcode::ATOMS_cas); +} + +void TranslatorVisitor::B2R(u64) { + ThrowNotImplemented(Opcode::B2R); +} + +void TranslatorVisitor::BPT(u64) { + ThrowNotImplemented(Opcode::BPT); +} + +void TranslatorVisitor::BRA(u64) { + ThrowNotImplemented(Opcode::BRA); +} + +void TranslatorVisitor::BRK(u64) { + ThrowNotImplemented(Opcode::BRK); +} + +void TranslatorVisitor::CAL() { + // CAL is a no-op +} + +void TranslatorVisitor::CCTL(u64) { + ThrowNotImplemented(Opcode::CCTL); +} + +void TranslatorVisitor::CCTLL(u64) { + ThrowNotImplemented(Opcode::CCTLL); +} + +void TranslatorVisitor::CONT(u64) { + ThrowNotImplemented(Opcode::CONT); +} + +void TranslatorVisitor::CS2R(u64) { + ThrowNotImplemented(Opcode::CS2R); +} + +void TranslatorVisitor::FCHK_reg(u64) { + ThrowNotImplemented(Opcode::FCHK_reg); +} + +void TranslatorVisitor::FCHK_cbuf(u64) { + ThrowNotImplemented(Opcode::FCHK_cbuf); +} + +void TranslatorVisitor::FCHK_imm(u64) { + ThrowNotImplemented(Opcode::FCHK_imm); +} + +void TranslatorVisitor::GETCRSPTR(u64) { + ThrowNotImplemented(Opcode::GETCRSPTR); +} + +void TranslatorVisitor::GETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::GETLMEMBASE); +} + +void TranslatorVisitor::IDE(u64) { + ThrowNotImplemented(Opcode::IDE); +} + +void TranslatorVisitor::IDP_reg(u64) { + ThrowNotImplemented(Opcode::IDP_reg); +} + +void TranslatorVisitor::IDP_imm(u64) { + ThrowNotImplemented(Opcode::IDP_imm); +} + +void TranslatorVisitor::IMAD_reg(u64) { + ThrowNotImplemented(Opcode::IMAD_reg); +} + +void TranslatorVisitor::IMAD_rc(u64) { + ThrowNotImplemented(Opcode::IMAD_rc); +} + +void TranslatorVisitor::IMAD_cr(u64) { + ThrowNotImplemented(Opcode::IMAD_cr); +} + +void TranslatorVisitor::IMAD_imm(u64) { + ThrowNotImplemented(Opcode::IMAD_imm); +} + +void TranslatorVisitor::IMAD32I(u64) { + ThrowNotImplemented(Opcode::IMAD32I); +} + +void TranslatorVisitor::IMADSP_reg(u64) { + ThrowNotImplemented(Opcode::IMADSP_reg); +} + +void TranslatorVisitor::IMADSP_rc(u64) { + ThrowNotImplemented(Opcode::IMADSP_rc); +} + +void TranslatorVisitor::IMADSP_cr(u64) { + ThrowNotImplemented(Opcode::IMADSP_cr); +} + +void TranslatorVisitor::IMADSP_imm(u64) { + ThrowNotImplemented(Opcode::IMADSP_imm); +} + +void TranslatorVisitor::IMUL_reg(u64) { + ThrowNotImplemented(Opcode::IMUL_reg); +} + +void TranslatorVisitor::IMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::IMUL_cbuf); +} + +void TranslatorVisitor::IMUL_imm(u64) { + ThrowNotImplemented(Opcode::IMUL_imm); +} + +void TranslatorVisitor::IMUL32I(u64) { + ThrowNotImplemented(Opcode::IMUL32I); +} + +void TranslatorVisitor::JCAL(u64) { + ThrowNotImplemented(Opcode::JCAL); +} + +void TranslatorVisitor::JMP(u64) { + ThrowNotImplemented(Opcode::JMP); +} + +void TranslatorVisitor::KIL() { + // KIL is a no-op +} + +void TranslatorVisitor::LD(u64) { + ThrowNotImplemented(Opcode::LD); +} + +void TranslatorVisitor::LEPC(u64) { + ThrowNotImplemented(Opcode::LEPC); +} + +void TranslatorVisitor::LONGJMP(u64) { + ThrowNotImplemented(Opcode::LONGJMP); +} + +void TranslatorVisitor::NOP(u64) { + // NOP is No-Op. +} + +void TranslatorVisitor::PBK() { + // PBK is a no-op +} + +void TranslatorVisitor::PCNT() { + // PCNT is a no-op +} + +void TranslatorVisitor::PEXIT(u64) { + ThrowNotImplemented(Opcode::PEXIT); +} + +void TranslatorVisitor::PLONGJMP(u64) { + ThrowNotImplemented(Opcode::PLONGJMP); +} + +void TranslatorVisitor::PRET(u64) { + ThrowNotImplemented(Opcode::PRET); +} + +void TranslatorVisitor::PRMT_reg(u64) { + ThrowNotImplemented(Opcode::PRMT_reg); +} + +void TranslatorVisitor::PRMT_rc(u64) { + ThrowNotImplemented(Opcode::PRMT_rc); +} + +void TranslatorVisitor::PRMT_cr(u64) { + ThrowNotImplemented(Opcode::PRMT_cr); +} + +void TranslatorVisitor::PRMT_imm(u64) { + ThrowNotImplemented(Opcode::PRMT_imm); +} + +void TranslatorVisitor::R2B(u64) { + ThrowNotImplemented(Opcode::R2B); +} + +void TranslatorVisitor::RAM(u64) { + ThrowNotImplemented(Opcode::RAM); +} + +void TranslatorVisitor::RET(u64) { + ThrowNotImplemented(Opcode::RET); +} + +void TranslatorVisitor::RTT(u64) { + ThrowNotImplemented(Opcode::RTT); +} + +void TranslatorVisitor::SAM(u64) { + ThrowNotImplemented(Opcode::SAM); +} + +void TranslatorVisitor::SETCRSPTR(u64) { + ThrowNotImplemented(Opcode::SETCRSPTR); +} + +void TranslatorVisitor::SETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::SETLMEMBASE); +} + +void TranslatorVisitor::SSY() { + // SSY is a no-op +} + +void TranslatorVisitor::ST(u64) { + ThrowNotImplemented(Opcode::ST); +} + +void TranslatorVisitor::STP(u64) { + ThrowNotImplemented(Opcode::STP); +} + +void TranslatorVisitor::SUATOM_cas(u64) { + ThrowNotImplemented(Opcode::SUATOM_cas); +} + +void TranslatorVisitor::SYNC(u64) { + ThrowNotImplemented(Opcode::SYNC); +} + +void TranslatorVisitor::TXA(u64) { + ThrowNotImplemented(Opcode::TXA); +} + +void TranslatorVisitor::VABSDIFF(u64) { + ThrowNotImplemented(Opcode::VABSDIFF); +} + +void TranslatorVisitor::VABSDIFF4(u64) { + ThrowNotImplemented(Opcode::VABSDIFF4); +} + +void TranslatorVisitor::VADD(u64) { + ThrowNotImplemented(Opcode::VADD); +} + +void TranslatorVisitor::VSET(u64) { + ThrowNotImplemented(Opcode::VSET); +} +void TranslatorVisitor::VSHL(u64) { + ThrowNotImplemented(Opcode::VSHL); +} + +void TranslatorVisitor::VSHR(u64) { + ThrowNotImplemented(Opcode::VSHR); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> output_reg; // Not needed on host + BitField<39, 1, u64> emit; + BitField<40, 1, u64> cut; + } const out{insn}; + + stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); + + if (out.emit != 0) { + v.ir.EmitVertex(stream_index); + } + if (out.cut != 0) { + v.ir.EndPrimitive(stream_index); + } + // Host doesn't need the output register, but we can write to it to avoid undefined reads + v.X(out.dest_reg, v.ir.Imm32(0)); +} +} // Anonymous namespace + +void TranslatorVisitor::OUT_reg(u64 insn) { + OUT(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::OUT_cbuf(u64 insn) { + OUT(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::OUT_imm(u64 insn) { + OUT(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + CovMask, + Covered, + Offset, + CentroidOffset, + MyIndex, +}; +} // Anonymous namespace + +void TranslatorVisitor::PIXLD(u64 insn) { + union { + u64 raw; + BitField<31, 3, Mode> mode; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, s64> addr_offset; + BitField<45, 3, IR::Pred> dest_pred; + } const pixld{insn}; + + if (pixld.dest_pred != IR::Pred::PT) { + throw NotImplementedException("Destination predicate"); + } + if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { + throw NotImplementedException("Non-zero source register"); + } + switch (pixld.mode) { + case Mode::MyIndex: + X(pixld.dest_reg, ir.SampleId()); + break; + default: + throw NotImplementedException("Mode {}", pixld.mode.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; + const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; + const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; + + ir.SetPred(pset.dest_pred_a, result_a); + ir.SetPred(pset.dest_pred_b, result_b); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp new file mode 100644 index 000000000..b02789874 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop_2; + BitField<47, 1, u64> cc; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; + + const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; + const IR::U32 zero{ir.Imm32(0)}; + + const IR::U32 result{ir.Select(res_2, true_result, zero)}; + + X(pset.dest_reg, result); + if (pset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (pset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..93baa75a9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + } const sel{insn}; + + const IR::U1 pred = v.ir.GetPred(sel.pred); + IR::U32 op_a{v.X(sel.src_reg)}; + IR::U32 op_b{src}; + if (sel.neg_pred != 0) { + std::swap(op_a, op_b); + } + const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; + + v.X(sel.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SEL_reg(u64 insn) { + SEL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SEL_cbuf(u64 insn) { + SEL(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SEL_imm(u64 insn) { + SEL(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..63b588ad4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -0,0 +1,205 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bit> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +enum class Size : u64 { + U32, + S32, + U64, + S64, + F32FTZRN, + F16x2FTZRN, + SD32, + SD64, +}; + +enum class AtomicOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + default: + break; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, + const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, + bool is_signed) { + switch (op) { + case AtomicOp::ADD: + return ir.ImageAtomicIAdd(handle, coords, op_b, info); + case AtomicOp::MIN: + return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); + case AtomicOp::MAX: + return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); + case AtomicOp::INC: + return ir.ImageAtomicInc(handle, coords, op_b, info); + case AtomicOp::DEC: + return ir.ImageAtomicDec(handle, coords, op_b, info); + case AtomicOp::AND: + return ir.ImageAtomicAnd(handle, coords, op_b, info); + case AtomicOp::OR: + return ir.ImageAtomicOr(handle, coords, op_b, info); + case AtomicOp::XOR: + return ir.ImageAtomicXor(handle, coords, op_b, info); + case AtomicOp::EXCH: + return ir.ImageAtomicExchange(handle, coords, op_b, info); + default: + throw NotImplementedException("Atomic Operation {}", op); + } +} + +ImageFormat Format(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return ImageFormat::R32_UINT; + default: + break; + } + throw NotImplementedException("Invalid size {}", size); +} + +bool IsSizeInt32(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return true; + default: + return false; + } +} + +void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, + IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, + u64 bound_offset, bool is_bindless, bool write_result) { + if (clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", clamp); + } + if (!IsSizeInt32(size)) { + throw NotImplementedException("Size {}", size); + } + const bool is_signed{size == Size::S32}; + const ImageFormat format{Format(size)}; + const TextureType tex_type{GetType(type)}; + const IR::Value coords{MakeCoords(v, coord_reg, type)}; + + const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) + : v.ir.Imm32(static_cast<u32>(bound_offset * 4))}; + IR::TextureInstInfo info{}; + info.type.Assign(tex_type); + info.image_format.Assign(format); + + // TODO: float/64-bit operand + const IR::Value op_b{v.X(operand_reg)}; + const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; + + if (write_result) { + v.X(dest_reg, IR::U32{color}); + } +} +} // Anonymous namespace + +void TranslatorVisitor::SUATOM(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> is_bindless; + BitField<29, 4, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<51, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> operand_reg; + BitField<36, 13, u64> bound_offset; // !is_bindless + BitField<39, 8, IR::Reg> bindless_reg; // is_bindless + } const suatom{insn}; + + ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, + suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, + suatom.is_bindless != 0, true); +} + +void TranslatorVisitor::SURED(u64 insn) { + // TODO: confirm offsets + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<21, 3, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<20, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> operand_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sured{insn}; + ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, + sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, + sured.is_bound == 0, false); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..681220a8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -0,0 +1,281 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bit> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +constexpr unsigned R = 1 << 0; +constexpr unsigned G = 1 << 1; +constexpr unsigned B = 1 << 2; +constexpr unsigned A = 1 << 3; + +constexpr std::array MASK{ + 0U, // + R, // + G, // + R | G, // + B, // + R | B, // + G | B, // + R | G | B, // + A, // + R | A, // + G | A, // + R | G | A, // + B | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators +enum class LoadCache : u64 { + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (volatile) +}; + +enum class StoreCache : u64 { + WB, // Cache write-back all coherent levels + CG, // Cache at global level (L2 and below, not L1) + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory, volatile?) +}; + +ImageFormat Format(Size size) { + switch (size) { + case Size::U8: + return ImageFormat::R8_UINT; + case Size::S8: + return ImageFormat::R8_SINT; + case Size::U16: + return ImageFormat::R16_UINT; + case Size::S16: + return ImageFormat::R16_SINT; + case Size::B32: + return ImageFormat::R32_UINT; + case Size::B64: + return ImageFormat::R32G32_UINT; + case Size::B128: + return ImageFormat::R32G32B32A32_UINT; + } + throw NotImplementedException("Invalid size {}", size); +} + +int SizeInRegs(Size size) { + switch (size) { + case Size::U8: + case Size::S8: + case Size::U16: + case Size::S16: + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B128: + return 4; + } + throw NotImplementedException("Invalid size {}", size); +} + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + case Type::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg), array(1)); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + } + throw NotImplementedException("Invalid type {}", type); +} + +unsigned SwizzleMask(u64 swizzle) { + if (swizzle == 0 || swizzle >= MASK.size()) { + throw NotImplementedException("Invalid swizzle {}", swizzle); + } + return MASK[swizzle]; +} + +IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { + std::array<IR::U32, 4> colors; + for (int i = 0; i < num_regs; ++i) { + colors[static_cast<size_t>(i)] = ir.GetReg(reg + i); + } + for (int i = num_regs; i < 4; ++i) { + colors[static_cast<size_t>(i)] = ir.Imm32(0); + } + return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); +} +} // Anonymous namespace + +void TranslatorVisitor::SULD(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, LoadCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const suld{insn}; + + if (suld.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", suld.clamp.Value()); + } + if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { + throw NotImplementedException("Cache {}", suld.cache.Value()); + } + const bool is_typed{suld.d != 0}; + if (is_typed && suld.ba != 0) { + throw NotImplementedException("BA"); + } + + const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; + const TextureType type{GetType(suld.type)}; + const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; + const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4)) + : X(suld.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + const IR::Value result{ir.ImageRead(handle, coords, info)}; + IR::Reg dest_reg{suld.dest_reg}; + if (is_typed) { + const int num_regs{SizeInRegs(suld.size)}; + for (int i = 0; i < num_regs; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); + } + } else { + const unsigned mask{SwizzleMask(suld.swizzle)}; + const int bits{std::popcount(mask)}; + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) { + throw NotImplementedException("Unaligned destination register"); + } + for (unsigned component = 0; component < 4; ++component) { + if (((mask >> component) & 1) == 0) { + continue; + } + X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); + ++dest_reg; + } + } +} + +void TranslatorVisitor::SUST(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, StoreCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sust{insn}; + + if (sust.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", sust.clamp.Value()); + } + if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { + throw NotImplementedException("Cache {}", sust.cache.Value()); + } + const bool is_typed{sust.d != 0}; + if (is_typed && sust.ba != 0) { + throw NotImplementedException("BA"); + } + const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; + const TextureType type{GetType(sust.type)}; + const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; + const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4)) + : X(sust.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + IR::Value color; + if (is_typed) { + color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); + } else { + const unsigned mask{SwizzleMask(sust.swizzle)}; + if (mask != 0xf) { + throw NotImplementedException("Non-full mask"); + } + color = MakeColor(ir, sust.data_reg, 4); + } + ir.ImageWrite(handle, coords, color, info); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..0046b5edd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -0,0 +1,236 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional<u32> cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type)); + info.is_depth.Assign(tex.dc != 0 ? 1 : 0); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + IR::Reg dest_reg{tex.dest_reg}; + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))}; + } + v.F(dest_reg, value); + ++dest_reg; + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..154e7f1a1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -0,0 +1,266 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<53, 4, u64> encoding; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +void CheckAlignment(IR::Reg reg, size_t alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +template <typename... Args> +IR::Value Composite(TranslatorVisitor& v, Args... regs) { + return v.ir.CompositeConstruct(v.F(regs)...); +} + +IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { + return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding texs{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))}; + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::Reg reg_a{texs.src_reg_a}; + const IR::Reg reg_b{texs.src_reg_b}; + IR::TextureInstInfo info{}; + if (texs.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + switch (texs.encoding) { + case 0: // 1D.LZ + info.type.Assign(TextureType::Color1D); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); + case 1: // 2D + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); + case 2: // 2D.LZ + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); + case 3: // 2D.LL + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, + info); + case 4: // 2D.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); + return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + {}, {}, {}, info); + case 5: // 2D.LL.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), + v.F(reg_b + 1), v.F(reg_b), {}, info); + case 6: // 2D.LZ.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + zero, {}, info); + case 7: // ARRAY_2D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleImplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + {}, {}, {}, info); + case 8: // ARRAY_2D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + zero, {}, info); + case 9: // ARRAY_2D.LZ.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ColorArray2D); + info.is_depth.Assign(1); + return v.ir.ImageSampleDrefExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + v.F(reg_b + 1), zero, {}, info); + case 10: // 3D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 11: // 3D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, + info); + case 12: // CUBE + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 13: // CUBE.LL + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), + v.F(reg_b + 1), {}, info); + default: + throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); + } +} + +unsigned Swizzle(u64 insn) { + const Encoding texs{insn}; + const size_t encoding{texs.swizzle}; + if (texs.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + const bool is_shadow{sample.Type() == IR::Type::F32}; + if (is_shadow) { + const bool is_alpha{component == 3}; + return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; + } else { + return IR::F32{v.ir.CompositeExtract(sample, component)}; + } +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding texs{insn}; + switch (index) { + case 0: + return texs.dest_reg_a; + case 1: + CheckAlignment(texs.dest_reg_a, 2); + return texs.dest_reg_a + 1; + case 2: + return texs.dest_reg_b; + case 3: + CheckAlignment(texs.dest_reg_b, 2); + return texs.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array<IR::F32, 4> swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding texs{insn}; + switch (store_index) { + case 1: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEXS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..218cbc1a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -0,0 +1,208 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +enum class OffsetType : u64 { + None = 0, + AOFFI, + PTP, + Invalid, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { + const IR::U32 value1{v.X(reg++)}; + const IR::U32 value2{v.X(reg++)}; + const IR::U32 bitsize{v.ir.Imm32(6)}; + const auto make_vector{[&v, &bitsize](const IR::U32& value) { + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); + }}; + return {make_vector(value1), make_vector(value2)}; +} + +void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, + bool is_bindless) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld4{insn}; + + const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; + + IR::Reg meta_reg{tld4.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::Value offset2; + IR::F32 dref; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + switch (offset_type) { + case OffsetType::None: + break; + case OffsetType::AOFFI: + offset = MakeOffset(v, meta_reg, tld4.type); + break; + case OffsetType::PTP: + std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + break; + default: + throw NotImplementedException("Invalid offset type {}", offset_type); + } + if (tld4.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld4.type)); + info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); + info.gather_component.Assign(static_cast<u32>(component_type)); + const IR::Value sample{[&] { + if (tld4.dc == 0) { + return v.ir.ImageGather(handle, coords, offset, offset2, info); + } + return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); + }()}; + + IR::Reg dest_reg{tld4.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld4.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld4.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4(u64 insn) { + union { + u64 raw; + BitField<56, 2, ComponentType> component; + BitField<54, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, false); +} + +void TranslatorVisitor::TLD4_b(u64 insn) { + union { + u64 raw; + BitField<38, 2, ComponentType> component; + BitField<36, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..34efa2d50 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -0,0 +1,134 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F32, + F16, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +union Encoding { + u64 raw; + BitField<55, 1, Precision> precision; + BitField<52, 2, ComponentType> component_type; + BitField<51, 1, u64> aoffi; + BitField<50, 1, u64> dc; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; +}; + +void CheckAlignment(IR::Reg reg, size_t alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tld4s{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))}; + const IR::Reg reg_a{tld4s.src_reg_a}; + const IR::Reg reg_b{tld4s.src_reg_b}; + IR::TextureInstInfo info{}; + if (tld4s.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value())); + info.type.Assign(Shader::TextureType::Color2D); + info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); + IR::Value coords; + if (tld4s.aoffi != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::Value offset = MakeOffset(v, reg_b); + if (tld4s.dc != 0) { + CheckAlignment(reg_b, 2); + IR::F32 dref = v.F(reg_b + 1); + return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); + } + return v.ir.ImageGather(handle, coords, offset, {}, info); + } + if (tld4s.dc != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::F32 dref = v.F(reg_b); + return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); + } + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); + return v.ir.ImageGather(handle, coords, {}, {}, info); +} + +IR::Reg RegStoreComponent32(u64 insn, size_t index) { + const Encoding tlds4{insn}; + switch (index) { + case 0: + return tlds4.dest_reg_a; + case 1: + CheckAlignment(tlds4.dest_reg_a, 2); + return tlds4.dest_reg_a + 1; + case 2: + return tlds4.dest_reg_b; + case 3: + CheckAlignment(tlds4.dest_reg_b, 2); + return tlds4.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + for (size_t component = 0; component < 4; ++component) { + const IR::Reg dest{RegStoreComponent32(insn, component)}; + v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + std::array<IR::F32, 4> swizzled; + for (size_t component = 0; component < 4; ++component) { + swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; + } + const Encoding tld4s{insn}; + v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4S(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..c3fe3ffda --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -0,0 +1,182 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { + const IR::U32 value{v.X(reg)}; + const u32 base{has_lod_clamp ? 12U : 16U}; + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> aoffi; + BitField<50, 1, u64> lc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> derivate_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const txd{insn}; + + const bool has_lod_clamp = txd.lc != 0; + if (has_lod_clamp) { + throw NotImplementedException("TXD.LC - CLAMP is not implemented"); + } + + IR::Value coords; + u32 num_derivates{}; + IR::Reg base_reg{txd.coord_reg}; + IR::Reg last_reg; + IR::Value handle; + if (is_bindless) { + handle = v.X(base_reg++); + } else { + handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4)); + } + + const auto read_array{[&]() -> IR::F32 { + const IR::U32 base{v.ir.Imm32(0)}; + const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; + const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; + return v.ir.ConvertUToF(32, 16, array_index); + }}; + switch (txd.type) { + case TextureType::_1D: { + coords = v.F(base_reg); + num_derivates = 1; + last_reg = base_reg + 1; + break; + } + case TextureType::ARRAY_1D: { + last_reg = base_reg + 1; + coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); + num_derivates = 1; + break; + } + case TextureType::_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); + num_derivates = 2; + break; + } + case TextureType::ARRAY_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); + num_derivates = 2; + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + const IR::Reg derivate_reg{txd.derivate_reg}; + IR::Value derivates; + switch (num_derivates) { + case 1: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); + break; + } + case 2: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), + v.F(derivate_reg + 2), v.F(derivate_reg + 3)); + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + IR::Value offset; + if (txd.aoffi != 0) { + offset = MakeOffset(v, last_reg, has_lod_clamp); + } + + IR::F32 lod_clamp; + if (has_lod_clamp) { + // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. + // to convert a fixed point, float(value) / float(1 << fixed_point) + // in this case the fixed_point is 8. + const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))}; + const IR::F32 fixp_lc{v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; + lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); + } + + IR::TextureInstInfo info{}; + info.type.Assign(GetType(txd.type)); + info.num_derivates.Assign(num_derivates); + info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); + const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; + + IR::Reg dest_reg{txd.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((txd.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (txd.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TXD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..983058303 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -0,0 +1,165 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{ + [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; + switch (type) { + case TextureType::_1D: + return v.X(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<55, 1, u64> lod; + BitField<50, 1, u64> multisample; + BitField<35, 1, u64> aoffi; + BitField<54, 1, u64> clamp; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld{insn}; + + const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; + + IR::Reg meta_reg{tld.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::U32 lod; + IR::U32 multisample; + if (is_bindless) { + handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); + } + if (tld.lod != 0) { + lod = v.X(meta_reg++); + } else { + lod = v.ir.Imm32(0U); + } + if (tld.aoffi != 0) { + offset = MakeOffset(v, meta_reg, tld.type); + } + if (tld.multisample != 0) { + multisample = v.X(meta_reg++); + } + if (tld.clamp != 0) { + throw NotImplementedException("TLD.CL - CLAMP is not implmented"); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld.type)); + const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; + + IR::Reg dest_reg{tld.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TLD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..5dd7e31b2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -0,0 +1,242 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<54, 1, u64> aoffi; + BitField<53, 1, u64> lod; + BitField<55, 1, u64> ms; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; + BitField<53, 4, u64> encoding; +}; + +void CheckAlignment(IR::Reg reg, size_t alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tlds{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))}; + const IR::Reg reg_a{tlds.src_reg_a}; + const IR::Reg reg_b{tlds.src_reg_b}; + IR::Value coords; + IR::U32 lod{v.ir.Imm32(0U)}; + IR::Value offsets; + IR::U32 multisample; + Shader::TextureType texture_type{}; + switch (tlds.encoding) { + case 0: + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + break; + case 1: + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + lod = v.X(reg_b); + break; + case 2: + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); + break; + case 4: + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + offsets = MakeOffset(v, reg_b); + break; + case 5: + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + break; + case 6: + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + multisample = v.X(reg_b); + break; + case 7: + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color3D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); + break; + case 8: { + CheckAlignment(reg_b, 2); + const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; + texture_type = Shader::TextureType::ColorArray2D; + coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); + break; + } + case 12: + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + offsets = MakeOffset(v, reg_b + 1); + break; + default: + throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); + } + IR::TextureInstInfo info{}; + if (tlds.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.type.Assign(texture_type); + return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); +} + +unsigned Swizzle(u64 insn) { + const Encoding tlds{insn}; + const size_t encoding{tlds.swizzle}; + if (tlds.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + return IR::F32{v.ir.CompositeExtract(sample, component)}; +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding tlds{insn}; + switch (index) { + case 0: + return tlds.dest_reg_a; + case 1: + CheckAlignment(tlds.dest_reg_a, 2); + return tlds.dest_reg_a + 1; + case 2: + return tlds.dest_reg_b; + case 3: + CheckAlignment(tlds.dest_reg_b, 2); + return tlds.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array<IR::F32, 4> swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding tlds{insn}; + switch (store_index) { + case 1: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLDS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..aea3c0e62 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -0,0 +1,131 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + // The ISA reads an array component here, but this is not needed on high level shading languages + // We are dropping this information. + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.F(reg + 1); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> ndv; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tmml{insn}; + + if ((tmml.mask & 0b1100) != 0) { + throw NotImplementedException("TMML BA results are not implmented"); + } + const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; + + IR::U32 handle; + IR::Reg meta_reg{tmml.meta_reg}; + if (is_bindless) { + handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4)); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tmml.type)); + const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; + + IR::Reg dest_reg{tmml.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tmml.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value{v.ir.CompositeExtract(sample, element)}; + if (element < 2) { + IR::U32 casted_value; + if (element == 0) { + casted_value = v.ir.ConvertFToU(32, value); + } else { + casted_value = v.ir.ConvertFToS(16, value); + } + v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); + } else { + v.F(dest_reg, value); + } + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TMML(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TMML_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..0459e5473 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -0,0 +1,76 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Dimension = 1, + TextureType = 2, + SamplePos = 5, +}; + +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { + switch (mode) { + case Mode::Dimension: { + const IR::U32 lod{v.X(src_reg)}; + return v.ir.ImageQueryDimension(handle, lod); + } + case Mode::TextureType: + case Mode::SamplePos: + default: + throw NotImplementedException("Mode {}", mode); + } +} + +void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<22, 3, Mode> mode; + BitField<31, 4, u64> mask; + } const txq{insn}; + + IR::Reg src_reg{txq.src_reg}; + IR::U32 handle; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(src_reg); + ++src_reg; + } + const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + IR::Reg dest_reg{txq.dest_reg}; + for (int element = 0; element < 4; ++element) { + if (((txq.mask >> element) & 1) == 0) { + continue; + } + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))}); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXQ(u64 insn) { + union { + u64 raw; + BitField<36, 13, u64> cbuf_offset; + } const txq{insn}; + + Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4)); +} + +void TranslatorVisitor::TXQ_b(u64 insn) { + Impl(*this, insn, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { + +IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, + u32 selector, bool is_signed) { + switch (width) { + case VideoWidth::Byte: + case VideoWidth::Unknown: + return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); + case VideoWidth::Short: + return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); + case VideoWidth::Word: + return value; + default: + throw NotImplementedException("Unknown VideoWidth {}", width); + } +} + +VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { + // immediates must be 16-bit format. + return is_immediate ? VideoWidth::Short : width; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +enum class VideoWidth : u64 { + Byte, + Unknown, + Short, + Word, +}; + +[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, + VideoWidth width, u32 selector, bool is_signed); + +[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VideoMinMaxOps : u64 { + MRG_16H, + MRG_16L, + MRG_8B0, + MRG_8B2, + ACC, + MIN, + MAX, +}; + +[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, + VideoMinMaxOps op, bool is_signed) { + switch (op) { + case VideoMinMaxOps::MIN: + return ir.IMin(lhs, rhs, is_signed); + case VideoMinMaxOps::MAX: + return ir.IMax(lhs, rhs, is_signed); + default: + throw NotImplementedException("VMNMX op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VMNMX(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 3, VideoMinMaxOps> op; + BitField<54, 1, u64> dest_sign; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> mx; + } const vmnmx{insn}; + + if (vmnmx.cc != 0) { + throw NotImplementedException("VMNMX CC"); + } + if (vmnmx.sat != 0) { + throw NotImplementedException("VMNMX SAT"); + } + // Selectors were shown to default to 2 in unit tests + if (vmnmx.src_a_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); + } + if (vmnmx.src_b_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); + } + if (vmnmx.src_a_width != VideoWidth::Word) { + throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); + } + + const bool is_b_imm{vmnmx.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const VideoWidth a_width{vmnmx.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmnmx.src_a_sign != 0}; + const bool src_b_signed{vmnmx.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; + + // First operation's sign is only dependent on operand b's sign + const bool op_1_signed{src_b_signed}; + + const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) + : ir.IMin(op_a, op_b, op_1_signed)}; + X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp @@ -0,0 +1,64 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::VMAD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 2, u64> scale; + BitField<53, 1, u64> src_c_neg; + BitField<54, 1, u64> src_a_neg; + BitField<55, 1, u64> sat; + } const vmad{insn}; + + if (vmad.cc != 0) { + throw NotImplementedException("VMAD CC"); + } + if (vmad.sat != 0) { + throw NotImplementedException("VMAD SAT"); + } + if (vmad.scale != 0) { + throw NotImplementedException("VMAD SCALE"); + } + if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD PO"); + } + if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD NEG"); + } + const bool is_b_imm{vmad.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const u32 a_selector{static_cast<u32>(vmad.src_a_selector)}; + // Immediate values can't have a selector + const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)}; + const VideoWidth a_width{vmad.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmad.src_a_sign != 0}; + const bool src_b_signed{vmad.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; + + X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..1b66abc33 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VsetpCompareOp : u64 { + False = 0, + LessThan, + Equal, + LessThanEqual, + GreaterThan = 16, + NotEqual, + GreaterThanEqual, + True, +}; + +CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { + switch (op) { + case VsetpCompareOp::False: + return CompareOp::False; + case VsetpCompareOp::LessThan: + return CompareOp::LessThan; + case VsetpCompareOp::Equal: + return CompareOp::Equal; + case VsetpCompareOp::LessThanEqual: + return CompareOp::LessThanEqual; + case VsetpCompareOp::GreaterThan: + return CompareOp::GreaterThan; + case VsetpCompareOp::NotEqual: + return CompareOp::NotEqual; + case VsetpCompareOp::GreaterThanEqual: + return CompareOp::GreaterThanEqual; + case VsetpCompareOp::True: + return CompareOp::True; + default: + throw NotImplementedException("Invalid compare op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 5, VsetpCompareOp> compare_op; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + } const vsetp{insn}; + + const bool is_b_imm{vsetp.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)}; + + const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)}; + const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)}; + const VideoWidth a_width{vsetp.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; + + const bool src_a_signed{vsetp.src_a_sign != 0}; + const bool src_b_signed{vsetp.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; + + // Compare operation's sign is only dependent on operand b's sign + const bool compare_signed{src_b_signed}; + const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; + const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; + const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; + ir.SetPred(vsetp.dest_pred_a, result_a); + ir.SetPred(vsetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..7ce370f09 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class VoteOp : u64 { + ALL, + ANY, + EQ, +}; + +[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { + switch (vote_op) { + case VoteOp::ALL: + return ir.VoteAll(pred); + case VoteOp::ANY: + return ir.VoteAny(pred); + case VoteOp::EQ: + return ir.VoteEqual(pred); + default: + throw NotImplementedException("Invalid VOTE op {}", vote_op); + } +} + +void Vote(TranslatorVisitor& v, u64 insn) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 3, IR::Pred> pred_a; + BitField<42, 1, u64> neg_pred_a; + BitField<45, 3, IR::Pred> pred_b; + BitField<48, 2, VoteOp> vote_op; + } const vote{insn}; + + const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; + v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); + v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); +} +} // Anonymous namespace + +void TranslatorVisitor::VOTE(u64 insn) { + Vote(*this, insn); +} + +void TranslatorVisitor::VOTE_vtg(u64) { + LOG_WARNING(Shader, "(STUBBED) called"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class ShuffleMode : u64 { + IDX, + UP, + DOWN, + BFLY, +}; + +[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, + const IR::U32& index, const IR::U32& mask, + ShuffleMode shfl_op) { + const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; + const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; + switch (shfl_op) { + case ShuffleMode::IDX: + return ir.ShuffleIndex(value, index, clamp, seg_mask); + case ShuffleMode::UP: + return ir.ShuffleUp(value, index, clamp, seg_mask); + case ShuffleMode::DOWN: + return ir.ShuffleDown(value, index, clamp, seg_mask); + case ShuffleMode::BFLY: + return ir.ShuffleButterfly(value, index, clamp, seg_mask); + default: + throw NotImplementedException("Invalid SHFL op {}", shfl_op); + } +} + +void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<30, 2, ShuffleMode> mode; + BitField<48, 3, IR::Pred> pred; + } const shfl{insn}; + + const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; + v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); + v.X(shfl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHFL(u64 insn) { + union { + u64 insn; + BitField<20, 5, u64> src_a_imm; + BitField<28, 1, u64> src_a_flag; + BitField<29, 1, u64> src_b_flag; + BitField<34, 13, u64> src_b_imm; + } const flags{insn}; + const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) + : GetReg20(insn)}; + const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) + : GetReg39(insn)}; + Shuffle(*this, insn, src_a, src_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp new file mode 100644 index 000000000..8e3c4c5d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -0,0 +1,52 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +namespace Shader::Maxwell { + +template <auto method> +static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { + using MethodType = decltype(method); + if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) { + (visitor.*method)(pc, insn); + } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) { + (visitor.*method)(insn); + } else { + (visitor.*method)(); + } +} + +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { + if (location_begin == location_end) { + return; + } + TranslatorVisitor visitor{env, *block}; + for (Location pc = location_begin; pc != location_end; ++pc) { + const u64 insn{env.ReadInstruction(pc.Offset())}; + try { + const Opcode opcode{Decode(insn)}; + switch (opcode) { +#define INST(name, cute, mask) \ + case Opcode::name: \ + Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ + break; +#include "shader_recompiler/frontend/maxwell/maxwell.inc" +#undef OPCODE + default: + throw LogicError("Invalid opcode {}", opcode); + } + } catch (Exception& exception) { + exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); + throw; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h new file mode 100644 index 000000000..a3edd2e46 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -0,0 +1,14 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::Maxwell { + +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..c067d459c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -0,0 +1,223 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <vector> + +#include "common/settings.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { +namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + size_t num_syntax_blocks{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + ++num_syntax_blocks; + } + } + IR::BlockList blocks; + blocks.reserve(num_syntax_blocks); + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(node.data.block); + } + } + return blocks; +} + +void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const auto begin{program.blocks.begin() + 1}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); +} + +void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + std::optional<PixelImap> imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.interpolation[index] = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; + continue; + } + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = program.info.stores_global_memory, + }); + } +} +} // Anonymous namespace + +IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { + IR::Program program; + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); + program.stage = env.ShaderStage(); + program.local_memory_size = env.LocalMemorySize(); + switch (program.stage) { + case Stage::TessellationControl: { + const ProgramHeader& sph{env.SPH()}; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Geometry: { + const ProgramHeader& sph{env.SPH()}; + program.output_topology = sph.common3.output_topology; + program.output_vertices = sph.common4.max_output_vertices; + program.invocations = sph.common2.threads_per_input_primitive; + program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; + if (program.is_geometry_passthrough) { + const auto& mask{env.GpPassthroughMask()}; + for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; + } + } + break; + } + case Stage::Compute: + program.workgroup_size = env.WorkgroupSize(); + program.shared_memory_size = env.SharedMemorySize(); + break; + default: + break; + } + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite + if (!host_info.support_float16) { + Optimization::LowerFp16ToFp32(program); + } + if (!host_info.support_int64) { + Optimization::LowerInt64ToInt32(program); + } + Optimization::SsaRewritePass(program); + + Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::TexturePass(env, program); + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + if (Settings::values.renderer_debug) { + Optimization::VerificationPass(program); + } + Optimization::CollectShaderInfoPass(env, program); + CollectInterpolationInfo(env, program); + AddNVNStorageBuffers(program); + return program; +} + +IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b) { + IR::Program result{}; + Optimization::VertexATransformPass(vertex_a); + Optimization::VertexBTransformPass(vertex_b); + for (const auto& term : vertex_a.syntax_list) { + if (term.type != IR::AbstractSyntaxNode::Type::Return) { + result.syntax_list.push_back(term); + } + } + result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), + vertex_b.syntax_list.end()); + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } + result.stage = Stage::VertexB; + result.info = vertex_a.info; + result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + + Optimization::JoinTextureInfo(result.info, vertex_b.info); + Optimization::JoinStorageInfo(result.info, vertex_b.info); + Optimization::DeadCodeEliminationPass(result); + if (Settings::values.renderer_debug) { + Optimization::VerificationPass(result); + } + Optimization::CollectShaderInfoPass(env_vertex_b, result); + return result; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..a84814811 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, + ObjectPool<IR::Block>& block_pool, Environment& env, + Flow::CFG& cfg, const HostTranslateInfo& host_info); + +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); + +} // namespace Shader::Maxwell |