summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/frontend/maxwell/translate
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2021-07-25 20:39:04 +0200
committerGitHub <noreply@github.com>2021-07-25 20:39:04 +0200
commit98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch)
tree816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler/frontend/maxwell/translate
parentMerge pull request #6699 from lat9nq/common-threads (diff)
parentshader: Support out of bound local memory reads and immediate writes (diff)
downloadyuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.bz2
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.lz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.zst
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.cpp52
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h23
95 files changed, 10073 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+ SAFEADD,
+};
+
+enum class AtomSize : u64 {
+ U32,
+ S32,
+ U64,
+ F32,
+ F16x2,
+ S64,
+};
+
+IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
+ AtomOp op, bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.GlobalAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.GlobalAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.GlobalAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.GlobalAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.GlobalAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.GlobalAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.GlobalAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atom Operation {}", op);
+ }
+}
+
+IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
+ AtomSize size) {
+ static constexpr IR::FpControl f16_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::RN,
+ .fmz_mode = IR::FmzMode::DontCare,
+ };
+ static constexpr IR::FpControl f32_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::RN,
+ .fmz_mode = IR::FmzMode::FTZ,
+ };
+ switch (op) {
+ case AtomOp::ADD:
+ return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
+ : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
+ default:
+ throw NotImplementedException("FP Atom Operation {}", op);
+ }
+}
+
+IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<28, 20, s64> addr_offset;
+ BitField<28, 20, u64> rz_addr_offset;
+ BitField<48, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ return v.L(mem.addr_reg);
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+
+bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
+ // TODO: SAFEADD
+ switch (size) {
+ case AtomSize::S32:
+ case AtomSize::U64:
+ return (op == AtomOp::INC || op == AtomOp::DEC);
+ case AtomSize::S64:
+ return !(op == AtomOp::MIN || op == AtomOp::MAX);
+ case AtomSize::F32:
+ return op != AtomOp::ADD;
+ case AtomSize::F16x2:
+ return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
+ default:
+ return false;
+ }
+}
+
+IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F32:
+ case AtomSize::F16x2:
+ return ir.LoadGlobal32(offset);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ir.PackUint2x32(ir.LoadGlobal64(offset));
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F16x2:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return v.L(dest_reg, IR::U64{result});
+ case AtomSize::F32:
+ return v.F(dest_reg, IR::F32{result});
+ default:
+ break;
+ }
+}
+
+IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
+ AtomSize size, AtomOp op) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
+ case AtomSize::F32:
+ return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
+ case AtomSize::F16x2: {
+ return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
+ }
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
+ const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
+ IR::Value result;
+ if (AtomOpNotApplicable(size, op)) {
+ result = LoadGlobal(v.ir, offset, size);
+ } else {
+ result = ApplyAtomOp(v, operand_reg, offset, size, op);
+ }
+ if (write_dest) {
+ StoreResult(v, dest_reg, result, size);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 8, IR::Reg> operand_reg;
+ BitField<49, 3, AtomSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atom{insn};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
+}
+
+void TranslatorVisitor::RED(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> operand_reg;
+ BitField<20, 3, AtomSize> size;
+ BitField<23, 3, AtomOp> op;
+ } const red{insn};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class AtomsSize : u64 {
+ U32,
+ S32,
+ U64,
+};
+
+IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.SharedAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.SharedAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.SharedAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.SharedAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.SharedAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.SharedAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.SharedAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.SharedAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.SharedAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atoms Operation {}", op);
+ }
+}
+
+IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<30, 22, u64> absolute_offset;
+ BitField<30, 22, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
+ switch (size) {
+ case AtomsSize::U32:
+ case AtomsSize::S32:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomsSize::U64:
+ return v.L(dest_reg, IR::U64{result});
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOMS(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<28, 2, AtomsSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atoms{insn};
+
+ const bool size_64{atoms.size == AtomsSize::U64};
+ if (size_64 && atoms.op != AtomOp::EXCH) {
+ throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
+ }
+ const bool is_signed{atoms.size == AtomsSize::S32};
+ const IR::U32 offset{AtomsOffset(*this, insn)};
+
+ IR::Value result;
+ if (size_64) {
+ result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
+ } else {
+ result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
+ }
+ StoreResult(*this, atoms.dest_reg, result, atoms.size);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class BitSize : u64 {
+ B32,
+ B64,
+ B96,
+ B128,
+};
+
+void TranslatorVisitor::AL2P(u64 inst) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> result_register;
+ BitField<8, 8, IR::Reg> indexing_register;
+ BitField<20, 11, s64> offset;
+ BitField<47, 2, BitSize> bitsize;
+ } al2p{inst};
+ if (al2p.bitsize != BitSize::B32) {
+ throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
+ }
+ const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
+ const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
+ X(al2p.result_register, result);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// Seems to be in CUDA terminology.
+enum class LocalScope : u64 {
+ CTA,
+ GL,
+ SYS,
+ VC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MEMBAR(u64 inst) {
+ union {
+ u64 raw;
+ BitField<8, 2, LocalScope> scope;
+ } const membar{inst};
+
+ if (membar.scope == LocalScope::CTA) {
+ ir.WorkgroupMemoryBarrier();
+ } else {
+ ir.DeviceMemoryBarrier();
+ }
+}
+
+void TranslatorVisitor::DEPBAR() {
+ // DEPBAR is a no-op
+}
+
+void TranslatorVisitor::BAR(u64 insn) {
+ enum class Mode {
+ RedPopc,
+ Scan,
+ RedAnd,
+ RedOr,
+ Sync,
+ Arrive,
+ };
+ union {
+ u64 raw;
+ BitField<43, 1, u64> is_a_imm;
+ BitField<44, 1, u64> is_b_imm;
+ BitField<8, 8, u64> imm_a;
+ BitField<20, 12, u64> imm_b;
+ BitField<42, 1, u64> neg_pred;
+ BitField<39, 3, IR::Pred> pred;
+ } const bar{insn};
+
+ const Mode mode{[insn] {
+ switch (insn & 0x0000009B00000000ULL) {
+ case 0x0000000200000000ULL:
+ return Mode::RedPopc;
+ case 0x0000000300000000ULL:
+ return Mode::Scan;
+ case 0x0000000A00000000ULL:
+ return Mode::RedAnd;
+ case 0x0000001200000000ULL:
+ return Mode::RedOr;
+ case 0x0000008000000000ULL:
+ return Mode::Sync;
+ case 0x0000008100000000ULL:
+ return Mode::Arrive;
+ }
+ throw NotImplementedException("Invalid encoding");
+ }()};
+ if (mode != Mode::Sync) {
+ throw NotImplementedException("BAR mode {}", mode);
+ }
+ if (bar.is_a_imm == 0) {
+ throw NotImplementedException("Non-immediate input A");
+ }
+ if (bar.imm_a != 0) {
+ throw NotImplementedException("Non-zero input A");
+ }
+ if (bar.is_b_imm == 0) {
+ throw NotImplementedException("Non-immediate input B");
+ }
+ if (bar.imm_b != 0) {
+ throw NotImplementedException("Non-zero input B");
+ }
+ if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
+ throw NotImplementedException("Non-true input predicate");
+ }
+ ir.Barrier();
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<40, 1, u64> brev;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const bfe{insn};
+
+ const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
+ const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+
+ // Common constants
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 one{v.ir.Imm32(1)};
+ const IR::U32 max_size{v.ir.Imm32(32)};
+ // Edge case conditions
+ const IR::U1 zero_count{v.ir.IEqual(count, zero)};
+ const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
+ const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
+
+ IR::U32 base{v.X(bfe.offset_reg)};
+ if (bfe.brev != 0) {
+ base = v.ir.BitReverse(base);
+ }
+ IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
+ if (bfe.is_signed != 0) {
+ const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
+ const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+ const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
+ // Replicate condition
+ result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
+ // Exceeding condition
+ const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
+ result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
+ }
+ // Zero count condition
+ result = IR::U32{v.ir.Select(zero_count, zero, result)};
+
+ v.X(bfe.dest_reg, result);
+
+ if (bfe.cc != 0) {
+ v.SetZFlag(v.ir.IEqual(result, zero));
+ v.SetSFlag(v.ir.ILessThan(result, zero, true));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFE_reg(u64 insn) {
+ BFE(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::BFE_cbuf(u64 insn) {
+ BFE(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFE_imm(u64 insn) {
+ BFE(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> insert_reg;
+ BitField<47, 1, u64> cc;
+ } const bfi{insn};
+
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
+ const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+ const IR::U32 max_size{v.ir.Imm32(32)};
+
+ // Edge case conditions
+ const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
+ const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
+
+ const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
+ const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
+
+ const IR::U32 insert{v.X(bfi.insert_reg)};
+ IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
+
+ result = IR::U32{v.ir.Select(exceed_offset, base, result)};
+
+ v.X(bfi.dest_reg, result);
+ if (bfi.cc != 0) {
+ v.SetZFlag(v.ir.IEqual(result, zero));
+ v.SetSFlag(v.ir.ILessThan(result, zero, true));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFI_reg(u64 insn) {
+ BFI(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_rc(u64 insn) {
+ BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFI_cr(u64 insn) {
+ BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_imm(u64 insn) {
+ BFI(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void Check(u64 insn) {
+ union {
+ u64 raw;
+ BitField<5, 1, u64> cbuf_mode;
+ BitField<6, 1, u64> lmt;
+ } const encoding{insn};
+
+ if (encoding.cbuf_mode != 0) {
+ throw NotImplementedException("Constant buffer mode");
+ }
+ if (encoding.lmt != 0) {
+ throw NotImplementedException("LMT");
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BRX(u64 insn) {
+ Check(insn);
+}
+
+void TranslatorVisitor::JMX(u64 insn) {
+ Check(insn);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+
+namespace Shader::Maxwell {
+
+enum class FpRounding : u64 {
+ RN,
+ RM,
+ RP,
+ RZ,
+};
+
+enum class FmzMode : u64 {
+ None,
+ FTZ,
+ FMZ,
+ INVALIDFMZ3,
+};
+
+inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
+ switch (fp_rounding) {
+ case FpRounding::RN:
+ return IR::FpRounding::RN;
+ case FpRounding::RM:
+ return IR::FpRounding::RM;
+ case FpRounding::RP:
+ return IR::FpRounding::RP;
+ case FpRounding::RZ:
+ return IR::FpRounding::RZ;
+ }
+ throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
+}
+
+inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
+ switch (fmz_mode) {
+ case FmzMode::None:
+ return IR::FmzMode::None;
+ case FmzMode::FTZ:
+ return IR::FmzMode::FTZ;
+ case FmzMode::FMZ:
+ // FMZ is manually handled in the instruction
+ return IR::FmzMode::FTZ;
+ case FmzMode::INVALIDFMZ3:
+ break;
+ }
+ throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+
+namespace Shader::Maxwell {
+IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed) {
+ switch (compare_op) {
+ case CompareOp::False:
+ return ir.Imm1(false);
+ case CompareOp::LessThan:
+ return ir.ILessThan(operand_1, operand_2, is_signed);
+ case CompareOp::Equal:
+ return ir.IEqual(operand_1, operand_2);
+ case CompareOp::LessThanEqual:
+ return ir.ILessThanEqual(operand_1, operand_2, is_signed);
+ case CompareOp::GreaterThan:
+ return ir.IGreaterThan(operand_1, operand_2, is_signed);
+ case CompareOp::NotEqual:
+ return ir.INotEqual(operand_1, operand_2);
+ case CompareOp::GreaterThanEqual:
+ return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
+ case CompareOp::True:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid compare op {}", compare_op);
+ }
+}
+
+IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed) {
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
+ const IR::U1 z_flag{ir.GetZFlag()};
+ const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
+ const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
+ : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
+ ir.ILessThan(operand_2, zero, true))};
+ switch (compare_op) {
+ case CompareOp::False:
+ return ir.Imm1(false);
+ case CompareOp::LessThan:
+ return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+ ir.ILessThan(intermediate, zero, true))};
+ case CompareOp::Equal:
+ return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
+ case CompareOp::LessThanEqual: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+ ir.ILessThan(intermediate, zero, true))};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+ }
+ case CompareOp::GreaterThan: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
+ ir.IGreaterThan(intermediate, zero, true))};
+ const IR::U1 not_z{ir.LogicalNot(z_flag)};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
+ }
+ case CompareOp::NotEqual:
+ return ir.LogicalOr(ir.INotEqual(intermediate, zero),
+ ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
+ case CompareOp::GreaterThanEqual: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
+ ir.IGreaterThanEqual(intermediate, zero, true))};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+ }
+ case CompareOp::True:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid compare op {}", compare_op);
+ }
+}
+
+IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
+ BooleanOp bop) {
+ switch (bop) {
+ case BooleanOp::AND:
+ return ir.LogicalAnd(predicate_1, predicate_2);
+ case BooleanOp::OR:
+ return ir.LogicalOr(predicate_1, predicate_2);
+ case BooleanOp::XOR:
+ return ir.LogicalXor(predicate_1, predicate_2);
+ default:
+ throw NotImplementedException("Invalid bop {}", bop);
+ }
+}
+
+IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
+ switch (op) {
+ case PredicateOp::False:
+ return ir.Imm1(false);
+ case PredicateOp::True:
+ return ir.Imm1(true);
+ case PredicateOp::Zero:
+ return ir.IEqual(result, ir.Imm32(0));
+ case PredicateOp::NonZero:
+ return ir.INotEqual(result, ir.Imm32(0));
+ default:
+ throw NotImplementedException("Invalid Predicate operation {}", op);
+ }
+}
+
+bool IsCompareOpOrdered(FPCompareOp op) {
+ switch (op) {
+ case FPCompareOp::LTU:
+ case FPCompareOp::EQU:
+ case FPCompareOp::LEU:
+ case FPCompareOp::GTU:
+ case FPCompareOp::NEU:
+ case FPCompareOp::GEU:
+ return false;
+ default:
+ return true;
+ }
+}
+
+IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+ const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+ IR::FpControl control) {
+ const bool ordered{IsCompareOpOrdered(compare_op)};
+ switch (compare_op) {
+ case FPCompareOp::F:
+ return ir.Imm1(false);
+ case FPCompareOp::LT:
+ case FPCompareOp::LTU:
+ return ir.FPLessThan(operand_1, operand_2, control, ordered);
+ case FPCompareOp::EQ:
+ case FPCompareOp::EQU:
+ return ir.FPEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::LE:
+ case FPCompareOp::LEU:
+ return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::GT:
+ case FPCompareOp::GTU:
+ return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
+ case FPCompareOp::NE:
+ case FPCompareOp::NEU:
+ return ir.FPNotEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::GE:
+ case FPCompareOp::GEU:
+ return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::NUM:
+ return ir.FPOrdered(operand_1, operand_2);
+ case FPCompareOp::Nan:
+ return ir.FPUnordered(operand_1, operand_2);
+ case FPCompareOp::T:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid FP compare op {}", compare_op);
+ }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
+
+[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, CompareOp compare_op,
+ bool is_signed);
+
+[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
+ const IR::U1& predicate_2, BooleanOp bop);
+
+[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
+
+[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
+
+[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+ const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+ IR::FpControl control = {});
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+void TranslatorVisitor::CSET(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 5, IR::FlowTest> cc_test;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ } const cset{insn};
+
+ const IR::U32 one_mask{ir.Imm32(-1)};
+ const IR::U32 fp_one{ir.Imm32(0x3f800000)};
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
+ const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
+ const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
+ const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
+ const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
+ X(cset.dest_reg, result);
+ if (cset.cc != 0) {
+ const IR::U1 is_zero{ir.IEqual(result, zero)};
+ SetZFlag(is_zero);
+ if (cset.bf != 0) {
+ ResetSFlag();
+ } else {
+ SetSFlag(ir.LogicalNot(is_zero));
+ }
+ ResetOFlag();
+ ResetCFlag();
+ }
+}
+
+void TranslatorVisitor::CSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<8, 5, IR::FlowTest> cc_test;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<45, 2, BooleanOp> bop;
+ } const csetp{insn};
+
+ const BooleanOp bop{csetp.bop};
+ const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
+ const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
+ const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
+ ir.SetPred(csetp.dest_pred_a, result_a);
+ ir.SetPred(csetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<45, 1, u64> neg_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_a;
+ BitField<49, 1, u64> abs_b;
+ } const dadd{insn};
+ if (dadd.cc != 0) {
+ throw NotImplementedException("DADD CC");
+ }
+
+ const IR::F64 src_a{v.D(dadd.src_a_reg)};
+ const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dadd.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DADD_reg(u64 insn) {
+ DADD(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DADD_cbuf(u64 insn) {
+ DADD(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DADD_imm(u64 insn) {
+ DADD(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 4, FPCompareOp> compare_op;
+ BitField<52, 1, u64> bf;
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ } const dset{insn};
+
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
+
+ IR::U1 pred{v.ir.GetPred(dset.pred)};
+ if (dset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(dset.dest_reg, result);
+ if (dset.cc != 0) {
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (dset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSET_reg(u64 insn) {
+ DSET(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSET_cbuf(u64 insn) {
+ DSET(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSET_imm(u64 insn) {
+ DSET(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<50, 2, FpRounding> fp_rounding;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_c;
+ } const dfma{insn};
+
+ if (dfma.cc != 0) {
+ throw NotImplementedException("DFMA CC");
+ }
+
+ const IR::F64 src_a{v.D(dfma.src_a_reg)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
+ const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dfma.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DFMA_reg(u64 insn) {
+ DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_cr(u64 insn) {
+ DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_rc(u64 insn) {
+ DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DFMA_imm(u64 insn) {
+ DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ } const dmnmx{insn};
+
+ if (dmnmx.cc != 0) {
+ throw NotImplementedException("DMNMX CC");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
+
+ IR::F64 max{v.ir.FPMax(op_a, op_b)};
+ IR::F64 min{v.ir.FPMin(op_a, op_b)};
+
+ if (dmnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+ v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMNMX_reg(u64 insn) {
+ DMNMX(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
+ DMNMX(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMNMX_imm(u64 insn) {
+ DMNMX(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg;
+ } const dmul{insn};
+
+ if (dmul.cc != 0) {
+ throw NotImplementedException("DMUL CC");
+ }
+
+ const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dmul.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMUL_reg(u64 insn) {
+ DMUL(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMUL_cbuf(u64 insn) {
+ DMUL(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMUL_imm(u64 insn) {
+ DMUL(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<6, 1, u64> negate_b;
+ BitField<7, 1, u64> abs_a;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const dsetp{insn};
+
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
+
+ const BooleanOp bop{dsetp.bop};
+ const FPCompareOp compare_op{dsetp.compare_op};
+ const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
+ const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(dsetp.dest_pred_a, result_a);
+ v.ir.SetPred(dsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSETP_reg(u64 insn) {
+ DSETP(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSETP_cbuf(u64 insn) {
+ DSETP(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSETP_imm(u64 insn) {
+ DSETP(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ExitFragment(TranslatorVisitor& v) {
+ const ProgramHeader sph{v.env.SPH()};
+ IR::Reg src_reg{IR::Reg::R0};
+ for (u32 render_target = 0; render_target < 8; ++render_target) {
+ const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
+ for (u32 component = 0; component < 4; ++component) {
+ if (!mask[component]) {
+ continue;
+ }
+ v.ir.SetFragColor(render_target, component, v.F(src_reg));
+ ++src_reg;
+ }
+ }
+ if (sph.ps.omap.sample_mask != 0) {
+ v.ir.SetSampleMask(v.X(src_reg));
+ }
+ if (sph.ps.omap.depth != 0) {
+ v.ir.SetFragDepth(v.F(src_reg + 1));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::EXIT() {
+ switch (env.ShaderStage()) {
+ case Stage::Fragment:
+ ExitFragment(*this);
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<40, 1, u64> tilde;
+ BitField<41, 1, u64> shift;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const flo{insn};
+
+ if (flo.cc != 0) {
+ throw NotImplementedException("CC");
+ }
+ if (flo.tilde != 0) {
+ src = v.ir.BitwiseNot(src);
+ }
+ IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
+ if (flo.shift != 0) {
+ const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
+ result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
+ }
+ v.X(flo.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FLO_reg(u64 insn) {
+ FLO(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::FLO_cbuf(u64 insn) {
+ FLO(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::FLO_imm(u64 insn) {
+ FLO(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
+ const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const fadd{insn};
+
+ if (cc) {
+ throw NotImplementedException("FADD CC");
+ }
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
+ IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(fadd.dest_reg, value);
+}
+
+void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 raw;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> neg_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_a;
+ BitField<49, 1, u64> abs_b;
+ BitField<50, 1, u64> sat;
+ } const fadd{insn};
+
+ FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
+ fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FADD_reg(u64 insn) {
+ FADD(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FADD_cbuf(u64 insn) {
+ FADD(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FADD_imm(u64 insn) {
+ FADD(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 1, u64> ftz;
+ BitField<56, 1, u64> neg_a;
+ BitField<54, 1, u64> abs_a;
+ BitField<52, 1, u64> cc;
+ BitField<53, 1, u64> neg_b;
+ BitField<57, 1, u64> abs_b;
+ } const fadd32i{insn};
+
+ FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
+ fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const fcmp{insn};
+
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
+ const IR::U32 src_reg{v.X(fcmp.src_reg)};
+ const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+ v.X(fcmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FCMP_reg(u64 insn) {
+ FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_rc(u64 insn) {
+ FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FCMP_cr(u64 insn) {
+ FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const fcmp{insn};
+ const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
+ const u32 value{static_cast<u32>(fcmp.value) << 12};
+
+ FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 4, FPCompareOp> compare_op;
+ BitField<52, 1, u64> bf;
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ BitField<55, 1, u64> ftz;
+ } const fset{insn};
+
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
+ const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(fset.pred)};
+ if (fset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(fset.dest_reg, result);
+ if (fset.cc != 0) {
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (fset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSET_reg(u64 insn) {
+ FSET(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSET_cbuf(u64 insn) {
+ FSET(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSET_imm(u64 insn) {
+ FSET(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+};
+
+enum class RoundingOp : u64 {
+ None = 0,
+ Pass = 3,
+ Round = 8,
+ Floor = 9,
+ Ceil = 10,
+ Trunc = 11,
+};
+
+[[nodiscard]] u32 WidthSize(FloatFormat width) {
+ switch (width) {
+ case FloatFormat::F16:
+ return 16;
+ case FloatFormat::F32:
+ return 32;
+ case FloatFormat::F64:
+ return 64;
+ default:
+ throw NotImplementedException("Invalid width {}", width);
+ }
+}
+
+void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> neg;
+ BitField<47, 1, u64> cc;
+ BitField<50, 1, u64> sat;
+ BitField<39, 4, u64> rounding_op;
+ BitField<39, 2, FpRounding> rounding;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<8, 2, FloatFormat> dst_size;
+
+ [[nodiscard]] RoundingOp RoundingOperation() const {
+ constexpr u64 rounding_mask = 0x0B;
+ return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
+ }
+ } const f2f{insn};
+
+ if (f2f.cc != 0) {
+ throw NotImplementedException("F2F CC");
+ }
+
+ IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
+
+ const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
+ IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ if (f2f.src_size != f2f.dst_size) {
+ fp_control.rounding = CastFpRounding(f2f.rounding);
+ input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
+ } else {
+ switch (f2f.RoundingOperation()) {
+ case RoundingOp::None:
+ case RoundingOp::Pass:
+ // Make sure NANs are handled properly
+ switch (f2f.src_size) {
+ case FloatFormat::F16:
+ input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
+ break;
+ case FloatFormat::F32:
+ input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
+ break;
+ case FloatFormat::F64:
+ input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
+ break;
+ }
+ break;
+ case RoundingOp::Round:
+ input = v.ir.FPRoundEven(input, fp_control);
+ break;
+ case RoundingOp::Floor:
+ input = v.ir.FPFloor(input, fp_control);
+ break;
+ case RoundingOp::Ceil:
+ input = v.ir.FPCeil(input, fp_control);
+ break;
+ case RoundingOp::Trunc:
+ input = v.ir.FPTrunc(input, fp_control);
+ break;
+ default:
+ throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
+ }
+ }
+ if (f2f.sat != 0 && !any_fp64) {
+ input = v.ir.FPSaturate(input);
+ }
+
+ switch (f2f.dst_size) {
+ case FloatFormat::F16: {
+ const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+ v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
+ break;
+ }
+ case FloatFormat::F32:
+ v.F(f2f.dest_reg, input);
+ break;
+ case FloatFormat::F64:
+ v.D(f2f.dest_reg, input);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2F_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
+ src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatReg20(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleReg20(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
+ src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatCbuf(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleCbuf(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ BitField<20, 19, u64> imm;
+ BitField<56, 1, u64> imm_neg;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
+ const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
+ src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
+ if (f2f.imm_neg != 0) {
+ throw NotImplementedException("Neg bit on F16");
+ }
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatImm20(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleImm20(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <limits>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class DestFormat : u64 {
+ Invalid,
+ I16,
+ I32,
+ I64,
+};
+enum class SrcFormat : u64 {
+ Invalid,
+ F16,
+ F32,
+ F64,
+};
+enum class Rounding : u64 {
+ Round,
+ Floor,
+ Ceil,
+ Trunc,
+};
+
+union F2I {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, DestFormat> dest_format;
+ BitField<10, 2, SrcFormat> src_format;
+ BitField<12, 1, u64> is_signed;
+ BitField<39, 2, Rounding> rounding;
+ BitField<41, 1, u64> half;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> abs;
+ BitField<47, 1, u64> cc;
+ BitField<49, 1, u64> neg;
+};
+
+size_t BitSize(DestFormat dest_format) {
+ switch (dest_format) {
+ case DestFormat::I16:
+ return 16;
+ case DestFormat::I32:
+ return 32;
+ case DestFormat::I64:
+ return 64;
+ default:
+ throw NotImplementedException("Invalid destination format {}", dest_format);
+ }
+}
+
+std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
+ if (is_signed) {
+ switch (format) {
+ case DestFormat::I16:
+ return {static_cast<f64>(std::numeric_limits<s16>::max()),
+ static_cast<f64>(std::numeric_limits<s16>::min())};
+ case DestFormat::I32:
+ return {static_cast<f64>(std::numeric_limits<s32>::max()),
+ static_cast<f64>(std::numeric_limits<s32>::min())};
+ case DestFormat::I64:
+ return {static_cast<f64>(std::numeric_limits<s64>::max()),
+ static_cast<f64>(std::numeric_limits<s64>::min())};
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case DestFormat::I16:
+ return {static_cast<f64>(std::numeric_limits<u16>::max()),
+ static_cast<f64>(std::numeric_limits<u16>::min())};
+ case DestFormat::I32:
+ return {static_cast<f64>(std::numeric_limits<u32>::max()),
+ static_cast<f64>(std::numeric_limits<u32>::min())};
+ case DestFormat::I64:
+ return {static_cast<f64>(std::numeric_limits<u64>::max()),
+ static_cast<f64>(std::numeric_limits<u64>::min())};
+ default:
+ break;
+ }
+ }
+ throw NotImplementedException("Invalid destination format {}", format);
+}
+
+IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 14, s64> offset;
+ BitField<34, 5, u64> binding;
+ } const cbuf{insn};
+ if (cbuf.binding >= 18) {
+ throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+ }
+ if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
+ throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
+ }
+ if (cbuf.offset % 2 != 0) {
+ throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
+ }
+ const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
+ const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
+ const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
+ const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
+ return v.ir.PackDouble2x32(vector);
+}
+
+void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
+ // F2I is used to convert from a floating point value to an integer
+ const F2I f2i{insn};
+
+ const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
+ f2i.dest_format != DestFormat::I64};
+ IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
+ if (denorm_cares) {
+ fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
+ }
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = fmz_mode,
+ };
+ const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
+ const IR::F16F32F64 rounded_value{[&] {
+ switch (f2i.rounding) {
+ case Rounding::Round:
+ return v.ir.FPRoundEven(op_a, fp_control);
+ case Rounding::Floor:
+ return v.ir.FPFloor(op_a, fp_control);
+ case Rounding::Ceil:
+ return v.ir.FPCeil(op_a, fp_control);
+ case Rounding::Trunc:
+ return v.ir.FPTrunc(op_a, fp_control);
+ default:
+ throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
+ }
+ }()};
+ const bool is_signed{f2i.is_signed != 0};
+ const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
+
+ IR::F16F32F64 intermediate;
+ switch (f2i.src_format) {
+ case SrcFormat::F16: {
+ const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
+ const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ case SrcFormat::F32: {
+ const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
+ const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ case SrcFormat::F64: {
+ const IR::F64 max_val{v.ir.Imm64(max_bound)};
+ const IR::F64 min_val{v.ir.Imm64(min_bound)};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
+ }
+
+ const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
+ IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
+
+ bool handled_special_case = false;
+ const bool special_nan_cases =
+ (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
+ if (special_nan_cases) {
+ if (f2i.dest_format == DestFormat::I32) {
+ handled_special_case = true;
+ result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
+ } else if (f2i.dest_format == DestFormat::I64) {
+ handled_special_case = true;
+ result = IR::U64{
+ v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
+ }
+ }
+ if (!handled_special_case && is_signed) {
+ if (bitsize != 64) {
+ result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
+ } else {
+ result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
+ }
+ }
+
+ if (bitsize == 64) {
+ v.L(f2i.dest_reg, result);
+ } else {
+ v.X(f2i.dest_reg, result);
+ }
+
+ if (f2i.cc != 0) {
+ throw NotImplementedException("F2I CC");
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2I_reg(u64 insn) {
+ union {
+ u64 raw;
+ F2I base;
+ BitField<20, 8, IR::Reg> src_reg;
+ } const f2i{insn};
+
+ const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+ switch (f2i.base.src_format) {
+ case SrcFormat::F16:
+ return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
+ case SrcFormat::F32:
+ return F(f2i.src_reg);
+ case SrcFormat::F64:
+ return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
+ default:
+ throw NotImplementedException("Invalid F2I source format {}",
+ f2i.base.src_format.Value());
+ }
+ }()};
+ TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_cbuf(u64 insn) {
+ const F2I f2i{insn};
+ const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+ switch (f2i.src_format) {
+ case SrcFormat::F16:
+ return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
+ case SrcFormat::F32:
+ return GetFloatCbuf(insn);
+ case SrcFormat::F64: {
+ return UnpackCbuf(*this, insn);
+ }
+ default:
+ throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
+ }
+ }()};
+ TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_imm(u64) {
+ throw NotImplementedException("{}", Opcode::F2I_imm);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
+ bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const ffma{insn};
+
+ if (cc) {
+ throw NotImplementedException("FFMA CC");
+ }
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+ const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = CastFmzMode(fmz_mode),
+ };
+ IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
+ if (fmz_mode == FmzMode::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+ const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+ const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+ value = IR::F32{v.ir.Select(any_zero, op_c, value)};
+ }
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(ffma.dest_reg, value);
+}
+
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
+ union {
+ u64 raw;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> sat;
+ BitField<51, 2, FpRounding> fp_rounding;
+ BitField<53, 2, FmzMode> fmz_mode;
+ } const ffma{insn};
+
+ FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
+ ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FFMA_reg(u64 insn) {
+ FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_rc(u64 insn) {
+ FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FFMA_cr(u64 insn) {
+ FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_imm(u64 insn) {
+ FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
+ BitField<52, 1, u64> cc;
+ BitField<53, 2, FmzMode> fmz_mode;
+ BitField<55, 1, u64> sat;
+ BitField<56, 1, u64> neg_a;
+ BitField<57, 1, u64> neg_c;
+ } const ffma32i{insn};
+
+ FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
+ ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ } const fmnmx{insn};
+
+ if (fmnmx.cc) {
+ throw NotImplementedException("FMNMX CC");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
+ IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
+
+ if (fmnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+
+ v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMNMX_reg(u64 insn) {
+ FMNMX(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
+ FMNMX(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMNMX_imm(u64 insn) {
+ FMNMX(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Operation : u64 {
+ Cos = 0,
+ Sin = 1,
+ Ex2 = 2, // Base 2 exponent
+ Lg2 = 3, // Base 2 logarithm
+ Rcp = 4, // Reciprocal
+ Rsq = 5, // Reciprocal square root
+ Rcp64H = 6, // 64-bit reciprocal
+ Rsq64H = 7, // 64-bit reciprocal square root
+ Sqrt = 8,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MUFU(u64 insn) {
+ // MUFU is used to implement a bunch of special functions. See Operation.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 4, Operation> operation;
+ BitField<46, 1, u64> abs;
+ BitField<48, 1, u64> neg;
+ BitField<50, 1, u64> sat;
+ } const mufu{insn};
+
+ const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
+ IR::F32 value{[&]() -> IR::F32 {
+ switch (mufu.operation) {
+ case Operation::Cos:
+ return ir.FPCos(op_a);
+ case Operation::Sin:
+ return ir.FPSin(op_a);
+ case Operation::Ex2:
+ return ir.FPExp2(op_a);
+ case Operation::Lg2:
+ return ir.FPLog2(op_a);
+ case Operation::Rcp:
+ return ir.FPRecip(op_a);
+ case Operation::Rsq:
+ return ir.FPRecipSqrt(op_a);
+ case Operation::Rcp64H:
+ throw NotImplementedException("MUFU.RCP64H");
+ case Operation::Rsq64H:
+ throw NotImplementedException("MUFU.RSQ64H");
+ case Operation::Sqrt:
+ return ir.FPSqrt(op_a);
+ default:
+ throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
+ }
+ }()};
+
+ if (mufu.sat) {
+ value = ir.FPSaturate(value);
+ }
+
+ F(mufu.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Scale : u64 {
+ None,
+ D2,
+ D4,
+ D8,
+ M8,
+ M4,
+ M2,
+ INVALIDSCALE37,
+};
+
+float ScaleFactor(Scale scale) {
+ switch (scale) {
+ case Scale::None:
+ return 1.0f;
+ case Scale::D2:
+ return 1.0f / 2.0f;
+ case Scale::D4:
+ return 1.0f / 4.0f;
+ case Scale::D8:
+ return 1.0f / 8.0f;
+ case Scale::M8:
+ return 8.0f;
+ case Scale::M4:
+ return 4.0f;
+ case Scale::M2:
+ return 2.0f;
+ case Scale::INVALIDSCALE37:
+ break;
+ }
+ throw NotImplementedException("Invalid FMUL scale {}", scale);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
+ FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const fmul{insn};
+
+ if (cc) {
+ throw NotImplementedException("FMUL CC");
+ }
+ IR::F32 op_a{v.F(fmul.src_a)};
+ if (scale != Scale::None) {
+ if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
+ throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
+ }
+ op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
+ }
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = CastFmzMode(fmz_mode),
+ };
+ IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
+ if (fmz_mode == FmzMode::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+ const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+ const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+ value = IR::F32{v.ir.Select(any_zero, zero, value)};
+ }
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(fmul.dest_reg, value);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 raw;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<41, 3, Scale> scale;
+ BitField<44, 2, FmzMode> fmz;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<50, 1, u64> sat;
+ } const fmul{insn};
+
+ FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
+ fmul.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMUL_reg(u64 insn) {
+ return FMUL(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMUL_cbuf(u64 insn) {
+ return FMUL(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMUL_imm(u64 insn) {
+ return FMUL(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FMUL32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 2, FmzMode> fmz;
+ BitField<55, 1, u64> sat;
+ } const fmul32i{insn};
+
+ FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
+ fmul32i.sat != 0, fmul32i.cc != 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ SINCOS,
+ EX2,
+};
+
+void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 1, Mode> mode;
+ BitField<45, 1, u64> neg;
+ BitField<49, 1, u64> abs;
+ } const rro{insn};
+
+ v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::RRO_reg(u64 insn) {
+ RRO(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::RRO_cbuf(u64 insn) {
+ RRO(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::RRO_imm(u64) {
+ throw NotImplementedException("RRO (imm)");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<6, 1, u64> negate_b;
+ BitField<7, 1, u64> abs_a;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const fsetp{insn};
+
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
+ const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ const BooleanOp bop{fsetp.bop};
+ const FPCompareOp compare_op{fsetp.compare_op};
+ const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
+ const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(fsetp.dest_pred_a, result_a);
+ v.ir.SetPred(fsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSETP_reg(u64 insn) {
+ FSETP(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSETP_cbuf(u64 insn) {
+ FSETP(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSETP_imm(u64 insn) {
+ FSETP(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::FSWZADD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<28, 8, u64> swizzle;
+ BitField<38, 1, u64> ndv;
+ BitField<39, 2, FpRounding> round;
+ BitField<44, 1, u64> ftz;
+ BitField<47, 1, u64> cc;
+ } const fswzadd{insn};
+
+ if (fswzadd.ndv != 0) {
+ throw NotImplementedException("FSWZADD NDV");
+ }
+
+ const IR::F32 src_a{GetFloatReg8(insn)};
+ const IR::F32 src_b{GetFloatReg20(insn)};
+ const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
+
+ const IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = CastFpRounding(fswzadd.round),
+ .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
+ F(fswzadd.dest_reg, result);
+
+ if (fswzadd.cc != 0) {
+ throw NotImplementedException("FSWZADD CC");
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
+ Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hadd2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+ lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
+}
+
+void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
+ const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<49, 2, Merge> merge;
+ BitField<39, 1, u64> ftz;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hadd2{insn};
+
+ HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
+ hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HADD2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<32, 1, u64> sat;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hadd2{insn};
+
+ HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
+ GetReg20(insn));
+}
+
+void TranslatorVisitor::HADD2_cbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_b;
+ BitField<54, 1, u64> abs_b;
+ } const hadd2{insn};
+
+ HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
+ GetCbuf(insn));
+}
+
+void TranslatorVisitor::HADD2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hadd2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
+ HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HADD2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 1, u64> ftz;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_a;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<20, 32, u64> imm32;
+ } const hadd2{insn};
+
+ const u32 imm{static_cast<u32>(hadd2.imm32)};
+ HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
+ hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
+ Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
+ bool sat, HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hfma2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ if (lhs_c.Type() == IR::Type::F16) {
+ lhs_c = v.ir.FPConvert(32, lhs_c);
+ rhs_c = v.ir.FPConvert(32, rhs_c);
+ }
+ }
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
+
+ lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
+ rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = HalfPrecision2FmzMode(precision),
+ };
+ IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
+ if (precision == HalfPrecision::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+ const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+ const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+ lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
+
+ const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+ const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+ const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+ rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
+ }
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
+}
+
+void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
+ Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
+ HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<47, 2, Swizzle> swizzle_a;
+ BitField<49, 2, Merge> merge;
+ } const hfma2{insn};
+
+ HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
+ sat, precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HFMA2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<28, 2, Swizzle> swizzle_b;
+ BitField<32, 1, u64> saturate;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> neg_c;
+ BitField<35, 2, Swizzle> swizzle_c;
+ BitField<37, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
+ GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_rc(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_b;
+ BitField<56, 1, u64> neg_b;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
+ GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_cr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_c;
+ BitField<56, 1, u64> neg_b;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
+ GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_c;
+
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
+ GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_c;
+ BitField<20, 32, u64> imm32;
+ BitField<52, 1, u64> neg_c;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<55, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ const u32 imm{static_cast<u32>(hfma2.imm32)};
+ HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
+ Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
+ switch (precision) {
+ case HalfPrecision::None:
+ return IR::FmzMode::None;
+ case HalfPrecision::FTZ:
+ return IR::FmzMode::FTZ;
+ case HalfPrecision::FMZ:
+ return IR::FmzMode::FMZ;
+ default:
+ return IR::FmzMode::DontCare;
+ }
+}
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
+ switch (swizzle) {
+ case Swizzle::H1_H0: {
+ const IR::Value vector{ir.UnpackFloat2x16(value)};
+ return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
+ }
+ case Swizzle::H0_H0: {
+ const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
+ return {scalar, scalar};
+ }
+ case Swizzle::H1_H1: {
+ const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
+ return {scalar, scalar};
+ }
+ case Swizzle::F32: {
+ const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
+ return {scalar, scalar};
+ }
+ }
+ throw InvalidArgument("Invalid swizzle {}", swizzle);
+}
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+ Merge merge) {
+ switch (merge) {
+ case Merge::H1_H0:
+ return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
+ case Merge::F32:
+ return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
+ case Merge::MRG_H0:
+ case Merge::MRG_H1: {
+ const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
+ const bool is_h0{merge == Merge::MRG_H0};
+ const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
+ return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
+ }
+ }
+ throw InvalidArgument("Invalid merge {}", merge);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class Merge : u64 {
+ H1_H0,
+ F32,
+ MRG_H0,
+ MRG_H1,
+};
+
+enum class Swizzle : u64 {
+ H1_H0,
+ F32,
+ H0_H0,
+ H1_H1,
+};
+
+enum class HalfPrecision : u64 {
+ None = 0,
+ FTZ = 1,
+ FMZ = 2,
+};
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+ Merge merge);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
+ Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
+ HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hmul2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+ lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = HalfPrecision2FmzMode(precision),
+ };
+ IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
+ if (precision == HalfPrecision::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+ const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+ const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+ lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
+
+ const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+ const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+ const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+ rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
+ }
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
+}
+
+void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
+ Swizzle swizzle_b, const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<49, 2, Merge> merge;
+ BitField<47, 2, Swizzle> swizzle_a;
+ BitField<39, 2, HalfPrecision> precision;
+ } const hmul2{insn};
+
+ HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
+ hmul2.precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HMUL2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<32, 1, u64> sat;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<44, 1, u64> abs_a;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hmul2{insn};
+
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
+ hmul2.swizzle_b, GetReg20(insn));
+}
+
+void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<54, 1, u64> abs_b;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ } const hmul2{insn};
+
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
+ Swizzle::F32, GetCbuf(insn));
+}
+
+void TranslatorVisitor::HMUL2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ } const hmul2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
+ Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HMUL2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 2, HalfPrecision> precision;
+ BitField<52, 1, u64> sat;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<20, 32, u64> imm32;
+ } const hmul2{insn};
+
+ const u32 imm{static_cast<u32>(hmul2.imm32)};
+ HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
+ Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
+ bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<44, 1, u64> abs_a;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hset2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+ if (lhs_a.Type() != lhs_b.Type()) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+
+ lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(hset2.pred)};
+ if (hset2.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+ const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+ const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
+ const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
+
+ const u32 true_value = bf ? 0x3c00 : 0xffff;
+ const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
+ const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
+ const IR::U32 fail_result{v.ir.Imm32(0)};
+ const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
+ const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
+
+ v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSET2_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<30, 1, u64> abs_b;
+ BitField<49, 1, u64> bf;
+ BitField<31, 1, u64> neg_b;
+ BitField<50, 1, u64> ftz;
+ BitField<35, 4, FPCompareOp> compare_op;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hset2{insn};
+
+ HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
+ hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
+}
+
+void TranslatorVisitor::HSET2_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> bf;
+ BitField<56, 1, u64> neg_b;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ } const hset2{insn};
+
+ HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
+ hset2.compare_op, Swizzle::F32);
+}
+
+void TranslatorVisitor::HSET2_imm(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> bf;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hset2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
+ Swizzle::H1_H0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
+ Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
+ union {
+ u64 insn;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<44, 1, u64> abs_a;
+ BitField<6, 1, u64> ftz;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hsetp2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+ if (lhs_a.Type() != lhs_b.Type()) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+
+ lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
+ if (hsetp2.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+ const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+ const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
+ const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
+
+ if (h_and) {
+ auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
+ v.ir.SetPred(hsetp2.dest_pred_a, result);
+ v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
+ } else {
+ v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
+ v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSETP2_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<30, 1, u64> abs_b;
+ BitField<49, 1, u64> h_and;
+ BitField<31, 1, u64> neg_b;
+ BitField<35, 4, FPCompareOp> compare_op;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hsetp2{insn};
+ HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
+ hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> h_and;
+ BitField<54, 1, u64> abs_b;
+ BitField<56, 1, u64> neg_b;
+ BitField<49, 4, FPCompareOp> compare_op;
+ } const hsetp2{insn};
+
+ HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
+ hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_imm(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> h_and;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hsetp2{insn};
+
+ const u32 imm{static_cast<u32>(hsetp2.low << 6) |
+ static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hsetp2.high << 22) |
+ static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
+ hsetp2.h_and != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
+ u32 offset) {
+ if (unaligned) {
+ return ir.Imm32(0);
+ }
+ return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
+}
+} // Anonymous namespace
+
+IR::U32 TranslatorVisitor::X(IR::Reg reg) {
+ return ir.GetReg(reg);
+}
+
+IR::U64 TranslatorVisitor::L(IR::Reg reg) {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+IR::F32 TranslatorVisitor::F(IR::Reg reg) {
+ return ir.BitCast<IR::F32>(X(reg));
+}
+
+IR::F64 TranslatorVisitor::D(IR::Reg reg) {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
+ ir.SetReg(dest_reg, value);
+}
+
+void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register {}", dest_reg);
+ }
+ const IR::Value result{ir.UnpackUint2x32(value)};
+ for (int i = 0; i < 2; i++) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+}
+
+void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
+ X(dest_reg, ir.BitCast<IR::U32>(value));
+}
+
+void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register {}", dest_reg);
+ }
+ const IR::Value result{ir.UnpackDouble2x32(value)};
+ for (int i = 0; i < 2; i++) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+}
+
+IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
+ union {
+ u64 raw;
+ BitField<39, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg8(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg20(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg39(insn));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> index;
+ } const reg{insn};
+ return D(reg.index);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
+ union {
+ u64 raw;
+ BitField<39, 8, IR::Reg> index;
+ } const reg{insn};
+ return D(reg.index);
+}
+
+static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 14, u64> offset;
+ BitField<34, 5, u64> binding;
+ } const cbuf{insn};
+
+ if (cbuf.binding >= 18) {
+ throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+ }
+ if (cbuf.offset >= 0x10'000) {
+ throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
+ }
+ const IR::Value binding{static_cast<u32>(cbuf.binding)};
+ const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
+ return {IR::U32{binding}, IR::U32{byte_offset}};
+}
+
+IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
+ const auto [binding, byte_offset]{CbufAddr(insn)};
+ return ir.GetCbuf(binding, byte_offset);
+}
+
+IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
+ const auto [binding, byte_offset]{CbufAddr(insn)};
+ return ir.GetFloatCbuf(binding, byte_offset);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 1, u64> unaligned;
+ } const cbuf{insn};
+
+ const auto [binding, offset_value]{CbufAddr(insn)};
+ const bool unaligned{cbuf.unaligned != 0};
+ const u32 offset{offset_value.U32()};
+ const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
+
+ const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
+ const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
+ return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
+}
+
+IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 1, u64> unaligned;
+ } const cbuf{insn};
+
+ if (cbuf.unaligned != 0) {
+ throw NotImplementedException("Unaligned packed constant buffer read");
+ }
+ const auto [binding, lower_offset]{CbufAddr(insn)};
+ const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
+ const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
+ const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
+ return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
+}
+
+IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+
+ if (imm.is_negative != 0) {
+ const s64 raw{static_cast<s64>(imm.value)};
+ return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
+ } else {
+ return ir.Imm32(static_cast<u32>(imm.value));
+ }
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+ const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
+ const u32 value{static_cast<u32>(imm.value) << 12};
+ return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+ const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
+ const u64 value{imm.value << 44};
+ return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
+}
+
+IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
+ const s64 value{GetImm20(insn).U32()};
+ return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
+}
+
+IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 32, u64> value;
+ } const imm{insn};
+ return ir.Imm32(static_cast<u32>(imm.value));
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 32, u64> value;
+ } const imm{insn};
+ return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
+}
+
+void TranslatorVisitor::SetZFlag(const IR::U1& value) {
+ ir.SetZFlag(value);
+}
+
+void TranslatorVisitor::SetSFlag(const IR::U1& value) {
+ ir.SetSFlag(value);
+}
+
+void TranslatorVisitor::SetCFlag(const IR::U1& value) {
+ ir.SetCFlag(value);
+}
+
+void TranslatorVisitor::SetOFlag(const IR::U1& value) {
+ ir.SetOFlag(value);
+}
+
+void TranslatorVisitor::ResetZero() {
+ SetZFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetSFlag() {
+ SetSFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetCFlag() {
+ SetCFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetOFlag() {
+ SetOFlag(ir.Imm1(false));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+
+namespace Shader::Maxwell {
+
+enum class CompareOp : u64 {
+ False,
+ LessThan,
+ Equal,
+ LessThanEqual,
+ GreaterThan,
+ NotEqual,
+ GreaterThanEqual,
+ True,
+};
+
+enum class BooleanOp : u64 {
+ AND,
+ OR,
+ XOR,
+};
+
+enum class PredicateOp : u64 {
+ False,
+ True,
+ Zero,
+ NonZero,
+};
+
+enum class FPCompareOp : u64 {
+ F,
+ LT,
+ EQ,
+ LE,
+ GT,
+ NE,
+ GE,
+ NUM,
+ Nan,
+ LTU,
+ EQU,
+ LEU,
+ GTU,
+ NEU,
+ GEU,
+ T,
+};
+
+class TranslatorVisitor {
+public:
+ explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
+
+ Environment& env;
+ IR::IREmitter ir;
+
+ void AL2P(u64 insn);
+ void ALD(u64 insn);
+ void AST(u64 insn);
+ void ATOM_cas(u64 insn);
+ void ATOM(u64 insn);
+ void ATOMS_cas(u64 insn);
+ void ATOMS(u64 insn);
+ void B2R(u64 insn);
+ void BAR(u64 insn);
+ void BFE_reg(u64 insn);
+ void BFE_cbuf(u64 insn);
+ void BFE_imm(u64 insn);
+ void BFI_reg(u64 insn);
+ void BFI_rc(u64 insn);
+ void BFI_cr(u64 insn);
+ void BFI_imm(u64 insn);
+ void BPT(u64 insn);
+ void BRA(u64 insn);
+ void BRK(u64 insn);
+ void BRX(u64 insn);
+ void CAL();
+ void CCTL(u64 insn);
+ void CCTLL(u64 insn);
+ void CONT(u64 insn);
+ void CS2R(u64 insn);
+ void CSET(u64 insn);
+ void CSETP(u64 insn);
+ void DADD_reg(u64 insn);
+ void DADD_cbuf(u64 insn);
+ void DADD_imm(u64 insn);
+ void DEPBAR();
+ void DFMA_reg(u64 insn);
+ void DFMA_rc(u64 insn);
+ void DFMA_cr(u64 insn);
+ void DFMA_imm(u64 insn);
+ void DMNMX_reg(u64 insn);
+ void DMNMX_cbuf(u64 insn);
+ void DMNMX_imm(u64 insn);
+ void DMUL_reg(u64 insn);
+ void DMUL_cbuf(u64 insn);
+ void DMUL_imm(u64 insn);
+ void DSET_reg(u64 insn);
+ void DSET_cbuf(u64 insn);
+ void DSET_imm(u64 insn);
+ void DSETP_reg(u64 insn);
+ void DSETP_cbuf(u64 insn);
+ void DSETP_imm(u64 insn);
+ void EXIT();
+ void F2F_reg(u64 insn);
+ void F2F_cbuf(u64 insn);
+ void F2F_imm(u64 insn);
+ void F2I_reg(u64 insn);
+ void F2I_cbuf(u64 insn);
+ void F2I_imm(u64 insn);
+ void FADD_reg(u64 insn);
+ void FADD_cbuf(u64 insn);
+ void FADD_imm(u64 insn);
+ void FADD32I(u64 insn);
+ void FCHK_reg(u64 insn);
+ void FCHK_cbuf(u64 insn);
+ void FCHK_imm(u64 insn);
+ void FCMP_reg(u64 insn);
+ void FCMP_rc(u64 insn);
+ void FCMP_cr(u64 insn);
+ void FCMP_imm(u64 insn);
+ void FFMA_reg(u64 insn);
+ void FFMA_rc(u64 insn);
+ void FFMA_cr(u64 insn);
+ void FFMA_imm(u64 insn);
+ void FFMA32I(u64 insn);
+ void FLO_reg(u64 insn);
+ void FLO_cbuf(u64 insn);
+ void FLO_imm(u64 insn);
+ void FMNMX_reg(u64 insn);
+ void FMNMX_cbuf(u64 insn);
+ void FMNMX_imm(u64 insn);
+ void FMUL_reg(u64 insn);
+ void FMUL_cbuf(u64 insn);
+ void FMUL_imm(u64 insn);
+ void FMUL32I(u64 insn);
+ void FSET_reg(u64 insn);
+ void FSET_cbuf(u64 insn);
+ void FSET_imm(u64 insn);
+ void FSETP_reg(u64 insn);
+ void FSETP_cbuf(u64 insn);
+ void FSETP_imm(u64 insn);
+ void FSWZADD(u64 insn);
+ void GETCRSPTR(u64 insn);
+ void GETLMEMBASE(u64 insn);
+ void HADD2_reg(u64 insn);
+ void HADD2_cbuf(u64 insn);
+ void HADD2_imm(u64 insn);
+ void HADD2_32I(u64 insn);
+ void HFMA2_reg(u64 insn);
+ void HFMA2_rc(u64 insn);
+ void HFMA2_cr(u64 insn);
+ void HFMA2_imm(u64 insn);
+ void HFMA2_32I(u64 insn);
+ void HMUL2_reg(u64 insn);
+ void HMUL2_cbuf(u64 insn);
+ void HMUL2_imm(u64 insn);
+ void HMUL2_32I(u64 insn);
+ void HSET2_reg(u64 insn);
+ void HSET2_cbuf(u64 insn);
+ void HSET2_imm(u64 insn);
+ void HSETP2_reg(u64 insn);
+ void HSETP2_cbuf(u64 insn);
+ void HSETP2_imm(u64 insn);
+ void I2F_reg(u64 insn);
+ void I2F_cbuf(u64 insn);
+ void I2F_imm(u64 insn);
+ void I2I_reg(u64 insn);
+ void I2I_cbuf(u64 insn);
+ void I2I_imm(u64 insn);
+ void IADD_reg(u64 insn);
+ void IADD_cbuf(u64 insn);
+ void IADD_imm(u64 insn);
+ void IADD3_reg(u64 insn);
+ void IADD3_cbuf(u64 insn);
+ void IADD3_imm(u64 insn);
+ void IADD32I(u64 insn);
+ void ICMP_reg(u64 insn);
+ void ICMP_rc(u64 insn);
+ void ICMP_cr(u64 insn);
+ void ICMP_imm(u64 insn);
+ void IDE(u64 insn);
+ void IDP_reg(u64 insn);
+ void IDP_imm(u64 insn);
+ void IMAD_reg(u64 insn);
+ void IMAD_rc(u64 insn);
+ void IMAD_cr(u64 insn);
+ void IMAD_imm(u64 insn);
+ void IMAD32I(u64 insn);
+ void IMADSP_reg(u64 insn);
+ void IMADSP_rc(u64 insn);
+ void IMADSP_cr(u64 insn);
+ void IMADSP_imm(u64 insn);
+ void IMNMX_reg(u64 insn);
+ void IMNMX_cbuf(u64 insn);
+ void IMNMX_imm(u64 insn);
+ void IMUL_reg(u64 insn);
+ void IMUL_cbuf(u64 insn);
+ void IMUL_imm(u64 insn);
+ void IMUL32I(u64 insn);
+ void IPA(u64 insn);
+ void ISBERD(u64 insn);
+ void ISCADD_reg(u64 insn);
+ void ISCADD_cbuf(u64 insn);
+ void ISCADD_imm(u64 insn);
+ void ISCADD32I(u64 insn);
+ void ISET_reg(u64 insn);
+ void ISET_cbuf(u64 insn);
+ void ISET_imm(u64 insn);
+ void ISETP_reg(u64 insn);
+ void ISETP_cbuf(u64 insn);
+ void ISETP_imm(u64 insn);
+ void JCAL(u64 insn);
+ void JMP(u64 insn);
+ void JMX(u64 insn);
+ void KIL();
+ void LD(u64 insn);
+ void LDC(u64 insn);
+ void LDG(u64 insn);
+ void LDL(u64 insn);
+ void LDS(u64 insn);
+ void LEA_hi_reg(u64 insn);
+ void LEA_hi_cbuf(u64 insn);
+ void LEA_lo_reg(u64 insn);
+ void LEA_lo_cbuf(u64 insn);
+ void LEA_lo_imm(u64 insn);
+ void LEPC(u64 insn);
+ void LONGJMP(u64 insn);
+ void LOP_reg(u64 insn);
+ void LOP_cbuf(u64 insn);
+ void LOP_imm(u64 insn);
+ void LOP3_reg(u64 insn);
+ void LOP3_cbuf(u64 insn);
+ void LOP3_imm(u64 insn);
+ void LOP32I(u64 insn);
+ void MEMBAR(u64 insn);
+ void MOV_reg(u64 insn);
+ void MOV_cbuf(u64 insn);
+ void MOV_imm(u64 insn);
+ void MOV32I(u64 insn);
+ void MUFU(u64 insn);
+ void NOP(u64 insn);
+ void OUT_reg(u64 insn);
+ void OUT_cbuf(u64 insn);
+ void OUT_imm(u64 insn);
+ void P2R_reg(u64 insn);
+ void P2R_cbuf(u64 insn);
+ void P2R_imm(u64 insn);
+ void PBK();
+ void PCNT();
+ void PEXIT(u64 insn);
+ void PIXLD(u64 insn);
+ void PLONGJMP(u64 insn);
+ void POPC_reg(u64 insn);
+ void POPC_cbuf(u64 insn);
+ void POPC_imm(u64 insn);
+ void PRET(u64 insn);
+ void PRMT_reg(u64 insn);
+ void PRMT_rc(u64 insn);
+ void PRMT_cr(u64 insn);
+ void PRMT_imm(u64 insn);
+ void PSET(u64 insn);
+ void PSETP(u64 insn);
+ void R2B(u64 insn);
+ void R2P_reg(u64 insn);
+ void R2P_cbuf(u64 insn);
+ void R2P_imm(u64 insn);
+ void RAM(u64 insn);
+ void RED(u64 insn);
+ void RET(u64 insn);
+ void RRO_reg(u64 insn);
+ void RRO_cbuf(u64 insn);
+ void RRO_imm(u64 insn);
+ void RTT(u64 insn);
+ void S2R(u64 insn);
+ void SAM(u64 insn);
+ void SEL_reg(u64 insn);
+ void SEL_cbuf(u64 insn);
+ void SEL_imm(u64 insn);
+ void SETCRSPTR(u64 insn);
+ void SETLMEMBASE(u64 insn);
+ void SHF_l_reg(u64 insn);
+ void SHF_l_imm(u64 insn);
+ void SHF_r_reg(u64 insn);
+ void SHF_r_imm(u64 insn);
+ void SHFL(u64 insn);
+ void SHL_reg(u64 insn);
+ void SHL_cbuf(u64 insn);
+ void SHL_imm(u64 insn);
+ void SHR_reg(u64 insn);
+ void SHR_cbuf(u64 insn);
+ void SHR_imm(u64 insn);
+ void SSY();
+ void ST(u64 insn);
+ void STG(u64 insn);
+ void STL(u64 insn);
+ void STP(u64 insn);
+ void STS(u64 insn);
+ void SUATOM(u64 insn);
+ void SUATOM_cas(u64 insn);
+ void SULD(u64 insn);
+ void SURED(u64 insn);
+ void SUST(u64 insn);
+ void SYNC(u64 insn);
+ void TEX(u64 insn);
+ void TEX_b(u64 insn);
+ void TEXS(u64 insn);
+ void TLD(u64 insn);
+ void TLD_b(u64 insn);
+ void TLD4(u64 insn);
+ void TLD4_b(u64 insn);
+ void TLD4S(u64 insn);
+ void TLDS(u64 insn);
+ void TMML(u64 insn);
+ void TMML_b(u64 insn);
+ void TXA(u64 insn);
+ void TXD(u64 insn);
+ void TXD_b(u64 insn);
+ void TXQ(u64 insn);
+ void TXQ_b(u64 insn);
+ void VABSDIFF(u64 insn);
+ void VABSDIFF4(u64 insn);
+ void VADD(u64 insn);
+ void VMAD(u64 insn);
+ void VMNMX(u64 insn);
+ void VOTE(u64 insn);
+ void VOTE_vtg(u64 insn);
+ void VSET(u64 insn);
+ void VSETP(u64 insn);
+ void VSHL(u64 insn);
+ void VSHR(u64 insn);
+ void XMAD_reg(u64 insn);
+ void XMAD_rc(u64 insn);
+ void XMAD_cr(u64 insn);
+ void XMAD_imm(u64 insn);
+
+ [[nodiscard]] IR::U32 X(IR::Reg reg);
+ [[nodiscard]] IR::U64 L(IR::Reg reg);
+ [[nodiscard]] IR::F32 F(IR::Reg reg);
+ [[nodiscard]] IR::F64 D(IR::Reg reg);
+
+ void X(IR::Reg dest_reg, const IR::U32& value);
+ void L(IR::Reg dest_reg, const IR::U64& value);
+ void F(IR::Reg dest_reg, const IR::F32& value);
+ void D(IR::Reg dest_reg, const IR::F64& value);
+
+ [[nodiscard]] IR::U32 GetReg8(u64 insn);
+ [[nodiscard]] IR::U32 GetReg20(u64 insn);
+ [[nodiscard]] IR::U32 GetReg39(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
+
+ [[nodiscard]] IR::U32 GetCbuf(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
+ [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
+
+ [[nodiscard]] IR::U32 GetImm20(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
+ [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
+
+ [[nodiscard]] IR::U32 GetImm32(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
+
+ void SetZFlag(const IR::U1& value);
+ void SetSFlag(const IR::U1& value);
+ void SetCFlag(const IR::U1& value);
+ void SetOFlag(const IR::U1& value);
+
+ void ResetZero();
+ void ResetSFlag();
+ void ResetCFlag();
+ void ResetOFlag();
+};
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
+ bool cc) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const iadd{insn};
+
+ if (sat) {
+ throw NotImplementedException("IADD SAT");
+ }
+ if (x && po) {
+ throw NotImplementedException("IADD X+PO");
+ }
+ // Operand A is always read from here, negated if needed
+ IR::U32 op_a{v.X(iadd.src_a)};
+ if (neg_a) {
+ op_a = v.ir.INeg(op_a);
+ }
+ // Add both operands
+ IR::U32 result{v.ir.IAdd(op_a, op_b)};
+ if (x) {
+ const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+ result = v.ir.IAdd(result, carry);
+ }
+ if (po) {
+ // .PO adds one to the result
+ result = v.ir.IAdd(result, v.ir.Imm32(1));
+ }
+ if (cc) {
+ // Store flags
+ // TODO: Does this grab the result pre-PO or after?
+ if (po) {
+ throw NotImplementedException("IADD CC+PO");
+ }
+ // TODO: How does CC behave when X is set?
+ if (x) {
+ throw NotImplementedException("IADD X+CC");
+ }
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.SetCFlag(v.ir.GetCarryFromOp(result));
+ v.SetOFlag(v.ir.GetOverflowFromOp(result));
+ }
+ // Store result
+ v.X(iadd.dest_reg, result);
+}
+
+void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+ union {
+ u64 insn;
+ BitField<43, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ BitField<48, 2, u64> three_for_po;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_a;
+ BitField<50, 1, u64> sat;
+ } const iadd{insn};
+
+ const bool po{iadd.three_for_po == 3};
+ if (!po && iadd.neg_b != 0) {
+ op_b = v.ir.INeg(op_b);
+ }
+ IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD_reg(u64 insn) {
+ IADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IADD_cbuf(u64 insn) {
+ IADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IADD_imm(u64 insn) {
+ IADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::IADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 1, u64> x;
+ BitField<54, 1, u64> sat;
+ BitField<55, 2, u64> three_for_po;
+ BitField<56, 1, u64> neg_a;
+ } const iadd32i{insn};
+
+ const bool po{iadd32i.three_for_po == 3};
+ const bool neg_a{!po && iadd32i.neg_a != 0};
+ IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Shift : u64 {
+ None,
+ Right,
+ Left,
+};
+enum class Half : u64 {
+ All,
+ Lower,
+ Upper,
+};
+
+[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
+ constexpr bool is_signed{false};
+ switch (half) {
+ case Half::All:
+ return value;
+ case Half::Lower:
+ return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
+ case Half::Upper:
+ return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
+ }
+ throw NotImplementedException("Invalid half");
+}
+
+[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
+ switch (shift) {
+ case Shift::None:
+ return value;
+ case Shift::Right: {
+ // 33-bit RS IADD3 edge case
+ const IR::U1 edge_case{ir.GetCarryFromOp(value)};
+ const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
+ return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
+ }
+ case Shift::Left:
+ return ir.ShiftLeftLogical(value, ir.Imm32(16));
+ }
+ throw NotImplementedException("Invalid shift");
+}
+
+void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
+ Shift shift = Shift::None) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> x;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> neg_b;
+ BitField<51, 1, u64> neg_a;
+ } iadd3{insn};
+
+ if (iadd3.neg_a != 0) {
+ op_a = v.ir.INeg(op_a);
+ }
+ if (iadd3.neg_b != 0) {
+ op_b = v.ir.INeg(op_b);
+ }
+ if (iadd3.neg_c != 0) {
+ op_c = v.ir.INeg(op_c);
+ }
+ IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
+ if (iadd3.x != 0) {
+ // TODO: How does RS behave when X is set?
+ if (shift == Shift::Right) {
+ throw NotImplementedException("IADD3 X+RS");
+ }
+ const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+ lhs_1 = v.ir.IAdd(lhs_1, carry);
+ }
+ const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
+ const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
+
+ v.X(iadd3.dest_reg, result);
+ if (iadd3.cc != 0) {
+ // TODO: How does CC behave when X is set?
+ if (iadd3.x != 0) {
+ throw NotImplementedException("IADD3 X+CC");
+ }
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.SetCFlag(v.ir.GetCarryFromOp(result));
+ const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
+ v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD3_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<37, 2, Shift> shift;
+ BitField<35, 2, Half> half_a;
+ BitField<33, 2, Half> half_b;
+ BitField<31, 2, Half> half_c;
+ } const iadd3{insn};
+
+ const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
+ const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
+ const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
+ IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
+}
+
+void TranslatorVisitor::IADD3_cbuf(u64 insn) {
+ IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::IADD3_imm(u64 insn) {
+ IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const icmp{insn};
+
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const bool is_signed{icmp.is_signed != 0};
+ const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
+
+ const IR::U32 src_reg{v.X(icmp.src_reg)};
+ const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+ v.X(icmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ICMP_reg(u64 insn) {
+ ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_rc(u64 insn) {
+ ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::ICMP_cr(u64 insn) {
+ ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_imm(u64 insn) {
+ ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed, bool x) {
+ return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+ : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> x;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const iset{insn};
+
+ const IR::U32 src_a{v.X(iset.src_reg)};
+ const bool is_signed{iset.is_signed != 0};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const bool x{iset.x != 0};
+ const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
+
+ IR::U1 pred{v.ir.GetPred(iset.pred)};
+ if (iset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(iset.dest_reg, result);
+ if (iset.cc != 0) {
+ if (x) {
+ throw NotImplementedException("ISET.CC + X");
+ }
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (iset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISET_reg(u64 insn) {
+ ISET(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISET_cbuf(u64 insn) {
+ ISET(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISET_imm(u64 insn) {
+ ISET(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+};
+
+enum class IntFormat : u64 {
+ U8 = 0,
+ U16 = 1,
+ U32 = 2,
+ U64 = 3,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, FloatFormat> float_format;
+ BitField<10, 2, IntFormat> int_format;
+ BitField<13, 1, u64> is_signed;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<41, 2, u64> selector;
+ BitField<47, 1, u64> cc;
+ BitField<45, 1, u64> neg;
+ BitField<49, 1, u64> abs;
+};
+
+bool Is64(u64 insn) {
+ return Encoding{insn}.int_format == IntFormat::U64;
+}
+
+int BitSize(FloatFormat format) {
+ switch (format) {
+ case FloatFormat::F16:
+ return 16;
+ case FloatFormat::F32:
+ return 32;
+ case FloatFormat::F64:
+ return 64;
+ }
+ throw NotImplementedException("Invalid float format {}", format);
+}
+
+IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
+ const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
+ const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
+ const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
+ const IR::U1 is_least{v.ir.IEqual(value, least_value)};
+ return IR::U32{v.ir.Select(is_least, value, absolute)};
+}
+
+void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
+ const Encoding i2f{insn};
+ if (i2f.cc != 0) {
+ throw NotImplementedException("I2F CC");
+ }
+ const bool is_signed{i2f.is_signed != 0};
+ int src_bitsize{};
+ switch (i2f.int_format) {
+ case IntFormat::U8:
+ src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+ v.ir.Imm32(8), is_signed);
+ if (i2f.abs != 0) {
+ src = SmallAbs(v, src, 8);
+ }
+ src_bitsize = 8;
+ break;
+ case IntFormat::U16:
+ if (i2f.selector == 1 || i2f.selector == 3) {
+ throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
+ }
+ src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+ v.ir.Imm32(16), is_signed);
+ if (i2f.abs != 0) {
+ src = SmallAbs(v, src, 16);
+ }
+ src_bitsize = 16;
+ break;
+ case IntFormat::U32:
+ case IntFormat::U64:
+ if (i2f.selector != 0) {
+ throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
+ }
+ if (i2f.abs != 0 && is_signed) {
+ src = v.ir.IAbs(src);
+ }
+ src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
+ break;
+ }
+ const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
+ const int dst_bitsize{BitSize(i2f.float_format)};
+ const IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = CastFpRounding(i2f.fp_rounding),
+ .fmz_mode = IR::FmzMode::DontCare,
+ };
+ auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
+ static_cast<size_t>(conversion_src_bitsize), is_signed, src,
+ fp_control)};
+ if (i2f.neg != 0) {
+ if (i2f.abs != 0 || !is_signed) {
+ // We know the value is positive
+ value = v.ir.FPNeg(value);
+ } else {
+ // Only negate if the input isn't the lowest value
+ IR::U1 is_least;
+ if (src_bitsize == 64) {
+ is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
+ } else if (src_bitsize == 32) {
+ is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
+ } else {
+ const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
+ is_least = v.ir.IEqual(src, least_value);
+ }
+ value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
+ }
+ }
+ switch (i2f.float_format) {
+ case FloatFormat::F16: {
+ const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+ v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
+ break;
+ }
+ case FloatFormat::F32:
+ v.F(i2f.dest_reg, value);
+ break;
+ case FloatFormat::F64: {
+ if (!IR::IsAligned(i2f.dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
+ }
+ const IR::Value vector{v.ir.UnpackDouble2x32(value)};
+ for (int i = 0; i < 2; ++i) {
+ v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2F_reg(u64 insn) {
+ if (Is64(insn)) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> reg;
+ } const value{insn};
+ const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
+ I2F(*this, insn, ir.PackUint2x32(regs));
+ } else {
+ I2F(*this, insn, GetReg20(insn));
+ }
+}
+
+void TranslatorVisitor::I2F_cbuf(u64 insn) {
+ if (Is64(insn)) {
+ I2F(*this, insn, GetPackedCbuf(insn));
+ } else {
+ I2F(*this, insn, GetCbuf(insn));
+ }
+}
+
+void TranslatorVisitor::I2F_imm(u64 insn) {
+ if (Is64(insn)) {
+ I2F(*this, insn, GetPackedImm20(insn));
+ } else {
+ I2F(*this, insn, GetImm20(insn));
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class MaxShift : u64 {
+ U32,
+ Undefined,
+ U64,
+ S64,
+};
+
+IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
+ bool right_shift, bool is_signed) {
+ if (!right_shift) {
+ return ir.ShiftLeftLogical(packed_int, safe_shift);
+ }
+ if (is_signed) {
+ return ir.ShiftRightArithmetic(packed_int, safe_shift);
+ }
+ return ir.ShiftRightLogical(packed_int, safe_shift);
+}
+
+void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
+ bool right_shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<0, 8, IR::Reg> lo_bits_reg;
+ BitField<37, 2, MaxShift> max_shift;
+ BitField<47, 1, u64> cc;
+ BitField<48, 2, u64> x_mode;
+ BitField<50, 1, u64> wrap;
+ } const shf{insn};
+
+ if (shf.cc != 0) {
+ throw NotImplementedException("SHF CC");
+ }
+ if (shf.x_mode != 0) {
+ throw NotImplementedException("SHF X Mode");
+ }
+ if (shf.max_shift == MaxShift::Undefined) {
+ throw NotImplementedException("SHF Use of undefined MaxShift value");
+ }
+ const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
+ const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
+ const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
+ const IR::U32 safe_shift{shf.wrap != 0
+ ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
+ : v.ir.UMin(shift, max_shift)};
+
+ const bool is_signed{shf.max_shift == MaxShift::S64};
+ const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
+ const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
+
+ const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
+ v.X(shf.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHF_l_reg(u64 insn) {
+ SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_l_imm(u64 insn) {
+ SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_r_reg(u64 insn) {
+ SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
+}
+
+void TranslatorVisitor::SHF_r_imm(u64 insn) {
+ SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 2, u64> mode;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const imnmx{insn};
+
+ if (imnmx.cc != 0) {
+ throw NotImplementedException("IMNMX CC");
+ }
+
+ if (imnmx.mode != 0) {
+ throw NotImplementedException("IMNMX.MODE");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
+ const IR::U32 op_a{v.X(imnmx.src_reg)};
+ IR::U32 min;
+ IR::U32 max;
+
+ if (imnmx.is_signed != 0) {
+ min = IR::U32{v.ir.SMin(op_a, op_b)};
+ max = IR::U32{v.ir.SMax(op_a, op_b)};
+ } else {
+ min = IR::U32{v.ir.UMin(op_a, op_b)};
+ max = IR::U32{v.ir.UMax(op_a, op_b)};
+ }
+ if (imnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+
+ const IR::U32 result{v.ir.Select(pred, min, max)};
+ v.X(imnmx.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IMNMX_reg(u64 insn) {
+ IMNMX(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
+ IMNMX(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IMNMX_imm(u64 insn) {
+ IMNMX(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<40, 1, u64> tilde;
+ } const popc{insn};
+
+ const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
+ const IR::U32 result = v.ir.BitCount(operand);
+ v.X(popc.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::POPC_reg(u64 insn) {
+ POPC(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::POPC_cbuf(u64 insn) {
+ POPC(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::POPC_imm(u64 insn) {
+ POPC(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
+ u64 scale_imm) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> op_a;
+ } const iscadd{insn};
+
+ const bool po{neg_a && neg_b};
+ IR::U32 op_a{v.X(iscadd.op_a)};
+ if (po) {
+ // When PO is present, add one
+ op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
+ } else {
+ // When PO is not present, the bits are interpreted as negation
+ if (neg_a) {
+ op_a = v.ir.INeg(op_a);
+ }
+ if (neg_b) {
+ op_b = v.ir.INeg(op_b);
+ }
+ }
+ // With the operands already processed, scale A
+ const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
+ const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
+
+ const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
+ v.X(iscadd.dest_reg, result);
+
+ if (cc) {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ const IR::U1 carry{v.ir.GetCarryFromOp(result)};
+ const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
+ v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
+ v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
+ }
+}
+
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+ union {
+ u64 raw;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_a;
+ BitField<39, 5, u64> scale;
+ } const iscadd{insn};
+
+ ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISCADD_reg(u64 insn) {
+ ISCADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
+ ISCADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISCADD_imm(u64 insn) {
+ ISCADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::ISCADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 5, u64> scale;
+ } const iscadd{insn};
+
+ return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed, bool x) {
+ return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+ : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> x;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const isetp{insn};
+
+ const bool is_signed{isetp.is_signed != 0};
+ const bool x{isetp.x != 0};
+ const BooleanOp bop{isetp.bop};
+ const CompareOp compare_op{isetp.compare_op};
+ const IR::U32 op_a{v.X(isetp.src_reg_a)};
+ const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
+ const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(isetp.dest_pred_a, result_a);
+ v.ir.SetPred(isetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISETP_reg(u64 insn) {
+ ISETP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISETP_cbuf(u64 insn) {
+ ISETP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISETP_imm(u64 insn) {
+ ISETP(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 1, u64> w;
+ BitField<43, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ } const shl{insn};
+
+ if (shl.x != 0) {
+ throw NotImplementedException("SHL.X");
+ }
+ if (shl.cc != 0) {
+ throw NotImplementedException("SHL.CC");
+ }
+ const IR::U32 base{v.X(shl.src_reg_a)};
+ IR::U32 result;
+ if (shl.w != 0) {
+ // When .W is set, the shift value is wrapped
+ // To emulate this we just have to wrap it ourselves.
+ const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
+ result = v.ir.ShiftLeftLogical(base, shift);
+ } else {
+ // When .W is not set, the shift value is clamped between 0 and 32.
+ // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
+ // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
+ //
+ // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
+ // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
+ // or equal to the bit width of the components of Base."
+ //
+ // And on the GLASM specification it is also safe to evaluate out of bounds:
+ //
+ // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
+ // "The results of a shift operation ("<<") are undefined if the value of the second operand
+ // is negative, or greater than or equal to the number of bits in the first operand."
+ //
+ // Emphasis on undefined results in contrast to undefined behavior.
+ //
+ const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
+ const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
+ result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
+ }
+ v.X(shl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHL_reg(u64 insn) {
+ SHL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHL_cbuf(u64 insn) {
+ SHL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHL_imm(u64 insn) {
+ SHL(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 1, u64> is_wrapped;
+ BitField<40, 1, u64> brev;
+ BitField<43, 1, u64> xmode;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const shr{insn};
+
+ if (shr.xmode != 0) {
+ throw NotImplementedException("SHR.XMODE");
+ }
+ if (shr.cc != 0) {
+ throw NotImplementedException("SHR.CC");
+ }
+
+ IR::U32 base{v.X(shr.src_reg_a)};
+ if (shr.brev == 1) {
+ base = v.ir.BitReverse(base);
+ }
+ IR::U32 result;
+ const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
+ if (shr.is_signed == 1) {
+ result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
+ } else {
+ result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
+ }
+
+ if (shr.is_wrapped == 0) {
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 safe_bits{v.ir.Imm32(32)};
+
+ const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
+ const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
+ const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+ result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
+ }
+ v.X(shr.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHR_reg(u64 insn) {
+ SHR(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHR_cbuf(u64 insn) {
+ SHR(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHR_imm(u64 insn) {
+ SHR(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SelectMode : u64 {
+ Default,
+ CLO,
+ CHI,
+ CSFU,
+ CBCC,
+};
+
+enum class Half : u64 {
+ H0, // Least-significant bits (15:0)
+ H1, // Most-significant bits (31:16)
+};
+
+IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
+ const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
+ return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
+}
+
+void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
+ SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_a_signed;
+ BitField<49, 1, u64> is_b_signed;
+ BitField<53, 1, Half> half_a;
+ } const xmad{insn};
+
+ if (x) {
+ throw NotImplementedException("XMAD X");
+ }
+ const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
+ const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
+
+ IR::U32 product{v.ir.IMul(op_a, op_b)};
+ if (psl) {
+ // .PSL shifts the product 16 bits
+ product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
+ }
+ const IR::U32 op_c{[&]() -> IR::U32 {
+ switch (select_mode) {
+ case SelectMode::Default:
+ return src_c;
+ case SelectMode::CLO:
+ return ExtractHalf(v, src_c, Half::H0, false);
+ case SelectMode::CHI:
+ return ExtractHalf(v, src_c, Half::H1, false);
+ case SelectMode::CBCC:
+ return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
+ case SelectMode::CSFU:
+ throw NotImplementedException("XMAD CSFU");
+ }
+ throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
+ }()};
+ IR::U32 result{v.ir.IAdd(product, op_c)};
+ if (mrg) {
+ // .MRG inserts src_b [15:0] into result's [31:16].
+ const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
+ result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
+ }
+ if (xmad.cc) {
+ throw NotImplementedException("XMAD CC");
+ }
+ // Store result
+ v.X(xmad.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::XMAD_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<35, 1, Half> half_b;
+ BitField<36, 1, u64> psl;
+ BitField<37, 1, u64> mrg;
+ BitField<38, 1, u64> x;
+ BitField<50, 3, SelectMode> select_mode;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+ xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_rc(u64 insn) {
+ union {
+ u64 raw;
+ BitField<50, 2, SelectMode> select_mode;
+ BitField<52, 1, Half> half_b;
+ BitField<54, 1, u64> x;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
+ xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_cr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<50, 2, SelectMode> select_mode;
+ BitField<52, 1, Half> half_b;
+ BitField<54, 1, u64> x;
+ BitField<55, 1, u64> psl;
+ BitField<56, 1, u64> mrg;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+ xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 16, u64> src_b;
+ BitField<36, 1, u64> psl;
+ BitField<37, 1, u64> mrg;
+ BitField<38, 1, u64> x;
+ BitField<50, 3, SelectMode> select_mode;
+ } const xmad{insn};
+
+ XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
+ Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class IntegerWidth : u64 {
+ Byte,
+ Short,
+ Word,
+};
+
+[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
+ switch (width) {
+ case IntegerWidth::Byte:
+ return ir.Imm32(8);
+ case IntegerWidth::Short:
+ return ir.Imm32(16);
+ case IntegerWidth::Word:
+ return ir.Imm32(32);
+ default:
+ throw NotImplementedException("Invalid width {}", width);
+ }
+}
+
+[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
+ IntegerWidth dst_width) {
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 count{WidthSize(ir, dst_width)};
+ return ir.BitFieldExtract(src, zero, count, false);
+}
+
+[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
+ bool dst_signed, bool src_signed) {
+ IR::U32 min{};
+ IR::U32 max{};
+ const IR::U32 zero{ir.Imm32(0)};
+ switch (dst_width) {
+ case IntegerWidth::Byte:
+ min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
+ max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
+ break;
+ case IntegerWidth::Short:
+ min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
+ max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
+ break;
+ case IntegerWidth::Word:
+ min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
+ max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
+ break;
+ default:
+ throw NotImplementedException("Invalid width {}", dst_width);
+ }
+ const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
+ return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
+}
+
+void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, IntegerWidth> dst_fmt;
+ BitField<12, 1, u64> dst_fmt_sign;
+ BitField<10, 2, IntegerWidth> src_fmt;
+ BitField<13, 1, u64> src_fmt_sign;
+ BitField<41, 3, u64> selector;
+ BitField<45, 1, u64> neg;
+ BitField<47, 1, u64> cc;
+ BitField<49, 1, u64> abs;
+ BitField<50, 1, u64> sat;
+ } const i2i{insn};
+
+ if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
+ throw NotImplementedException("16-bit source format incompatible with selector {}",
+ i2i.selector);
+ }
+ if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
+ throw NotImplementedException("32-bit source format incompatible with selector {}",
+ i2i.selector);
+ }
+
+ const s32 selector{static_cast<s32>(i2i.selector)};
+ const IR::U32 offset{v.ir.Imm32(selector * 8)};
+ const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
+ const bool src_signed{i2i.src_fmt_sign != 0};
+ const bool dst_signed{i2i.dst_fmt_sign != 0};
+ const bool sat{i2i.sat != 0};
+
+ IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
+ if (i2i.abs != 0) {
+ src_values = v.ir.IAbs(src_values);
+ }
+ if (i2i.neg != 0) {
+ src_values = v.ir.INeg(src_values);
+ }
+ const IR::U32 result{
+ sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
+ : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
+
+ v.X(i2i.dest_reg, result);
+ if (i2i.cc != 0) {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2I_reg(u64 insn) {
+ I2I(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::I2I_cbuf(u64 insn) {
+ I2I(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::I2I_imm(u64 insn) {
+ I2I(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Default,
+ Patch,
+ Prim,
+ Attr,
+};
+
+enum class Shift : u64 {
+ Default,
+ U16,
+ B32,
+};
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISBERD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<31, 1, u64> skew;
+ BitField<32, 1, u64> o;
+ BitField<33, 2, Mode> mode;
+ BitField<47, 2, Shift> shift;
+ } const isberd{insn};
+
+ if (isberd.skew != 0) {
+ throw NotImplementedException("SKEW");
+ }
+ if (isberd.o != 0) {
+ throw NotImplementedException("O");
+ }
+ if (isberd.mode != Mode::Default) {
+ throw NotImplementedException("Mode {}", isberd.mode.Value());
+ }
+ if (isberd.shift != Shift::Default) {
+ throw NotImplementedException("Shift {}", isberd.shift.Value());
+ }
+ LOG_WARNING(Shader, "(STUBBED) called");
+ X(isberd.dest_reg, X(isberd.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+using namespace LDC;
+namespace {
+std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
+ const IR::U32& reg, const IR::U32& imm) {
+ switch (mode) {
+ case Mode::Default:
+ return {imm_index, ir.IAdd(reg, imm)};
+ default:
+ break;
+ }
+ throw NotImplementedException("Mode {}", mode);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDC(u64 insn) {
+ const Encoding ldc{insn};
+ const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
+ const IR::U32 reg{X(ldc.src_reg)};
+ const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
+ const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
+ switch (ldc.size) {
+ case Size::U8:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
+ break;
+ case Size::S8:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
+ break;
+ case Size::U16:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
+ break;
+ case Size::S16:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
+ break;
+ case Size::B32:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
+ break;
+ case Size::B64: {
+ if (!IR::IsAligned(ldc.dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register");
+ }
+ const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
+ for (int i = 0; i < 2; ++i) {
+ X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid size {}", ldc.size.Value());
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell::LDC {
+
+enum class Mode : u64 {
+ Default,
+ IL,
+ IS,
+ ISL,
+};
+
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 16, s64> offset;
+ BitField<36, 5, u64> index;
+ BitField<44, 2, Mode> mode;
+ BitField<48, 3, Size> size;
+};
+
+} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
+ bool neg, bool x) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_lo_reg;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> pred;
+ } const lea{insn};
+
+ if (x) {
+ throw NotImplementedException("LEA.HI X");
+ }
+ if (lea.pred != IR::Pred::PT) {
+ throw NotImplementedException("LEA.HI Pred");
+ }
+ if (lea.cc != 0) {
+ throw NotImplementedException("LEA.HI CC");
+ }
+
+ const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+ const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
+ const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
+
+ const s32 hi_scale{32 - static_cast<s32>(scale)};
+ const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
+ const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
+
+ IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
+ v.X(lea.dest_reg, result);
+}
+
+void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_lo_reg;
+ BitField<39, 5, u64> scale;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> pred;
+ } const lea{insn};
+ if (lea.x != 0) {
+ throw NotImplementedException("LEA.LO X");
+ }
+ if (lea.pred != IR::Pred::PT) {
+ throw NotImplementedException("LEA.LO Pred");
+ }
+ if (lea.cc != 0) {
+ throw NotImplementedException("LEA.LO CC");
+ }
+
+ const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+ const s32 scale{static_cast<s32>(lea.scale)};
+ const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
+ const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
+
+ IR::U32 result{v.ir.IAdd(base, scaled_offset)};
+ v.X(lea.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LEA_hi_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<28, 5, u64> scale;
+ BitField<37, 1, u64> neg;
+ BitField<38, 1, u64> x;
+ } const lea{insn};
+
+ LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<51, 5, u64> scale;
+ BitField<56, 1, u64> neg;
+ BitField<57, 1, u64> x;
+ } const lea{insn};
+
+ LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_lo_reg(u64 insn) {
+ LEA_lo(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
+ LEA_lo(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LEA_lo_imm(u64 insn) {
+ LEA_lo(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ B32,
+ B64,
+ B96,
+ B128,
+};
+
+enum class InterpolationMode : u64 {
+ Pass,
+ Multiply,
+ Constant,
+ Sc,
+};
+
+enum class SampleMode : u64 {
+ Default,
+ Centroid,
+ Offset,
+};
+
+u32 NumElements(Size size) {
+ switch (size) {
+ case Size::B32:
+ return 1;
+ case Size::B64:
+ return 2;
+ case Size::B96:
+ return 3;
+ case Size::B128:
+ return 4;
+ }
+ throw InvalidArgument("Invalid size {}", size);
+}
+
+template <typename F>
+void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
+ const IR::U32 index_value{v.X(index_reg)};
+ for (u32 element = 0; element < num_elements; ++element) {
+ const IR::U32 final_offset{
+ element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
+ f(element, final_offset);
+ }
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ALD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 10, u64> absolute_offset;
+ BitField<20, 11, s64> relative_offset;
+ BitField<39, 8, IR::Reg> vertex_reg;
+ BitField<32, 1, u64> o;
+ BitField<31, 1, u64> patch;
+ BitField<47, 2, Size> size;
+ } const ald{insn};
+
+ const u64 offset{ald.absolute_offset.Value()};
+ if (offset % 4 != 0) {
+ throw NotImplementedException("Unaligned absolute offset {}", offset);
+ }
+ const IR::U32 vertex{X(ald.vertex_reg)};
+ const u32 num_elements{NumElements(ald.size)};
+ if (ald.index_reg == IR::Reg::RZ) {
+ for (u32 element = 0; element < num_elements; ++element) {
+ if (ald.patch != 0) {
+ const IR::Patch patch{offset / 4 + element};
+ F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
+ } else {
+ const IR::Attribute attr{offset / 4 + element};
+ F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
+ }
+ }
+ return;
+ }
+ if (ald.patch != 0) {
+ throw NotImplementedException("Indirect patch read");
+ }
+ HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+ F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
+ });
+}
+
+void TranslatorVisitor::AST(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 10, u64> absolute_offset;
+ BitField<20, 11, s64> relative_offset;
+ BitField<31, 1, u64> patch;
+ BitField<39, 8, IR::Reg> vertex_reg;
+ BitField<47, 2, Size> size;
+ } const ast{insn};
+
+ if (ast.index_reg != IR::Reg::RZ) {
+ throw NotImplementedException("Indexed store");
+ }
+ const u64 offset{ast.absolute_offset.Value()};
+ if (offset % 4 != 0) {
+ throw NotImplementedException("Unaligned absolute offset {}", offset);
+ }
+ const IR::U32 vertex{X(ast.vertex_reg)};
+ const u32 num_elements{NumElements(ast.size)};
+ if (ast.index_reg == IR::Reg::RZ) {
+ for (u32 element = 0; element < num_elements; ++element) {
+ if (ast.patch != 0) {
+ const IR::Patch patch{offset / 4 + element};
+ ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
+ } else {
+ const IR::Attribute attr{offset / 4 + element};
+ ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
+ }
+ }
+ return;
+ }
+ if (ast.patch != 0) {
+ throw NotImplementedException("Indexed tessellation patch store");
+ }
+ HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+ ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
+ });
+}
+
+void TranslatorVisitor::IPA(u64 insn) {
+ // IPA is the instruction used to read varyings from a fragment shader.
+ // gl_FragCoord is mapped to the gl_Position attribute.
+ // It yields unknown results when used outside of the fragment shader stage.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 8, IR::Reg> multiplier;
+ BitField<30, 8, IR::Attribute> attribute;
+ BitField<38, 1, u64> idx;
+ BitField<51, 1, u64> sat;
+ BitField<52, 2, SampleMode> sample_mode;
+ BitField<54, 2, InterpolationMode> interpolation_mode;
+ } const ipa{insn};
+
+ // Indexed IPAs are used for indexed varyings.
+ // For example:
+ //
+ // in vec4 colors[4];
+ // uniform int idx;
+ // void main() {
+ // gl_FragColor = colors[idx];
+ // }
+ const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
+ const IR::Attribute attribute{ipa.attribute};
+ IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
+ : ir.GetAttribute(attribute)};
+ if (IR::IsGeneric(attribute)) {
+ const ProgramHeader& sph{env.SPH()};
+ const u32 attr_index{IR::GenericAttributeIndex(attribute)};
+ const u32 element{static_cast<u32>(attribute) % 4};
+ const std::array input_map{sph.ps.GenericInputMap(attr_index)};
+ const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
+ if (is_perspective) {
+ const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
+ value = ir.FPMul(value, position_w);
+ }
+ }
+ if (ipa.interpolation_mode == InterpolationMode::Multiply) {
+ value = ir.FPMul(value, F(ipa.multiplier));
+ }
+
+ // Saturated IPAs are generally generated out of clamped varyings.
+ // For example: clamp(some_varying, 0.0, 1.0)
+ const bool is_saturated{ipa.sat != 0};
+ if (is_saturated) {
+ if (attribute == IR::Attribute::FrontFace) {
+ throw NotImplementedException("IPA.SAT on FrontFace");
+ }
+ value = ir.FPSaturate(value);
+ }
+
+ F(ipa.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<20, 24, u64> absolute_offset;
+ BitField<20, 24, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
+ const IR::U32 offset{Offset(v, insn)};
+ if (offset.IsImmediate()) {
+ return {v.ir.Imm32(offset.U32() / 4), offset};
+ } else {
+ return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
+ }
+}
+
+std::pair<int, bool> GetSize(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 3, Size> size;
+ } const encoding{insn};
+
+ switch (encoding.size) {
+ case Size::U8:
+ return {8, false};
+ case Size::S8:
+ return {8, true};
+ case Size::U16:
+ return {16, false};
+ case Size::S16:
+ return {16, true};
+ case Size::B32:
+ return {32, false};
+ case Size::B64:
+ return {64, false};
+ case Size::B128:
+ return {128, false};
+ default:
+ throw NotImplementedException("Invalid size {}", encoding.size.Value());
+ }
+}
+
+IR::Reg Reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> reg;
+ } const encoding{insn};
+
+ return encoding.reg;
+}
+
+IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
+}
+
+IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
+}
+
+IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
+ const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
+ const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
+ return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDL(u64 insn) {
+ const auto [word_offset, offset]{WordOffset(*this, insn)};
+ const IR::U32 word{LoadLocal(*this, word_offset, offset)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ X(dest, word);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
+ const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
+ X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::LDS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ X(dest, IR::U32{value});
+ break;
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ for (int element = 0; element < bit_size / 32; ++element) {
+ X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STL(u64 insn) {
+ const auto [word_offset, offset]{WordOffset(*this, insn)};
+ if (offset.IsImmediate()) {
+ // TODO: Support storing out of bounds at runtime
+ if (offset.U32() >= env.LocalMemorySize()) {
+ LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
+ offset.U32(), env.LocalMemorySize());
+ return;
+ }
+ }
+ const IR::Reg reg{Reg(insn)};
+ const IR::U32 src{X(reg)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned source register");
+ }
+ ir.WriteLocal(word_offset, src);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg reg{Reg(insn)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ ir.WriteShared(bit_size, offset, X(reg));
+ break;
+ case 64:
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
+ break;
+ case 128: {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
+ ir.WriteShared(128, offset, vector);
+ break;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LoadSize : u64 {
+ U8, // Zero-extend
+ S8, // Sign-extend
+ U16, // Zero-extend
+ S16, // Sign-extend
+ B32,
+ B64,
+ B128,
+ U128, // ???
+};
+
+enum class StoreSize : u64 {
+ U8, // Zero-extend
+ S8, // Sign-extend
+ U16, // Zero-extend
+ S16, // Sign-extend
+ B32,
+ B64,
+ B128,
+};
+
+// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class LoadCache : u64 {
+ CA, // Cache at all levels, likely to be accessed again
+ CG, // Cache at global level (cache in L2 and below, not L1)
+ CI, // ???
+ CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
+};
+
+// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class StoreCache : u64 {
+ WB, // Cache write-back all coherent levels
+ CG, // Cache at global level
+ CS, // Cache streaming, likely to be accessed once
+ WT, // Cache write-through (to system memory)
+};
+
+IR::U64 Address(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 24, s64> addr_offset;
+ BitField<20, 24, u64> rz_addr_offset;
+ BitField<45, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ // LDG/STG without .E uses a 32-bit pointer, zero-extend it
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ if (!IR::IsAligned(mem.addr_reg, 2)) {
+ throw NotImplementedException("Unaligned address register");
+ }
+ // Pack two registers to build the 64-bit address
+ return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ // Apply the offset
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDG(u64 insn) {
+ // LDG loads global memory into registers
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<46, 2, LoadCache> cache;
+ BitField<48, 3, LoadSize> size;
+ } const ldg{insn};
+
+ // Pointer to load data from
+ const IR::U64 address{Address(*this, insn)};
+ const IR::Reg dest_reg{ldg.dest_reg};
+ switch (ldg.size) {
+ case LoadSize::U8:
+ X(dest_reg, ir.LoadGlobalU8(address));
+ break;
+ case LoadSize::S8:
+ X(dest_reg, ir.LoadGlobalS8(address));
+ break;
+ case LoadSize::U16:
+ X(dest_reg, ir.LoadGlobalU16(address));
+ break;
+ case LoadSize::S16:
+ X(dest_reg, ir.LoadGlobalS16(address));
+ break;
+ case LoadSize::B32:
+ X(dest_reg, ir.LoadGlobal32(address));
+ break;
+ case LoadSize::B64: {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.LoadGlobal64(address)};
+ for (int i = 0; i < 2; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ case LoadSize::B128:
+ case LoadSize::U128: {
+ if (!IR::IsAligned(dest_reg, 4)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.LoadGlobal128(address)};
+ for (int i = 0; i < 4; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
+ }
+}
+
+void TranslatorVisitor::STG(u64 insn) {
+ // STG stores registers into global memory.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> data_reg;
+ BitField<46, 2, StoreCache> cache;
+ BitField<48, 3, StoreSize> size;
+ } const stg{insn};
+
+ // Pointer to store data into
+ const IR::U64 address{Address(*this, insn)};
+ const IR::Reg data_reg{stg.data_reg};
+ switch (stg.size) {
+ case StoreSize::U8:
+ ir.WriteGlobalU8(address, X(data_reg));
+ break;
+ case StoreSize::S8:
+ ir.WriteGlobalS8(address, X(data_reg));
+ break;
+ case StoreSize::U16:
+ ir.WriteGlobalU16(address, X(data_reg));
+ break;
+ case StoreSize::S16:
+ ir.WriteGlobalS16(address, X(data_reg));
+ break;
+ case StoreSize::B32:
+ ir.WriteGlobal32(address, X(data_reg));
+ break;
+ case StoreSize::B64: {
+ if (!IR::IsAligned(data_reg, 2)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
+ ir.WriteGlobal64(address, vector);
+ break;
+ }
+ case StoreSize::B128:
+ if (!IR::IsAligned(data_reg, 4)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{
+ ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
+ ir.WriteGlobal128(address, vector);
+ break;
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LogicalOp : u64 {
+ AND,
+ OR,
+ XOR,
+ PASS_B,
+};
+
+[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, LogicalOp op) {
+ switch (op) {
+ case LogicalOp::AND:
+ return ir.BitwiseAnd(operand_1, operand_2);
+ case LogicalOp::OR:
+ return ir.BitwiseOr(operand_1, operand_2);
+ case LogicalOp::XOR:
+ return ir.BitwiseXor(operand_1, operand_2);
+ case LogicalOp::PASS_B:
+ return operand_2;
+ default:
+ throw NotImplementedException("Invalid Logical operation {}", op);
+ }
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
+ LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
+ IR::Pred dest_pred = IR::Pred::PT) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ } const lop{insn};
+
+ if (x) {
+ throw NotImplementedException("X");
+ }
+ IR::U32 op_a{v.X(lop.src_reg)};
+ if (inv_a != 0) {
+ op_a = v.ir.BitwiseNot(op_a);
+ }
+ if (inv_b != 0) {
+ op_b = v.ir.BitwiseNot(op_b);
+ }
+
+ const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
+ if (pred_op) {
+ const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
+ v.ir.SetPred(dest_pred, pred_result);
+ }
+ if (cc) {
+ if (bit_op == LogicalOp::PASS_B) {
+ v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
+ v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
+ } else {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+ v.X(lop.dest_reg, result);
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 insn;
+ BitField<39, 1, u64> inv_a;
+ BitField<40, 1, u64> inv_b;
+ BitField<41, 2, LogicalOp> bit_op;
+ BitField<43, 1, u64> x;
+ BitField<44, 2, PredicateOp> pred_op;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> dest_pred;
+ } const lop{insn};
+
+ LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
+ lop.pred_op, lop.dest_pred);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP_reg(u64 insn) {
+ LOP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LOP_cbuf(u64 insn) {
+ LOP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LOP_imm(u64 insn) {
+ LOP(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::LOP32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<53, 2, LogicalOp> bit_op;
+ BitField<57, 1, u64> x;
+ BitField<52, 1, u64> cc;
+ BitField<55, 1, u64> inv_a;
+ BitField<56, 1, u64> inv_b;
+ } const lop32i{insn};
+
+ LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
+ lop32i.inv_b != 0, lop32i.bit_op);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
+// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
+IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
+ u64 ttbl) {
+ IR::U32 r{ir.Imm32(0)};
+ const IR::U32 not_a{ir.BitwiseNot(a)};
+ const IR::U32 not_b{ir.BitwiseNot(b)};
+ const IR::U32 not_c{ir.BitwiseNot(c)};
+ if (ttbl & 0x01) {
+ // r |= ~a & ~b & ~c;
+ const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x02) {
+ // r |= ~a & ~b & c;
+ const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x04) {
+ // r |= ~a & b & ~c;
+ const auto lhs{ir.BitwiseAnd(not_a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x08) {
+ // r |= ~a & b & c;
+ const auto lhs{ir.BitwiseAnd(not_a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x10) {
+ // r |= a & ~b & ~c;
+ const auto lhs{ir.BitwiseAnd(a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x20) {
+ // r |= a & ~b & c;
+ const auto lhs{ir.BitwiseAnd(a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x40) {
+ // r |= a & b & ~c;
+ const auto lhs{ir.BitwiseAnd(a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x80) {
+ // r |= a & b & c;
+ const auto lhs{ir.BitwiseAnd(a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ return r;
+}
+
+IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<47, 1, u64> cc;
+ } const lop3{insn};
+
+ if (lop3.cc != 0) {
+ throw NotImplementedException("LOP3 CC");
+ }
+
+ const IR::U32 op_a{v.X(lop3.src_reg)};
+ const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
+ v.X(lop3.dest_reg, result);
+ return result;
+}
+
+u64 GetLut48(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 8, u64> lut;
+ } const lut{insn};
+ return lut.lut;
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP3_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<28, 8, u64> lut;
+ BitField<38, 1, u64> x;
+ BitField<36, 2, PredicateOp> pred_op;
+ BitField<48, 3, IR::Pred> pred;
+ } const lop3{insn};
+
+ if (lop3.x != 0) {
+ throw NotImplementedException("LOP3 X");
+ }
+ const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
+ const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
+ ir.SetPred(lop3.pred, pred_result);
+}
+
+void TranslatorVisitor::LOP3_cbuf(u64 insn) {
+ LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
+}
+
+void TranslatorVisitor::LOP3_imm(u64 insn) {
+ LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ PR,
+ CC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::P2R_reg(u64) {
+ throw NotImplementedException("P2R (reg)");
+}
+
+void TranslatorVisitor::P2R_cbuf(u64) {
+ throw NotImplementedException("P2R (cbuf)");
+}
+
+void TranslatorVisitor::P2R_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src;
+ BitField<40, 1, Mode> mode;
+ BitField<41, 2, u64> byte_selector;
+ } const p2r{insn};
+
+ const u32 mask{GetImm20(insn).U32()};
+ const bool pr_mode{p2r.mode == Mode::PR};
+ const u32 num_items{pr_mode ? 7U : 4U};
+ const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
+ IR::U32 insert{ir.Imm32(0)};
+ for (u32 index = 0; index < num_items; ++index) {
+ if (((mask >> index) & 1) == 0) {
+ continue;
+ }
+ const IR::U1 cond{[this, index, pr_mode] {
+ if (pr_mode) {
+ return ir.GetPred(IR::Pred{index});
+ }
+ switch (index) {
+ case 0:
+ return ir.GetZFlag();
+ case 1:
+ return ir.GetSFlag();
+ case 2:
+ return ir.GetCFlag();
+ case 3:
+ return ir.GetOFlag();
+ }
+ throw LogicError("Unreachable P2R index");
+ }()};
+ const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
+ insert = ir.BitwiseOr(insert, bit);
+ }
+ const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
+ X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 4, u64> mask;
+ BitField<12, 4, u64> mov32i_mask;
+ } const mov{insn};
+
+ if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
+ throw NotImplementedException("Non-full move mask");
+ }
+ v.X(mov.dest_reg, src);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::MOV_reg(u64 insn) {
+ MOV(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::MOV_cbuf(u64 insn) {
+ MOV(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::MOV_imm(u64 insn) {
+ MOV(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::MOV32I(u64 insn) {
+ MOV(*this, insn, GetImm32(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ PR,
+ CC,
+};
+
+void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
+ switch (index) {
+ case 0:
+ return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
+ case 1:
+ return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
+ case 2:
+ return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
+ case 3:
+ return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
+ default:
+ throw LogicError("Unreachable R2P index");
+ }
+}
+
+void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<40, 1, Mode> mode;
+ BitField<41, 2, u64> byte_selector;
+ } const r2p{insn};
+ const IR::U32 src{v.X(r2p.src_reg)};
+ const IR::U32 count{v.ir.Imm32(1)};
+ const bool pr_mode{r2p.mode == Mode::PR};
+ const u32 num_items{pr_mode ? 7U : 4U};
+ const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
+ for (u32 index = 0; index < num_items; ++index) {
+ const IR::U32 offset{v.ir.Imm32(offset_base + index)};
+ const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
+ const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
+ const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
+ const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
+ if (pr_mode) {
+ const IR::Pred pred{index};
+ v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
+ } else {
+ SetFlag(v.ir, inv_mask_bit, src_bit, index);
+ }
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::R2P_reg(u64 insn) {
+ R2P(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::R2P_cbuf(u64 insn) {
+ R2P(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::R2P_imm(u64 insn) {
+ R2P(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SpecialRegister : u64 {
+ SR_LANEID = 0,
+ SR_CLOCK = 1,
+ SR_VIRTCFG = 2,
+ SR_VIRTID = 3,
+ SR_PM0 = 4,
+ SR_PM1 = 5,
+ SR_PM2 = 6,
+ SR_PM3 = 7,
+ SR_PM4 = 8,
+ SR_PM5 = 9,
+ SR_PM6 = 10,
+ SR_PM7 = 11,
+ SR12 = 12,
+ SR13 = 13,
+ SR14 = 14,
+ SR_ORDERING_TICKET = 15,
+ SR_PRIM_TYPE = 16,
+ SR_INVOCATION_ID = 17,
+ SR_Y_DIRECTION = 18,
+ SR_THREAD_KILL = 19,
+ SM_SHADER_TYPE = 20,
+ SR_DIRECTCBEWRITEADDRESSLOW = 21,
+ SR_DIRECTCBEWRITEADDRESSHIGH = 22,
+ SR_DIRECTCBEWRITEENABLE = 23,
+ SR_MACHINE_ID_0 = 24,
+ SR_MACHINE_ID_1 = 25,
+ SR_MACHINE_ID_2 = 26,
+ SR_MACHINE_ID_3 = 27,
+ SR_AFFINITY = 28,
+ SR_INVOCATION_INFO = 29,
+ SR_WSCALEFACTOR_XY = 30,
+ SR_WSCALEFACTOR_Z = 31,
+ SR_TID = 32,
+ SR_TID_X = 33,
+ SR_TID_Y = 34,
+ SR_TID_Z = 35,
+ SR_CTA_PARAM = 36,
+ SR_CTAID_X = 37,
+ SR_CTAID_Y = 38,
+ SR_CTAID_Z = 39,
+ SR_NTID = 40,
+ SR_CirQueueIncrMinusOne = 41,
+ SR_NLATC = 42,
+ SR43 = 43,
+ SR_SM_SPA_VERSION = 44,
+ SR_MULTIPASSSHADERINFO = 45,
+ SR_LWINHI = 46,
+ SR_SWINHI = 47,
+ SR_SWINLO = 48,
+ SR_SWINSZ = 49,
+ SR_SMEMSZ = 50,
+ SR_SMEMBANKS = 51,
+ SR_LWINLO = 52,
+ SR_LWINSZ = 53,
+ SR_LMEMLOSZ = 54,
+ SR_LMEMHIOFF = 55,
+ SR_EQMASK = 56,
+ SR_LTMASK = 57,
+ SR_LEMASK = 58,
+ SR_GTMASK = 59,
+ SR_GEMASK = 60,
+ SR_REGALLOC = 61,
+ SR_BARRIERALLOC = 62,
+ SR63 = 63,
+ SR_GLOBALERRORSTATUS = 64,
+ SR65 = 65,
+ SR_WARPERRORSTATUS = 66,
+ SR_WARPERRORSTATUSCLEAR = 67,
+ SR68 = 68,
+ SR69 = 69,
+ SR70 = 70,
+ SR71 = 71,
+ SR_PM_HI0 = 72,
+ SR_PM_HI1 = 73,
+ SR_PM_HI2 = 74,
+ SR_PM_HI3 = 75,
+ SR_PM_HI4 = 76,
+ SR_PM_HI5 = 77,
+ SR_PM_HI6 = 78,
+ SR_PM_HI7 = 79,
+ SR_CLOCKLO = 80,
+ SR_CLOCKHI = 81,
+ SR_GLOBALTIMERLO = 82,
+ SR_GLOBALTIMERHI = 83,
+ SR84 = 84,
+ SR85 = 85,
+ SR86 = 86,
+ SR87 = 87,
+ SR88 = 88,
+ SR89 = 89,
+ SR90 = 90,
+ SR91 = 91,
+ SR92 = 92,
+ SR93 = 93,
+ SR94 = 94,
+ SR95 = 95,
+ SR_HWTASKID = 96,
+ SR_CIRCULARQUEUEENTRYINDEX = 97,
+ SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
+ SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
+};
+
+[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
+ switch (special_register) {
+ case SpecialRegister::SR_INVOCATION_ID:
+ return ir.InvocationId();
+ case SpecialRegister::SR_THREAD_KILL:
+ return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
+ case SpecialRegister::SR_INVOCATION_INFO:
+ LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
+ return ir.Imm32(0x00ff'0000);
+ case SpecialRegister::SR_TID: {
+ const IR::Value tid{ir.LocalInvocationId()};
+ return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
+ IR::U32{ir.CompositeExtract(tid, 1)},
+ ir.Imm32(16), ir.Imm32(8)),
+ IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
+ }
+ case SpecialRegister::SR_TID_X:
+ return ir.LocalInvocationIdX();
+ case SpecialRegister::SR_TID_Y:
+ return ir.LocalInvocationIdY();
+ case SpecialRegister::SR_TID_Z:
+ return ir.LocalInvocationIdZ();
+ case SpecialRegister::SR_CTAID_X:
+ return ir.WorkgroupIdX();
+ case SpecialRegister::SR_CTAID_Y:
+ return ir.WorkgroupIdY();
+ case SpecialRegister::SR_CTAID_Z:
+ return ir.WorkgroupIdZ();
+ case SpecialRegister::SR_WSCALEFACTOR_XY:
+ LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
+ return ir.Imm32(Common::BitCast<u32>(1.0f));
+ case SpecialRegister::SR_WSCALEFACTOR_Z:
+ LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
+ return ir.Imm32(Common::BitCast<u32>(1.0f));
+ case SpecialRegister::SR_LANEID:
+ return ir.LaneId();
+ case SpecialRegister::SR_EQMASK:
+ return ir.SubgroupEqMask();
+ case SpecialRegister::SR_LTMASK:
+ return ir.SubgroupLtMask();
+ case SpecialRegister::SR_LEMASK:
+ return ir.SubgroupLeMask();
+ case SpecialRegister::SR_GTMASK:
+ return ir.SubgroupGtMask();
+ case SpecialRegister::SR_GEMASK:
+ return ir.SubgroupGeMask();
+ case SpecialRegister::SR_Y_DIRECTION:
+ return ir.BitCast<IR::U32>(ir.YDirection());
+ case SpecialRegister::SR_AFFINITY:
+ LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
+ return ir.Imm32(0); // This is the default value hardware returns.
+ default:
+ throw NotImplementedException("S2R special register {}", special_register);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::S2R(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 8, SpecialRegister> src_reg;
+ } const s2r{insn};
+
+ X(s2r.dest_reg, Read(ir, s2r.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
+ throw NotImplementedException("Instruction {} is not implemented", opcode);
+}
+
+void TranslatorVisitor::ATOM_cas(u64) {
+ ThrowNotImplemented(Opcode::ATOM_cas);
+}
+
+void TranslatorVisitor::ATOMS_cas(u64) {
+ ThrowNotImplemented(Opcode::ATOMS_cas);
+}
+
+void TranslatorVisitor::B2R(u64) {
+ ThrowNotImplemented(Opcode::B2R);
+}
+
+void TranslatorVisitor::BPT(u64) {
+ ThrowNotImplemented(Opcode::BPT);
+}
+
+void TranslatorVisitor::BRA(u64) {
+ ThrowNotImplemented(Opcode::BRA);
+}
+
+void TranslatorVisitor::BRK(u64) {
+ ThrowNotImplemented(Opcode::BRK);
+}
+
+void TranslatorVisitor::CAL() {
+ // CAL is a no-op
+}
+
+void TranslatorVisitor::CCTL(u64) {
+ ThrowNotImplemented(Opcode::CCTL);
+}
+
+void TranslatorVisitor::CCTLL(u64) {
+ ThrowNotImplemented(Opcode::CCTLL);
+}
+
+void TranslatorVisitor::CONT(u64) {
+ ThrowNotImplemented(Opcode::CONT);
+}
+
+void TranslatorVisitor::CS2R(u64) {
+ ThrowNotImplemented(Opcode::CS2R);
+}
+
+void TranslatorVisitor::FCHK_reg(u64) {
+ ThrowNotImplemented(Opcode::FCHK_reg);
+}
+
+void TranslatorVisitor::FCHK_cbuf(u64) {
+ ThrowNotImplemented(Opcode::FCHK_cbuf);
+}
+
+void TranslatorVisitor::FCHK_imm(u64) {
+ ThrowNotImplemented(Opcode::FCHK_imm);
+}
+
+void TranslatorVisitor::GETCRSPTR(u64) {
+ ThrowNotImplemented(Opcode::GETCRSPTR);
+}
+
+void TranslatorVisitor::GETLMEMBASE(u64) {
+ ThrowNotImplemented(Opcode::GETLMEMBASE);
+}
+
+void TranslatorVisitor::IDE(u64) {
+ ThrowNotImplemented(Opcode::IDE);
+}
+
+void TranslatorVisitor::IDP_reg(u64) {
+ ThrowNotImplemented(Opcode::IDP_reg);
+}
+
+void TranslatorVisitor::IDP_imm(u64) {
+ ThrowNotImplemented(Opcode::IDP_imm);
+}
+
+void TranslatorVisitor::IMAD_reg(u64) {
+ ThrowNotImplemented(Opcode::IMAD_reg);
+}
+
+void TranslatorVisitor::IMAD_rc(u64) {
+ ThrowNotImplemented(Opcode::IMAD_rc);
+}
+
+void TranslatorVisitor::IMAD_cr(u64) {
+ ThrowNotImplemented(Opcode::IMAD_cr);
+}
+
+void TranslatorVisitor::IMAD_imm(u64) {
+ ThrowNotImplemented(Opcode::IMAD_imm);
+}
+
+void TranslatorVisitor::IMAD32I(u64) {
+ ThrowNotImplemented(Opcode::IMAD32I);
+}
+
+void TranslatorVisitor::IMADSP_reg(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_reg);
+}
+
+void TranslatorVisitor::IMADSP_rc(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_rc);
+}
+
+void TranslatorVisitor::IMADSP_cr(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_cr);
+}
+
+void TranslatorVisitor::IMADSP_imm(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_imm);
+}
+
+void TranslatorVisitor::IMUL_reg(u64) {
+ ThrowNotImplemented(Opcode::IMUL_reg);
+}
+
+void TranslatorVisitor::IMUL_cbuf(u64) {
+ ThrowNotImplemented(Opcode::IMUL_cbuf);
+}
+
+void TranslatorVisitor::IMUL_imm(u64) {
+ ThrowNotImplemented(Opcode::IMUL_imm);
+}
+
+void TranslatorVisitor::IMUL32I(u64) {
+ ThrowNotImplemented(Opcode::IMUL32I);
+}
+
+void TranslatorVisitor::JCAL(u64) {
+ ThrowNotImplemented(Opcode::JCAL);
+}
+
+void TranslatorVisitor::JMP(u64) {
+ ThrowNotImplemented(Opcode::JMP);
+}
+
+void TranslatorVisitor::KIL() {
+ // KIL is a no-op
+}
+
+void TranslatorVisitor::LD(u64) {
+ ThrowNotImplemented(Opcode::LD);
+}
+
+void TranslatorVisitor::LEPC(u64) {
+ ThrowNotImplemented(Opcode::LEPC);
+}
+
+void TranslatorVisitor::LONGJMP(u64) {
+ ThrowNotImplemented(Opcode::LONGJMP);
+}
+
+void TranslatorVisitor::NOP(u64) {
+ // NOP is No-Op.
+}
+
+void TranslatorVisitor::PBK() {
+ // PBK is a no-op
+}
+
+void TranslatorVisitor::PCNT() {
+ // PCNT is a no-op
+}
+
+void TranslatorVisitor::PEXIT(u64) {
+ ThrowNotImplemented(Opcode::PEXIT);
+}
+
+void TranslatorVisitor::PLONGJMP(u64) {
+ ThrowNotImplemented(Opcode::PLONGJMP);
+}
+
+void TranslatorVisitor::PRET(u64) {
+ ThrowNotImplemented(Opcode::PRET);
+}
+
+void TranslatorVisitor::PRMT_reg(u64) {
+ ThrowNotImplemented(Opcode::PRMT_reg);
+}
+
+void TranslatorVisitor::PRMT_rc(u64) {
+ ThrowNotImplemented(Opcode::PRMT_rc);
+}
+
+void TranslatorVisitor::PRMT_cr(u64) {
+ ThrowNotImplemented(Opcode::PRMT_cr);
+}
+
+void TranslatorVisitor::PRMT_imm(u64) {
+ ThrowNotImplemented(Opcode::PRMT_imm);
+}
+
+void TranslatorVisitor::R2B(u64) {
+ ThrowNotImplemented(Opcode::R2B);
+}
+
+void TranslatorVisitor::RAM(u64) {
+ ThrowNotImplemented(Opcode::RAM);
+}
+
+void TranslatorVisitor::RET(u64) {
+ ThrowNotImplemented(Opcode::RET);
+}
+
+void TranslatorVisitor::RTT(u64) {
+ ThrowNotImplemented(Opcode::RTT);
+}
+
+void TranslatorVisitor::SAM(u64) {
+ ThrowNotImplemented(Opcode::SAM);
+}
+
+void TranslatorVisitor::SETCRSPTR(u64) {
+ ThrowNotImplemented(Opcode::SETCRSPTR);
+}
+
+void TranslatorVisitor::SETLMEMBASE(u64) {
+ ThrowNotImplemented(Opcode::SETLMEMBASE);
+}
+
+void TranslatorVisitor::SSY() {
+ // SSY is a no-op
+}
+
+void TranslatorVisitor::ST(u64) {
+ ThrowNotImplemented(Opcode::ST);
+}
+
+void TranslatorVisitor::STP(u64) {
+ ThrowNotImplemented(Opcode::STP);
+}
+
+void TranslatorVisitor::SUATOM_cas(u64) {
+ ThrowNotImplemented(Opcode::SUATOM_cas);
+}
+
+void TranslatorVisitor::SYNC(u64) {
+ ThrowNotImplemented(Opcode::SYNC);
+}
+
+void TranslatorVisitor::TXA(u64) {
+ ThrowNotImplemented(Opcode::TXA);
+}
+
+void TranslatorVisitor::VABSDIFF(u64) {
+ ThrowNotImplemented(Opcode::VABSDIFF);
+}
+
+void TranslatorVisitor::VABSDIFF4(u64) {
+ ThrowNotImplemented(Opcode::VABSDIFF4);
+}
+
+void TranslatorVisitor::VADD(u64) {
+ ThrowNotImplemented(Opcode::VADD);
+}
+
+void TranslatorVisitor::VSET(u64) {
+ ThrowNotImplemented(Opcode::VSET);
+}
+void TranslatorVisitor::VSHL(u64) {
+ ThrowNotImplemented(Opcode::VSHL);
+}
+
+void TranslatorVisitor::VSHR(u64) {
+ ThrowNotImplemented(Opcode::VSHR);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> output_reg; // Not needed on host
+ BitField<39, 1, u64> emit;
+ BitField<40, 1, u64> cut;
+ } const out{insn};
+
+ stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
+
+ if (out.emit != 0) {
+ v.ir.EmitVertex(stream_index);
+ }
+ if (out.cut != 0) {
+ v.ir.EndPrimitive(stream_index);
+ }
+ // Host doesn't need the output register, but we can write to it to avoid undefined reads
+ v.X(out.dest_reg, v.ir.Imm32(0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::OUT_reg(u64 insn) {
+ OUT(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::OUT_cbuf(u64 insn) {
+ OUT(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::OUT_imm(u64 insn) {
+ OUT(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Default,
+ CovMask,
+ Covered,
+ Offset,
+ CentroidOffset,
+ MyIndex,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::PIXLD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<31, 3, Mode> mode;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, s64> addr_offset;
+ BitField<45, 3, IR::Pred> dest_pred;
+ } const pixld{insn};
+
+ if (pixld.dest_pred != IR::Pred::PT) {
+ throw NotImplementedException("Destination predicate");
+ }
+ if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
+ throw NotImplementedException("Non-zero source register");
+ }
+ switch (pixld.mode) {
+ case Mode::MyIndex:
+ X(pixld.dest_reg, ir.SampleId());
+ break;
+ default:
+ throw NotImplementedException("Mode {}", pixld.mode.Value());
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<12, 3, IR::Pred> pred_a;
+ BitField<15, 1, u64> neg_pred_a;
+ BitField<24, 2, BooleanOp> bop_1;
+ BitField<29, 3, IR::Pred> pred_b;
+ BitField<32, 1, u64> neg_pred_b;
+ BitField<39, 3, IR::Pred> pred_c;
+ BitField<42, 1, u64> neg_pred_c;
+ BitField<45, 2, BooleanOp> bop_2;
+ } const pset{insn};
+
+ const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+ const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+ const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+ const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+ const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
+ const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
+ const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
+
+ ir.SetPred(pset.dest_pred_a, result_a);
+ ir.SetPred(pset.dest_pred_b, result_b);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSET(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<12, 3, IR::Pred> pred_a;
+ BitField<15, 1, u64> neg_pred_a;
+ BitField<24, 2, BooleanOp> bop_1;
+ BitField<29, 3, IR::Pred> pred_b;
+ BitField<32, 1, u64> neg_pred_b;
+ BitField<39, 3, IR::Pred> pred_c;
+ BitField<42, 1, u64> neg_pred_c;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop_2;
+ BitField<47, 1, u64> cc;
+ } const pset{insn};
+
+ const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+ const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+ const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+ const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+ const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
+
+ const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
+ const IR::U32 zero{ir.Imm32(0)};
+
+ const IR::U32 result{ir.Select(res_2, true_result, zero)};
+
+ X(pset.dest_reg, result);
+ if (pset.cc != 0) {
+ const IR::U1 is_zero{ir.IEqual(result, zero)};
+ SetZFlag(is_zero);
+ if (pset.bf != 0) {
+ ResetSFlag();
+ } else {
+ SetSFlag(ir.LogicalNot(is_zero));
+ }
+ ResetOFlag();
+ ResetCFlag();
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ } const sel{insn};
+
+ const IR::U1 pred = v.ir.GetPred(sel.pred);
+ IR::U32 op_a{v.X(sel.src_reg)};
+ IR::U32 op_b{src};
+ if (sel.neg_pred != 0) {
+ std::swap(op_a, op_b);
+ }
+ const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
+
+ v.X(sel.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SEL_reg(u64 insn) {
+ SEL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SEL_cbuf(u64 insn) {
+ SEL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SEL_imm(u64 insn) {
+ SEL(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+ _1D,
+ BUFFER_1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+};
+
+enum class Size : u64 {
+ U32,
+ S32,
+ U64,
+ S64,
+ F32FTZRN,
+ F16x2FTZRN,
+ SD32,
+ SD64,
+};
+
+enum class AtomicOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class Clamp : u64 {
+ IGN,
+ Default,
+ TRAP,
+};
+
+TextureType GetType(Type type) {
+ switch (type) {
+ case Type::_1D:
+ return TextureType::Color1D;
+ case Type::BUFFER_1D:
+ return TextureType::Buffer;
+ case Type::ARRAY_1D:
+ return TextureType::ColorArray1D;
+ case Type::_2D:
+ return TextureType::Color2D;
+ case Type::ARRAY_2D:
+ return TextureType::ColorArray2D;
+ case Type::_3D:
+ return TextureType::Color3D;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+ switch (type) {
+ case Type::_1D:
+ case Type::BUFFER_1D:
+ return v.X(reg);
+ case Type::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case Type::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ default:
+ break;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
+ const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomicOp::ADD:
+ return ir.ImageAtomicIAdd(handle, coords, op_b, info);
+ case AtomicOp::MIN:
+ return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
+ case AtomicOp::MAX:
+ return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
+ case AtomicOp::INC:
+ return ir.ImageAtomicInc(handle, coords, op_b, info);
+ case AtomicOp::DEC:
+ return ir.ImageAtomicDec(handle, coords, op_b, info);
+ case AtomicOp::AND:
+ return ir.ImageAtomicAnd(handle, coords, op_b, info);
+ case AtomicOp::OR:
+ return ir.ImageAtomicOr(handle, coords, op_b, info);
+ case AtomicOp::XOR:
+ return ir.ImageAtomicXor(handle, coords, op_b, info);
+ case AtomicOp::EXCH:
+ return ir.ImageAtomicExchange(handle, coords, op_b, info);
+ default:
+ throw NotImplementedException("Atomic Operation {}", op);
+ }
+}
+
+ImageFormat Format(Size size) {
+ switch (size) {
+ case Size::U32:
+ case Size::S32:
+ case Size::SD32:
+ return ImageFormat::R32_UINT;
+ default:
+ break;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+bool IsSizeInt32(Size size) {
+ switch (size) {
+ case Size::U32:
+ case Size::S32:
+ case Size::SD32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
+ IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
+ u64 bound_offset, bool is_bindless, bool write_result) {
+ if (clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", clamp);
+ }
+ if (!IsSizeInt32(size)) {
+ throw NotImplementedException("Size {}", size);
+ }
+ const bool is_signed{size == Size::S32};
+ const ImageFormat format{Format(size)};
+ const TextureType tex_type{GetType(type)};
+ const IR::Value coords{MakeCoords(v, coord_reg, type)};
+
+ const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
+ : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
+ IR::TextureInstInfo info{};
+ info.type.Assign(tex_type);
+ info.image_format.Assign(format);
+
+ // TODO: float/64-bit operand
+ const IR::Value op_b{v.X(operand_reg)};
+ const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
+
+ if (write_result) {
+ v.X(dest_reg, IR::U32{color});
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SUATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<54, 1, u64> is_bindless;
+ BitField<29, 4, AtomicOp> op;
+ BitField<33, 3, Type> type;
+ BitField<51, 3, Size> size;
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> operand_reg;
+ BitField<36, 13, u64> bound_offset; // !is_bindless
+ BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
+ } const suatom{insn};
+
+ ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
+ suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
+ suatom.is_bindless != 0, true);
+}
+
+void TranslatorVisitor::SURED(u64 insn) {
+ // TODO: confirm offsets
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<21, 3, AtomicOp> op;
+ BitField<33, 3, Type> type;
+ BitField<20, 3, Size> size;
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> operand_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const sured{insn};
+ ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
+ sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
+ sured.is_bound == 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+ _1D,
+ BUFFER_1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+};
+
+constexpr unsigned R = 1 << 0;
+constexpr unsigned G = 1 << 1;
+constexpr unsigned B = 1 << 2;
+constexpr unsigned A = 1 << 3;
+
+constexpr std::array MASK{
+ 0U, //
+ R, //
+ G, //
+ R | G, //
+ B, //
+ R | B, //
+ G | B, //
+ R | G | B, //
+ A, //
+ R | A, //
+ G | A, //
+ R | G | A, //
+ B | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+enum class Clamp : u64 {
+ IGN,
+ Default,
+ TRAP,
+};
+
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
+enum class LoadCache : u64 {
+ CA, // Cache at all levels, likely to be accessed again
+ CG, // Cache at global level (L2 and below, not L1)
+ CI, // ???
+ CV, // Don't cache and fetch again (volatile)
+};
+
+enum class StoreCache : u64 {
+ WB, // Cache write-back all coherent levels
+ CG, // Cache at global level (L2 and below, not L1)
+ CS, // Cache streaming, likely to be accessed once
+ WT, // Cache write-through (to system memory, volatile?)
+};
+
+ImageFormat Format(Size size) {
+ switch (size) {
+ case Size::U8:
+ return ImageFormat::R8_UINT;
+ case Size::S8:
+ return ImageFormat::R8_SINT;
+ case Size::U16:
+ return ImageFormat::R16_UINT;
+ case Size::S16:
+ return ImageFormat::R16_SINT;
+ case Size::B32:
+ return ImageFormat::R32_UINT;
+ case Size::B64:
+ return ImageFormat::R32G32_UINT;
+ case Size::B128:
+ return ImageFormat::R32G32B32A32_UINT;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+int SizeInRegs(Size size) {
+ switch (size) {
+ case Size::U8:
+ case Size::S8:
+ case Size::U16:
+ case Size::S16:
+ case Size::B32:
+ return 1;
+ case Size::B64:
+ return 2;
+ case Size::B128:
+ return 4;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+TextureType GetType(Type type) {
+ switch (type) {
+ case Type::_1D:
+ return TextureType::Color1D;
+ case Type::BUFFER_1D:
+ return TextureType::Buffer;
+ case Type::ARRAY_1D:
+ return TextureType::ColorArray1D;
+ case Type::_2D:
+ return TextureType::Color2D;
+ case Type::ARRAY_2D:
+ return TextureType::ColorArray2D;
+ case Type::_3D:
+ return TextureType::Color3D;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+ const auto array{[&](int index) {
+ return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
+ }};
+ switch (type) {
+ case Type::_1D:
+ case Type::BUFFER_1D:
+ return v.X(reg);
+ case Type::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.X(reg), array(1));
+ case Type::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case Type::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
+ case Type::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+unsigned SwizzleMask(u64 swizzle) {
+ if (swizzle == 0 || swizzle >= MASK.size()) {
+ throw NotImplementedException("Invalid swizzle {}", swizzle);
+ }
+ return MASK[swizzle];
+}
+
+IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
+ std::array<IR::U32, 4> colors;
+ for (int i = 0; i < num_regs; ++i) {
+ colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
+ }
+ for (int i = num_regs; i < 4; ++i) {
+ colors[static_cast<size_t>(i)] = ir.Imm32(0);
+ }
+ return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SULD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<52, 1, u64> d;
+ BitField<23, 1, u64> ba;
+ BitField<33, 3, Type> type;
+ BitField<24, 2, LoadCache> cache;
+ BitField<20, 3, Size> size; // .D
+ BitField<20, 4, u64> swizzle; // .P
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const suld{insn};
+
+ if (suld.clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", suld.clamp.Value());
+ }
+ if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
+ throw NotImplementedException("Cache {}", suld.cache.Value());
+ }
+ const bool is_typed{suld.d != 0};
+ if (is_typed && suld.ba != 0) {
+ throw NotImplementedException("BA");
+ }
+
+ const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
+ const TextureType type{GetType(suld.type)};
+ const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
+ const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
+ : X(suld.bindless_reg)};
+ IR::TextureInstInfo info{};
+ info.type.Assign(type);
+ info.image_format.Assign(format);
+
+ const IR::Value result{ir.ImageRead(handle, coords, info)};
+ IR::Reg dest_reg{suld.dest_reg};
+ if (is_typed) {
+ const int num_regs{SizeInRegs(suld.size)};
+ for (int i = 0; i < num_regs; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+ } else {
+ const unsigned mask{SwizzleMask(suld.swizzle)};
+ const int bits{std::popcount(mask)};
+ if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
+ throw NotImplementedException("Unaligned destination register");
+ }
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((mask >> component) & 1) == 0) {
+ continue;
+ }
+ X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
+ ++dest_reg;
+ }
+ }
+}
+
+void TranslatorVisitor::SUST(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<52, 1, u64> d;
+ BitField<23, 1, u64> ba;
+ BitField<33, 3, Type> type;
+ BitField<24, 2, StoreCache> cache;
+ BitField<20, 3, Size> size; // .D
+ BitField<20, 4, u64> swizzle; // .P
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> data_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const sust{insn};
+
+ if (sust.clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", sust.clamp.Value());
+ }
+ if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
+ throw NotImplementedException("Cache {}", sust.cache.Value());
+ }
+ const bool is_typed{sust.d != 0};
+ if (is_typed && sust.ba != 0) {
+ throw NotImplementedException("BA");
+ }
+ const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
+ const TextureType type{GetType(sust.type)};
+ const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
+ const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
+ : X(sust.bindless_reg)};
+ IR::TextureInstInfo info{};
+ info.type.Assign(type);
+ info.image_format.Assign(format);
+
+ IR::Value color;
+ if (is_typed) {
+ color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
+ } else {
+ const unsigned mask{SwizzleMask(sust.swizzle)};
+ if (mask != 0xf) {
+ throw NotImplementedException("Non-full mask");
+ }
+ color = MakeColor(ir, sust.data_reg, 4);
+ }
+ ir.ImageWrite(handle, coords, color, info);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Blod : u64 {
+ None,
+ LZ,
+ LB,
+ LL,
+ INVALIDBLOD4,
+ INVALIDBLOD5,
+ LBA,
+ LLA,
+};
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
+ switch (blod) {
+ case Blod::None:
+ return v.ir.Imm32(0.0f);
+ case Blod::LZ:
+ return v.ir.Imm32(0.0f);
+ case Blod::LB:
+ case Blod::LL:
+ case Blod::LBA:
+ case Blod::LLA:
+ return v.F(reg++);
+ case Blod::INVALIDBLOD4:
+ case Blod::INVALIDBLOD5:
+ break;
+ }
+ throw NotImplementedException("Invalid blod {}", blod);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+bool HasExplicitLod(Blod blod) {
+ switch (blod) {
+ case Blod::LL:
+ case Blod::LLA:
+ case Blod::LZ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
+ std::optional<u32> cbuf_offset) {
+ union {
+ u64 raw;
+ BitField<35, 1, u64> ndv;
+ BitField<49, 1, u64> nodep;
+ BitField<50, 1, u64> dc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ } const tex{insn};
+
+ if (lc) {
+ throw NotImplementedException("LC");
+ }
+ const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
+
+ IR::Reg meta_reg{tex.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::F32 dref;
+ IR::F32 lod_clamp;
+ if (cbuf_offset) {
+ handle = v.ir.Imm32(*cbuf_offset);
+ } else {
+ handle = v.X(meta_reg++);
+ }
+ const IR::F32 lod{MakeLod(v, meta_reg, blod)};
+ if (aoffi) {
+ offset = MakeOffset(v, meta_reg, tex.type);
+ }
+ if (tex.dc != 0) {
+ dref = v.F(meta_reg++);
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tex.type));
+ info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
+ info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
+ info.has_lod_clamp.Assign(lc ? 1 : 0);
+
+ const IR::Value sample{[&]() -> IR::Value {
+ if (tex.dc == 0) {
+ if (HasExplicitLod(blod)) {
+ return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
+ } else {
+ return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
+ }
+ }
+ if (HasExplicitLod(blod)) {
+ return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
+ } else {
+ return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+ info);
+ }
+ }()};
+
+ IR::Reg dest_reg{tex.dest_reg};
+ for (int element = 0; element < 4; ++element) {
+ if (((tex.mask >> element) & 1) == 0) {
+ continue;
+ }
+ IR::F32 value;
+ if (tex.dc != 0) {
+ value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
+ } else {
+ value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
+ }
+ v.F(dest_reg, value);
+ ++dest_reg;
+ }
+ if (tex.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEX(u64 insn) {
+ union {
+ u64 raw;
+ BitField<54, 1, u64> aoffi;
+ BitField<55, 3, Blod> blod;
+ BitField<58, 1, u64> lc;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tex{insn};
+
+ Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TEX_b(u64 insn) {
+ union {
+ u64 raw;
+ BitField<36, 1, u64> aoffi;
+ BitField<37, 3, Blod> blod;
+ BitField<40, 1, u64> lc;
+ } const tex{insn};
+
+ Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F16,
+ F32,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<59, 1, Precision> precision;
+ BitField<53, 4, u64> encoding;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+ BitField<50, 3, u64> swizzle;
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+ R, //
+ G, //
+ B, //
+ A, //
+ R | G, //
+ R | A, //
+ G | A, //
+ B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+ R | G | B, //
+ R | G | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+template <typename... Args>
+IR::Value Composite(TranslatorVisitor& v, Args... regs) {
+ return v.ir.CompositeConstruct(v.F(regs)...);
+}
+
+IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
+ return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding texs{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::Reg reg_a{texs.src_reg_a};
+ const IR::Reg reg_b{texs.src_reg_b};
+ IR::TextureInstInfo info{};
+ if (texs.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ switch (texs.encoding) {
+ case 0: // 1D.LZ
+ info.type.Assign(TextureType::Color1D);
+ return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
+ case 1: // 2D
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
+ case 2: // 2D.LZ
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
+ case 3: // 2D.LL
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
+ info);
+ case 4: // 2D.DC
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+ {}, {}, {}, info);
+ case 5: // 2D.LL.DC
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
+ v.F(reg_b + 1), v.F(reg_b), {}, info);
+ case 6: // 2D.LZ.DC
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+ zero, {}, info);
+ case 7: // ARRAY_2D
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ return v.ir.ImageSampleImplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ {}, {}, {}, info);
+ case 8: // ARRAY_2D.LZ
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ return v.ir.ImageSampleExplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ zero, {}, info);
+ case 9: // ARRAY_2D.LZ.DC
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ v.F(reg_b + 1), zero, {}, info);
+ case 10: // 3D
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color3D);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+ {}, info);
+ case 11: // 3D.LZ
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color3D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
+ info);
+ case 12: // CUBE
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorCube);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+ {}, info);
+ case 13: // CUBE.LL
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::ColorCube);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
+ v.F(reg_b + 1), {}, info);
+ default:
+ throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
+ }
+}
+
+unsigned Swizzle(u64 insn) {
+ const Encoding texs{insn};
+ const size_t encoding{texs.swizzle};
+ if (texs.dest_reg_b == IR::Reg::RZ) {
+ if (encoding >= RG_LUT.size()) {
+ throw NotImplementedException("Illegal RG encoding {}", encoding);
+ }
+ return RG_LUT[encoding];
+ } else {
+ if (encoding >= RGBA_LUT.size()) {
+ throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+ }
+ return RGBA_LUT[encoding];
+ }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+ const bool is_shadow{sample.Type() == IR::Type::F32};
+ if (is_shadow) {
+ const bool is_alpha{component == 3};
+ return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
+ } else {
+ return IR::F32{v.ir.CompositeExtract(sample, component)};
+ }
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+ const Encoding texs{insn};
+ switch (index) {
+ case 0:
+ return texs.dest_reg_a;
+ case 1:
+ CheckAlignment(texs.dest_reg_a, 2);
+ return texs.dest_reg_a + 1;
+ case 2:
+ return texs.dest_reg_b;
+ case 3:
+ CheckAlignment(texs.dest_reg_b, 2);
+ return texs.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+ v.F(dest, Extract(v, sample, component));
+ ++store_index;
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ std::array<IR::F32, 4> swizzled;
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ swizzled[store_index] = Extract(v, sample, component);
+ ++store_index;
+ }
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const Encoding texs{insn};
+ switch (store_index) {
+ case 1:
+ v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ switch (store_index) {
+ case 2:
+ break;
+ case 3:
+ v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
+ break;
+ case 4:
+ v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+ break;
+ }
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEXS(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+enum class OffsetType : u64 {
+ None = 0,
+ AOFFI,
+ PTP,
+ Invalid,
+};
+
+enum class ComponentType : u64 {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
+ const IR::U32 value1{v.X(reg++)};
+ const IR::U32 value2{v.X(reg++)};
+ const IR::U32 bitsize{v.ir.Imm32(6)};
+ const auto make_vector{[&v, &bitsize](const IR::U32& value) {
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
+ }};
+ return {make_vector(value1), make_vector(value2)};
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
+ bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<35, 1, u64> ndv;
+ BitField<49, 1, u64> nodep;
+ BitField<50, 1, u64> dc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tld4{insn};
+
+ const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
+
+ IR::Reg meta_reg{tld4.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::Value offset2;
+ IR::F32 dref;
+ if (!is_bindless) {
+ handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
+ } else {
+ handle = v.X(meta_reg++);
+ }
+ switch (offset_type) {
+ case OffsetType::None:
+ break;
+ case OffsetType::AOFFI:
+ offset = MakeOffset(v, meta_reg, tld4.type);
+ break;
+ case OffsetType::PTP:
+ std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
+ break;
+ default:
+ throw NotImplementedException("Invalid offset type {}", offset_type);
+ }
+ if (tld4.dc != 0) {
+ dref = v.F(meta_reg++);
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tld4.type));
+ info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
+ info.gather_component.Assign(static_cast<u32>(component_type));
+ const IR::Value sample{[&] {
+ if (tld4.dc == 0) {
+ return v.ir.ImageGather(handle, coords, offset, offset2, info);
+ }
+ return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
+ }()};
+
+ IR::Reg dest_reg{tld4.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tld4.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (tld4.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4(u64 insn) {
+ union {
+ u64 raw;
+ BitField<56, 2, ComponentType> component;
+ BitField<54, 2, OffsetType> offset;
+ } const tld4{insn};
+ Impl(*this, insn, tld4.component, tld4.offset, false);
+}
+
+void TranslatorVisitor::TLD4_b(u64 insn) {
+ union {
+ u64 raw;
+ BitField<38, 2, ComponentType> component;
+ BitField<36, 2, OffsetType> offset;
+ } const tld4{insn};
+ Impl(*this, insn, tld4.component, tld4.offset, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F32,
+ F16,
+};
+
+enum class ComponentType : u64 {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<55, 1, Precision> precision;
+ BitField<52, 2, ComponentType> component_type;
+ BitField<51, 1, u64> aoffi;
+ BitField<50, 1, u64> dc;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+ const IR::U32 value{v.X(reg)};
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding tld4s{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
+ const IR::Reg reg_a{tld4s.src_reg_a};
+ const IR::Reg reg_b{tld4s.src_reg_b};
+ IR::TextureInstInfo info{};
+ if (tld4s.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
+ info.type.Assign(Shader::TextureType::Color2D);
+ info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
+ IR::Value coords;
+ if (tld4s.aoffi != 0) {
+ CheckAlignment(reg_a, 2);
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+ IR::Value offset = MakeOffset(v, reg_b);
+ if (tld4s.dc != 0) {
+ CheckAlignment(reg_b, 2);
+ IR::F32 dref = v.F(reg_b + 1);
+ return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
+ }
+ return v.ir.ImageGather(handle, coords, offset, {}, info);
+ }
+ if (tld4s.dc != 0) {
+ CheckAlignment(reg_a, 2);
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+ IR::F32 dref = v.F(reg_b);
+ return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
+ }
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
+ return v.ir.ImageGather(handle, coords, {}, {}, info);
+}
+
+IR::Reg RegStoreComponent32(u64 insn, size_t index) {
+ const Encoding tlds4{insn};
+ switch (index) {
+ case 0:
+ return tlds4.dest_reg_a;
+ case 1:
+ CheckAlignment(tlds4.dest_reg_a, 2);
+ return tlds4.dest_reg_a + 1;
+ case 2:
+ return tlds4.dest_reg_b;
+ case 3:
+ CheckAlignment(tlds4.dest_reg_b, 2);
+ return tlds4.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ for (size_t component = 0; component < 4; ++component) {
+ const IR::Reg dest{RegStoreComponent32(insn, component)};
+ v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ std::array<IR::F32, 4> swizzled;
+ for (size_t component = 0; component < 4; ++component) {
+ swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
+ }
+ const Encoding tld4s{insn};
+ v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4S(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
+ const IR::U32 value{v.X(reg)};
+ const u32 base{has_lod_clamp ? 12U : 16U};
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<35, 1, u64> aoffi;
+ BitField<50, 1, u64> lc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> derivate_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const txd{insn};
+
+ const bool has_lod_clamp = txd.lc != 0;
+ if (has_lod_clamp) {
+ throw NotImplementedException("TXD.LC - CLAMP is not implemented");
+ }
+
+ IR::Value coords;
+ u32 num_derivates{};
+ IR::Reg base_reg{txd.coord_reg};
+ IR::Reg last_reg;
+ IR::Value handle;
+ if (is_bindless) {
+ handle = v.X(base_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
+ }
+
+ const auto read_array{[&]() -> IR::F32 {
+ const IR::U32 base{v.ir.Imm32(0)};
+ const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
+ const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
+ return v.ir.ConvertUToF(32, 16, array_index);
+ }};
+ switch (txd.type) {
+ case TextureType::_1D: {
+ coords = v.F(base_reg);
+ num_derivates = 1;
+ last_reg = base_reg + 1;
+ break;
+ }
+ case TextureType::ARRAY_1D: {
+ last_reg = base_reg + 1;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
+ num_derivates = 1;
+ break;
+ }
+ case TextureType::_2D: {
+ last_reg = base_reg + 2;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
+ num_derivates = 2;
+ break;
+ }
+ case TextureType::ARRAY_2D: {
+ last_reg = base_reg + 2;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
+ num_derivates = 2;
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid texture type");
+ }
+
+ const IR::Reg derivate_reg{txd.derivate_reg};
+ IR::Value derivates;
+ switch (num_derivates) {
+ case 1: {
+ derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
+ break;
+ }
+ case 2: {
+ derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
+ v.F(derivate_reg + 2), v.F(derivate_reg + 3));
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid texture type");
+ }
+
+ IR::Value offset;
+ if (txd.aoffi != 0) {
+ offset = MakeOffset(v, last_reg, has_lod_clamp);
+ }
+
+ IR::F32 lod_clamp;
+ if (has_lod_clamp) {
+ // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
+ // to convert a fixed point, float(value) / float(1 << fixed_point)
+ // in this case the fixed_point is 8.
+ const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
+ const IR::F32 fixp_lc{v.ir.ConvertUToF(
+ 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
+ lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
+ }
+
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(txd.type));
+ info.num_derivates.Assign(num_derivates);
+ info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
+ const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
+
+ IR::Reg dest_reg{txd.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((txd.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (txd.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXD(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TXD_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{
+ [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.X(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<55, 1, u64> lod;
+ BitField<50, 1, u64> multisample;
+ BitField<35, 1, u64> aoffi;
+ BitField<54, 1, u64> clamp;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tld{insn};
+
+ const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
+
+ IR::Reg meta_reg{tld.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::U32 lod;
+ IR::U32 multisample;
+ if (is_bindless) {
+ handle = v.X(meta_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
+ }
+ if (tld.lod != 0) {
+ lod = v.X(meta_reg++);
+ } else {
+ lod = v.ir.Imm32(0U);
+ }
+ if (tld.aoffi != 0) {
+ offset = MakeOffset(v, meta_reg, tld.type);
+ }
+ if (tld.multisample != 0) {
+ multisample = v.X(meta_reg++);
+ }
+ if (tld.clamp != 0) {
+ throw NotImplementedException("TLD.CL - CLAMP is not implmented");
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tld.type));
+ const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
+
+ IR::Reg dest_reg{tld.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tld.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (tld.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TLD_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F16,
+ F32,
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+ R, //
+ G, //
+ B, //
+ A, //
+ R | G, //
+ R | A, //
+ G | A, //
+ B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+ R | G | B, //
+ R | G | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+union Encoding {
+ u64 raw;
+ BitField<59, 1, Precision> precision;
+ BitField<54, 1, u64> aoffi;
+ BitField<53, 1, u64> lod;
+ BitField<55, 1, u64> ms;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+ BitField<50, 3, u64> swizzle;
+ BitField<53, 4, u64> encoding;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+ const IR::U32 value{v.X(reg)};
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding tlds{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
+ const IR::Reg reg_a{tlds.src_reg_a};
+ const IR::Reg reg_b{tlds.src_reg_b};
+ IR::Value coords;
+ IR::U32 lod{v.ir.Imm32(0U)};
+ IR::Value offsets;
+ IR::U32 multisample;
+ Shader::TextureType texture_type{};
+ switch (tlds.encoding) {
+ case 0:
+ texture_type = Shader::TextureType::Color1D;
+ coords = v.X(reg_a);
+ break;
+ case 1:
+ texture_type = Shader::TextureType::Color1D;
+ coords = v.X(reg_a);
+ lod = v.X(reg_b);
+ break;
+ case 2:
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
+ break;
+ case 4:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ offsets = MakeOffset(v, reg_b);
+ break;
+ case 5:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ lod = v.X(reg_b);
+ break;
+ case 6:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ multisample = v.X(reg_b);
+ break;
+ case 7:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color3D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
+ break;
+ case 8: {
+ CheckAlignment(reg_b, 2);
+ const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
+ texture_type = Shader::TextureType::ColorArray2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
+ break;
+ }
+ case 12:
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ lod = v.X(reg_b);
+ offsets = MakeOffset(v, reg_b + 1);
+ break;
+ default:
+ throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
+ }
+ IR::TextureInstInfo info{};
+ if (tlds.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ info.type.Assign(texture_type);
+ return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
+}
+
+unsigned Swizzle(u64 insn) {
+ const Encoding tlds{insn};
+ const size_t encoding{tlds.swizzle};
+ if (tlds.dest_reg_b == IR::Reg::RZ) {
+ if (encoding >= RG_LUT.size()) {
+ throw NotImplementedException("Illegal RG encoding {}", encoding);
+ }
+ return RG_LUT[encoding];
+ } else {
+ if (encoding >= RGBA_LUT.size()) {
+ throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+ }
+ return RGBA_LUT[encoding];
+ }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+ return IR::F32{v.ir.CompositeExtract(sample, component)};
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+ const Encoding tlds{insn};
+ switch (index) {
+ case 0:
+ return tlds.dest_reg_a;
+ case 1:
+ CheckAlignment(tlds.dest_reg_a, 2);
+ return tlds.dest_reg_a + 1;
+ case 2:
+ return tlds.dest_reg_b;
+ case 3:
+ CheckAlignment(tlds.dest_reg_b, 2);
+ return tlds.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+ v.F(dest, Extract(v, sample, component));
+ ++store_index;
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ std::array<IR::F32, 4> swizzled;
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ swizzled[store_index] = Extract(v, sample, component);
+ ++store_index;
+ }
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const Encoding tlds{insn};
+ switch (store_index) {
+ case 1:
+ v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ switch (store_index) {
+ case 2:
+ break;
+ case 3:
+ v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
+ break;
+ case 4:
+ v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+ break;
+ }
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLDS(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ // The ISA reads an array component here, but this is not needed on high level shading languages
+ // We are dropping this information.
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.F(reg + 1);
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<35, 1, u64> ndv;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tmml{insn};
+
+ if ((tmml.mask & 0b1100) != 0) {
+ throw NotImplementedException("TMML BA results are not implmented");
+ }
+ const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
+
+ IR::U32 handle;
+ IR::Reg meta_reg{tmml.meta_reg};
+ if (is_bindless) {
+ handle = v.X(meta_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tmml.type));
+ const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
+
+ IR::Reg dest_reg{tmml.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tmml.mask >> element) & 1) == 0) {
+ continue;
+ }
+ IR::F32 value{v.ir.CompositeExtract(sample, element)};
+ if (element < 2) {
+ IR::U32 casted_value;
+ if (element == 0) {
+ casted_value = v.ir.ConvertFToU(32, value);
+ } else {
+ casted_value = v.ir.ConvertFToS(16, value);
+ }
+ v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
+ } else {
+ v.F(dest_reg, value);
+ }
+ ++dest_reg;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TMML(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TMML_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Dimension = 1,
+ TextureType = 2,
+ SamplePos = 5,
+};
+
+IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
+ switch (mode) {
+ case Mode::Dimension: {
+ const IR::U32 lod{v.X(src_reg)};
+ return v.ir.ImageQueryDimension(handle, lod);
+ }
+ case Mode::TextureType:
+ case Mode::SamplePos:
+ default:
+ throw NotImplementedException("Mode {}", mode);
+ }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<22, 3, Mode> mode;
+ BitField<31, 4, u64> mask;
+ } const txq{insn};
+
+ IR::Reg src_reg{txq.src_reg};
+ IR::U32 handle;
+ if (cbuf_offset) {
+ handle = v.ir.Imm32(*cbuf_offset);
+ } else {
+ handle = v.X(src_reg);
+ ++src_reg;
+ }
+ const IR::Value query{Query(v, handle, txq.mode, src_reg)};
+ IR::Reg dest_reg{txq.dest_reg};
+ for (int element = 0; element < 4; ++element) {
+ if (((txq.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
+ ++dest_reg;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXQ(u64 insn) {
+ union {
+ u64 raw;
+ BitField<36, 13, u64> cbuf_offset;
+ } const txq{insn};
+
+ Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TXQ_b(u64 insn) {
+ Impl(*this, insn, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
+ u32 selector, bool is_signed) {
+ switch (width) {
+ case VideoWidth::Byte:
+ case VideoWidth::Unknown:
+ return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
+ case VideoWidth::Short:
+ return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
+ case VideoWidth::Word:
+ return value;
+ default:
+ throw NotImplementedException("Unknown VideoWidth {}", width);
+ }
+}
+
+VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
+ // immediates must be 16-bit format.
+ return is_immediate ? VideoWidth::Short : width;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+enum class VideoWidth : u64 {
+ Byte,
+ Unknown,
+ Short,
+ Word,
+};
+
+[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
+ VideoWidth width, u32 selector, bool is_signed);
+
+[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VideoMinMaxOps : u64 {
+ MRG_16H,
+ MRG_16L,
+ MRG_8B0,
+ MRG_8B2,
+ ACC,
+ MIN,
+ MAX,
+};
+
+[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
+ VideoMinMaxOps op, bool is_signed) {
+ switch (op) {
+ case VideoMinMaxOps::MIN:
+ return ir.IMin(lhs, rhs, is_signed);
+ case VideoMinMaxOps::MAX:
+ return ir.IMax(lhs, rhs, is_signed);
+ default:
+ throw NotImplementedException("VMNMX op {}", op);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VMNMX(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ BitField<51, 3, VideoMinMaxOps> op;
+ BitField<54, 1, u64> dest_sign;
+ BitField<55, 1, u64> sat;
+ BitField<56, 1, u64> mx;
+ } const vmnmx{insn};
+
+ if (vmnmx.cc != 0) {
+ throw NotImplementedException("VMNMX CC");
+ }
+ if (vmnmx.sat != 0) {
+ throw NotImplementedException("VMNMX SAT");
+ }
+ // Selectors were shown to default to 2 in unit tests
+ if (vmnmx.src_a_selector != 2) {
+ throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
+ }
+ if (vmnmx.src_b_selector != 2) {
+ throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
+ }
+ if (vmnmx.src_a_width != VideoWidth::Word) {
+ throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
+ }
+
+ const bool is_b_imm{vmnmx.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
+ const IR::U32 src_c{GetReg39(insn)};
+
+ const VideoWidth a_width{vmnmx.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vmnmx.src_a_sign != 0};
+ const bool src_b_signed{vmnmx.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
+
+ // First operation's sign is only dependent on operand b's sign
+ const bool op_1_signed{src_b_signed};
+
+ const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
+ : ir.IMin(op_a, op_b, op_1_signed)};
+ X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::VMAD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ BitField<51, 2, u64> scale;
+ BitField<53, 1, u64> src_c_neg;
+ BitField<54, 1, u64> src_a_neg;
+ BitField<55, 1, u64> sat;
+ } const vmad{insn};
+
+ if (vmad.cc != 0) {
+ throw NotImplementedException("VMAD CC");
+ }
+ if (vmad.sat != 0) {
+ throw NotImplementedException("VMAD SAT");
+ }
+ if (vmad.scale != 0) {
+ throw NotImplementedException("VMAD SCALE");
+ }
+ if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
+ throw NotImplementedException("VMAD PO");
+ }
+ if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
+ throw NotImplementedException("VMAD NEG");
+ }
+ const bool is_b_imm{vmad.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
+ const IR::U32 src_c{GetReg39(insn)};
+
+ const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
+ // Immediate values can't have a selector
+ const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
+ const VideoWidth a_width{vmad.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vmad.src_a_sign != 0};
+ const bool src_b_signed{vmad.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+ X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VsetpCompareOp : u64 {
+ False = 0,
+ LessThan,
+ Equal,
+ LessThanEqual,
+ GreaterThan = 16,
+ NotEqual,
+ GreaterThanEqual,
+ True,
+};
+
+CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
+ switch (op) {
+ case VsetpCompareOp::False:
+ return CompareOp::False;
+ case VsetpCompareOp::LessThan:
+ return CompareOp::LessThan;
+ case VsetpCompareOp::Equal:
+ return CompareOp::Equal;
+ case VsetpCompareOp::LessThanEqual:
+ return CompareOp::LessThanEqual;
+ case VsetpCompareOp::GreaterThan:
+ return CompareOp::GreaterThan;
+ case VsetpCompareOp::NotEqual:
+ return CompareOp::NotEqual;
+ case VsetpCompareOp::GreaterThanEqual:
+ return CompareOp::GreaterThanEqual;
+ case VsetpCompareOp::True:
+ return CompareOp::True;
+ default:
+ throw NotImplementedException("Invalid compare op {}", op);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 5, VsetpCompareOp> compare_op;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ } const vsetp{insn};
+
+ const bool is_b_imm{vsetp.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
+
+ const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
+ const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
+ const VideoWidth a_width{vsetp.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vsetp.src_a_sign != 0};
+ const bool src_b_signed{vsetp.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+ // Compare operation's sign is only dependent on operand b's sign
+ const bool compare_signed{src_b_signed};
+ const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
+ const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
+ const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
+ const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
+ ir.SetPred(vsetp.dest_pred_a, result_a);
+ ir.SetPred(vsetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VoteOp : u64 {
+ ALL,
+ ANY,
+ EQ,
+};
+
+[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
+ switch (vote_op) {
+ case VoteOp::ALL:
+ return ir.VoteAll(pred);
+ case VoteOp::ANY:
+ return ir.VoteAny(pred);
+ case VoteOp::EQ:
+ return ir.VoteEqual(pred);
+ default:
+ throw NotImplementedException("Invalid VOTE op {}", vote_op);
+ }
+}
+
+void Vote(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 3, IR::Pred> pred_a;
+ BitField<42, 1, u64> neg_pred_a;
+ BitField<45, 3, IR::Pred> pred_b;
+ BitField<48, 2, VoteOp> vote_op;
+ } const vote{insn};
+
+ const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
+ v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
+ v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VOTE(u64 insn) {
+ Vote(*this, insn);
+}
+
+void TranslatorVisitor::VOTE_vtg(u64) {
+ LOG_WARNING(Shader, "(STUBBED) called");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class ShuffleMode : u64 {
+ IDX,
+ UP,
+ DOWN,
+ BFLY,
+};
+
+[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
+ const IR::U32& index, const IR::U32& mask,
+ ShuffleMode shfl_op) {
+ const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
+ const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
+ switch (shfl_op) {
+ case ShuffleMode::IDX:
+ return ir.ShuffleIndex(value, index, clamp, seg_mask);
+ case ShuffleMode::UP:
+ return ir.ShuffleUp(value, index, clamp, seg_mask);
+ case ShuffleMode::DOWN:
+ return ir.ShuffleDown(value, index, clamp, seg_mask);
+ case ShuffleMode::BFLY:
+ return ir.ShuffleButterfly(value, index, clamp, seg_mask);
+ default:
+ throw NotImplementedException("Invalid SHFL op {}", shfl_op);
+ }
+}
+
+void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<30, 2, ShuffleMode> mode;
+ BitField<48, 3, IR::Pred> pred;
+ } const shfl{insn};
+
+ const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
+ v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
+ v.X(shfl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHFL(u64 insn) {
+ union {
+ u64 insn;
+ BitField<20, 5, u64> src_a_imm;
+ BitField<28, 1, u64> src_a_flag;
+ BitField<29, 1, u64> src_b_flag;
+ BitField<34, 13, u64> src_b_imm;
+ } const flags{insn};
+ const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
+ : GetReg20(insn)};
+ const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
+ : GetReg39(insn)};
+ Shuffle(*this, insn, src_a, src_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+
+namespace Shader::Maxwell {
+
+template <auto method>
+static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
+ using MethodType = decltype(method);
+ if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
+ (visitor.*method)(pc, insn);
+ } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
+ (visitor.*method)(insn);
+ } else {
+ (visitor.*method)();
+ }
+}
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
+ if (location_begin == location_end) {
+ return;
+ }
+ TranslatorVisitor visitor{env, *block};
+ for (Location pc = location_begin; pc != location_end; ++pc) {
+ const u64 insn{env.ReadInstruction(pc.Offset())};
+ try {
+ const Opcode opcode{Decode(insn)};
+ switch (opcode) {
+#define INST(name, cute, mask) \
+ case Opcode::name: \
+ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \
+ break;
+#include "shader_recompiler/frontend/maxwell/maxwell.inc"
+#undef OPCODE
+ default:
+ throw LogicError("Invalid opcode {}", opcode);
+ }
+ } catch (Exception& exception) {
+ exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
+ throw;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::Maxwell {
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "common/settings.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/post_order.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
+ size_t num_syntax_blocks{};
+ for (const auto& node : syntax_list) {
+ if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+ ++num_syntax_blocks;
+ }
+ }
+ IR::BlockList blocks;
+ blocks.reserve(num_syntax_blocks);
+ for (const auto& node : syntax_list) {
+ if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+ blocks.push_back(node.data.block);
+ }
+ }
+ return blocks;
+}
+
+void RemoveUnreachableBlocks(IR::Program& program) {
+ // Some blocks might be unreachable if a function call exists unconditionally
+ // If this happens the number of blocks and post order blocks will mismatch
+ if (program.blocks.size() == program.post_order_blocks.size()) {
+ return;
+ }
+ const auto begin{program.blocks.begin() + 1};
+ const auto end{program.blocks.end()};
+ const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
+ program.blocks.erase(std::remove_if(begin, end, pred), end);
+}
+
+void CollectInterpolationInfo(Environment& env, IR::Program& program) {
+ if (program.stage != Stage::Fragment) {
+ return;
+ }
+ const ProgramHeader& sph{env.SPH()};
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ std::optional<PixelImap> imap;
+ for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
+ if (value == PixelImap::Unused) {
+ continue;
+ }
+ if (imap && imap != value) {
+ throw NotImplementedException("Per component interpolation");
+ }
+ imap = value;
+ }
+ if (!imap) {
+ continue;
+ }
+ program.info.interpolation[index] = [&] {
+ switch (*imap) {
+ case PixelImap::Unused:
+ case PixelImap::Perspective:
+ return Interpolation::Smooth;
+ case PixelImap::Constant:
+ return Interpolation::Flat;
+ case PixelImap::ScreenLinear:
+ return Interpolation::NoPerspective;
+ }
+ throw NotImplementedException("Unknown interpolation {}", *imap);
+ }();
+ }
+}
+
+void AddNVNStorageBuffers(IR::Program& program) {
+ if (!program.info.uses_global_memory) {
+ return;
+ }
+ const u32 driver_cbuf{0};
+ const u32 descriptor_size{0x10};
+ const u32 num_buffers{16};
+ const u32 base{[&] {
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return 0x110u;
+ case Stage::TessellationControl:
+ return 0x210u;
+ case Stage::TessellationEval:
+ return 0x310u;
+ case Stage::Geometry:
+ return 0x410u;
+ case Stage::Fragment:
+ return 0x510u;
+ case Stage::Compute:
+ return 0x310u;
+ }
+ throw InvalidArgument("Invalid stage {}", program.stage);
+ }()};
+ auto& descs{program.info.storage_buffers_descriptors};
+ for (u32 index = 0; index < num_buffers; ++index) {
+ if (!program.info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const u32 offset{base + index * descriptor_size};
+ const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
+ if (it != descs.end()) {
+ it->is_written |= program.info.stores_global_memory;
+ continue;
+ }
+ descs.push_back({
+ .cbuf_index = driver_cbuf,
+ .cbuf_offset = offset,
+ .count = 1,
+ .is_written = program.info.stores_global_memory,
+ });
+ }
+}
+} // Anonymous namespace
+
+IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+ Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
+ IR::Program program;
+ program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+ program.blocks = GenerateBlocks(program.syntax_list);
+ program.post_order_blocks = PostOrder(program.syntax_list.front());
+ program.stage = env.ShaderStage();
+ program.local_memory_size = env.LocalMemorySize();
+ switch (program.stage) {
+ case Stage::TessellationControl: {
+ const ProgramHeader& sph{env.SPH()};
+ program.invocations = sph.common2.threads_per_input_primitive;
+ break;
+ }
+ case Stage::Geometry: {
+ const ProgramHeader& sph{env.SPH()};
+ program.output_topology = sph.common3.output_topology;
+ program.output_vertices = sph.common4.max_output_vertices;
+ program.invocations = sph.common2.threads_per_input_primitive;
+ program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
+ if (program.is_geometry_passthrough) {
+ const auto& mask{env.GpPassthroughMask()};
+ for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
+ program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
+ }
+ }
+ break;
+ }
+ case Stage::Compute:
+ program.workgroup_size = env.WorkgroupSize();
+ program.shared_memory_size = env.SharedMemorySize();
+ break;
+ default:
+ break;
+ }
+ RemoveUnreachableBlocks(program);
+
+ // Replace instructions before the SSA rewrite
+ if (!host_info.support_float16) {
+ Optimization::LowerFp16ToFp32(program);
+ }
+ if (!host_info.support_int64) {
+ Optimization::LowerInt64ToInt32(program);
+ }
+ Optimization::SsaRewritePass(program);
+
+ Optimization::GlobalMemoryToStorageBufferPass(program);
+ Optimization::TexturePass(env, program);
+
+ Optimization::ConstantPropagationPass(program);
+ Optimization::DeadCodeEliminationPass(program);
+ if (Settings::values.renderer_debug) {
+ Optimization::VerificationPass(program);
+ }
+ Optimization::CollectShaderInfoPass(env, program);
+ CollectInterpolationInfo(env, program);
+ AddNVNStorageBuffers(program);
+ return program;
+}
+
+IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+ Environment& env_vertex_b) {
+ IR::Program result{};
+ Optimization::VertexATransformPass(vertex_a);
+ Optimization::VertexBTransformPass(vertex_b);
+ for (const auto& term : vertex_a.syntax_list) {
+ if (term.type != IR::AbstractSyntaxNode::Type::Return) {
+ result.syntax_list.push_back(term);
+ }
+ }
+ result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
+ vertex_b.syntax_list.end());
+ result.blocks = GenerateBlocks(result.syntax_list);
+ result.post_order_blocks = vertex_b.post_order_blocks;
+ for (const auto& block : vertex_a.post_order_blocks) {
+ result.post_order_blocks.push_back(block);
+ }
+ result.stage = Stage::VertexB;
+ result.info = vertex_a.info;
+ result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
+ result.info.loads.mask |= vertex_b.info.loads.mask;
+ result.info.stores.mask |= vertex_b.info.stores.mask;
+
+ Optimization::JoinTextureInfo(result.info, vertex_b.info);
+ Optimization::JoinStorageInfo(result.info, vertex_b.info);
+ Optimization::DeadCodeEliminationPass(result);
+ if (Settings::values.renderer_debug) {
+ Optimization::VerificationPass(result);
+ }
+ Optimization::CollectShaderInfoPass(env_vertex_b, result);
+ return result;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool, Environment& env,
+ Flow::CFG& cfg, const HostTranslateInfo& host_info);
+
+[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+ Environment& env_vertex_b);
+
+} // namespace Shader::Maxwell