summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/backend/glasm
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/backend/glasm')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.cpp154
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.h80
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp492
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.h25
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp91
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp244
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp346
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp231
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp414
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp850
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_instructions.h625
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp294
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp568
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp273
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_select.cpp67
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp58
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_special.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp150
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.cpp186
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.h303
24 files changed, 5451 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
new file mode 100644
index 000000000..069c019ad
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -0,0 +1,154 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+std::string_view InterpDecorator(Interpolation interp) {
+ switch (interp) {
+ case Interpolation::Smooth:
+ return "";
+ case Interpolation::Flat:
+ return "FLAT ";
+ case Interpolation::NoPerspective:
+ return "NOPERSPECTIVE ";
+ }
+ throw InvalidArgument("Invalid interpolation {}", interp);
+}
+
+bool IsInputArray(Stage stage) {
+ return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+ stage == Stage::TessellationEval;
+}
+} // Anonymous namespace
+
+EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_)
+ : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
+ // FIXME: Temporary partial implementation
+ u32 cbuf_index{};
+ for (const auto& desc : info.constant_buffer_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Constant buffer descriptor array");
+ }
+ Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
+ ++cbuf_index;
+ }
+ u32 ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Storage buffer descriptor array");
+ }
+ if (runtime_info.glasm_use_storage_buffers) {
+ Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
+ ++bindings.storage_buffer;
+ ++ssbo_index;
+ }
+ }
+ if (!runtime_info.glasm_use_storage_buffers) {
+ if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
+ Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
+ }
+ }
+ stage = program.stage;
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ stage_name = "vertex";
+ attrib_name = "vertex";
+ break;
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ stage_name = "primitive";
+ attrib_name = "primitive";
+ break;
+ case Stage::Geometry:
+ stage_name = "primitive";
+ attrib_name = "vertex";
+ break;
+ case Stage::Fragment:
+ stage_name = "fragment";
+ attrib_name = "fragment";
+ break;
+ case Stage::Compute:
+ stage_name = "invocation";
+ break;
+ }
+ const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"};
+ const VaryingState loads{info.loads.mask | info.passthrough.mask};
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (loads.Generic(index)) {
+ Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};",
+ InterpDecorator(info.interpolation[index]), index, attr_stage, index, index);
+ }
+ }
+ if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) {
+ Add("ATTRIB vertex_position=vertex.position;");
+ }
+ if (info.uses_invocation_id) {
+ Add("ATTRIB primitive_invocation=primitive.invocation;");
+ }
+ if (info.stores_tess_level_outer) {
+ Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};");
+ }
+ if (info.stores_tess_level_inner) {
+ Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};");
+ }
+ if (info.stores.ClipDistances()) {
+ Add("OUTPUT result_clip[]={{result.clip[0..7]}};");
+ }
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ if (stage == Stage::TessellationControl) {
+ Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};"
+ "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};",
+ index, index, index, index, index, index);
+ } else {
+ Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index,
+ index, index);
+ }
+ }
+ if (stage == Stage::Fragment) {
+ Add("OUTPUT frag_color0=result.color;");
+ for (size_t index = 1; index < info.stores_frag_color.size(); ++index) {
+ Add("OUTPUT frag_color{}=result.color[{}];", index, index);
+ }
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (info.stores.Generic(index)) {
+ Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index);
+ }
+ }
+ image_buffer_bindings.reserve(info.image_buffer_descriptors.size());
+ for (const auto& desc : info.image_buffer_descriptors) {
+ image_buffer_bindings.push_back(bindings.image);
+ bindings.image += desc.count;
+ }
+ image_bindings.reserve(info.image_descriptors.size());
+ for (const auto& desc : info.image_descriptors) {
+ image_bindings.push_back(bindings.image);
+ bindings.image += desc.count;
+ }
+ texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size());
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ texture_buffer_bindings.push_back(bindings.texture);
+ bindings.texture += desc.count;
+ }
+ texture_bindings.reserve(info.texture_descriptors.size());
+ for (const auto& desc : info.texture_descriptors) {
+ texture_bindings.push_back(bindings.texture);
+ bindings.texture += desc.count;
+ }
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h
new file mode 100644
index 000000000..8433e5c00
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.h
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+struct Info;
+struct Profile;
+struct RuntimeInfo;
+} // namespace Shader
+
+namespace Shader::Backend {
+struct Bindings;
+}
+
+namespace Shader::IR {
+class Inst;
+struct Program;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext {
+public:
+ explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_);
+
+ template <typename... Args>
+ void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst),
+ std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ template <typename... Args>
+ void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst),
+ std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ template <typename... Args>
+ void Add(const char* format_str, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ std::string code;
+ RegAlloc reg_alloc{};
+ const Info& info;
+ const Profile& profile;
+ const RuntimeInfo& runtime_info;
+
+ std::vector<u32> texture_buffer_bindings;
+ std::vector<u32> image_buffer_bindings;
+ std::vector<u32> texture_bindings;
+ std::vector<u32> image_bindings;
+
+ Stage stage{};
+ std::string_view stage_name = "invalid";
+ std::string_view attrib_name = "invalid";
+
+ u32 num_safety_loop_vars{};
+ bool uses_y_direction{};
+};
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
new file mode 100644
index 000000000..a5e8c9b6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -0,0 +1,492 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+
+#include "common/div_ceil.h"
+#include "common/settings.h"
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+ using ReturnType = ReturnType_;
+
+ static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <typename T>
+struct Identity {
+ Identity(T data_) : data{data_} {}
+
+ T Extract() {
+ return data;
+ }
+
+ T data;
+};
+
+template <bool scalar>
+class RegWrapper {
+public:
+ RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
+ const Value value{reg_alloc.Peek(ir_value)};
+ if (value.type == Type::Register) {
+ inst = ir_value.InstRecursive();
+ reg = Register{value};
+ } else {
+ reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
+ }
+ switch (value.type) {
+ case Type::Register:
+ case Type::Void:
+ break;
+ case Type::U32:
+ ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
+ break;
+ case Type::U64:
+ ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
+ break;
+ }
+ }
+
+ auto Extract() {
+ if (inst) {
+ reg_alloc.Unref(*inst);
+ } else {
+ reg_alloc.FreeReg(reg);
+ }
+ return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
+ }
+
+private:
+ RegAlloc& reg_alloc;
+ IR::Inst* inst{};
+ Register reg{};
+};
+
+template <typename ArgType>
+class ValueWrapper {
+public:
+ ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
+ : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
+
+ ArgType Extract() {
+ if (!ir_value.IsImmediate()) {
+ reg_alloc.Unref(*ir_value.InstRecursive());
+ }
+ return value;
+ }
+
+private:
+ RegAlloc& reg_alloc;
+ const IR::Value& ir_value;
+ ArgType value;
+};
+
+template <typename ArgType>
+auto Arg(EmitContext& ctx, const IR::Value& arg) {
+ if constexpr (std::is_same_v<ArgType, Register>) {
+ return RegWrapper<false>{ctx, arg};
+ } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
+ return RegWrapper<true>{ctx, arg};
+ } else if constexpr (std::is_base_of_v<Value, ArgType>) {
+ return ValueWrapper<ArgType>{ctx, arg};
+ } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+ return Identity<const IR::Value&>{arg};
+ } else if constexpr (std::is_same_v<ArgType, u32>) {
+ return Identity{arg.U32()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+ return Identity{arg.Attribute()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+ return Identity{arg.Patch()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+ return Identity{arg.Reg()};
+ }
+}
+
+template <auto func, bool is_first_arg_inst>
+struct InvokeCall {
+ template <typename... Args>
+ InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
+ if constexpr (is_first_arg_inst) {
+ func(ctx, *inst, args.Extract()...);
+ } else {
+ func(ctx, args.Extract()...);
+ }
+ }
+};
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+ using Traits = FuncTraits<decltype(func)>;
+ if constexpr (is_first_arg_inst) {
+ InvokeCall<func, is_first_arg_inst>{
+ ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
+ } else {
+ InvokeCall<func, is_first_arg_inst>{
+ ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
+ }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+ using Traits = FuncTraits<decltype(func)>;
+ static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+ if constexpr (Traits::NUM_ARGS == 1) {
+ Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+ } else {
+ using FirstArgType = typename Traits::template ArgType<1>;
+ static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
+ using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+ Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+ }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...) \
+ case IR::Opcode::name: \
+ return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+ }
+ throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+bool IsReference(IR::Inst& inst) {
+ return inst.GetOpcode() == IR::Opcode::Reference;
+}
+
+void PrecolorInst(IR::Inst& phi) {
+ // Insert phi moves before references to avoid overwritting other phis
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::Block& phi_block{*phi.PhiBlock(i)};
+ auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
+ IR::IREmitter ir{phi_block, it};
+ const IR::Value arg{phi.Arg(i)};
+ if (arg.IsImmediate()) {
+ ir.PhiMove(phi, arg);
+ } else {
+ ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
+ }
+ }
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
+ }
+}
+
+void Precolor(const IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& phi : block->Instructions()) {
+ if (!IR::IsPhi(phi)) {
+ break;
+ }
+ PrecolorInst(phi);
+ }
+ }
+}
+
+void EmitCode(EmitContext& ctx, const IR::Program& program) {
+ const auto eval{
+ [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
+ for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::Block:
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ EmitInst(ctx, &inst);
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::If:
+ ctx.Add("MOV.S.CC RC,{};"
+ "IF NE.x;",
+ eval(node.data.if_node.cond));
+ break;
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ ctx.Add("ENDIF;");
+ break;
+ case IR::AbstractSyntaxNode::Type::Loop:
+ ctx.Add("REP;");
+ break;
+ case IR::AbstractSyntaxNode::Type::Repeat:
+ if (!Settings::values.disable_shader_loop_safety_checks) {
+ const u32 loop_index{ctx.num_safety_loop_vars++};
+ const u32 vector_index{loop_index / 4};
+ const char component{"xyzw"[loop_index % 4]};
+ ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
+ "BRK(LT.{});",
+ vector_index, component, vector_index, component, component);
+ }
+ if (node.data.repeat.cond.IsImmediate()) {
+ if (node.data.repeat.cond.U1()) {
+ ctx.Add("ENDREP;");
+ } else {
+ ctx.Add("BRK;"
+ "ENDREP;");
+ }
+ } else {
+ ctx.Add("MOV.S.CC RC,{};"
+ "BRK(EQ.x);"
+ "ENDREP;",
+ eval(node.data.repeat.cond));
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Break:
+ if (node.data.break_node.cond.IsImmediate()) {
+ if (node.data.break_node.cond.U1()) {
+ ctx.Add("BRK;");
+ }
+ } else {
+ ctx.Add("MOV.S.CC RC,{};"
+ "BRK (NE.x);",
+ eval(node.data.break_node.cond));
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Return:
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ ctx.Add("RET;");
+ break;
+ }
+ }
+ if (!ctx.reg_alloc.IsEmpty()) {
+ LOG_WARNING(Shader_GLASM, "Register leak after generating code");
+ }
+}
+
+void SetupOptions(const IR::Program& program, const Profile& profile,
+ const RuntimeInfo& runtime_info, std::string& header) {
+ const Info& info{program.info};
+ const Stage stage{program.stage};
+
+ // TODO: Track the shared atomic ops
+ header += "OPTION NV_internal;"
+ "OPTION NV_shader_storage_buffer;"
+ "OPTION NV_gpu_program_fp64;";
+ if (info.uses_int64_bit_atomics) {
+ header += "OPTION NV_shader_atomic_int64;";
+ }
+ if (info.uses_atomic_f32_add) {
+ header += "OPTION NV_shader_atomic_float;";
+ }
+ if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
+ header += "OPTION NV_shader_atomic_fp16_vector;";
+ }
+ if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
+ info.uses_fswzadd) {
+ header += "OPTION NV_shader_thread_group;";
+ }
+ if (info.uses_subgroup_shuffles) {
+ header += "OPTION NV_shader_thread_shuffle;";
+ }
+ if (info.uses_sparse_residency) {
+ header += "OPTION EXT_sparse_texture2;";
+ }
+ const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
+ info.stores[IR::Attribute::Layer]};
+ if ((stage != Stage::Geometry && stores_viewport_layer) ||
+ info.stores[IR::Attribute::ViewportMask]) {
+ if (profile.support_viewport_index_layer_non_geometry) {
+ header += "OPTION NV_viewport_array2;";
+ }
+ }
+ if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
+ header += "OPTION NV_geometry_shader_passthrough;";
+ }
+ if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
+ header += "OPTION EXT_shader_image_load_formatted;";
+ }
+ if (profile.support_derivative_control) {
+ header += "OPTION ARB_derivative_control;";
+ }
+ if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
+ header += "OPTION NV_early_fragment_tests;";
+ }
+ if (stage == Stage::Fragment) {
+ header += "OPTION ARB_draw_buffers;";
+ }
+}
+
+std::string_view StageHeader(Stage stage) {
+ switch (stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return "!!NVvp5.0\n";
+ case Stage::TessellationControl:
+ return "!!NVtcp5.0\n";
+ case Stage::TessellationEval:
+ return "!!NVtep5.0\n";
+ case Stage::Geometry:
+ return "!!NVgp5.0\n";
+ case Stage::Fragment:
+ return "!!NVfp5.0\n";
+ case Stage::Compute:
+ return "!!NVcp5.0\n";
+ }
+ throw InvalidArgument("Invalid stage {}", stage);
+}
+
+std::string_view InputPrimitive(InputTopology topology) {
+ switch (topology) {
+ case InputTopology::Points:
+ return "POINTS";
+ case InputTopology::Lines:
+ return "LINES";
+ case InputTopology::LinesAdjacency:
+ return "LINESS_ADJACENCY";
+ case InputTopology::Triangles:
+ return "TRIANGLES";
+ case InputTopology::TrianglesAdjacency:
+ return "TRIANGLES_ADJACENCY";
+ }
+ throw InvalidArgument("Invalid input topology {}", topology);
+}
+
+std::string_view OutputPrimitive(OutputTopology topology) {
+ switch (topology) {
+ case OutputTopology::PointList:
+ return "POINTS";
+ case OutputTopology::LineStrip:
+ return "LINE_STRIP";
+ case OutputTopology::TriangleStrip:
+ return "TRIANGLE_STRIP";
+ }
+ throw InvalidArgument("Invalid output topology {}", topology);
+}
+
+std::string_view GetTessMode(TessPrimitive primitive) {
+ switch (primitive) {
+ case TessPrimitive::Triangles:
+ return "TRIANGLES";
+ case TessPrimitive::Quads:
+ return "QUADS";
+ case TessPrimitive::Isolines:
+ return "ISOLINES";
+ }
+ throw InvalidArgument("Invalid tessellation primitive {}", primitive);
+}
+
+std::string_view GetTessSpacing(TessSpacing spacing) {
+ switch (spacing) {
+ case TessSpacing::Equal:
+ return "EQUAL";
+ case TessSpacing::FractionalOdd:
+ return "FRACTIONAL_ODD";
+ case TessSpacing::FractionalEven:
+ return "FRACTIONAL_EVEN";
+ }
+ throw InvalidArgument("Invalid tessellation spacing {}", spacing);
+}
+} // Anonymous namespace
+
+std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
+ Bindings& bindings) {
+ EmitContext ctx{program, bindings, profile, runtime_info};
+ Precolor(program);
+ EmitCode(ctx, program);
+ std::string header{StageHeader(program.stage)};
+ SetupOptions(program, profile, runtime_info, header);
+ switch (program.stage) {
+ case Stage::TessellationControl:
+ header += fmt::format("VERTICES_OUT {};", program.invocations);
+ break;
+ case Stage::TessellationEval:
+ header += fmt::format("TESS_MODE {};"
+ "TESS_SPACING {};"
+ "TESS_VERTEX_ORDER {};",
+ GetTessMode(runtime_info.tess_primitive),
+ GetTessSpacing(runtime_info.tess_spacing),
+ runtime_info.tess_clockwise ? "CW" : "CCW");
+ break;
+ case Stage::Geometry:
+ header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
+ if (program.is_geometry_passthrough) {
+ if (profile.support_geometry_shader_passthrough) {
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (program.info.passthrough.Generic(index)) {
+ header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
+ }
+ }
+ if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
+ header += "PASSTHROUGH result.position;";
+ }
+ } else {
+ LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
+ }
+ } else {
+ header +=
+ fmt::format("VERTICES_OUT {};"
+ "PRIMITIVE_OUT {};",
+ program.output_vertices, OutputPrimitive(program.output_topology));
+ }
+ break;
+ case Stage::Compute:
+ header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
+ program.workgroup_size[1], program.workgroup_size[2]);
+ break;
+ default:
+ break;
+ }
+ if (program.shared_memory_size > 0) {
+ header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
+ header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
+ }
+ header += "TEMP ";
+ for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
+ header += fmt::format("R{},", index);
+ }
+ if (program.local_memory_size > 0) {
+ header += fmt::format("lmem[{}],", program.local_memory_size);
+ }
+ if (program.info.uses_fswzadd) {
+ header += "FSWZA[4],FSWZB[4],";
+ }
+ const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
+ for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
+ header += fmt::format("loop{},", index);
+ }
+ header += "RC;"
+ "LONG TEMP ";
+ for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
+ header += fmt::format("D{},", index);
+ }
+ header += "DC;";
+ if (program.info.uses_fswzadd) {
+ header += "MOV.F FSWZA[0],-1;"
+ "MOV.F FSWZA[1],1;"
+ "MOV.F FSWZA[2],-1;"
+ "MOV.F FSWZA[3],0;"
+ "MOV.F FSWZB[0],-1;"
+ "MOV.F FSWZB[1],-1;"
+ "MOV.F FSWZB[2],1;"
+ "MOV.F FSWZB[3],-1;";
+ }
+ for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
+ header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
+ }
+ if (ctx.uses_y_direction) {
+ header += "PARAM y_direction[1]={state.material.front.ambient};";
+ }
+ ctx.code.insert(0, header);
+ ctx.code += "END";
+ return ctx.code;
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
new file mode 100644
index 000000000..bcb55f062
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -0,0 +1,25 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+
+[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program) {
+ Bindings binding;
+ return EmitGLASM(profile, runtime_info, program, binding);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
new file mode 100644
index 000000000..9201ccd39
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -0,0 +1,91 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+static void Alias(IR::Inst& inst, const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return;
+ }
+ IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())};
+ value_inst.DestructiveAddUsage(inst.UseCount());
+ value_inst.DestructiveRemoveUsage();
+ inst.SetDefinition(value_inst.Definition<Id>());
+}
+
+void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
+ // Fake one usage to get a real register out of the condition
+ inst.DestructiveAddUsage(1);
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ const ScalarS32 input{ctx.reg_alloc.Consume(value)};
+ if (ret != input) {
+ ctx.Add("MOV.S {},{};", ret, input);
+ }
+}
+
+void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("PK64.U {}.x,{};", inst, value);
+}
+
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP64.U {}.xy,{}.x;", inst, value);
+}
+
+void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("PK2H {}.x,{};", inst, value);
+}
+
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP2H {}.xy,{}.x;", inst, value);
+}
+
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("PK64 {}.x,{};", inst, value);
+}
+
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP64 {}.xy,{}.x;", inst, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
new file mode 100644
index 000000000..bff0b7c1c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -0,0 +1,244 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <auto read_imm, char type, typename... Values>
+void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (std::ranges::any_of(std::array{elements...},
+ [](const IR::Value& value) { return value.IsImmediate(); })) {
+ using Type = std::invoke_result_t<decltype(read_imm), IR::Value>;
+ const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...};
+ ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]),
+ fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3]));
+ }
+ size_t index{};
+ for (const IR::Value& element : {elements...}) {
+ if (!element.IsImmediate()) {
+ const ScalarU32 value{ctx.reg_alloc.Consume(element)};
+ ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value);
+ }
+ ++index;
+ }
+}
+
+void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ret == composite && index == 0) {
+ // No need to do anything here, the source and destination are the same register
+ return;
+ }
+ ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]);
+}
+
+template <typename ObjectType>
+void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object,
+ u32 index, char type) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ const char swizzle{"xyzw"[index]};
+ if (ret != composite && ret == object) {
+ // The object is aliased with the return value, so we have to use a temporary to insert
+ ctx.Add("MOV.{} RC,{};"
+ "MOV.{} RC.{},{};"
+ "MOV.{} {},RC;",
+ type, composite, type, swizzle, object, type, ret);
+ } else if (ret != composite) {
+ // The input composite is not aliased with the return value so we have to copy it before
+ // hand. But the insert object is not aliased with the return value, so we don't have to
+ // worry about that
+ ctx.Add("MOV.{} {},{};"
+ "MOV.{} {}.{},{};",
+ type, ret, composite, type, ret, swizzle, object);
+ } else {
+ // The return value is alised so we can just insert the object, it doesn't matter if it's
+ // aliased
+ ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object);
+ }
+}
+} // Anonymous namespace
+
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2);
+}
+
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2, [[maybe_unused]] Register e3,
+ [[maybe_unused]] Register e4) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2);
+}
+
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
new file mode 100644
index 000000000..02c9dc6d7
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -0,0 +1,346 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ std::string_view size) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Indirect constant buffer loading");
+ }
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (offset.type == Type::U32) {
+ // Avoid reading arrays out of bounds, matching hardware's behavior
+ if (offset.imm_u32 >= 0x10'000) {
+ ctx.Add("MOV.S {},0;", ret);
+ return;
+ }
+ }
+ ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
+}
+
+bool IsInputArray(Stage stage) {
+ return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+ stage == Stage::TessellationEval;
+}
+
+std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
+ return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
+}
+
+u32 TexCoordIndex(IR::Attribute attr) {
+ return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
+}
+} // Anonymous namespace
+
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U8");
+}
+
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "S8");
+}
+
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U16");
+}
+
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "S16");
+}
+
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U32");
+}
+
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "F32");
+}
+
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U32X2");
+}
+
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle);
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::PrimitiveId:
+ ctx.Add("MOV.S {}.x,primitive.id;", inst);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ if (IsInputArray(ctx.stage)) {
+ ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle);
+ } else {
+ ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle);
+ }
+ break;
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle);
+ break;
+ case IR::Attribute::PointSpriteS:
+ case IR::Attribute::PointSpriteT:
+ ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle);
+ break;
+ case IR::Attribute::TessellationEvaluationPointU:
+ case IR::Attribute::TessellationEvaluationPointV:
+ ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle);
+ break;
+ case IR::Attribute::InstanceId:
+ ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::VertexId:
+ ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::FrontFace:
+ ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
+ break;
+ default:
+ throw NotImplementedException("Get attribute {}", attr);
+ }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
+ [[maybe_unused]] ScalarU32 vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value);
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::Layer:
+ if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.layer.x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM,
+ "Layer stored outside of geometry shader not supported by device");
+ }
+ break;
+ case IR::Attribute::ViewportIndex:
+ if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.viewport.x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM,
+ "Viewport stored outside of geometry shader not supported by device");
+ }
+ break;
+ case IR::Attribute::ViewportMask:
+ // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage.
+ if (ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.viewportmask[0].x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask");
+ }
+ break;
+ case IR::Attribute::PointSize:
+ ctx.Add("MOV.F result.pointsize.x,{};", value);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ ctx.Add("MOV.F result.position.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ ctx.Add("MOV.F result.color.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontSpecularR:
+ case IR::Attribute::ColorFrontSpecularG:
+ case IR::Attribute::ColorFrontSpecularB:
+ case IR::Attribute::ColorFrontSpecularA:
+ ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackDiffuseR:
+ case IR::Attribute::ColorBackDiffuseG:
+ case IR::Attribute::ColorBackDiffuseB:
+ case IR::Attribute::ColorBackDiffuseA:
+ ctx.Add("MOV.F result.color.back.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackSpecularR:
+ case IR::Attribute::ColorBackSpecularG:
+ case IR::Attribute::ColorBackSpecularB:
+ case IR::Attribute::ColorBackSpecularA:
+ ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::FogCoordinate:
+ ctx.Add("MOV.F result.fogcoord.x,{};", value);
+ break;
+ case IR::Attribute::ClipDistance0:
+ case IR::Attribute::ClipDistance1:
+ case IR::Attribute::ClipDistance2:
+ case IR::Attribute::ClipDistance3:
+ case IR::Attribute::ClipDistance4:
+ case IR::Attribute::ClipDistance5:
+ case IR::Attribute::ClipDistance6:
+ case IR::Attribute::ClipDistance7: {
+ const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
+ ctx.Add("MOV.F result.clip[{}].x,{};", index, value);
+ break;
+ }
+ default:
+ throw NotImplementedException("Set attribute {}", attr);
+ }
+}
+
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) {
+ // RC.x = base_index
+ // RC.y = masked_index
+ // RC.z = compare_index
+ ctx.Add("SHR.S RC.x,{},2;"
+ "AND.S RC.y,RC.x,3;"
+ "SHR.S RC.z,{},4;",
+ offset, offset);
+
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ u32 num_endifs{};
+ const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) {
+ ++num_endifs;
+ ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index
+ "IF NE.w;"
+ // X
+ "SEQ.S.CC RC.w,RC.y,0;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // Y
+ "SEQ.S.CC RC.w,RC.y,1;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // Z
+ "SEQ.S.CC RC.w,RC.y,2;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // W
+ "MOV {}.x,{};"
+ "ENDIF;"
+ "ENDIF;"
+ "ENDIF;"
+ "ELSE;",
+ compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]);
+ }};
+ const auto read_swizzled{[&](u32 compare_index, std::string_view value) {
+ const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value),
+ fmt::format("{}.z", value), fmt::format("{}.w", value)};
+ read(compare_index, values);
+ }};
+ if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ const u32 index{static_cast<u32>(IR::Attribute::PositionX)};
+ if (IsInputArray(ctx.stage)) {
+ read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex)));
+ } else {
+ read_swizzled(index, fmt::format("{}.position", ctx.attrib_name));
+ }
+ }
+ for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
+ if (!ctx.info.loads.Generic(index)) {
+ continue;
+ }
+ read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex)));
+ }
+ for (u32 i = 0; i < num_endifs; ++i) {
+ ctx.Add("ENDIF;");
+ }
+}
+
+void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset,
+ [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Non-generic patch load");
+ }
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ const char swizzle{"xyzw"[element]};
+ const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""};
+ ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) {
+ if (IR::IsGeneric(patch)) {
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value);
+ return;
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodBottom: {
+ const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+ ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value);
+ break;
+ }
+ case IR::Patch::TessellationLodInteriorU:
+ ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value);
+ break;
+ case IR::Patch::TessellationLodInteriorV:
+ ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value);
+ break;
+ default:
+ throw NotImplementedException("Patch {}", patch);
+ }
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) {
+ ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) {
+ ctx.Add("MOV.S result.samplemask.x,{};", value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) {
+ ctx.Add("MOV.F result.depth.z,{};", value);
+}
+
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) {
+ ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset);
+}
+
+void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) {
+ ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
new file mode 100644
index 000000000..ccdf1cbc8
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
@@ -0,0 +1,231 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+std::string_view FpRounding(IR::FpRounding fp_rounding) {
+ switch (fp_rounding) {
+ case IR::FpRounding::DontCare:
+ return "";
+ case IR::FpRounding::RN:
+ return ".ROUND";
+ case IR::FpRounding::RZ:
+ return ".TRUNC";
+ case IR::FpRounding::RM:
+ return ".FLR";
+ case IR::FpRounding::RP:
+ return ".CEIL";
+ }
+ throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding);
+}
+
+template <typename InputType>
+void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest,
+ std::string_view src, bool is_long_result) {
+ const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)};
+ const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)};
+ ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value);
+}
+} // Anonymous namespace
+
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S16", "F16", false);
+}
+
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S16", "F32", false);
+}
+
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S16", "F64", false);
+}
+
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S32", "F16", false);
+}
+
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S32", "F32", false);
+}
+
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S32", "F64", false);
+}
+
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S64", "F16", true);
+}
+
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S64", "F32", true);
+}
+
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S64", "F64", true);
+}
+
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U16", "F16", false);
+}
+
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U16", "F32", false);
+}
+
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U16", "F64", false);
+}
+
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U32", "F16", false);
+}
+
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U32", "F32", false);
+}
+
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U32", "F64", false);
+}
+
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U64", "F16", true);
+}
+
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U64", "F32", true);
+}
+
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U64", "F64", true);
+}
+
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "U64", "U32", true);
+}
+
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U32", "U64", false);
+}
+
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "F16", "F32", false);
+}
+
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "F16", false);
+}
+
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "F32", "F64", false);
+}
+
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "F64", "F32", true);
+}
+
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S8", false);
+}
+
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S16", false);
+}
+
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F16", "S32", false);
+}
+
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S64", false);
+}
+
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U8", false);
+}
+
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U16", false);
+}
+
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F16", "U32", false);
+}
+
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U64", false);
+}
+
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S8", false);
+}
+
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S16", false);
+}
+
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F32", "S32", false);
+}
+
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S64", false);
+}
+
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U8", false);
+}
+
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U16", false);
+}
+
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F32", "U32", false);
+}
+
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U64", false);
+}
+
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S8", true);
+}
+
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S16", true);
+}
+
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F64", "S32", true);
+}
+
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S64", true);
+}
+
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U8", true);
+}
+
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U16", true);
+}
+
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F64", "U32", true);
+}
+
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U64", true);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
new file mode 100644
index 000000000..4ed58619d
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
@@ -0,0 +1,414 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <typename InputType>
+void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op,
+ std::string_view type, bool ordered, bool inequality = false) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs);
+ if (ordered && inequality) {
+ ctx.Add("SEQ.{} RC.y,{},{};"
+ "SEQ.{} RC.z,{},{};"
+ "AND.U RC.x,RC.x,RC.y;"
+ "AND.U RC.x,RC.x,RC.z;"
+ "SNE.S {}.x,RC.x,0;",
+ type, lhs, lhs, type, rhs, rhs, ret);
+ } else if (ordered) {
+ ctx.Add("SNE.S {}.x,RC.x,0;", ret);
+ } else {
+ ctx.Add("SNE.{} RC.y,{},{};"
+ "SNE.{} RC.z,{},{};"
+ "OR.U RC.x,RC.x,RC.y;"
+ "OR.U RC.x,RC.x,RC.z;"
+ "SNE.S {}.x,RC.x,0;",
+ type, lhs, lhs, type, rhs, rhs, ret);
+ }
+}
+
+template <typename InputType>
+void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value,
+ InputType max_value, std::string_view type) {
+ // Call MAX first to properly clamp nan to min_value instead
+ ctx.Add("MAX.{} RC.x,{},{};"
+ "MIN.{} {}.x,RC.x,{};",
+ type, min_value, value, type, ret, max_value);
+}
+
+std::string_view Precise(IR::Inst& inst) {
+ const bool precise{inst.Flags<IR::FpControl>().no_contraction};
+ return precise ? ".PREC" : "";
+}
+} // Anonymous namespace
+
+void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("MOV.F {}.x,|{}|;", inst, value);
+}
+
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value);
+}
+
+void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
+}
+
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
+}
+
+void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b,
+ [[maybe_unused]] Register c) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) {
+ ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c);
+}
+
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) {
+ ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c);
+}
+
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MAX.F {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MIN.F {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
+}
+
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
+}
+
+void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) {
+ ctx.Add("MOV.F {}.x,-{};", inst, value);
+}
+
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value);
+}
+
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("SIN {}.x,{};", inst, value);
+}
+
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("COS {}.x,{};", inst, value);
+}
+
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("EX2 {}.x,{};", inst, value);
+}
+
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("LG2 {}.x,{};", inst, value);
+}
+
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("RCP {}.x,{};", inst, value);
+}
+
+void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("RSQ {}.x,{};", inst, value);
+}
+
+void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret);
+}
+
+void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("MOV.F.SAT {}.x,{};", inst, value);
+}
+
+void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value,
+ [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
+ ScalarF32 max_value) {
+ Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F");
+}
+
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
+ ScalarF64 max_value) {
+ Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64");
+}
+
+void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("ROUND.F {}.x,{};", inst, value);
+}
+
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("FLR.F {}.x,{};", inst, value);
+}
+
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("FLR.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("CEIL.F {}.x,{};", inst, value);
+}
+
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("TRUNC.F {}.x,{};", inst, value);
+}
+
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F", true);
+}
+
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true);
+}
+
+void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F", false);
+}
+
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false);
+}
+
+void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true);
+}
+
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true);
+}
+
+void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true);
+}
+
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true);
+}
+
+void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F", true);
+}
+
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F64", true);
+}
+
+void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F", false);
+}
+
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F64", false);
+}
+
+void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F", true);
+}
+
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F64", true);
+}
+
+void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F", false);
+}
+
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F64", false);
+}
+
+void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F", true);
+}
+
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F64", true);
+}
+
+void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F", false);
+}
+
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F64", false);
+}
+
+void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F", true);
+}
+
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F64", true);
+}
+
+void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F", false);
+}
+
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F64", false);
+}
+
+void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Compare(ctx, inst, value, value, "SNE", "F", true, false);
+}
+
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Compare(ctx, inst, value, value, "SNE", "F64", true, false);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
new file mode 100644
index 000000000..09e3a9b82
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -0,0 +1,850 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+struct ScopedRegister {
+ ScopedRegister() = default;
+ ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{&reg_alloc_}, reg{reg_alloc->AllocReg()} {}
+
+ ~ScopedRegister() {
+ if (reg_alloc) {
+ reg_alloc->FreeReg(reg);
+ }
+ }
+
+ ScopedRegister& operator=(ScopedRegister&& rhs) noexcept {
+ if (reg_alloc) {
+ reg_alloc->FreeReg(reg);
+ }
+ reg_alloc = std::exchange(rhs.reg_alloc, nullptr);
+ reg = rhs.reg;
+ return *this;
+ }
+
+ ScopedRegister(ScopedRegister&& rhs) noexcept
+ : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {}
+
+ ScopedRegister& operator=(const ScopedRegister&) = delete;
+ ScopedRegister(const ScopedRegister&) = delete;
+
+ RegAlloc* reg_alloc{};
+ Register reg;
+};
+
+std::string Texture(EmitContext& ctx, IR::TextureInstInfo info,
+ [[maybe_unused]] const IR::Value& index) {
+ // FIXME: indexed reads
+ if (info.type == TextureType::Buffer) {
+ return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index));
+ } else {
+ return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index));
+ }
+}
+
+std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
+ [[maybe_unused]] const IR::Value& index) {
+ // FIXME: indexed reads
+ if (info.type == TextureType::Buffer) {
+ return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index));
+ } else {
+ return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index));
+ }
+}
+
+std::string_view TextureType(IR::TextureInstInfo info) {
+ if (info.is_depth) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ return "SHADOW1D";
+ case TextureType::ColorArray1D:
+ return "SHADOWARRAY1D";
+ case TextureType::Color2D:
+ return "SHADOW2D";
+ case TextureType::ColorArray2D:
+ return "SHADOWARRAY2D";
+ case TextureType::Color3D:
+ return "SHADOW3D";
+ case TextureType::ColorCube:
+ return "SHADOWCUBE";
+ case TextureType::ColorArrayCube:
+ return "SHADOWARRAYCUBE";
+ case TextureType::Buffer:
+ return "SHADOWBUFFER";
+ }
+ } else {
+ switch (info.type) {
+ case TextureType::Color1D:
+ return "1D";
+ case TextureType::ColorArray1D:
+ return "ARRAY1D";
+ case TextureType::Color2D:
+ return "2D";
+ case TextureType::ColorArray2D:
+ return "ARRAY2D";
+ case TextureType::Color3D:
+ return "3D";
+ case TextureType::ColorCube:
+ return "CUBE";
+ case TextureType::ColorArrayCube:
+ return "ARRAYCUBE";
+ case TextureType::Buffer:
+ return "BUFFER";
+ }
+ }
+ throw InvalidArgument("Invalid texture type {}", info.type.Value());
+}
+
+std::string Offset(EmitContext& ctx, const IR::Value& offset) {
+ if (offset.IsEmpty()) {
+ return "";
+ }
+ return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)});
+}
+
+std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx,
+ const IR::Value& offset2) {
+ if (offset2.IsEmpty()) {
+ return {};
+ } else {
+ return {ctx.reg_alloc, ctx.reg_alloc};
+ }
+}
+
+void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1,
+ const IR::Value& offset2) {
+ const Register offsets_a{ctx.reg_alloc.Consume(offset1)};
+ const Register offsets_b{ctx.reg_alloc.Consume(offset2)};
+ // Input swizzle: [XYXY] [XYXY]
+ // Output swizzle: [XXXX] [YYYY]
+ ctx.Add("MOV {}.x,{}.x;"
+ "MOV {}.y,{}.z;"
+ "MOV {}.z,{}.x;"
+ "MOV {}.w,{}.z;"
+ "MOV {}.x,{}.y;"
+ "MOV {}.y,{}.w;"
+ "MOV {}.z,{}.y;"
+ "MOV {}.w,{}.w;",
+ off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y,
+ offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b);
+}
+
+std::string GradOffset(const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate");
+ return "";
+ }
+ IR::Inst* const vector{offset.InstRecursive()};
+ if (!vector->AreAllArgsImmediates()) {
+ LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate");
+ return "";
+ }
+ switch (vector->NumArgs()) {
+ case 1:
+ return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32()));
+ case 2:
+ return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()),
+ static_cast<s32>(vector->Arg(1).U32()));
+ default:
+ throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs());
+ }
+}
+
+std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) {
+ if (coord.IsImmediate()) {
+ ScopedRegister scoped_reg(ctx.reg_alloc);
+ ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)});
+ return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)};
+ }
+ std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
+ if (coord.InstRecursive()->HasUses()) {
+ // Move non-dead coords to a separate register, although this should never happen because
+ // vectors are only assembled for immediate texture instructions
+ ctx.Add("MOV.F RC,{};", coord_vec);
+ coord_vec = "RC";
+ }
+ return {std::move(coord_vec), ScopedRegister{}};
+}
+
+void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
+ if (!sparse_inst) {
+ return;
+ }
+ const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
+ ctx.Add("MOV.S {},-1;"
+ "MOV.S {}(NONRESIDENT),0;",
+ sparse_ret, sparse_ret);
+}
+
+std::string_view FormatStorage(ImageFormat format) {
+ switch (format) {
+ case ImageFormat::Typeless:
+ return "U";
+ case ImageFormat::R8_UINT:
+ return "U8";
+ case ImageFormat::R8_SINT:
+ return "S8";
+ case ImageFormat::R16_UINT:
+ return "U16";
+ case ImageFormat::R16_SINT:
+ return "S16";
+ case ImageFormat::R32_UINT:
+ return "U32";
+ case ImageFormat::R32G32_UINT:
+ return "U32X2";
+ case ImageFormat::R32G32B32A32_UINT:
+ return "U32X4";
+ }
+ throw InvalidArgument("Invalid image format {}", format);
+}
+
+template <typename T>
+void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value,
+ std::string_view op) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
+}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+ const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (sparse_inst) {
+ sparse_inst->Invalidate();
+ }
+ return sparse_inst;
+}
+} // Anonymous namespace
+
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.has_bias) {
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec,
+ bias_lc, texture, offset_vec);
+ } else {
+ if (info.has_lod_clamp) {
+ ctx.Add("MOV.F {}.w,{}.x;"
+ "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+ coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type,
+ offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.w,{}.x;"
+ "TXB.F{} {},{},{},{}{};",
+ coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ }
+ } else {
+ if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec,
+ bias_lc, texture, offset_vec);
+ } else {
+ ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
+ type, offset_vec);
+ }
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture,
+ offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{}{};",
+ coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& bias_lc, const IR::Value& offset) {
+ // Allocate early to avoid aliases
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister staging;
+ if (info.type == TextureType::ColorArrayCube) {
+ staging = ScopedRegister{ctx.reg_alloc};
+ }
+ const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+ const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.has_bias) {
+ if (info.has_lod_clamp) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{}.x;"
+ "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+ coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+ bias_lc_vec, texture, type, offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXB.F.LODCLAMP{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+ offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {} with bias and lod clamp",
+ info.type.Value());
+ }
+ } else {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{}.x;"
+ "TXB.F{} {},{},{},{}{};",
+ coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+ texture, type, offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXB.F{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{}.x;"
+ "TXB.F{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ }
+ } else {
+ if (info.has_lod_clamp) {
+ if (info.type != TextureType::ColorArrayCube) {
+ const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+ info.type == TextureType::ColorCube};
+ const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+ ctx.Add("MOV.F {}.{},{};"
+ "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+ coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec,
+ texture, type, offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{};"
+ "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ }
+ } else {
+ if (info.type != TextureType::ColorArrayCube) {
+ const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+ info.type == TextureType::ColorCube};
+ const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+ ctx.Add("MOV.F {}.{},{};"
+ "TEX.F{} {},{},{},{}{};",
+ coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture,
+ type, offset_vec);
+ } else {
+ ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture,
+ type, offset_vec);
+ }
+ }
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& lod, const IR::Value& offset) {
+ // Allocate early to avoid aliases
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister staging;
+ if (info.type == TextureType::ColorArrayCube) {
+ staging = ScopedRegister{ctx.reg_alloc};
+ }
+ const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+ const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{}{};",
+ coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{};"
+ "TXL.F{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) {
+ // Allocate offsets early so they don't overwrite any consumed register
+ const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const char comp{"xyzw"[info.gather_component]};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (offset2.IsEmpty()) {
+ const std::string offset_vec{Offset(ctx, offset)};
+ ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type,
+ offset_vec);
+ } else {
+ SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+ ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg,
+ texture, comp, type);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+ const IR::Value& dref) {
+ // FIXME: This instruction is not working as expected
+
+ // Allocate offsets early so they don't overwrite any consumed register
+ const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ std::string args;
+ switch (info.type) {
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value);
+ args = fmt::to_string(coord_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value);
+ args = fmt::to_string(coord_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ args = fmt::format("{},{}", coord_vec, dref_value);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ if (offset2.IsEmpty()) {
+ const std::string offset_vec{Offset(ctx, offset)};
+ ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec);
+ } else {
+ SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+ ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture,
+ type);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.type == TextureType::Buffer) {
+ ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ } else if (ms.type != Type::Void) {
+ ctx.Add("MOV.S {}.w,{};"
+ "TXFMS.F{} {},{},{},{}{};",
+ coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ } else {
+ ctx.Add("MOV.S {}.w,{};"
+ "TXF.F{} {},{},{},{}{};",
+ coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ ScalarS32 lod) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string_view type{TextureType(info)};
+ ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
+}
+
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string_view type{TextureType(info)};
+ ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type);
+}
+
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister dpdx, dpdy;
+ const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+ if (multi_component) {
+ // Allocate this early to avoid aliasing other registers
+ dpdx = ScopedRegister{ctx.reg_alloc};
+ dpdy = ScopedRegister{ctx.reg_alloc};
+ }
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{GradOffset(offset)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (multi_component) {
+ ctx.Add("MOV.F {}.x,{}.x;"
+ "MOV.F {}.y,{}.z;"
+ "MOV.F {}.x,{}.y;"
+ "MOV.F {}.y,{}.w;",
+ dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
+ dpdy.reg, derivatives_vec);
+ if (info.has_lod_clamp) {
+ const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
+ ctx.Add("MOV.F {}.w,{};"
+ "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
+ dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+ texture, type, offset_vec);
+ } else {
+ ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+ texture, type, offset_vec);
+ }
+ } else {
+ ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
+ derivatives_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view format{FormatStorage(info.image_format)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type);
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ Register color) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string_view format{FormatStorage(info.image_format)};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
+}
+
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
+}
+
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MIN.S32");
+}
+
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MIN.U32");
+}
+
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MAX.S32");
+}
+
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MAX.U32");
+}
+
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32");
+}
+
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32");
+}
+
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "AND.U32");
+}
+
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "OR.U32");
+}
+
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "XOR.U32");
+}
+
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ Register coord, ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32");
+}
+
+void EmitBindlessImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicIAdd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicInc32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicDec32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicAnd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicOr32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicXor32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicExchange32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicIAdd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicInc32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicDec32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicAnd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicOr32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicXor32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicExchange32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
new file mode 100644
index 000000000..12afda43b
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -0,0 +1,625 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext;
+
+// Microinstruction emitters
+void EmitPhi(EmitContext& ctx, IR::Inst& inst);
+void EmitVoid(EmitContext& ctx);
+void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitReference(EmitContext&, const IR::Value& value);
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value);
+void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value);
+void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
+void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value);
+void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value);
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value);
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value);
+void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value);
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2);
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2);
+void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3);
+void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3,
+ Register e4);
+void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2);
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
+void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
+ Register false_value);
+void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
+void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitPackFloat2x16(EmitContext& ctx, Register value);
+void EmitUnpackFloat2x16(EmitContext& ctx, Register value);
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c);
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c);
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c);
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPNeg16(EmitContext& ctx, Register value);
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value);
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecip64(EmitContext& ctx, Register value);
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecipSqrt64(EmitContext& ctx, Register value);
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPSaturate16(EmitContext& ctx, Register value);
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPSaturate64(EmitContext& ctx, Register value);
+void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value);
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
+ ScalarF32 max_value);
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
+ ScalarF64 max_value);
+void EmitFPRoundEven16(EmitContext& ctx, Register value);
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPFloor16(EmitContext& ctx, Register value);
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPCeil16(EmitContext& ctx, Register value);
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPTrunc16(EmitContext& ctx, Register value);
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPIsNan16(EmitContext& ctx, Register value);
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift);
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift);
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift);
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarS32 shift);
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
+ ScalarS32 offset, ScalarS32 count);
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
+ ScalarS32 count);
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
+ ScalarU32 count);
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max);
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max);
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value);
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value);
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ Register value);
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value);
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value);
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarF32 value);
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitGlobalAtomicIAdd32(EmitContext& ctx);
+void EmitGlobalAtomicSMin32(EmitContext& ctx);
+void EmitGlobalAtomicUMin32(EmitContext& ctx);
+void EmitGlobalAtomicSMax32(EmitContext& ctx);
+void EmitGlobalAtomicUMax32(EmitContext& ctx);
+void EmitGlobalAtomicInc32(EmitContext& ctx);
+void EmitGlobalAtomicDec32(EmitContext& ctx);
+void EmitGlobalAtomicAnd32(EmitContext& ctx);
+void EmitGlobalAtomicOr32(EmitContext& ctx);
+void EmitGlobalAtomicXor32(EmitContext& ctx);
+void EmitGlobalAtomicExchange32(EmitContext& ctx);
+void EmitGlobalAtomicIAdd64(EmitContext& ctx);
+void EmitGlobalAtomicSMin64(EmitContext& ctx);
+void EmitGlobalAtomicUMin64(EmitContext& ctx);
+void EmitGlobalAtomicSMax64(EmitContext& ctx);
+void EmitGlobalAtomicUMax64(EmitContext& ctx);
+void EmitGlobalAtomicInc64(EmitContext& ctx);
+void EmitGlobalAtomicDec64(EmitContext& ctx);
+void EmitGlobalAtomicAnd64(EmitContext& ctx);
+void EmitGlobalAtomicOr64(EmitContext& ctx);
+void EmitGlobalAtomicXor64(EmitContext& ctx);
+void EmitGlobalAtomicExchange64(EmitContext& ctx);
+void EmitGlobalAtomicAddF32(EmitContext& ctx);
+void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitBindlessImageSampleImplicitLod(EmitContext&);
+void EmitBindlessImageSampleExplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+void EmitBindlessImageGather(EmitContext&);
+void EmitBindlessImageGatherDref(EmitContext&);
+void EmitBindlessImageFetch(EmitContext&);
+void EmitBindlessImageQueryDimensions(EmitContext&);
+void EmitBindlessImageQueryLod(EmitContext&);
+void EmitBindlessImageGradient(EmitContext&);
+void EmitBindlessImageRead(EmitContext&);
+void EmitBindlessImageWrite(EmitContext&);
+void EmitBoundImageSampleImplicitLod(EmitContext&);
+void EmitBoundImageSampleExplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+void EmitBoundImageGather(EmitContext&);
+void EmitBoundImageGatherDref(EmitContext&);
+void EmitBoundImageFetch(EmitContext&);
+void EmitBoundImageQueryDimensions(EmitContext&);
+void EmitBoundImageQueryLod(EmitContext&);
+void EmitBoundImageGradient(EmitContext&);
+void EmitBoundImageRead(EmitContext&);
+void EmitBoundImageWrite(EmitContext&);
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, Register bias_lc, const IR::Value& offset);
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, ScalarF32 lod, const IR::Value& offset);
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& bias_lc, const IR::Value& offset);
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& lod, const IR::Value& offset);
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2);
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+ const IR::Value& dref);
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ ScalarS32 lod);
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp);
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ Register color);
+void EmitBindlessImageAtomicIAdd32(EmitContext&);
+void EmitBindlessImageAtomicSMin32(EmitContext&);
+void EmitBindlessImageAtomicUMin32(EmitContext&);
+void EmitBindlessImageAtomicSMax32(EmitContext&);
+void EmitBindlessImageAtomicUMax32(EmitContext&);
+void EmitBindlessImageAtomicInc32(EmitContext&);
+void EmitBindlessImageAtomicDec32(EmitContext&);
+void EmitBindlessImageAtomicAnd32(EmitContext&);
+void EmitBindlessImageAtomicOr32(EmitContext&);
+void EmitBindlessImageAtomicXor32(EmitContext&);
+void EmitBindlessImageAtomicExchange32(EmitContext&);
+void EmitBoundImageAtomicIAdd32(EmitContext&);
+void EmitBoundImageAtomicSMin32(EmitContext&);
+void EmitBoundImageAtomicUMin32(EmitContext&);
+void EmitBoundImageAtomicSMax32(EmitContext&);
+void EmitBoundImageAtomicUMax32(EmitContext&);
+void EmitBoundImageAtomicInc32(EmitContext&);
+void EmitBoundImageAtomicDec32(EmitContext&);
+void EmitBoundImageAtomicAnd32(EmitContext&);
+void EmitBoundImageAtomicOr32(EmitContext&);
+void EmitBoundImageAtomicXor32(EmitContext&);
+void EmitBoundImageAtomicExchange32(EmitContext&);
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value);
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value);
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ Register coord, ScalarU32 value);
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
+ ScalarU32 swizzle);
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
new file mode 100644
index 000000000..f55c26b76
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -0,0 +1,294 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b,
+ std::string_view lop) {
+ const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+ const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+ if (zero) {
+ zero->Invalidate();
+ }
+ if (sign) {
+ sign->Invalidate();
+ }
+ if (zero || sign) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b);
+ if (zero) {
+ ctx.Add("SEQ.S {},{},0;", *zero, ret);
+ }
+ if (sign) {
+ ctx.Add("SLT.S {},{},0;", *sign, ret);
+ }
+}
+} // Anonymous namespace
+
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ const std::array flags{
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
+ };
+ for (IR::Inst* const flag_inst : flags) {
+ if (flag_inst) {
+ flag_inst->Invalidate();
+ }
+ }
+ const bool cc{inst.HasAssociatedPseudoOperation()};
+ const std::string_view cc_mod{cc ? ".CC" : ""};
+ if (cc) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b);
+ if (!cc) {
+ return;
+ }
+ static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
+ for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
+ if (!flags[flag_index]) {
+ continue;
+ }
+ const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
+ if (flag_index == 0) {
+ ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
+ } else {
+ // We could use conditional execution here, but it's broken on Nvidia's compiler
+ ctx.Add("IF {}.x;"
+ "MOV.S {}.x,-1;"
+ "ELSE;"
+ "MOV.S {}.x,0;"
+ "ENDIF;",
+ masks[flag_index], flag_ret, flag_ret);
+ }
+ }
+}
+
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
+ ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b);
+}
+
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("SUB.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
+ ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b);
+}
+
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
+ ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
+ } else {
+ ctx.Add("MOV.S {},-{};", inst, value);
+ }
+}
+
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("MOV.S64 {},-{};", inst, value);
+}
+
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("ABS.S {},{};", inst, value);
+}
+
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
+ ctx.Add("SHL.U {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift) {
+ ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
+ ctx.Add("SHR.U {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift) {
+ ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) {
+ ctx.Add("SHR.S {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarS32 shift) {
+ ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "AND");
+}
+
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "OR");
+}
+
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "XOR");
+}
+
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
+ ScalarS32 offset, ScalarS32 count) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base);
+ } else {
+ ctx.Add("MOV.S RC.x,{};"
+ "MOV.S RC.y,{};"
+ "BFI.S {},RC,{},{};",
+ count, offset, ret, insert, base);
+ }
+}
+
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
+ ScalarS32 count) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base);
+ } else {
+ ctx.Add("MOV.S RC.x,{};"
+ "MOV.S RC.y,{};"
+ "BFE.S {},RC,{};",
+ count, offset, ret, base);
+ }
+}
+
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
+ ScalarU32 count) {
+ const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+ const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+ if (zero) {
+ zero->Invalidate();
+ }
+ if (sign) {
+ sign->Invalidate();
+ }
+ if (zero || sign) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
+ } else {
+ ctx.Add("MOV.U RC.x,{};"
+ "MOV.U RC.y,{};"
+ "BFE.U {},RC,{};",
+ count, offset, ret, base);
+ }
+ if (zero) {
+ ctx.Add("SEQ.S {},{},0;", *zero, ret);
+ }
+ if (sign) {
+ ctx.Add("SLT.S {},{},0;", *sign, ret);
+ }
+}
+
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BFR {},{};", inst, value);
+}
+
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BTC {},{};", inst, value);
+}
+
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("NOT.S {},{};", inst, value);
+}
+
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BTFM.S {},{};", inst, value);
+}
+
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ ctx.Add("BTFM.U {},{};", inst, value);
+}
+
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MIN.S {},{},{};", inst, a, b);
+}
+
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+ ctx.Add("MIN.U {},{},{};", inst, a, b);
+}
+
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MAX.S {},{},{};", inst, a, b);
+}
+
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+ ctx.Add("MAX.U {},{},{};", inst, a, b);
+}
+
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("MIN.S RC.x,{},{};"
+ "MAX.S {}.x,RC.x,{};",
+ max, value, ret, min);
+}
+
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("MIN.U RC.x,{},{};"
+ "MAX.U {}.x,RC.x,{};",
+ max, value, ret, min);
+}
+
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
new file mode 100644
index 000000000..af9fac7c1
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -0,0 +1,568 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ std::string_view then_expr, std::string_view else_expr = {}) {
+ // Operate on bindless SSBO, call the expression with bounds checking
+ // address = c[binding].xy
+ // length = c[binding].z
+ const u32 sb_binding{binding.U32()};
+ ctx.Add("PK64.U DC,c[{}];" // pointer = address
+ "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset)
+ "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset
+ "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
+ sb_binding, offset, offset, sb_binding);
+ if (else_expr.empty()) {
+ ctx.Add("IF NE.x;{}ENDIF;", then_expr);
+ } else {
+ ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
+ }
+}
+
+void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
+ std::string_view else_expr = {}) {
+ const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
+ for (size_t index = 0; index < num_buffers; ++index) {
+ if (!ctx.info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const auto& ssbo{ctx.info.storage_buffers_descriptors[index]};
+ ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr
+ "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32
+ "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32
+ "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size
+ "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0
+ "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0
+ "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b
+ "IF NE.x;" // if cond
+ "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
+ ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
+ address, address);
+ if (pointer_based) {
+ ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
+ "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
+ "{}"
+ "ELSE;",
+ index, expr);
+ } else {
+ ctx.Add("CVT.U32.U64 RC.x,DC.x;"
+ "{},ssbo{}[RC.x];"
+ "ELSE;",
+ expr, index);
+ }
+ }
+ if (!else_expr.empty()) {
+ ctx.Add("{}", else_expr);
+ }
+ const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()};
+ for (size_t index = 0; index < num_used_buffers; ++index) {
+ ctx.Add("ENDIF;");
+ }
+}
+
+template <typename ValueType>
+void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
+ std::string_view size) {
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
+ } else {
+ StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
+ }
+}
+
+void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
+ } else {
+ StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
+ fmt::format("MOV.U {},{{0,0,0,0}};", ret));
+ }
+}
+
+template <typename ValueType>
+void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
+ } else {
+ GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
+ }
+}
+
+void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
+ } else {
+ GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
+ fmt::format("MOV.S {},0;", ret));
+ }
+}
+
+template <typename ValueType>
+void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ ValueType value, std::string_view operation, std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
+ offset);
+ } else {
+ StorageOp(ctx, binding, offset,
+ fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
+ }
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U8");
+}
+
+void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "S8");
+}
+
+void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U16");
+}
+
+void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "S16");
+}
+
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32");
+}
+
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32X2");
+}
+
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32X4");
+}
+
+void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U8");
+}
+
+void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "S8");
+}
+
+void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U16");
+}
+
+void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "S16");
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) {
+ GlobalWrite(ctx, address, value, "U32");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U32X2");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U32X4");
+}
+
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U8");
+}
+
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "S8");
+}
+
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U16");
+}
+
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "S16");
+}
+
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32");
+}
+
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32X2");
+}
+
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32X4");
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U8");
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value) {
+ Write(ctx, binding, offset, value, "S8");
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U16");
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value) {
+ Write(ctx, binding, offset, value, "S16");
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U32");
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value) {
+ Write(ctx, binding, offset, value, "U32X2");
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value) {
+ Write(ctx, binding, offset, value, "U32X4");
+}
+
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value) {
+ ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value) {
+ ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ Register value) {
+ ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "U32");
+}
+
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "S32");
+}
+
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "U32");
+}
+
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "S32");
+}
+
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "U32");
+}
+
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
+}
+
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
+}
+
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "AND", "U32");
+}
+
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "OR", "U32");
+}
+
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "XOR", "U32");
+}
+
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
+}
+
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "U64");
+}
+
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "S64");
+}
+
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "U64");
+}
+
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "S64");
+}
+
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "U64");
+}
+
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "AND", "U64");
+}
+
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "OR", "U64");
+}
+
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "XOR", "U64");
+}
+
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
+}
+
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarF32 value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "F32");
+}
+
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
+}
+
+void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
+}
+
+void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
+}
+
+void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
new file mode 100644
index 000000000..ff64c6924
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -0,0 +1,273 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+namespace Shader::Backend::GLASM {
+
+#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
+
+static void DefinePhi(EmitContext& ctx, IR::Inst& phi) {
+ switch (phi.Arg(0).Type()) {
+ case IR::Type::U1:
+ case IR::Type::U32:
+ case IR::Type::F32:
+ ctx.reg_alloc.Define(phi);
+ break;
+ case IR::Type::U64:
+ case IR::Type::F64:
+ ctx.reg_alloc.LongDefine(phi);
+ break;
+ default:
+ throw NotImplementedException("Phi node type {}", phi.Type());
+ }
+}
+
+void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ ctx.reg_alloc.Consume(phi.Arg(i));
+ }
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ DefinePhi(ctx, phi);
+ }
+}
+
+void EmitVoid(EmitContext&) {}
+
+void EmitReference(EmitContext& ctx, const IR::Value& value) {
+ ctx.reg_alloc.Consume(value);
+}
+
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
+ IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())};
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ DefinePhi(ctx, phi);
+ }
+ const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})};
+ const Value eval_value{ctx.reg_alloc.Consume(value)};
+
+ if (phi_reg == eval_value) {
+ return;
+ }
+ switch (phi.Flags<IR::Type>()) {
+ case IR::Type::U1:
+ case IR::Type::U32:
+ case IR::Type::F32:
+ ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value});
+ break;
+ case IR::Type::U64:
+ case IR::Type::F64:
+ ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value});
+ break;
+ default:
+ throw NotImplementedException("Phi node type {}", phi.Type());
+ }
+}
+
+void EmitJoin(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+ ctx.Add("KIL TR.x;");
+}
+
+void EmitBarrier(EmitContext& ctx) {
+ ctx.Add("BAR;");
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("MEMBAR.CTA;");
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("MEMBAR;");
+}
+
+void EmitPrologue(EmitContext& ctx) {
+ // TODO
+}
+
+void EmitEpilogue(EmitContext& ctx) {
+ // TODO
+}
+
+void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) {
+ if (stream.type == Type::U32 && stream.imm_u32 == 0) {
+ ctx.Add("EMIT;");
+ } else {
+ ctx.Add("EMITS {};", stream);
+ }
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+ if (!stream.IsImmediate()) {
+ LOG_WARNING(Shader_GLASM, "Stream is not immediate");
+ }
+ ctx.reg_alloc.Consume(stream);
+ ctx.Add("ENDPRIM;");
+}
+
+void EmitGetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {},invocation.groupid;", inst);
+}
+
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {},invocation.localid;", inst);
+}
+
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst);
+}
+
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst);
+}
+
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst);
+}
+
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
+ ctx.uses_y_direction = true;
+ ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
+}
+
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
+ ctx.LongAdd("MOV.S64 {}.x,0;", inst);
+}
+
+void EmitGetZeroFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSignFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCarryFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOverflowFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSparseFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetInBoundsFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("OR.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("AND.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("XOR.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("SEQ.S {},{},0;", inst, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
new file mode 100644
index 000000000..68fff613c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
@@ -0,0 +1,67 @@
+
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
+ Register false_value) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ const Register ret{ctx.reg_alloc.LongDefine(inst)};
+ if (ret == true_value) {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x(EQ.x),{};",
+ cond, ret, false_value);
+ } else if (ret == false_value) {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x(NE.x),{};",
+ cond, ret, true_value);
+ } else {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x,{};"
+ "MOV.U64 {}.x(NE.x),{};",
+ cond, ret, false_value, ret, true_value);
+ }
+}
+
+void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
new file mode 100644
index 000000000..c1498f449
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
@@ -0,0 +1,58 @@
+
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U8 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U16 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U32 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) {
+ ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) {
+ ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset);
+}
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
new file mode 100644
index 000000000..544d475b4
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
@@ -0,0 +1,150 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLASM {
+
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name);
+}
+
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGALL.S {}.x,{};", inst, pred);
+}
+
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGANY.S {}.x,{};", inst, pred);
+}
+
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGEQ.S {}.x,{};", inst, pred);
+}
+
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGBALLOT {}.x,{};", inst, pred);
+}
+
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name);
+}
+
+static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask,
+ std::string_view op) {
+ IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+ if (in_bounds) {
+ in_bounds->Invalidate();
+ }
+ std::string mask;
+ if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
+ mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
+ } else {
+ mask = "RC";
+ ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};",
+ ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)},
+ ScalarU32{ctx.reg_alloc.Consume(clamp)});
+ }
+ const Register value_ret{ctx.reg_alloc.Define(inst)};
+ if (in_bounds) {
+ const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
+ ctx.Add("SHF{}.U {},{},{},{};"
+ "MOV.U {}.x,{}.y;",
+ op, bounds_ret, value, index, mask, value_ret, bounds_ret);
+ } else {
+ ctx.Add("SHF{}.U {},{},{},{};"
+ "MOV.U {}.x,{}.y;",
+ op, value_ret, value, index, mask, value_ret, value_ret);
+ }
+}
+
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX");
+}
+
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP");
+}
+
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN");
+}
+
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
+}
+
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
+ ScalarU32 swizzle) {
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("AND.U RC.z,{}.threadid,3;"
+ "SHL.U RC.z,RC.z,1;"
+ "SHR.U RC.z,{},RC.z;"
+ "AND.U RC.z,RC.z,3;"
+ "MUL.F RC.x,{},FSWZA[RC.z];"
+ "MUL.F RC.y,{},FSWZB[RC.z];"
+ "ADD.F {}.x,RC.x,RC.y;",
+ ctx.stage_name, swizzle, op_a, op_b, ret);
+}
+
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDX.FINE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
+ ctx.Add("DDX {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDY.FINE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
+ ctx.Add("DDY {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDX.COARSE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
+ ctx.Add("DDX {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDY.COARSE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
+ ctx.Add("DDY {}.x,{};", inst, p);
+ }
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
new file mode 100644
index 000000000..4c046db6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -0,0 +1,186 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+Register RegAlloc::Define(IR::Inst& inst) {
+ return Define(inst, false);
+}
+
+Register RegAlloc::LongDefine(IR::Inst& inst) {
+ return Define(inst, true);
+}
+
+Value RegAlloc::Peek(const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return MakeImm(value);
+ } else {
+ return PeekInst(*value.Inst());
+ }
+}
+
+Value RegAlloc::Consume(const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return MakeImm(value);
+ } else {
+ return ConsumeInst(*value.Inst());
+ }
+}
+
+void RegAlloc::Unref(IR::Inst& inst) {
+ IR::Inst& value_inst{AliasInst(inst)};
+ value_inst.DestructiveRemoveUsage();
+ if (!value_inst.HasUses()) {
+ Free(value_inst.Definition<Id>());
+ }
+}
+
+Register RegAlloc::AllocReg() {
+ Register ret;
+ ret.type = Type::Register;
+ ret.id = Alloc(false);
+ return ret;
+}
+
+Register RegAlloc::AllocLongReg() {
+ Register ret;
+ ret.type = Type::Register;
+ ret.id = Alloc(true);
+ return ret;
+}
+
+void RegAlloc::FreeReg(Register reg) {
+ Free(reg.id);
+}
+
+Value RegAlloc::MakeImm(const IR::Value& value) {
+ Value ret;
+ switch (value.Type()) {
+ case IR::Type::Void:
+ ret.type = Type::Void;
+ break;
+ case IR::Type::U1:
+ ret.type = Type::U32;
+ ret.imm_u32 = value.U1() ? 0xffffffff : 0;
+ break;
+ case IR::Type::U32:
+ ret.type = Type::U32;
+ ret.imm_u32 = value.U32();
+ break;
+ case IR::Type::F32:
+ ret.type = Type::U32;
+ ret.imm_u32 = Common::BitCast<u32>(value.F32());
+ break;
+ case IR::Type::U64:
+ ret.type = Type::U64;
+ ret.imm_u64 = value.U64();
+ break;
+ case IR::Type::F64:
+ ret.type = Type::U64;
+ ret.imm_u64 = Common::BitCast<u64>(value.F64());
+ break;
+ default:
+ throw NotImplementedException("Immediate type {}", value.Type());
+ }
+ return ret;
+}
+
+Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
+ if (inst.HasUses()) {
+ inst.SetDefinition<Id>(Alloc(is_long));
+ } else {
+ Id id{};
+ id.is_long.Assign(is_long ? 1 : 0);
+ id.is_null.Assign(1);
+ inst.SetDefinition<Id>(id);
+ }
+ return Register{PeekInst(inst)};
+}
+
+Value RegAlloc::PeekInst(IR::Inst& inst) {
+ Value ret;
+ ret.type = Type::Register;
+ ret.id = inst.Definition<Id>();
+ return ret;
+}
+
+Value RegAlloc::ConsumeInst(IR::Inst& inst) {
+ Unref(inst);
+ return PeekInst(inst);
+}
+
+Id RegAlloc::Alloc(bool is_long) {
+ size_t& num_regs{is_long ? num_used_long_registers : num_used_registers};
+ std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use};
+ if (num_used_registers + num_used_long_registers < NUM_REGS) {
+ for (size_t reg = 0; reg < NUM_REGS; ++reg) {
+ if (use[reg]) {
+ continue;
+ }
+ num_regs = std::max(num_regs, reg + 1);
+ use[reg] = true;
+ Id ret{};
+ ret.is_valid.Assign(1);
+ ret.is_long.Assign(is_long ? 1 : 0);
+ ret.is_spill.Assign(0);
+ ret.is_condition_code.Assign(0);
+ ret.is_null.Assign(0);
+ ret.index.Assign(static_cast<u32>(reg));
+ return ret;
+ }
+ }
+ throw NotImplementedException("Register spilling");
+}
+
+void RegAlloc::Free(Id id) {
+ if (id.is_valid == 0) {
+ throw LogicError("Freeing invalid register");
+ }
+ if (id.is_spill != 0) {
+ throw NotImplementedException("Free spill");
+ }
+ if (id.is_long != 0) {
+ long_register_use[id.index] = false;
+ } else {
+ register_use[id.index] = false;
+ }
+}
+
+/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::Identity:
+ case IR::Opcode::BitCastU16F16:
+ case IR::Opcode::BitCastU32F32:
+ case IR::Opcode::BitCastU64F64:
+ case IR::Opcode::BitCastF16U16:
+ case IR::Opcode::BitCastF32U32:
+ case IR::Opcode::BitCastF64U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) {
+ IR::Inst* it{&inst};
+ while (IsAliased(*it)) {
+ const IR::Value arg{it->Arg(0)};
+ if (arg.IsImmediate()) {
+ break;
+ }
+ it = arg.InstRecursive();
+ }
+ return *it;
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h
new file mode 100644
index 000000000..82aec66c6
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.h
@@ -0,0 +1,303 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+
+#include <fmt/format.h>
+
+#include "common/bit_cast.h"
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext;
+
+enum class Type : u32 {
+ Void,
+ Register,
+ U32,
+ U64,
+};
+
+struct Id {
+ union {
+ u32 raw;
+ BitField<0, 1, u32> is_valid;
+ BitField<1, 1, u32> is_long;
+ BitField<2, 1, u32> is_spill;
+ BitField<3, 1, u32> is_condition_code;
+ BitField<4, 1, u32> is_null;
+ BitField<5, 27, u32> index;
+ };
+
+ bool operator==(Id rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+ bool operator!=(Id rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(sizeof(Id) == sizeof(u32));
+
+struct Value {
+ Type type;
+ union {
+ Id id;
+ u32 imm_u32;
+ u64 imm_u64;
+ };
+
+ bool operator==(const Value& rhs) const noexcept {
+ if (type != rhs.type) {
+ return false;
+ }
+ switch (type) {
+ case Type::Void:
+ return true;
+ case Type::Register:
+ return id == rhs.id;
+ case Type::U32:
+ return imm_u32 == rhs.imm_u32;
+ case Type::U64:
+ return imm_u64 == rhs.imm_u64;
+ }
+ return false;
+ }
+ bool operator!=(const Value& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+struct Register : Value {};
+struct ScalarRegister : Value {};
+struct ScalarU32 : Value {};
+struct ScalarS32 : Value {};
+struct ScalarF32 : Value {};
+struct ScalarF64 : Value {};
+
+class RegAlloc {
+public:
+ RegAlloc() = default;
+
+ Register Define(IR::Inst& inst);
+
+ Register LongDefine(IR::Inst& inst);
+
+ [[nodiscard]] Value Peek(const IR::Value& value);
+
+ Value Consume(const IR::Value& value);
+
+ void Unref(IR::Inst& inst);
+
+ [[nodiscard]] Register AllocReg();
+
+ [[nodiscard]] Register AllocLongReg();
+
+ void FreeReg(Register reg);
+
+ void InvalidateConditionCodes() {
+ // This does nothing for now
+ }
+
+ [[nodiscard]] size_t NumUsedRegisters() const noexcept {
+ return num_used_registers;
+ }
+
+ [[nodiscard]] size_t NumUsedLongRegisters() const noexcept {
+ return num_used_long_registers;
+ }
+
+ [[nodiscard]] bool IsEmpty() const noexcept {
+ return register_use.none() && long_register_use.none();
+ }
+
+ /// Returns true if the instruction is expected to be aliased to another
+ static bool IsAliased(const IR::Inst& inst);
+
+ /// Returns the underlying value out of an alias sequence
+ static IR::Inst& AliasInst(IR::Inst& inst);
+
+private:
+ static constexpr size_t NUM_REGS = 4096;
+ static constexpr size_t NUM_ELEMENTS = 4;
+
+ Value MakeImm(const IR::Value& value);
+
+ Register Define(IR::Inst& inst, bool is_long);
+
+ Value PeekInst(IR::Inst& inst);
+
+ Value ConsumeInst(IR::Inst& inst);
+
+ Id Alloc(bool is_long);
+
+ void Free(Id id);
+
+ size_t num_used_registers{};
+ size_t num_used_long_registers{};
+ std::bitset<NUM_REGS> register_use{};
+ std::bitset<NUM_REGS> long_register_use{};
+};
+
+template <bool scalar, typename FormatContext>
+auto FormatTo(FormatContext& ctx, Id id) {
+ if (id.is_condition_code != 0) {
+ throw NotImplementedException("Condition code emission");
+ }
+ if (id.is_spill != 0) {
+ throw NotImplementedException("Spill emission");
+ }
+ if constexpr (scalar) {
+ if (id.is_null != 0) {
+ return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
+ }
+ if (id.is_long != 0) {
+ return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
+ } else {
+ return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
+ }
+ } else {
+ if (id.is_null != 0) {
+ return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
+ }
+ if (id.is_long != 0) {
+ return fmt::format_to(ctx.out(), "D{}", id.index.Value());
+ } else {
+ return fmt::format_to(ctx.out(), "R{}", id.index.Value());
+ }
+ }
+}
+
+} // namespace Shader::Backend::GLASM
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::Id> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) {
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::Register> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) {
+ if (value.type != Shader::Backend::GLASM::Type::Register) {
+ throw Shader::InvalidArgument("Register value type is not register");
+ }
+ return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) {
+ if (value.type != Shader::Backend::GLASM::Type::Register) {
+ throw Shader::InvalidArgument("Register value type is not register");
+ }
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", value.imm_u32);
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32));
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32));
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ break;
+ case Shader::Backend::GLASM::Type::U64:
+ return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64));
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};