diff options
Diffstat (limited to '')
73 files changed, 18090 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::Backend { + +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + +} // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp new file mode 100644 index 000000000..069c019ad --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -0,0 +1,154 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLASM { +namespace { +std::string_view InterpDecorator(Interpolation interp) { + switch (interp) { + case Interpolation::Smooth: + return ""; + case Interpolation::Flat: + return "FLAT "; + case Interpolation::NoPerspective: + return "NOPERSPECTIVE "; + } + throw InvalidArgument("Invalid interpolation {}", interp); +} + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} +} // Anonymous namespace + +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { + // FIXME: Temporary partial implementation + u32 cbuf_index{}; + for (const auto& desc : info.constant_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Constant buffer descriptor array"); + } + Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); + ++cbuf_index; + } + u32 ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Storage buffer descriptor array"); + } + if (runtime_info.glasm_use_storage_buffers) { + Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); + ++bindings.storage_buffer; + ++ssbo_index; + } + } + if (!runtime_info.glasm_use_storage_buffers) { + if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { + Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + } + } + stage = program.stage; + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vertex"; + attrib_name = "vertex"; + break; + case Stage::TessellationControl: + case Stage::TessellationEval: + stage_name = "primitive"; + attrib_name = "primitive"; + break; + case Stage::Geometry: + stage_name = "primitive"; + attrib_name = "vertex"; + break; + case Stage::Fragment: + stage_name = "fragment"; + attrib_name = "fragment"; + break; + case Stage::Compute: + stage_name = "invocation"; + break; + } + const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (loads.Generic(index)) { + Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", + InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); + } + } + if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) { + Add("ATTRIB vertex_position=vertex.position;"); + } + if (info.uses_invocation_id) { + Add("ATTRIB primitive_invocation=primitive.invocation;"); + } + if (info.stores_tess_level_outer) { + Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};"); + } + if (info.stores_tess_level_inner) { + Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); + } + if (info.stores.ClipDistances()) { + Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + if (stage == Stage::TessellationControl) { + Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};" + "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};", + index, index, index, index, index, index); + } else { + Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index, + index, index); + } + } + if (stage == Stage::Fragment) { + Add("OUTPUT frag_color0=result.color;"); + for (size_t index = 1; index < info.stores_frag_color.size(); ++index) { + Add("OUTPUT frag_color{}=result.color[{}];", index, index); + } + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); + } + } + image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { + image_buffer_bindings.push_back(bindings.image); + bindings.image += desc.count; + } + image_bindings.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { + image_bindings.push_back(bindings.image); + bindings.image += desc.count; + } + texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { + texture_buffer_bindings.push_back(bindings.texture); + bindings.texture += desc.count; + } + texture_bindings.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { + texture_bindings.push_back(bindings.texture); + bindings.texture += desc.count; + } +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h new file mode 100644 index 000000000..8433e5c00 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -0,0 +1,80 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <utility> +#include <vector> + +#include <fmt/format.h> + +#include "shader_recompiler/backend/glasm/reg_alloc.h" +#include "shader_recompiler/stage.h" + +namespace Shader { +struct Info; +struct Profile; +struct RuntimeInfo; +} // namespace Shader + +namespace Shader::Backend { +struct Bindings; +} + +namespace Shader::IR { +class Inst; +struct Program; +} // namespace Shader::IR + +namespace Shader::Backend::GLASM { + +class EmitContext { +public: + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); + + template <typename... Args> + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst), + std::forward<Args>(args)...); + // TODO: Remove this + code += '\n'; + } + + template <typename... Args> + void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst), + std::forward<Args>(args)...); + // TODO: Remove this + code += '\n'; + } + + template <typename... Args> + void Add(const char* format_str, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string code; + RegAlloc reg_alloc{}; + const Info& info; + const Profile& profile; + const RuntimeInfo& runtime_info; + + std::vector<u32> texture_buffer_bindings; + std::vector<u32> image_buffer_bindings; + std::vector<u32> texture_bindings; + std::vector<u32> image_bindings; + + Stage stage{}; + std::string_view stage_name = "invalid"; + std::string_view attrib_name = "invalid"; + + u32 num_safety_loop_vars{}; + bool uses_y_direction{}; +}; + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp new file mode 100644 index 000000000..4ce1c4f54 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -0,0 +1,492 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <string> +#include <tuple> + +#include "common/div_ceil.h" +#include "common/settings.h" +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLASM { +namespace { +template <class Func> +struct FuncTraits {}; + +template <class ReturnType_, class... Args> +struct FuncTraits<ReturnType_ (*)(Args...)> { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template <size_t I> + using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; +}; + +template <typename T> +struct Identity { + Identity(T data_) : data{data_} {} + + T Extract() { + return data; + } + + T data; +}; + +template <bool scalar> +class RegWrapper { +public: + RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} { + const Value value{reg_alloc.Peek(ir_value)}; + if (value.type == Type::Register) { + inst = ir_value.InstRecursive(); + reg = Register{value}; + } else { + reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); + } + switch (value.type) { + case Type::Register: + case Type::Void: + break; + case Type::U32: + ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); + break; + case Type::U64: + ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); + break; + } + } + + auto Extract() { + if (inst) { + reg_alloc.Unref(*inst); + } else { + reg_alloc.FreeReg(reg); + } + return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}}; + } + +private: + RegAlloc& reg_alloc; + IR::Inst* inst{}; + Register reg{}; +}; + +template <typename ArgType> +class ValueWrapper { +public: + ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) + : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} + + ArgType Extract() { + if (!ir_value.IsImmediate()) { + reg_alloc.Unref(*ir_value.InstRecursive()); + } + return value; + } + +private: + RegAlloc& reg_alloc; + const IR::Value& ir_value; + ArgType value; +}; + +template <typename ArgType> +auto Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v<ArgType, Register>) { + return RegWrapper<false>{ctx, arg}; + } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) { + return RegWrapper<true>{ctx, arg}; + } else if constexpr (std::is_base_of_v<Value, ArgType>) { + return ValueWrapper<ArgType>{ctx, arg}; + } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { + return Identity<const IR::Value&>{arg}; + } else if constexpr (std::is_same_v<ArgType, u32>) { + return Identity{arg.U32()}; + } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { + return Identity{arg.Attribute()}; + } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { + return Identity{arg.Patch()}; + } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { + return Identity{arg.Reg()}; + } +} + +template <auto func, bool is_first_arg_inst> +struct InvokeCall { + template <typename... Args> + InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { + if constexpr (is_first_arg_inst) { + func(ctx, *inst, args.Extract()...); + } else { + func(ctx, args.Extract()...); + } + } +}; + +template <auto func, bool is_first_arg_inst, size_t... I> +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { + using Traits = FuncTraits<decltype(func)>; + if constexpr (is_first_arg_inst) { + InvokeCall<func, is_first_arg_inst>{ + ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...}; + } else { + InvokeCall<func, is_first_arg_inst>{ + ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...}; + } +} + +template <auto func> +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits<decltype(func)>; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; + using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; + Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +bool IsReference(IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Reference; +} + +void PrecolorInst(IR::Inst& phi) { + // Insert phi moves before references to avoid overwritting other phis + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Block& phi_block{*phi.PhiBlock(i)}; + auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); + } + } + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } +} + +void Precolor(const IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } + PrecolorInst(phi); + } + } +} + +void EmitCode(EmitContext& ctx, const IR::Program& program) { + const auto eval{ + [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("MOV.S.CC RC,{};" + "IF NE.x;", + eval(node.data.if_node.cond)); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("ENDIF;"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("REP;"); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + if (!Settings::values.disable_shader_loop_safety_checks) { + const u32 loop_index{ctx.num_safety_loop_vars++}; + const u32 vector_index{loop_index / 4}; + const char component{"xyzw"[loop_index % 4]}; + ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;" + "BRK(LT.{});", + vector_index, component, vector_index, component, component); + } + if (node.data.repeat.cond.IsImmediate()) { + if (node.data.repeat.cond.U1()) { + ctx.Add("ENDREP;"); + } else { + ctx.Add("BRK;" + "ENDREP;"); + } + } else { + ctx.Add("MOV.S.CC RC,{};" + "BRK(EQ.x);" + "ENDREP;", + eval(node.data.repeat.cond)); + } + break; + case IR::AbstractSyntaxNode::Type::Break: + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { + ctx.Add("BRK;"); + } + } else { + ctx.Add("MOV.S.CC RC,{};" + "BRK (NE.x);", + eval(node.data.break_node.cond)); + } + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("RET;"); + break; + } + } + if (!ctx.reg_alloc.IsEmpty()) { + LOG_WARNING(Shader_GLASM, "Register leak after generating code"); + } +} + +void SetupOptions(const IR::Program& program, const Profile& profile, + const RuntimeInfo& runtime_info, std::string& header) { + const Info& info{program.info}; + const Stage stage{program.stage}; + + // TODO: Track the shared atomic ops + header += "OPTION NV_internal;" + "OPTION NV_shader_storage_buffer;" + "OPTION NV_gpu_program_fp64;"; + if (info.uses_int64_bit_atomics) { + header += "OPTION NV_shader_atomic_int64;"; + } + if (info.uses_atomic_f32_add) { + header += "OPTION NV_shader_atomic_float;"; + } + if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + header += "OPTION NV_shader_atomic_fp16_vector;"; + } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_fswzadd) { + header += "OPTION NV_shader_thread_group;"; + } + if (info.uses_subgroup_shuffles) { + header += "OPTION NV_shader_thread_shuffle;"; + } + if (info.uses_sparse_residency) { + header += "OPTION EXT_sparse_texture2;"; + } + const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || + info.stores[IR::Attribute::Layer]}; + if ((stage != Stage::Geometry && stores_viewport_layer) || + info.stores[IR::Attribute::ViewportMask]) { + if (profile.support_viewport_index_layer_non_geometry) { + header += "OPTION NV_viewport_array2;"; + } + } + if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { + header += "OPTION NV_geometry_shader_passthrough;"; + } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + header += "OPTION EXT_shader_image_load_formatted;"; + } + if (profile.support_derivative_control) { + header += "OPTION ARB_derivative_control;"; + } + if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { + header += "OPTION NV_early_fragment_tests;"; + } + if (stage == Stage::Fragment) { + header += "OPTION ARB_draw_buffers;"; + } +} + +std::string_view StageHeader(Stage stage) { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + return "!!NVvp5.0\n"; + case Stage::TessellationControl: + return "!!NVtcp5.0\n"; + case Stage::TessellationEval: + return "!!NVtep5.0\n"; + case Stage::Geometry: + return "!!NVgp5.0\n"; + case Stage::Fragment: + return "!!NVfp5.0\n"; + case Stage::Compute: + return "!!NVcp5.0\n"; + } + throw InvalidArgument("Invalid stage {}", stage); +} + +std::string_view InputPrimitive(InputTopology topology) { + switch (topology) { + case InputTopology::Points: + return "POINTS"; + case InputTopology::Lines: + return "LINES"; + case InputTopology::LinesAdjacency: + return "LINES_ADJACENCY"; + case InputTopology::Triangles: + return "TRIANGLES"; + case InputTopology::TrianglesAdjacency: + return "TRIANGLES_ADJACENCY"; + } + throw InvalidArgument("Invalid input topology {}", topology); +} + +std::string_view OutputPrimitive(OutputTopology topology) { + switch (topology) { + case OutputTopology::PointList: + return "POINTS"; + case OutputTopology::LineStrip: + return "LINE_STRIP"; + case OutputTopology::TriangleStrip: + return "TRIANGLE_STRIP"; + } + throw InvalidArgument("Invalid output topology {}", topology); +} + +std::string_view GetTessMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Triangles: + return "TRIANGLES"; + case TessPrimitive::Quads: + return "QUADS"; + case TessPrimitive::Isolines: + return "ISOLINES"; + } + throw InvalidArgument("Invalid tessellation primitive {}", primitive); +} + +std::string_view GetTessSpacing(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return "EQUAL"; + case TessSpacing::FractionalOdd: + return "FRACTIONAL_ODD"; + case TessSpacing::FractionalEven: + return "FRACTIONAL_EVEN"; + } + throw InvalidArgument("Invalid tessellation spacing {}", spacing); +} +} // Anonymous namespace + +std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, + Bindings& bindings) { + EmitContext ctx{program, bindings, profile, runtime_info}; + Precolor(program); + EmitCode(ctx, program); + std::string header{StageHeader(program.stage)}; + SetupOptions(program, profile, runtime_info, header); + switch (program.stage) { + case Stage::TessellationControl: + header += fmt::format("VERTICES_OUT {};", program.invocations); + break; + case Stage::TessellationEval: + header += fmt::format("TESS_MODE {};" + "TESS_SPACING {};" + "TESS_VERTEX_ORDER {};", + GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "CW" : "CCW"); + break; + case Stage::Geometry: + header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); + if (program.is_geometry_passthrough) { + if (profile.support_geometry_shader_passthrough) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (program.info.passthrough.Generic(index)) { + header += fmt::format("PASSTHROUGH result.attrib[{}];", index); + } + } + if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + header += "PASSTHROUGH result.position;"; + } + } else { + LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); + } + } else { + header += + fmt::format("VERTICES_OUT {};" + "PRIMITIVE_OUT {};", + program.output_vertices, OutputPrimitive(program.output_topology)); + } + break; + case Stage::Compute: + header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], + program.workgroup_size[1], program.workgroup_size[2]); + break; + default: + break; + } + if (program.shared_memory_size > 0) { + header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); + header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); + } + header += "TEMP "; + for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { + header += fmt::format("R{},", index); + } + if (program.local_memory_size > 0) { + header += fmt::format("lmem[{}],", program.local_memory_size); + } + if (program.info.uses_fswzadd) { + header += "FSWZA[4],FSWZB[4],"; + } + const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)}; + for (u32 index = 0; index < num_safety_loop_vectors; ++index) { + header += fmt::format("loop{},", index); + } + header += "RC;" + "LONG TEMP "; + for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { + header += fmt::format("D{},", index); + } + header += "DC;"; + if (program.info.uses_fswzadd) { + header += "MOV.F FSWZA[0],-1;" + "MOV.F FSWZA[1],1;" + "MOV.F FSWZA[2],-1;" + "MOV.F FSWZA[3],0;" + "MOV.F FSWZB[0],-1;" + "MOV.F FSWZB[1],-1;" + "MOV.F FSWZB[2],1;" + "MOV.F FSWZB[3],-1;"; + } + for (u32 index = 0; index < num_safety_loop_vectors; ++index) { + header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index); + } + if (ctx.uses_y_direction) { + header += "PARAM y_direction[1]={state.material.front.ambient};"; + } + ctx.code.insert(0, header); + ctx.code += "END"; + return ctx.code; +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h new file mode 100644 index 000000000..bcb55f062 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -0,0 +1,25 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLASM { + +[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); + +[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program) { + Bindings binding; + return EmitGLASM(profile, runtime_info, program, binding); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp new file mode 100644 index 000000000..9201ccd39 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -0,0 +1,91 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +static void Alias(IR::Inst& inst, const IR::Value& value) { + if (value.IsImmediate()) { + return; + } + IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())}; + value_inst.DestructiveAddUsage(inst.UseCount()); + value_inst.DestructiveRemoveUsage(); + inst.SetDefinition(value_inst.Definition<Id>()); +} + +void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + // Fake one usage to get a real register out of the condition + inst.DestructiveAddUsage(1); + const Register ret{ctx.reg_alloc.Define(inst)}; + const ScalarS32 input{ctx.reg_alloc.Consume(value)}; + if (ret != input) { + ctx.Add("MOV.S {},{};", ret, input); + } +} + +void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("PK64.U {}.x,{};", inst, value); +} + +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP64.U {}.xy,{}.x;", inst, value); +} + +void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("PK2H {}.x,{};", inst, value); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP2H {}.xy,{}.x;", inst, value); +} + +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("PK64 {}.x,{};", inst, value); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP64 {}.xy,{}.x;", inst, value); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp new file mode 100644 index 000000000..bff0b7c1c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -0,0 +1,244 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +template <auto read_imm, char type, typename... Values> +void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (std::ranges::any_of(std::array{elements...}, + [](const IR::Value& value) { return value.IsImmediate(); })) { + using Type = std::invoke_result_t<decltype(read_imm), IR::Value>; + const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...}; + ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]), + fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3])); + } + size_t index{}; + for (const IR::Value& element : {elements...}) { + if (!element.IsImmediate()) { + const ScalarU32 value{ctx.reg_alloc.Consume(element)}; + ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value); + } + ++index; + } +} + +void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ret == composite && index == 0) { + // No need to do anything here, the source and destination are the same register + return; + } + ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]); +} + +template <typename ObjectType> +void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object, + u32 index, char type) { + const Register ret{ctx.reg_alloc.Define(inst)}; + const char swizzle{"xyzw"[index]}; + if (ret != composite && ret == object) { + // The object is aliased with the return value, so we have to use a temporary to insert + ctx.Add("MOV.{} RC,{};" + "MOV.{} RC.{},{};" + "MOV.{} {},RC;", + type, composite, type, swizzle, object, type, ret); + } else if (ret != composite) { + // The input composite is not aliased with the return value so we have to copy it before + // hand. But the insert object is not aliased with the return value, so we don't have to + // worry about that + ctx.Add("MOV.{} {},{};" + "MOV.{} {}.{},{};", + type, ret, composite, type, ret, swizzle, object); + } else { + // The return value is alised so we can just insert the object, it doesn't matter if it's + // aliased + ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object); + } +} +} // Anonymous namespace + +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2) { + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3) { + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'U'); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'U'); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'U'); +} + +void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2, [[maybe_unused]] Register e3, + [[maybe_unused]] Register e4) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); +} + +void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp new file mode 100644 index 000000000..02c9dc6d7 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -0,0 +1,346 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Backend::GLASM { +namespace { +void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + std::string_view size) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Indirect constant buffer loading"); + } + const Register ret{ctx.reg_alloc.Define(inst)}; + if (offset.type == Type::U32) { + // Avoid reading arrays out of bounds, matching hardware's behavior + if (offset.imm_u32 >= 0x10'000) { + ctx.Add("MOV.S {},0;", ret); + return; + } + } + ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); +} + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} + +std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { + return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; +} + +u32 TexCoordIndex(IR::Attribute attr) { + return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; +} +} // Anonymous namespace + +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "U8"); +} + +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "S8"); +} + +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "U16"); +} + +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "S16"); +} + +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "U32"); +} + +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "F32"); +} + +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "U32X2"); +} + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) { + const u32 element{static_cast<u32>(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle); + return; + } + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.Add("MOV.S {}.x,primitive.id;", inst); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + if (IsInputArray(ctx.stage)) { + ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle); + } else { + ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); + } + break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle); + break; + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle); + break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); + break; + case IR::Attribute::InstanceId: + ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); + break; + case IR::Attribute::VertexId: + ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); + break; + case IR::Attribute::FrontFace: + ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); + break; + default: + throw NotImplementedException("Get attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, + [[maybe_unused]] ScalarU32 vertex) { + const u32 element{static_cast<u32>(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) { + const u32 index{TexCoordIndex(attr)}; + ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value); + return; + } + switch (attr) { + case IR::Attribute::Layer: + if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.layer.x,{};", value); + } else { + LOG_WARNING(Shader_GLASM, + "Layer stored outside of geometry shader not supported by device"); + } + break; + case IR::Attribute::ViewportIndex: + if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.viewport.x,{};", value); + } else { + LOG_WARNING(Shader_GLASM, + "Viewport stored outside of geometry shader not supported by device"); + } + break; + case IR::Attribute::ViewportMask: + // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage. + if (ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.viewportmask[0].x,{};", value); + } else { + LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask"); + } + break; + case IR::Attribute::PointSize: + ctx.Add("MOV.F result.pointsize.x,{};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("MOV.F result.position.{},{};", swizzle, value); + break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("MOV.F result.color.{},{};", swizzle, value); + break; + case IR::Attribute::ColorFrontSpecularR: + case IR::Attribute::ColorFrontSpecularG: + case IR::Attribute::ColorFrontSpecularB: + case IR::Attribute::ColorFrontSpecularA: + ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value); + break; + case IR::Attribute::ColorBackDiffuseR: + case IR::Attribute::ColorBackDiffuseG: + case IR::Attribute::ColorBackDiffuseB: + case IR::Attribute::ColorBackDiffuseA: + ctx.Add("MOV.F result.color.back.{},{};", swizzle, value); + break; + case IR::Attribute::ColorBackSpecularR: + case IR::Attribute::ColorBackSpecularG: + case IR::Attribute::ColorBackSpecularB: + case IR::Attribute::ColorBackSpecularA: + ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value); + break; + case IR::Attribute::FogCoordinate: + ctx.Add("MOV.F result.fogcoord.x,{};", value); + break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; + ctx.Add("MOV.F result.clip[{}].x,{};", index, value); + break; + } + default: + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) { + // RC.x = base_index + // RC.y = masked_index + // RC.z = compare_index + ctx.Add("SHR.S RC.x,{},2;" + "AND.S RC.y,RC.x,3;" + "SHR.S RC.z,{},4;", + offset, offset); + + const Register ret{ctx.reg_alloc.Define(inst)}; + u32 num_endifs{}; + const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) { + ++num_endifs; + ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index + "IF NE.w;" + // X + "SEQ.S.CC RC.w,RC.y,0;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // Y + "SEQ.S.CC RC.w,RC.y,1;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // Z + "SEQ.S.CC RC.w,RC.y,2;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // W + "MOV {}.x,{};" + "ENDIF;" + "ENDIF;" + "ENDIF;" + "ELSE;", + compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]); + }}; + const auto read_swizzled{[&](u32 compare_index, std::string_view value) { + const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value), + fmt::format("{}.z", value), fmt::format("{}.w", value)}; + read(compare_index, values); + }}; + if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { + const u32 index{static_cast<u32>(IR::Attribute::PositionX)}; + if (IsInputArray(ctx.stage)) { + read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); + } else { + read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); + } + } + for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { + if (!ctx.info.loads.Generic(index)) { + continue; + } + read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); + } + for (u32 i = 0; i < num_endifs; ++i) { + ctx.Add("ENDIF;"); + } +} + +void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + const char swizzle{"xyzw"[element]}; + const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""}; + ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value); + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; + ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value); + break; + } + case IR::Patch::TessellationLodInteriorU: + ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value); + break; + case IR::Patch::TessellationLodInteriorV: + ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value); + break; + default: + throw NotImplementedException("Patch {}", patch); + } +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) { + ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value); +} + +void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) { + ctx.Add("MOV.S result.samplemask.x,{};", value); +} + +void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) { + ctx.Add("MOV.F result.depth.z,{};", value); +} + +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) { + ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset); +} + +void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) { + ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp new file mode 100644 index 000000000..ccdf1cbc8 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp @@ -0,0 +1,231 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +std::string_view FpRounding(IR::FpRounding fp_rounding) { + switch (fp_rounding) { + case IR::FpRounding::DontCare: + return ""; + case IR::FpRounding::RN: + return ".ROUND"; + case IR::FpRounding::RZ: + return ".TRUNC"; + case IR::FpRounding::RM: + return ".FLR"; + case IR::FpRounding::RP: + return ".CEIL"; + } + throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding); +} + +template <typename InputType> +void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest, + std::string_view src, bool is_long_result) { + const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)}; + const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)}; + ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value); +} +} // Anonymous namespace + +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S16", "F16", false); +} + +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S16", "F32", false); +} + +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S16", "F64", false); +} + +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S32", "F16", false); +} + +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S32", "F32", false); +} + +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S32", "F64", false); +} + +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S64", "F16", true); +} + +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S64", "F32", true); +} + +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S64", "F64", true); +} + +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U16", "F16", false); +} + +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U16", "F32", false); +} + +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U16", "F64", false); +} + +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U32", "F16", false); +} + +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U32", "F32", false); +} + +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U32", "F64", false); +} + +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U64", "F16", true); +} + +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U64", "F32", true); +} + +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U64", "F64", true); +} + +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "U64", "U32", true); +} + +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U32", "U64", false); +} + +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "F16", "F32", false); +} + +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "F16", false); +} + +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "F32", "F64", false); +} + +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "F64", "F32", true); +} + +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S8", false); +} + +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S16", false); +} + +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F16", "S32", false); +} + +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S64", false); +} + +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U8", false); +} + +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U16", false); +} + +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F16", "U32", false); +} + +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U64", false); +} + +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S8", false); +} + +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S16", false); +} + +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F32", "S32", false); +} + +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S64", false); +} + +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U8", false); +} + +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U16", false); +} + +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F32", "U32", false); +} + +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U64", false); +} + +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S8", true); +} + +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S16", true); +} + +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F64", "S32", true); +} + +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S64", true); +} + +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U8", true); +} + +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U16", true); +} + +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F64", "U32", true); +} + +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U64", true); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp new file mode 100644 index 000000000..4ed58619d --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -0,0 +1,414 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +template <typename InputType> +void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op, + std::string_view type, bool ordered, bool inequality = false) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs); + if (ordered && inequality) { + ctx.Add("SEQ.{} RC.y,{},{};" + "SEQ.{} RC.z,{},{};" + "AND.U RC.x,RC.x,RC.y;" + "AND.U RC.x,RC.x,RC.z;" + "SNE.S {}.x,RC.x,0;", + type, lhs, lhs, type, rhs, rhs, ret); + } else if (ordered) { + ctx.Add("SNE.S {}.x,RC.x,0;", ret); + } else { + ctx.Add("SNE.{} RC.y,{},{};" + "SNE.{} RC.z,{},{};" + "OR.U RC.x,RC.x,RC.y;" + "OR.U RC.x,RC.x,RC.z;" + "SNE.S {}.x,RC.x,0;", + type, lhs, lhs, type, rhs, rhs, ret); + } +} + +template <typename InputType> +void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, + InputType max_value, std::string_view type) { + // Call MAX first to properly clamp nan to min_value instead + ctx.Add("MAX.{} RC.x,{},{};" + "MIN.{} {}.x,RC.x,{};", + type, min_value, value, type, ret, max_value); +} + +std::string_view Precise(IR::Inst& inst) { + const bool precise{inst.Flags<IR::FpControl>().no_contraction}; + return precise ? ".PREC" : ""; +} +} // Anonymous namespace + +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("MOV.F {}.x,|{}|;", inst, value); +} + +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value); +} + +void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register a, [[maybe_unused]] Register b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); +} + +void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register a, [[maybe_unused]] Register b, + [[maybe_unused]] Register c) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) { + ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c); +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) { + ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c); +} + +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MAX.F {}.x,{},{};", inst, a, b); +} + +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b); +} + +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MIN.F {}.x,{},{};", inst, a, b); +} + +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b); +} + +void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register a, [[maybe_unused]] Register b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); +} + +void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) { + ctx.Add("MOV.F {}.x,-{};", inst, value); +} + +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value); +} + +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("SIN {}.x,{};", inst, value); +} + +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("COS {}.x,{};", inst, value); +} + +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("EX2 {}.x,{};", inst, value); +} + +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("LG2 {}.x,{};", inst, value); +} + +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("RCP {}.x,{};", inst, value); +} + +void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("RSQ {}.x,{};", inst, value); +} + +void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret); +} + +void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("MOV.F.SAT {}.x,{};", inst, value); +} + +void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value, + [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, + ScalarF32 max_value) { + Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F"); +} + +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, + ScalarF64 max_value) { + Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64"); +} + +void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("ROUND.F {}.x,{};", inst, value); +} + +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value); +} + +void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("FLR.F {}.x,{};", inst, value); +} + +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("FLR.F64 {}.x,{};", inst, value); +} + +void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("CEIL.F {}.x,{};", inst, value); +} + +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value); +} + +void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("TRUNC.F {}.x,{};", inst, value); +} + +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value); +} + +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F", true); +} + +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true); +} + +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F", false); +} + +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false); +} + +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true); +} + +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true); +} + +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F", true); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F64", true); +} + +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F", false); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F64", false); +} + +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F", true); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F64", true); +} + +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F", false); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F64", false); +} + +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F", true); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F64", true); +} + +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F", false); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F64", false); +} + +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F", true); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F64", true); +} + +void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F", false); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F64", false); +} + +void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Compare(ctx, inst, value, value, "SNE", "F", true, false); +} + +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Compare(ctx, inst, value, value, "SNE", "F64", true, false); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp new file mode 100644 index 000000000..09e3a9b82 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -0,0 +1,850 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +struct ScopedRegister { + ScopedRegister() = default; + ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{®_alloc_}, reg{reg_alloc->AllocReg()} {} + + ~ScopedRegister() { + if (reg_alloc) { + reg_alloc->FreeReg(reg); + } + } + + ScopedRegister& operator=(ScopedRegister&& rhs) noexcept { + if (reg_alloc) { + reg_alloc->FreeReg(reg); + } + reg_alloc = std::exchange(rhs.reg_alloc, nullptr); + reg = rhs.reg; + return *this; + } + + ScopedRegister(ScopedRegister&& rhs) noexcept + : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {} + + ScopedRegister& operator=(const ScopedRegister&) = delete; + ScopedRegister(const ScopedRegister&) = delete; + + RegAlloc* reg_alloc{}; + Register reg; +}; + +std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + // FIXME: indexed reads + if (info.type == TextureType::Buffer) { + return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); + } +} + +std::string Image(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + // FIXME: indexed reads + if (info.type == TextureType::Buffer) { + return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index)); + } +} + +std::string_view TextureType(IR::TextureInstInfo info) { + if (info.is_depth) { + switch (info.type) { + case TextureType::Color1D: + return "SHADOW1D"; + case TextureType::ColorArray1D: + return "SHADOWARRAY1D"; + case TextureType::Color2D: + return "SHADOW2D"; + case TextureType::ColorArray2D: + return "SHADOWARRAY2D"; + case TextureType::Color3D: + return "SHADOW3D"; + case TextureType::ColorCube: + return "SHADOWCUBE"; + case TextureType::ColorArrayCube: + return "SHADOWARRAYCUBE"; + case TextureType::Buffer: + return "SHADOWBUFFER"; + } + } else { + switch (info.type) { + case TextureType::Color1D: + return "1D"; + case TextureType::ColorArray1D: + return "ARRAY1D"; + case TextureType::Color2D: + return "2D"; + case TextureType::ColorArray2D: + return "ARRAY2D"; + case TextureType::Color3D: + return "3D"; + case TextureType::ColorCube: + return "CUBE"; + case TextureType::ColorArrayCube: + return "ARRAYCUBE"; + case TextureType::Buffer: + return "BUFFER"; + } + } + throw InvalidArgument("Invalid texture type {}", info.type.Value()); +} + +std::string Offset(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsEmpty()) { + return ""; + } + return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); +} + +std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx, + const IR::Value& offset2) { + if (offset2.IsEmpty()) { + return {}; + } else { + return {ctx.reg_alloc, ctx.reg_alloc}; + } +} + +void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1, + const IR::Value& offset2) { + const Register offsets_a{ctx.reg_alloc.Consume(offset1)}; + const Register offsets_b{ctx.reg_alloc.Consume(offset2)}; + // Input swizzle: [XYXY] [XYXY] + // Output swizzle: [XXXX] [YYYY] + ctx.Add("MOV {}.x,{}.x;" + "MOV {}.y,{}.z;" + "MOV {}.z,{}.x;" + "MOV {}.w,{}.z;" + "MOV {}.x,{}.y;" + "MOV {}.y,{}.w;" + "MOV {}.z,{}.y;" + "MOV {}.w,{}.w;", + off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y, + offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b); +} + +std::string GradOffset(const IR::Value& offset) { + if (offset.IsImmediate()) { + LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate"); + return ""; + } + IR::Inst* const vector{offset.InstRecursive()}; + if (!vector->AreAllArgsImmediates()) { + LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate"); + return ""; + } + switch (vector->NumArgs()) { + case 1: + return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32())); + case 2: + return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()), + static_cast<s32>(vector->Arg(1).U32())); + default: + throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs()); + } +} + +std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) { + if (coord.IsImmediate()) { + ScopedRegister scoped_reg(ctx.reg_alloc); + ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)}); + return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)}; + } + std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; + if (coord.InstRecursive()->HasUses()) { + // Move non-dead coords to a separate register, although this should never happen because + // vectors are only assembled for immediate texture instructions + ctx.Add("MOV.F RC,{};", coord_vec); + coord_vec = "RC"; + } + return {std::move(coord_vec), ScopedRegister{}}; +} + +void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { + if (!sparse_inst) { + return; + } + const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; + ctx.Add("MOV.S {},-1;" + "MOV.S {}(NONRESIDENT),0;", + sparse_ret, sparse_ret); +} + +std::string_view FormatStorage(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return "U"; + case ImageFormat::R8_UINT: + return "U8"; + case ImageFormat::R8_SINT: + return "S8"; + case ImageFormat::R16_UINT: + return "U16"; + case ImageFormat::R16_SINT: + return "S16"; + case ImageFormat::R32_UINT: + return "U32"; + case ImageFormat::R32G32_UINT: + return "U32X2"; + case ImageFormat::R32G32B32A32_UINT: + return "U32X4"; + } + throw InvalidArgument("Invalid image format {}", format); +} + +template <typename T> +void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value, + std::string_view op) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); +} + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} +} // Anonymous namespace + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, Register bias_lc, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.has_bias) { + if (info.type == TextureType::ColorArrayCube) { + ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec, + bias_lc, texture, offset_vec); + } else { + if (info.has_lod_clamp) { + ctx.Add("MOV.F {}.w,{}.x;" + "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type, + offset_vec); + } else { + ctx.Add("MOV.F {}.w,{}.x;" + "TXB.F{} {},{},{},{}{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + } + } else { + if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) { + ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, + bias_lc, texture, offset_vec); + } else { + ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, + type, offset_vec); + } + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture, + offset_vec); + } else { + ctx.Add("MOV.F {}.w,{};" + "TXL.F{} {},{},{},{}{};", + coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& bias_lc, const IR::Value& offset) { + // Allocate early to avoid aliases + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister staging; + if (info.type == TextureType::ColorArrayCube) { + staging = ScopedRegister{ctx.reg_alloc}; + } + const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; + const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.has_bias) { + if (info.has_lod_clamp) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{}.x;" + "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", + coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, + bias_lc_vec, texture, type, offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXB.F.LODCLAMP{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, + offset_vec); + break; + default: + throw NotImplementedException("Invalid type {} with bias and lod clamp", + info.type.Value()); + } + } else { + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{}.x;" + "TXB.F{} {},{},{},{}{};", + coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, + texture, type, offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXB.F{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, + offset_vec); + break; + case TextureType::ColorArrayCube: + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{}.x;" + "TXB.F{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + } + } else { + if (info.has_lod_clamp) { + if (info.type != TextureType::ColorArrayCube) { + const bool w_swizzle{info.type == TextureType::ColorArray2D || + info.type == TextureType::ColorCube}; + const char dref_swizzle{w_swizzle ? 'w' : 'z'}; + ctx.Add("MOV.F {}.{},{};" + "TEX.F.LODCLAMP{} {},{},{},{},{}{};", + coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, + texture, type, offset_vec); + } else { + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{};" + "TEX.F.LODCLAMP{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + } + } else { + if (info.type != TextureType::ColorArrayCube) { + const bool w_swizzle{info.type == TextureType::ColorArray2D || + info.type == TextureType::ColorCube}; + const char dref_swizzle{w_swizzle ? 'w' : 'z'}; + ctx.Add("MOV.F {}.{},{};" + "TEX.F{} {},{},{},{}{};", + coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture, + type, offset_vec); + } else { + ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture, + type, offset_vec); + } + } + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& lod, const IR::Value& offset) { + // Allocate early to avoid aliases + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister staging; + if (info.type == TextureType::ColorArrayCube) { + staging = ScopedRegister{ctx.reg_alloc}; + } + const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; + const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{};" + "TXL.F{} {},{},{},{}{};", + coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type, + offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXL.F{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type, + offset_vec); + break; + case TextureType::ColorArrayCube: + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{};" + "TXL.F{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) { + // Allocate offsets early so they don't overwrite any consumed register + const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const char comp{"xyzw"[info.gather_component]}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (offset2.IsEmpty()) { + const std::string offset_vec{Offset(ctx, offset)}; + ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type, + offset_vec); + } else { + SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); + ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg, + texture, comp, type); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, + const IR::Value& dref) { + // FIXME: This instruction is not working as expected + + // Allocate offsets early so they don't overwrite any consumed register + const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + std::string args; + switch (info.type) { + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value); + args = fmt::to_string(coord_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value); + args = fmt::to_string(coord_vec); + break; + case TextureType::ColorArrayCube: + args = fmt::format("{},{}", coord_vec, dref_value); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + if (offset2.IsEmpty()) { + const std::string offset_vec{Offset(ctx, offset)}; + ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec); + } else { + SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); + ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture, + type); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.type == TextureType::Buffer) { + ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); + } else if (ms.type != Type::Void) { + ctx.Add("MOV.S {}.w,{};" + "TXFMS.F{} {},{},{},{}{};", + coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } else { + ctx.Add("MOV.S {}.w,{};" + "TXF.F{} {},{},{},{}{};", + coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + ScalarS32 lod) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string texture{Texture(ctx, info, index)}; + const std::string_view type{TextureType(info)}; + ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string texture{Texture(ctx, info, index)}; + const std::string_view type{TextureType(info)}; + ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister dpdx, dpdy; + const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + if (multi_component) { + // Allocate this early to avoid aliasing other registers + dpdx = ScopedRegister{ctx.reg_alloc}; + dpdy = ScopedRegister{ctx.reg_alloc}; + } + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{GradOffset(offset)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (multi_component) { + ctx.Add("MOV.F {}.x,{}.x;" + "MOV.F {}.y,{}.z;" + "MOV.F {}.x,{}.y;" + "MOV.F {}.y,{}.w;", + dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, + dpdy.reg, derivatives_vec); + if (info.has_lod_clamp) { + const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; + ctx.Add("MOV.F {}.w,{};" + "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", + dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + texture, type, offset_vec); + } else { + ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + texture, type, offset_vec); + } + } else { + ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, + derivatives_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view format{FormatStorage(info.image_format)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type); + StoreSparse(ctx, sparse_inst); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + Register color) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string_view format{FormatStorage(info.image_format)}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MIN.S32"); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MIN.U32"); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MAX.S32"); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MAX.U32"); +} + +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32"); +} + +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32"); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "AND.U32"); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "OR.U32"); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "XOR.U32"); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + Register coord, ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32"); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h new file mode 100644 index 000000000..12afda43b --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -0,0 +1,625 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/backend/glasm/reg_alloc.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLASM { + +class EmitContext; + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst& inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext&, const IR::Value& value); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); +void EmitJoin(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value); +void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value); +void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitSampleId(EmitContext& ctx, IR::Inst& inst); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); +void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); +void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value); +void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value); +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value); +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value); +void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value); +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3, + Register e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); +void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, + Register false_value); +void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); +void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); +void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitPackFloat2x16(EmitContext& ctx, Register value); +void EmitUnpackFloat2x16(EmitContext& ctx, Register value); +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c); +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); +void EmitFPNeg16(EmitContext& ctx, Register value); +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRecip64(EmitContext& ctx, Register value); +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRecipSqrt64(EmitContext& ctx, Register value); +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPSaturate16(EmitContext& ctx, Register value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPSaturate64(EmitContext& ctx, Register value); +void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value); +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, + ScalarF32 max_value); +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, + ScalarF64 max_value); +void EmitFPRoundEven16(EmitContext& ctx, Register value); +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPFloor16(EmitContext& ctx, Register value); +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPCeil16(EmitContext& ctx, Register value); +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPTrunc16(EmitContext& ctx, Register value); +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPIsNan16(EmitContext& ctx, Register value); +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarS32 shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, + ScalarS32 offset, ScalarS32 count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, + ScalarS32 count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, + ScalarU32 count); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value); +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value); +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value); +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value); +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, Register bias_lc, const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, ScalarF32 lod, const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& lod, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, + const IR::Value& dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + ScalarS32 lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + Register color); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + Register coord, ScalarU32 value); +void EmitLaneId(EmitContext& ctx, IR::Inst& inst); +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, + ScalarU32 swizzle); +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp new file mode 100644 index 000000000..f55c26b76 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -0,0 +1,294 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b, + std::string_view lop) { + const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); + const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); + if (zero) { + zero->Invalidate(); + } + if (sign) { + sign->Invalidate(); + } + if (zero || sign) { + ctx.reg_alloc.InvalidateConditionCodes(); + } + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b); + if (zero) { + ctx.Add("SEQ.S {},{},0;", *zero, ret); + } + if (sign) { + ctx.Add("SLT.S {},{},0;", *sign, ret); + } +} +} // Anonymous namespace + +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + const std::array flags{ + inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), + }; + for (IR::Inst* const flag_inst : flags) { + if (flag_inst) { + flag_inst->Invalidate(); + } + } + const bool cc{inst.HasAssociatedPseudoOperation()}; + const std::string_view cc_mod{cc ? ".CC" : ""}; + if (cc) { + ctx.reg_alloc.InvalidateConditionCodes(); + } + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b); + if (!cc) { + return; + } + static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"}; + for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { + if (!flags[flag_index]) { + continue; + } + const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; + if (flag_index == 0) { + ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); + } else { + // We could use conditional execution here, but it's broken on Nvidia's compiler + ctx.Add("IF {}.x;" + "MOV.S {}.x,-1;" + "ELSE;" + "MOV.S {}.x,0;" + "ENDIF;", + masks[flag_index], flag_ret, flag_ret); + } + } +} + +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { + ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b); +} + +void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("SUB.S {}.x,{},{};", inst, a, b); +} + +void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { + ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b); +} + +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MUL.S {}.x,{},{};", inst, a, b); +} + +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) { + ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32)); + } else { + ctx.Add("MOV.S {},-{};", inst, value); + } +} + +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("MOV.S64 {},-{};", inst, value); +} + +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("ABS.S {},{};", inst, value); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { + ctx.Add("SHL.U {}.x,{},{};", inst, base, shift); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift) { + ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift); +} + +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { + ctx.Add("SHR.U {}.x,{},{};", inst, base, shift); +} + +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift) { + ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) { + ctx.Add("SHR.S {}.x,{},{};", inst, base, shift); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarS32 shift) { + ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + BitwiseLogicalOp(ctx, inst, a, b, "AND"); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + BitwiseLogicalOp(ctx, inst, a, b, "OR"); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + BitwiseLogicalOp(ctx, inst, a, b, "XOR"); +} + +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, + ScalarS32 offset, ScalarS32 count) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base); + } else { + ctx.Add("MOV.S RC.x,{};" + "MOV.S RC.y,{};" + "BFI.S {},RC,{},{};", + count, offset, ret, insert, base); + } +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, + ScalarS32 count) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base); + } else { + ctx.Add("MOV.S RC.x,{};" + "MOV.S RC.y,{};" + "BFE.S {},RC,{};", + count, offset, ret, base); + } +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, + ScalarU32 count) { + const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); + const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); + if (zero) { + zero->Invalidate(); + } + if (sign) { + sign->Invalidate(); + } + if (zero || sign) { + ctx.reg_alloc.InvalidateConditionCodes(); + } + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); + } else { + ctx.Add("MOV.U RC.x,{};" + "MOV.U RC.y,{};" + "BFE.U {},RC,{};", + count, offset, ret, base); + } + if (zero) { + ctx.Add("SEQ.S {},{},0;", *zero, ret); + } + if (sign) { + ctx.Add("SLT.S {},{},0;", *sign, ret); + } +} + +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BFR {},{};", inst, value); +} + +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BTC {},{};", inst, value); +} + +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("NOT.S {},{};", inst, value); +} + +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BTFM.S {},{};", inst, value); +} + +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + ctx.Add("BTFM.U {},{};", inst, value); +} + +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MIN.S {},{},{};", inst, a, b); +} + +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { + ctx.Add("MIN.U {},{},{};", inst, a, b); +} + +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MAX.S {},{},{};", inst, a, b); +} + +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { + ctx.Add("MAX.U {},{},{};", inst, a, b); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("MIN.S RC.x,{},{};" + "MAX.S {}.x,RC.x,{};", + max, value, ret, min); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("MIN.U RC.x,{},{};" + "MAX.U {}.x,RC.x,{};", + max, value, ret, min); +} + +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs); +} + +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs); +} + +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs); +} + +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp new file mode 100644 index 000000000..af9fac7c1 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -0,0 +1,568 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLASM { +namespace { +void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + std::string_view then_expr, std::string_view else_expr = {}) { + // Operate on bindless SSBO, call the expression with bounds checking + // address = c[binding].xy + // length = c[binding].z + const u32 sb_binding{binding.U32()}; + ctx.Add("PK64.U DC,c[{}];" // pointer = address + "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) + "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset + "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length + sb_binding, offset, offset, sb_binding); + if (else_expr.empty()) { + ctx.Add("IF NE.x;{}ENDIF;", then_expr); + } else { + ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); + } +} + +void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, + std::string_view else_expr = {}) { + const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!ctx.info.nvn_buffer_used[index]) { + continue; + } + const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; + ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr + "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 + "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 + "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size + "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0 + "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0 + "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b + "IF NE.x;" // if cond + "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr + ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, + address, address); + if (pointer_based) { + ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf + "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset + "{}" + "ELSE;", + index, expr); + } else { + ctx.Add("CVT.U32.U64 RC.x,DC.x;" + "{},ssbo{}[RC.x];" + "ELSE;", + expr, index); + } + } + if (!else_expr.empty()) { + ctx.Add("{}", else_expr); + } + const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()}; + for (size_t index = 0; index < num_used_buffers; ++index) { + ctx.Add("ENDIF;"); + } +} + +template <typename ValueType> +void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, + std::string_view size) { + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); + } +} + +void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.U {},{{0,0,0,0}};", ret)); + } +} + +template <typename ValueType> +void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); + } +} + +void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.S {},0;", ret)); + } +} + +template <typename ValueType> +void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + ValueType value, std::string_view operation, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), + offset); + } else { + StorageOp(ctx, binding, offset, + fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); + } +} +} // Anonymous namespace + +void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U8"); +} + +void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "S8"); +} + +void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U16"); +} + +void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "S16"); +} + +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32"); +} + +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32X2"); +} + +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32X4"); +} + +void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U8"); +} + +void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "S8"); +} + +void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U16"); +} + +void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "S16"); +} + +void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) { + GlobalWrite(ctx, address, value, "U32"); +} + +void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U32X2"); +} + +void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U32X4"); +} + +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U8"); +} + +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "S8"); +} + +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U16"); +} + +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "S16"); +} + +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U32"); +} + +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U32X2"); +} + +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U32X4"); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { + Write(ctx, binding, offset, value, "U8"); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value) { + Write(ctx, binding, offset, value, "S8"); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { + Write(ctx, binding, offset, value, "U16"); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value) { + Write(ctx, binding, offset, value, "S16"); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { + Write(ctx, binding, offset, value, "U32"); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value) { + Write(ctx, binding, offset, value, "U32X2"); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value) { + Write(ctx, binding, offset, value, "U32X4"); +} + +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value) { + ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U32"); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U32"); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U32"); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "AND", "U32"); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "OR", "U32"); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U32"); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U64"); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S64"); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U64"); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S64"); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U64"); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "AND", "U64"); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "OR", "U64"); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U64"); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F32"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); +} + +void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); +} + +void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); +} + +void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp new file mode 100644 index 000000000..ff64c6924 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -0,0 +1,273 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4100) +#endif + +namespace Shader::Backend::GLASM { + +#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) + +static void DefinePhi(EmitContext& ctx, IR::Inst& phi) { + switch (phi.Arg(0).Type()) { + case IR::Type::U1: + case IR::Type::U32: + case IR::Type::F32: + ctx.reg_alloc.Define(phi); + break; + case IR::Type::U64: + case IR::Type::F64: + ctx.reg_alloc.LongDefine(phi); + break; + default: + throw NotImplementedException("Phi node type {}", phi.Type()); + } +} + +void EmitPhi(EmitContext& ctx, IR::Inst& phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + ctx.reg_alloc.Consume(phi.Arg(i)); + } + if (!phi.Definition<Id>().is_valid) { + // The phi node wasn't forward defined + DefinePhi(ctx, phi); + } +} + +void EmitVoid(EmitContext&) {} + +void EmitReference(EmitContext& ctx, const IR::Value& value) { + ctx.reg_alloc.Consume(value); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { + IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())}; + if (!phi.Definition<Id>().is_valid) { + // The phi node wasn't forward defined + DefinePhi(ctx, phi); + } + const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; + const Value eval_value{ctx.reg_alloc.Consume(value)}; + + if (phi_reg == eval_value) { + return; + } + switch (phi.Flags<IR::Type>()) { + case IR::Type::U1: + case IR::Type::U32: + case IR::Type::F32: + ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value}); + break; + case IR::Type::U64: + case IR::Type::F64: + ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value}); + break; + default: + throw NotImplementedException("Phi node type {}", phi.Type()); + } +} + +void EmitJoin(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx) { + ctx.Add("KIL TR.x;"); +} + +void EmitBarrier(EmitContext& ctx) { + ctx.Add("BAR;"); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + ctx.Add("MEMBAR.CTA;"); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + ctx.Add("MEMBAR;"); +} + +void EmitPrologue(EmitContext& ctx) { + // TODO +} + +void EmitEpilogue(EmitContext& ctx) { + // TODO +} + +void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { + if (stream.type == Type::U32 && stream.imm_u32 == 0) { + ctx.Add("EMIT;"); + } else { + ctx.Add("EMITS {};", stream); + } +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + if (!stream.IsImmediate()) { + LOG_WARNING(Shader_GLASM, "Stream is not immediate"); + } + ctx.reg_alloc.Consume(stream); + ctx.Add("ENDPRIM;"); +} + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {},invocation.groupid;", inst); +} + +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {},invocation.localid;", inst); +} + +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst); +} + +void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst); +} + +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst); +} + +void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { + ctx.uses_y_direction = true; + ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); +} + +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); +} + +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); +} + +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); +} + +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); +} + +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { + ctx.LongAdd("MOV.S64 {}.x,0;", inst); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("OR.S {},{},{};", inst, a, b); +} + +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("AND.S {},{},{};", inst, a, b); +} + +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("XOR.S {},{},{};", inst, a, b); +} + +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("SEQ.S {},{},0;", inst, value); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp new file mode 100644 index 000000000..68fff613c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -0,0 +1,67 @@ + +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); +} + +void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); +} + +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, + Register false_value) { + ctx.reg_alloc.InvalidateConditionCodes(); + const Register ret{ctx.reg_alloc.LongDefine(inst)}; + if (ret == true_value) { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x(EQ.x),{};", + cond, ret, false_value); + } else if (ret == false_value) { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x(NE.x),{};", + cond, ret, true_value); + } else { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x,{};" + "MOV.U64 {}.x(NE.x),{};", + cond, ret, false_value, ret, true_value); + } +} + +void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); +} + +void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp new file mode 100644 index 000000000..c1498f449 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp @@ -0,0 +1,58 @@ + +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset); +} + +void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { + ctx.Add("STS.U8 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { + ctx.Add("STS.U16 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { + ctx.Add("STS.U32 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) { + ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { + ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset); +} +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp new file mode 100644 index 000000000..544d475b4 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -0,0 +1,150 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLASM { + +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name); +} + +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGALL.S {}.x,{};", inst, pred); +} + +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGANY.S {}.x,{};", inst, pred); +} + +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGEQ.S {}.x,{};", inst, pred); +} + +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGBALLOT {}.x,{};", inst, pred); +} + +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name); +} + +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name); +} + +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name); +} + +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name); +} + +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name); +} + +static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask, + std::string_view op) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (in_bounds) { + in_bounds->Invalidate(); + } + std::string mask; + if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { + mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); + } else { + mask = "RC"; + ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};", + ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)}, + ScalarU32{ctx.reg_alloc.Consume(clamp)}); + } + const Register value_ret{ctx.reg_alloc.Define(inst)}; + if (in_bounds) { + const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; + ctx.Add("SHF{}.U {},{},{},{};" + "MOV.U {}.x,{}.y;", + op, bounds_ret, value, index, mask, value_ret, bounds_ret); + } else { + ctx.Add("SHF{}.U {},{},{},{};" + "MOV.U {}.x,{}.y;", + op, value_ret, value, index, mask, value_ret, value_ret); + } +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX"); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP"); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN"); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); +} + +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, + ScalarU32 swizzle) { + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("AND.U RC.z,{}.threadid,3;" + "SHL.U RC.z,RC.z,1;" + "SHR.U RC.z,{},RC.z;" + "AND.U RC.z,RC.z,3;" + "MUL.F RC.x,{},FSWZA[RC.z];" + "MUL.F RC.y,{},FSWZB[RC.z];" + "ADD.F {}.x,RC.x,RC.y;", + ctx.stage_name, swizzle, op_a, op_b, ret); +} + +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.FINE {}.x,{};", inst, p); + } else { + LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); + ctx.Add("DDX {}.x,{};", inst, p); + } +} + +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.FINE {}.x,{};", inst, p); + } else { + LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); + ctx.Add("DDY {}.x,{};", inst, p); + } +} + +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.COARSE {}.x,{};", inst, p); + } else { + LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); + ctx.Add("DDX {}.x,{};", inst, p); + } +} + +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.COARSE {}.x,{};", inst, p); + } else { + LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); + ctx.Add("DDY {}.x,{};", inst, p); + } +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp new file mode 100644 index 000000000..4c046db6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -0,0 +1,186 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> + +#include <fmt/format.h> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/reg_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +Register RegAlloc::Define(IR::Inst& inst) { + return Define(inst, false); +} + +Register RegAlloc::LongDefine(IR::Inst& inst) { + return Define(inst, true); +} + +Value RegAlloc::Peek(const IR::Value& value) { + if (value.IsImmediate()) { + return MakeImm(value); + } else { + return PeekInst(*value.Inst()); + } +} + +Value RegAlloc::Consume(const IR::Value& value) { + if (value.IsImmediate()) { + return MakeImm(value); + } else { + return ConsumeInst(*value.Inst()); + } +} + +void RegAlloc::Unref(IR::Inst& inst) { + IR::Inst& value_inst{AliasInst(inst)}; + value_inst.DestructiveRemoveUsage(); + if (!value_inst.HasUses()) { + Free(value_inst.Definition<Id>()); + } +} + +Register RegAlloc::AllocReg() { + Register ret; + ret.type = Type::Register; + ret.id = Alloc(false); + return ret; +} + +Register RegAlloc::AllocLongReg() { + Register ret; + ret.type = Type::Register; + ret.id = Alloc(true); + return ret; +} + +void RegAlloc::FreeReg(Register reg) { + Free(reg.id); +} + +Value RegAlloc::MakeImm(const IR::Value& value) { + Value ret; + switch (value.Type()) { + case IR::Type::Void: + ret.type = Type::Void; + break; + case IR::Type::U1: + ret.type = Type::U32; + ret.imm_u32 = value.U1() ? 0xffffffff : 0; + break; + case IR::Type::U32: + ret.type = Type::U32; + ret.imm_u32 = value.U32(); + break; + case IR::Type::F32: + ret.type = Type::U32; + ret.imm_u32 = Common::BitCast<u32>(value.F32()); + break; + case IR::Type::U64: + ret.type = Type::U64; + ret.imm_u64 = value.U64(); + break; + case IR::Type::F64: + ret.type = Type::U64; + ret.imm_u64 = Common::BitCast<u64>(value.F64()); + break; + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } + return ret; +} + +Register RegAlloc::Define(IR::Inst& inst, bool is_long) { + if (inst.HasUses()) { + inst.SetDefinition<Id>(Alloc(is_long)); + } else { + Id id{}; + id.is_long.Assign(is_long ? 1 : 0); + id.is_null.Assign(1); + inst.SetDefinition<Id>(id); + } + return Register{PeekInst(inst)}; +} + +Value RegAlloc::PeekInst(IR::Inst& inst) { + Value ret; + ret.type = Type::Register; + ret.id = inst.Definition<Id>(); + return ret; +} + +Value RegAlloc::ConsumeInst(IR::Inst& inst) { + Unref(inst); + return PeekInst(inst); +} + +Id RegAlloc::Alloc(bool is_long) { + size_t& num_regs{is_long ? num_used_long_registers : num_used_registers}; + std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use}; + if (num_used_registers + num_used_long_registers < NUM_REGS) { + for (size_t reg = 0; reg < NUM_REGS; ++reg) { + if (use[reg]) { + continue; + } + num_regs = std::max(num_regs, reg + 1); + use[reg] = true; + Id ret{}; + ret.is_valid.Assign(1); + ret.is_long.Assign(is_long ? 1 : 0); + ret.is_spill.Assign(0); + ret.is_condition_code.Assign(0); + ret.is_null.Assign(0); + ret.index.Assign(static_cast<u32>(reg)); + return ret; + } + } + throw NotImplementedException("Register spilling"); +} + +void RegAlloc::Free(Id id) { + if (id.is_valid == 0) { + throw LogicError("Freeing invalid register"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Free spill"); + } + if (id.is_long != 0) { + long_register_use[id.index] = false; + } else { + register_use[id.index] = false; + } +} + +/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::Identity: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastU32F32: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::BitCastF32U32: + case IR::Opcode::BitCastF64U64: + return true; + default: + return false; + } +} + +/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) { + IR::Inst* it{&inst}; + while (IsAliased(*it)) { + const IR::Value arg{it->Arg(0)}; + if (arg.IsImmediate()) { + break; + } + it = arg.InstRecursive(); + } + return *it; +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h new file mode 100644 index 000000000..82aec66c6 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -0,0 +1,303 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <bitset> + +#include <fmt/format.h> + +#include "common/bit_cast.h" +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLASM { + +class EmitContext; + +enum class Type : u32 { + Void, + Register, + U32, + U64, +}; + +struct Id { + union { + u32 raw; + BitField<0, 1, u32> is_valid; + BitField<1, 1, u32> is_long; + BitField<2, 1, u32> is_spill; + BitField<3, 1, u32> is_condition_code; + BitField<4, 1, u32> is_null; + BitField<5, 27, u32> index; + }; + + bool operator==(Id rhs) const noexcept { + return raw == rhs.raw; + } + bool operator!=(Id rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(sizeof(Id) == sizeof(u32)); + +struct Value { + Type type; + union { + Id id; + u32 imm_u32; + u64 imm_u64; + }; + + bool operator==(const Value& rhs) const noexcept { + if (type != rhs.type) { + return false; + } + switch (type) { + case Type::Void: + return true; + case Type::Register: + return id == rhs.id; + case Type::U32: + return imm_u32 == rhs.imm_u32; + case Type::U64: + return imm_u64 == rhs.imm_u64; + } + return false; + } + bool operator!=(const Value& rhs) const noexcept { + return !operator==(rhs); + } +}; +struct Register : Value {}; +struct ScalarRegister : Value {}; +struct ScalarU32 : Value {}; +struct ScalarS32 : Value {}; +struct ScalarF32 : Value {}; +struct ScalarF64 : Value {}; + +class RegAlloc { +public: + RegAlloc() = default; + + Register Define(IR::Inst& inst); + + Register LongDefine(IR::Inst& inst); + + [[nodiscard]] Value Peek(const IR::Value& value); + + Value Consume(const IR::Value& value); + + void Unref(IR::Inst& inst); + + [[nodiscard]] Register AllocReg(); + + [[nodiscard]] Register AllocLongReg(); + + void FreeReg(Register reg); + + void InvalidateConditionCodes() { + // This does nothing for now + } + + [[nodiscard]] size_t NumUsedRegisters() const noexcept { + return num_used_registers; + } + + [[nodiscard]] size_t NumUsedLongRegisters() const noexcept { + return num_used_long_registers; + } + + [[nodiscard]] bool IsEmpty() const noexcept { + return register_use.none() && long_register_use.none(); + } + + /// Returns true if the instruction is expected to be aliased to another + static bool IsAliased(const IR::Inst& inst); + + /// Returns the underlying value out of an alias sequence + static IR::Inst& AliasInst(IR::Inst& inst); + +private: + static constexpr size_t NUM_REGS = 4096; + static constexpr size_t NUM_ELEMENTS = 4; + + Value MakeImm(const IR::Value& value); + + Register Define(IR::Inst& inst, bool is_long); + + Value PeekInst(IR::Inst& inst); + + Value ConsumeInst(IR::Inst& inst); + + Id Alloc(bool is_long); + + void Free(Id id); + + size_t num_used_registers{}; + size_t num_used_long_registers{}; + std::bitset<NUM_REGS> register_use{}; + std::bitset<NUM_REGS> long_register_use{}; +}; + +template <bool scalar, typename FormatContext> +auto FormatTo(FormatContext& ctx, Id id) { + if (id.is_condition_code != 0) { + throw NotImplementedException("Condition code emission"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Spill emission"); + } + if constexpr (scalar) { + if (id.is_null != 0) { + return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); + } + if (id.is_long != 0) { + return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); + } else { + return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); + } + } else { + if (id.is_null != 0) { + return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); + } + if (id.is_long != 0) { + return fmt::format_to(ctx.out(), "D{}", id.index.Value()); + } else { + return fmt::format_to(ctx.out(), "R{}", id.index.Value()); + } + } +} + +} // namespace Shader::Backend::GLASM + +template <> +struct fmt::formatter<Shader::Backend::GLASM::Id> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) { + return Shader::Backend::GLASM::FormatTo<true>(ctx, id); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::Register> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) { + if (value.type != Shader::Backend::GLASM::Type::Register) { + throw Shader::InvalidArgument("Register value type is not register"); + } + return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) { + if (value.type != Shader::Backend::GLASM::Type::Register) { + throw Shader::InvalidArgument("Register value type is not register"); + } + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", value.imm_u32); + case Shader::Backend::GLASM::Type::U64: + break; + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32)); + case Shader::Backend::GLASM::Type::U64: + break; + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32)); + case Shader::Backend::GLASM::Type::U64: + break; + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + break; + case Shader::Backend::GLASM::Type::U64: + return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64)); + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp new file mode 100644 index 000000000..4e6f2c0fe --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -0,0 +1,715 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLSL { +namespace { +u32 CbufIndex(size_t offset) { + return (offset / 4) % 4; +} + +char Swizzle(size_t offset) { + return "xyzw"[CbufIndex(offset)]; +} + +std::string_view InterpDecorator(Interpolation interp) { + switch (interp) { + case Interpolation::Smooth: + return ""; + case Interpolation::Flat: + return "flat "; + case Interpolation::NoPerspective: + return "noperspective "; + } + throw InvalidArgument("Invalid interpolation {}", interp); +} + +std::string_view InputArrayDecorator(Stage stage) { + switch (stage) { + case Stage::Geometry: + case Stage::TessellationControl: + case Stage::TessellationEval: + return "[]"; + default: + return ""; + } +} + +bool StoresPerVertexAttributes(Stage stage) { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + case Stage::Geometry: + case Stage::TessellationEval: + return true; + default: + return false; + } +} + +std::string OutputDecorator(Stage stage, u32 size) { + switch (stage) { + case Stage::TessellationControl: + return fmt::format("[{}]", size); + default: + return ""; + } +} + +std::string_view SamplerType(TextureType type, bool is_depth) { + if (is_depth) { + switch (type) { + case TextureType::Color1D: + return "sampler1DShadow"; + case TextureType::ColorArray1D: + return "sampler1DArrayShadow"; + case TextureType::Color2D: + return "sampler2DShadow"; + case TextureType::ColorArray2D: + return "sampler2DArrayShadow"; + case TextureType::ColorCube: + return "samplerCubeShadow"; + case TextureType::ColorArrayCube: + return "samplerCubeArrayShadow"; + default: + throw NotImplementedException("Texture type: {}", type); + } + } + switch (type) { + case TextureType::Color1D: + return "sampler1D"; + case TextureType::ColorArray1D: + return "sampler1DArray"; + case TextureType::Color2D: + return "sampler2D"; + case TextureType::ColorArray2D: + return "sampler2DArray"; + case TextureType::Color3D: + return "sampler3D"; + case TextureType::ColorCube: + return "samplerCube"; + case TextureType::ColorArrayCube: + return "samplerCubeArray"; + case TextureType::Buffer: + return "samplerBuffer"; + default: + throw NotImplementedException("Texture type: {}", type); + } +} + +std::string_view ImageType(TextureType type) { + switch (type) { + case TextureType::Color1D: + return "uimage1D"; + case TextureType::ColorArray1D: + return "uimage1DArray"; + case TextureType::Color2D: + return "uimage2D"; + case TextureType::ColorArray2D: + return "uimage2DArray"; + case TextureType::Color3D: + return "uimage3D"; + case TextureType::ColorCube: + return "uimageCube"; + case TextureType::ColorArrayCube: + return "uimageCubeArray"; + case TextureType::Buffer: + return "uimageBuffer"; + default: + throw NotImplementedException("Image type: {}", type); + } +} + +std::string_view ImageFormatString(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return ""; + case ImageFormat::R8_UINT: + return ",r8ui"; + case ImageFormat::R8_SINT: + return ",r8i"; + case ImageFormat::R16_UINT: + return ",r16ui"; + case ImageFormat::R16_SINT: + return ",r16i"; + case ImageFormat::R32_UINT: + return ",r32ui"; + case ImageFormat::R32G32_UINT: + return ",rg32ui"; + case ImageFormat::R32G32B32A32_UINT: + return ",rgba32ui"; + default: + throw NotImplementedException("Image format: {}", format); + } +} + +std::string_view ImageAccessQualifier(bool is_written, bool is_read) { + if (is_written && !is_read) { + return "writeonly "; + } + if (is_read && !is_written) { + return "readonly "; + } + return ""; +} + +std::string_view GetTessMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Triangles: + return "triangles"; + case TessPrimitive::Quads: + return "quads"; + case TessPrimitive::Isolines: + return "isolines"; + } + throw InvalidArgument("Invalid tessellation primitive {}", primitive); +} + +std::string_view GetTessSpacing(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return "equal_spacing"; + case TessSpacing::FractionalOdd: + return "fractional_odd_spacing"; + case TessSpacing::FractionalEven: + return "fractional_even_spacing"; + } + throw InvalidArgument("Invalid tessellation spacing {}", spacing); +} + +std::string_view InputPrimitive(InputTopology topology) { + switch (topology) { + case InputTopology::Points: + return "points"; + case InputTopology::Lines: + return "lines"; + case InputTopology::LinesAdjacency: + return "lines_adjacency"; + case InputTopology::Triangles: + return "triangles"; + case InputTopology::TrianglesAdjacency: + return "triangles_adjacency"; + } + throw InvalidArgument("Invalid input topology {}", topology); +} + +std::string_view OutputPrimitive(OutputTopology topology) { + switch (topology) { + case OutputTopology::PointList: + return "points"; + case OutputTopology::LineStrip: + return "line_strip"; + case OutputTopology::TriangleStrip: + return "triangle_strip"; + } + throw InvalidArgument("Invalid output topology {}", topology); +} + +void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { + if (!ctx.info.stores.Legacy()) { + return; + } + if (ctx.info.stores.FixedFunctionTexture()) { + header += "vec4 gl_TexCoord[8];"; + } + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + header += "vec4 gl_FrontColor;"; + } + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { + header += "vec4 gl_FrontSecondaryColor;"; + } + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { + header += "vec4 gl_BackColor;"; + } + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { + header += "vec4 gl_BackSecondaryColor;"; + } +} + +void SetupOutPerVertex(EmitContext& ctx, std::string& header) { + if (!StoresPerVertexAttributes(ctx.stage)) { + return; + } + if (ctx.uses_geometry_passthrough) { + return; + } + header += "out gl_PerVertex{vec4 gl_Position;"; + if (ctx.info.stores[IR::Attribute::PointSize]) { + header += "float gl_PointSize;"; + } + if (ctx.info.stores.ClipDistances()) { + header += "float gl_ClipDistance[];"; + } + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { + header += "int gl_ViewportIndex;"; + } + SetupLegacyOutPerVertex(ctx, header); + header += "};"; + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { + header += "out int gl_ViewportIndex;"; + } +} + +void SetupInPerVertex(EmitContext& ctx, std::string& header) { + // Currently only required for TessellationControl to adhere to + // ARB_separate_shader_objects requirements + if (ctx.stage != Stage::TessellationControl) { + return; + } + const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)}; + const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]}; + const bool loads_clip_distance{ctx.info.loads.ClipDistances()}; + const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance}; + if (!loads_per_vertex) { + return; + } + header += "in gl_PerVertex{"; + if (loads_position) { + header += "vec4 gl_Position;"; + } + if (loads_point_size) { + header += "float gl_PointSize;"; + } + if (loads_clip_distance) { + header += "float gl_ClipDistance[];"; + } + header += "}gl_in[gl_MaxPatchVertices];"; +} + +void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { + if (!ctx.info.loads.Legacy()) { + return; + } + header += "in gl_PerFragment{"; + if (ctx.info.loads.FixedFunctionTexture()) { + header += "vec4 gl_TexCoord[8];"; + } + if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + header += "vec4 gl_Color;"; + } + header += "};"; +} + +} // Anonymous namespace + +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, + uses_geometry_passthrough{program.is_geometry_passthrough && + profile.support_geometry_shader_passthrough} { + if (profile.need_fastmath_off) { + header += "#pragma optionNV(fastmath off)\n"; + } + SetupExtensions(); + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vs"; + break; + case Stage::TessellationControl: + stage_name = "tcs"; + header += fmt::format("layout(vertices={})out;", program.invocations); + break; + case Stage::TessellationEval: + stage_name = "tes"; + header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "cw" : "ccw"); + break; + case Stage::Geometry: + stage_name = "gs"; + header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology)); + if (uses_geometry_passthrough) { + header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};"; + break; + } else if (program.is_geometry_passthrough && + !profile.support_geometry_shader_passthrough) { + LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported"); + } + header += fmt::format( + "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", + OutputPrimitive(program.output_topology), program.output_vertices); + break; + case Stage::Fragment: + stage_name = "fs"; + position_name = "gl_FragCoord"; + if (runtime_info.force_early_z) { + header += "layout(early_fragment_tests)in;"; + } + if (info.uses_sample_id) { + header += "in int gl_SampleID;"; + } + if (info.stores_sample_mask) { + header += "out int gl_SampleMask[];"; + } + break; + case Stage::Compute: + stage_name = "cs"; + const u32 local_x{std::max(program.workgroup_size[0], 1u)}; + const u32 local_y{std::max(program.workgroup_size[1], 1u)}; + const u32 local_z{std::max(program.workgroup_size[2], 1u)}; + header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;", + local_x, local_y, local_z); + break; + } + SetupOutPerVertex(*this, header); + SetupInPerVertex(*this, header); + SetupLegacyInPerFragment(*this, header); + + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { + continue; + } + const auto qualifier{uses_geometry_passthrough ? "passthrough" + : fmt::format("location={}", index)}; + header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"}; + header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); + } + if (stage == Stage::Fragment) { + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { + continue; + } + header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); + } + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); + } + } + DefineConstantBuffers(bindings); + DefineStorageBuffers(bindings); + SetupImages(bindings); + SetupTextures(bindings); + DefineHelperFunctions(); + DefineConstants(); +} + +void EmitContext::SetupExtensions() { + header += "#extension GL_ARB_separate_shader_objects : enable\n"; + if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } + if (info.uses_int64 && profile.support_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } + if (info.uses_int64_bit_atomics) { + header += "#extension GL_NV_shader_atomic_int64 : enable\n"; + } + if (info.uses_atomic_f32_add) { + header += "#extension GL_NV_shader_atomic_float : enable\n"; + } + if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; + } + if (info.uses_fp16) { + if (profile.support_gl_nv_gpu_shader_5) { + header += "#extension GL_NV_gpu_shader5 : enable\n"; + } + if (profile.support_gl_amd_gpu_shader_half_float) { + header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; + } + } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_subgroup_shuffles || info.uses_fswzadd) { + header += "#extension GL_ARB_shader_ballot : enable\n" + "#extension GL_ARB_shader_group_vote : enable\n"; + if (!info.uses_int64 && profile.support_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } + if (profile.support_gl_warp_intrinsics) { + header += "#extension GL_NV_shader_thread_shuffle : enable\n"; + } + } + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && + profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { + header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; + } + if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { + header += "#extension GL_ARB_sparse_texture2 : enable\n"; + } + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { + header += "#extension GL_NV_viewport_array2 : enable\n"; + } + if (info.uses_typeless_image_reads) { + header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; + } + if (info.uses_derivatives && profile.support_gl_derivative_control) { + header += "#extension GL_ARB_derivative_control : enable\n"; + } + if (uses_geometry_passthrough) { + header += "#extension GL_NV_geometry_shader_passthrough : enable\n"; + } +} + +void EmitContext::DefineConstantBuffers(Bindings& bindings) { + if (info.constant_buffer_descriptors.empty()) { + return; + } + for (const auto& desc : info.constant_buffer_descriptors) { + header += fmt::format( + "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", + bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); + bindings.uniform_buffer += desc.count; + } +} + +void EmitContext::DefineStorageBuffers(Bindings& bindings) { + if (info.storage_buffers_descriptors.empty()) { + return; + } + u32 index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};", + bindings.storage_buffer, stage_name, bindings.storage_buffer, + stage_name, index); + bindings.storage_buffer += desc.count; + index += desc.count; + } +} + +void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + std::string definition{fmt::format("layout(location={}", index)}; + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!runtime_info.xfb_varyings.empty()) { + xfb_varying = &runtime_info.xfb_varyings[base_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + if (element > 0) { + definition += fmt::format(",component={}", element); + } + if (xfb_varying) { + definition += + fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, + xfb_varying->stride, xfb_varying->offset); + } + std::string name{fmt::format("out_attr{}", index)}; + if (num_components < 4 || element > 0) { + name += fmt::format("_{}", swizzle.substr(element, num_components)); + } + const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; + definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); + header += definition; + + const GenericElementInfo element_info{ + .name = name, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(output_generics[index].begin() + element, num_components, element_info); + element += num_components; + } +} + +void EmitContext::DefineHelperFunctions() { + header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n" + "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n"; + if (info.uses_global_increment || info.uses_shared_increment) { + header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; + } + if (info.uses_global_decrement || info.uses_shared_decrement) { + header += "uint CasDecrement(uint op_a,uint op_b){" + "return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; + } + if (info.uses_atomic_f32_add) { + header += "uint CasFloatAdd(uint op_a,float op_b){" + "return ftou(utof(op_a)+op_b);}"; + } + if (info.uses_atomic_f32x2_add) { + header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){" + "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; + } + if (info.uses_atomic_f32x2_min) { + header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f32x2_max) { + header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f16x2_add) { + header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(unpackFloat2x16(op_a)+op_b);}"; + } + if (info.uses_atomic_f16x2_min) { + header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f16x2_max) { + header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}"; + } + if (info.uses_atomic_s32_min) { + header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; + } + if (info.uses_atomic_s32_max) { + header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; + } + if (info.uses_global_memory && profile.support_int64) { + header += DefineGlobalMemoryFunctions(); + } + if (info.loads_indexed_attributes) { + const bool is_array{stage == Stage::Geometry}; + const auto vertex_arg{is_array ? ",uint vertex" : ""}; + std::string func{ + fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " + "masked_index=uint(base_index)&3u;switch(base_index>>2){{", + vertex_arg)}; + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { + const auto position_idx{is_array ? "gl_in[vertex]." : ""}; + func += fmt::format("case {}:return {}{}[masked_index];", + static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx, + position_name); + } + const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { + continue; + } + const auto vertex_idx{is_array ? "[vertex]" : ""}; + func += fmt::format("case {}:return in_attr{}{}[masked_index];", + base_attribute_value + index, index, vertex_idx); + } + func += "default: return 0.0;}}"; + header += func; + } + if (info.stores_indexed_attributes) { + // TODO + } +} + +std::string EmitContext::DefineGlobalMemoryFunctions() { + const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) { + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; + const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; + const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; + std::array<std::string, 2> addr_xy; + std::array<std::string, 2> size_xy; + for (size_t i = 0; i < addr_xy.size(); ++i) { + const auto addr_loc{ssbo.cbuf_offset + 4 * i}; + const auto size_loc{size_cbuf_offset + 4 * i}; + addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); + size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); + } + const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; + const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; + func += addr_statment; + + const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; + const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; + const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)}; + const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)}; + func += comparison; + + const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; + func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr); + }}; + std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"}; + std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"}; + std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"}; + std::string load_func{"uint LoadGlobal32(uint64_t addr){"}; + std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"}; + std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"}; + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } + define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}"); + define_body(write_func_64, index, + "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}"); + define_body(write_func_128, index, + "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint(" + "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}"); + define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}"); + define_body(load_func_64, index, + "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}"); + define_body(load_func_128, index, + "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" + "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); + } + write_func += '}'; + write_func_64 += '}'; + write_func_128 += '}'; + load_func += "return 0u;}"; + load_func_64 += "return uvec2(0);}"; + load_func_128 += "return uvec4(0);}"; + return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128; +} + +void EmitContext::SetupImages(Bindings& bindings) { + image_buffers.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { + image_buffers.push_back({bindings.image, desc.count}); + const auto format{ImageFormatString(desc.format)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", + bindings.image, format, qualifier, bindings.image, array_decorator); + bindings.image += desc.count; + } + images.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { + images.push_back({bindings.image, desc.count}); + const auto format{ImageFormatString(desc.format)}; + const auto image_type{ImageType(desc.type)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, + qualifier, image_type, bindings.image, array_decorator); + bindings.image += desc.count; + } +} + +void EmitContext::SetupTextures(Bindings& bindings) { + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { + texture_buffers.push_back({bindings.texture, desc.count}); + const auto sampler_type{SamplerType(TextureType::Buffer, false)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); + bindings.texture += desc.count; + } + textures.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { + textures.push_back({bindings.texture, desc.count}); + const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); + bindings.texture += desc.count; + } +} + +void EmitContext::DefineConstants() { + if (info.uses_fswzadd) { + header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" + "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; + } +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h new file mode 100644 index 000000000..d9b639d29 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -0,0 +1,174 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <utility> +#include <vector> + +#include <fmt/format.h> + +#include "shader_recompiler/backend/glsl/var_alloc.h" +#include "shader_recompiler/stage.h" + +namespace Shader { +struct Info; +struct Profile; +struct RuntimeInfo; +} // namespace Shader + +namespace Shader::Backend { +struct Bindings; +} + +namespace Shader::IR { +class Inst; +struct Program; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +struct GenericElementInfo { + std::string name; + u32 first_element{}; + u32 num_components{}; +}; + +struct TextureImageDefinition { + u32 binding; + u32 count; +}; + +class EmitContext { +public: + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); + + template <GlslVarType type, typename... Args> + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + const auto var_def{var_alloc.AddDefine(inst, type)}; + if (var_def.empty()) { + // skip assigment. + code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...); + } else { + code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...); + } + // TODO: Remove this + code += '\n'; + } + + template <typename... Args> + void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U1>(format_str, inst, args...); + } + + template <typename... Args> + void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F16x2>(format_str, inst, args...); + } + + template <typename... Args> + void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U32>(format_str, inst, args...); + } + + template <typename... Args> + void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F32>(format_str, inst, args...); + } + + template <typename... Args> + void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U64>(format_str, inst, args...); + } + + template <typename... Args> + void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F64>(format_str, inst, args...); + } + + template <typename... Args> + void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U32x2>(format_str, inst, args...); + } + + template <typename... Args> + void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F32x2>(format_str, inst, args...); + } + + template <typename... Args> + void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U32x3>(format_str, inst, args...); + } + + template <typename... Args> + void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F32x3>(format_str, inst, args...); + } + + template <typename... Args> + void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U32x4>(format_str, inst, args...); + } + + template <typename... Args> + void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F32x4>(format_str, inst, args...); + } + + template <typename... Args> + void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::PrecF32>(format_str, inst, args...); + } + + template <typename... Args> + void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::PrecF64>(format_str, inst, args...); + } + + template <typename... Args> + void Add(const char* format_str, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string header; + std::string code; + VarAlloc var_alloc; + const Info& info; + const Profile& profile; + const RuntimeInfo& runtime_info; + + Stage stage{}; + std::string_view stage_name = "invalid"; + std::string_view position_name = "gl_Position"; + + std::vector<TextureImageDefinition> texture_buffers; + std::vector<TextureImageDefinition> image_buffers; + std::vector<TextureImageDefinition> textures; + std::vector<TextureImageDefinition> images; + std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; + + u32 num_safety_loop_vars{}; + + bool uses_y_direction{}; + bool uses_cc_carry{}; + bool uses_geometry_passthrough{}; + +private: + void SetupExtensions(); + void DefineConstantBuffers(Bindings& bindings); + void DefineStorageBuffers(Bindings& bindings); + void DefineGenericOutput(size_t index, u32 invocations); + void DefineHelperFunctions(); + void DefineConstants(); + std::string DefineGlobalMemoryFunctions(); + void SetupImages(Bindings& bindings); + void SetupTextures(Bindings& bindings); +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp new file mode 100644 index 000000000..8a430d573 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -0,0 +1,252 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <string> +#include <tuple> +#include <type_traits> + +#include "common/div_ceil.h" +#include "common/settings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" + +namespace Shader::Backend::GLSL { +namespace { +template <class Func> +struct FuncTraits {}; + +template <class ReturnType_, class... Args> +struct FuncTraits<ReturnType_ (*)(Args...)> { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template <size_t I> + using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; +}; + +template <auto func, typename... Args> +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); +} + +template <typename ArgType> +auto Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v<ArgType, std::string_view>) { + return ctx.var_alloc.Consume(arg); + } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { + return arg; + } else if constexpr (std::is_same_v<ArgType, u32>) { + return arg.U32(); + } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { + return arg.Attribute(); + } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { + return arg.Patch(); + } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { + return arg.Reg(); + } +} + +template <auto func, bool is_first_arg_inst, size_t... I> +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { + using Traits = FuncTraits<decltype(func)>; + if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { + if constexpr (is_first_arg_inst) { + SetDefinition<func>( + ctx, inst, *inst, + Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); + } else { + SetDefinition<func>( + ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); + } + } +} + +template <auto func> +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits<decltype(func)>; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; + using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; + Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +bool IsReference(IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Reference; +} + +void PrecolorInst(IR::Inst& phi) { + // Insert phi moves before references to avoid overwritting other phis + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Block& phi_block{*phi.PhiBlock(i)}; + auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{arg.InstRecursive()}); + } + } + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } +} + +void Precolor(const IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } + PrecolorInst(phi); + } + } +} + +void EmitCode(EmitContext& ctx, const IR::Program& program) { + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond)); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("}}"); + break; + case IR::AbstractSyntaxNode::Type::Break: + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { + ctx.Add("break;"); + } + } else { + ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond)); + } + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("return;"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("for(;;){{"); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + if (Settings::values.disable_shader_loop_safety_checks) { + ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); + } else { + ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++, + ctx.var_alloc.Consume(node.data.repeat.cond)); + } + break; + default: + throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); + } + } +} + +std::string GlslVersionSpecifier(const EmitContext& ctx) { + if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { + return " compatibility"; + } + return ""; +} + +bool IsPreciseType(GlslVarType type) { + switch (type) { + case GlslVarType::PrecF32: + case GlslVarType::PrecF64: + return true; + default: + return false; + } +} + +void DefineVariables(const EmitContext& ctx, std::string& header) { + for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) { + const auto type{static_cast<GlslVarType>(i)}; + const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; + const auto type_name{ctx.var_alloc.GetGlslType(type)}; + const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug}; + const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""}; + // Temps/return types that are never used are stored at index 0 + if (tracker.uses_temp) { + header += fmt::format("{}{} t{}={}(0);", precise, type_name, + ctx.var_alloc.Representation(0, type), type_name); + } + for (u32 index = 0; index < tracker.num_used; ++index) { + header += fmt::format("{}{} {}={}(0);", precise, type_name, + ctx.var_alloc.Representation(index, type), type_name); + } + } + for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) { + header += fmt::format("int loop{}=0x2000;", i); + } +} +} // Anonymous namespace + +std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, + Bindings& bindings) { + EmitContext ctx{program, bindings, profile, runtime_info}; + Precolor(program); + EmitCode(ctx, program); + const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; + ctx.header.insert(0, version); + if (program.shared_memory_size > 0) { + const auto requested_size{program.shared_memory_size}; + const auto max_size{profile.gl_max_compute_smem_size}; + const bool needs_clamp{requested_size > max_size}; + if (needs_clamp) { + LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})", + requested_size, max_size); + } + const auto smem_size{needs_clamp ? max_size : requested_size}; + ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U)); + } + ctx.header += "void main(){\n"; + if (program.local_memory_size > 0) { + ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U)); + } + DefineVariables(ctx, ctx.header); + if (ctx.uses_cc_carry) { + ctx.header += "uint carry;"; + } + if (program.info.uses_subgroup_shuffles) { + ctx.header += "bool shfl_in_bounds;"; + } + ctx.code.insert(0, ctx.header); + ctx.code += '}'; + return ctx.code; +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h new file mode 100644 index 000000000..20e5719e6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -0,0 +1,24 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLSL { + +[[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); + +[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitGLSL(profile, {}, program, binding); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp new file mode 100644 index 000000000..772acc5a4 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -0,0 +1,418 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{ + "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"}; + +void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view value, std::string_view function) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const std::string smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret); +} + +void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset))}; + ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); +} + +void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, + std::string_view function) { + const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset))}; + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); + ctx.AddF32("{}=utof({});", inst, ret); +} +} // Anonymous namespace + +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32"); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32"); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement"); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement"); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, + pointer_offset); + ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", + pointer_offset, value, pointer_offset, value); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" + ");}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" + ");}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}" + "))[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x)," + "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); +} + +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); +} + +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); +} + +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp new file mode 100644 index 000000000..e1d1b558e --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +void EmitBarrier(EmitContext& ctx) { + ctx.Add("barrier();"); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + ctx.Add("groupMemoryBarrier();"); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + ctx.Add("memoryBarrier();"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp new file mode 100644 index 000000000..3c1714e89 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -0,0 +1,94 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +void Alias(IR::Inst& inst, const IR::Value& value) { + if (value.IsImmediate()) { + return; + } + IR::Inst& value_inst{*value.InstRecursive()}; + value_inst.DestructiveAddUsage(inst.UseCount()); + value_inst.DestructiveRemoveUsage(); + inst.SetDefinition(value_inst.Definition<Id>()); +} +} // Anonymous namespace + +void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + // Fake one usage to get a real variable out of the condition + inst.DestructiveAddUsage(1); + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)}; + const auto input{ctx.var_alloc.Consume(value)}; + if (ret != input) { + ctx.Add("{}={};", ret, input); + } +} + +void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + NotImplemented(); +} + +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=ftou({});", inst, value); +} + +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); +} + +void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + NotImplemented(); +} + +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=utof({});", inst, value); +} + +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=uint64BitsToDouble({});", inst, value); +} + +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=packUint2x32({});", inst, value); +} + +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); +} + +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packFloat2x16({});", inst, value); +} + +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value); +} + +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packHalf2x16({});", inst, value); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value); +} + +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=packDouble2x32({});", inst, value); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp new file mode 100644 index 000000000..49a66e3ec --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -0,0 +1,219 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr std::string_view SWIZZLE{"xyzw"}; +void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite, + std::string_view object, u32 index) { + if (result == composite) { + // The result is aliased with the composite + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); + } else { + ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object); + } +} +} // Anonymous namespace + +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2) { + ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3) { + ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4) { + ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3, + [[maybe_unused]] std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2) { + ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3) { + ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4) { + ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} + +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} + +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp new file mode 100644 index 000000000..580063fa9 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -0,0 +1,456 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr char SWIZZLE[]{"xyzw"}; + +u32 CbufIndex(u32 offset) { + return (offset / 4) % 4; +} + +char OffsetSwizzle(u32 offset) { + return SWIZZLE[CbufIndex(offset)]; +} + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} + +std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { + return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; +} + +std::string_view OutputVertexIndex(EmitContext& ctx) { + return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; +} + +void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, + const IR::Value& offset, u32 num_bits, std::string_view cast = {}, + std::string_view bit_offset = {}) { + const bool is_immediate{offset.IsImmediate()}; + const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug}; + if (is_immediate) { + const s32 signed_offset{static_cast<s32>(offset.U32())}; + static constexpr u32 cbuf_size{0x10000}; + if (signed_offset < 0 || offset.U32() > cbuf_size) { + LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); + ctx.Add("{}=0u;", ret); + return; + } + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16) + : fmt::format("{}>>4", offset_var)}; + const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) + : fmt::format("[({}>>2)%4]", offset_var)}; + + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; + const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; + const auto extraction{num_bits == 32 ? cbuf_cast + : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, + bit_offset, num_bits)}; + if (!component_indexing_bug) { + const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; + ctx.Add("{}={};", ret, result); + return; + } + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 i = 0; i < 4; ++i) { + const auto swizzle_string{fmt::format(".{}", "xyzw"[i])}; + const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)}; + ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result); + } +} + +void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view cast) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + if (offset.IsImmediate()) { + const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)}; + GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); + } else { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("({}%4)*8", offset_var)}; + GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); + } +} + +void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view cast) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + if (offset.IsImmediate()) { + const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)}; + GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); + } else { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)}; + GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); + } +} + +u32 TexCoordIndex(IR::Attribute attr) { + return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; +} +} // Anonymous namespace + +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf8(ctx, inst, binding, offset, "ftou"); +} + +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf8(ctx, inst, binding, offset, "ftoi"); +} + +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf16(ctx, inst, binding, offset, "ftou"); +} + +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf16(ctx, inst, binding, offset, "ftoi"); +} + +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + GetCbuf(ctx, ret, binding, offset, 32, "ftou"); +} + +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; + GetCbuf(ctx, ret, binding, offset, 32); +} + +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; + if (offset.IsImmediate()) { + static constexpr u32 cbuf_size{0x10000}; + const u32 u32_offset{offset.U32()}; + const s32 signed_offset{static_cast<s32>(offset.U32())}; + if (signed_offset < 0 || u32_offset > cbuf_size) { + LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); + ctx.AddU32x2("{}=uvec2(0u);", inst); + return; + } + if (u32_offset % 2 == 0) { + ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, + OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); + } else { + ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, + OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, + OffsetSwizzle(u32_offset + 4)); + } + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", + inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, + swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, + "xyzw"[(swizzle + 1) % 4]); + } +} + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex) { + const u32 element{static_cast<u32>(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + if (element == 3) { + ctx.AddF32("{}=1.f;", inst, attr); + } else { + ctx.AddF32("{}=0.f;", inst, attr); + } + return; + } + ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); + return; + } + // GLSL only exposes 8 legacy texcoords + if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { + LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", + TexCoordIndex(attr)); + ctx.AddF32("{}=0.f;", inst); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle); + return; + } + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.AddF32("{}=itof(gl_PrimitiveID);", inst); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: { + const bool is_array{IsInputArray(ctx.stage)}; + const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""}; + ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle); + break; + } + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=gl_Color.{};", inst, swizzle); + } else { + ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle); + } + break; + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); + break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); + break; + case IR::Attribute::InstanceId: + ctx.AddF32("{}=itof(gl_InstanceID);", inst); + break; + case IR::Attribute::VertexId: + ctx.AddF32("{}=itof(gl_VertexID);", inst); + break; + case IR::Attribute::FrontFace: + ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); + break; + default: + throw NotImplementedException("Get attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + [[maybe_unused]] std::string_view vertex) { + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const u32 attr_element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)}; + const auto output_decorator{OutputVertexIndex(ctx)}; + if (info.num_components == 1) { + ctx.Add("{}{}={};", info.name, output_decorator, value); + } else { + const u32 index_element{attr_element - info.first_element}; + ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); + } + return; + } + const u32 element{static_cast<u32>(attr) % 4}; + const char swizzle{"xyzw"[element]}; + // GLSL only exposes 8 legacy texcoords + if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { + LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", + TexCoordIndex(attr)); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value); + return; + } + switch (attr) { + case IR::Attribute::Layer: + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { + LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support " + "viewport layer extension"); + break; + } + ctx.Add("gl_Layer=ftoi({});", value); + break; + case IR::Attribute::ViewportIndex: + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { + LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support " + "viewport layer extension"); + break; + } + ctx.Add("gl_ViewportIndex=ftoi({});", value); + break; + case IR::Attribute::ViewportMask: + if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) { + LOG_WARNING( + Shader_GLSL, + "Shader stores viewport mask but device does not support viewport mask extension"); + break; + } + ctx.Add("gl_ViewportMask[0]=ftoi({});", value); + break; + case IR::Attribute::PointSize: + ctx.Add("gl_PointSize={};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("gl_Position.{}={};", swizzle, value); + break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("gl_FrontColor.{}={};", swizzle, value); + break; + case IR::Attribute::ColorFrontSpecularR: + case IR::Attribute::ColorFrontSpecularG: + case IR::Attribute::ColorFrontSpecularB: + case IR::Attribute::ColorFrontSpecularA: + ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); + break; + case IR::Attribute::ColorBackDiffuseR: + case IR::Attribute::ColorBackDiffuseG: + case IR::Attribute::ColorBackDiffuseB: + case IR::Attribute::ColorBackDiffuseA: + ctx.Add("gl_BackColor.{}={};", swizzle, value); + break; + case IR::Attribute::ColorBackSpecularR: + case IR::Attribute::ColorBackSpecularG: + case IR::Attribute::ColorBackSpecularB: + case IR::Attribute::ColorBackSpecularA: + ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); + break; + case IR::Attribute::FogCoordinate: + ctx.Add("gl_FogFragCoord={};", value); + break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; + ctx.Add("gl_ClipDistance[{}]={};", index, value); + break; + } + default: + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view vertex) { + const bool is_array{ctx.stage == Stage::Geometry}; + const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""}; + ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg); +} + +void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view vertex) { + NotImplemented(); +} + +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + const char swizzle{"xyzw"[element]}; + ctx.AddF32("{}=patch{}.{};", inst, index, swizzle); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + ctx.Add("patch{}.{}={};", index, "xyzw"[element], value); + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; + ctx.Add("gl_TessLevelOuter[{}]={};", index, value); + break; + } + case IR::Patch::TessellationLodInteriorU: + ctx.Add("gl_TessLevelInner[0]={};", value); + break; + case IR::Patch::TessellationLodInteriorV: + ctx.Add("gl_TessLevelInner[1]={};", value); + break; + default: + throw NotImplementedException("Patch {}", patch); + } +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + const char swizzle{"xyzw"[component]}; + ctx.Add("frag_color{}.{}={};", index, swizzle, value); +} + +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { + ctx.Add("gl_SampleMask[0]=int({});", value); +} + +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { + ctx.Add("gl_FragDepth={};", value); +} + +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); +} + +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_WorkGroupID;", inst); +} + +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_InvocationID);", inst); +} + +void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SampleID);", inst); +} + +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=gl_HelperInvocation;", inst); +} + +void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { + ctx.uses_y_direction = true; + ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); +} + +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { + ctx.AddU32("{}=lmem[{}];", inst, word_offset); +} + +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { + ctx.Add("lmem[{}]={};", word_offset, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp new file mode 100644 index 000000000..53f8896be --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/exception.h" + +namespace Shader::Backend::GLSL { + +void EmitJoin(EmitContext&) { + throw NotImplementedException("Join shouldn't be emitted"); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx) { + ctx.Add("discard;"); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp new file mode 100644 index 000000000..eeae6562c --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -0,0 +1,230 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value); +} + +void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=int({});", inst, value); +} + +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=int({});", inst, value); +} + +void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=int64_t({});", inst, value); +} + +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=int64_t({});", inst, value); +} + +void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float(int({}));", inst, value); +} + +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float(int64_t({}));", inst, value); +} + +void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({}&0xffff);", inst, value); +} + +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double(int({}));", inst, value); +} + +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double(int64_t({}));", inst, value); +} + +void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp new file mode 100644 index 000000000..d423bfb1b --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -0,0 +1,456 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs, + std::string_view op, bool ordered) { + const auto nan_op{ordered ? "&&!" : "||"}; + ctx.AddU1("{}={}{}{}" + "{}isnan({}){}isnan({});", + inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs); +} + +bool IsPrecise(const IR::Inst& inst) { + return inst.Flags<IR::FpControl>().no_contraction; +} +} // Anonymous namespace + +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=abs({});", inst, value); +} + +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=abs({});", inst, value); +} + +void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}={}+{};", inst, a, b); + } else { + ctx.AddF32("{}={}+{};", inst, a, b); + } +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}={}+{};", inst, a, b); + } else { + ctx.AddF64("{}={}+{};", inst, a, b); + } +} + +void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, + [[maybe_unused]] std::string_view c) { + NotImplemented(); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c); + } else { + ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); + } +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c); + } else { + ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); + } +} + +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF32("{}=max({},{});", inst, a, b); +} + +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF64("{}=max({},{});", inst, a, b); +} + +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF32("{}=min({},{});", inst, a, b); +} + +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF64("{}=min({},{});", inst, a, b); +} + +void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + NotImplemented(); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}={}*{};", inst, a, b); + } else { + ctx.AddF32("{}={}*{};", inst, a, b); + } +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}={}*{};", inst, a, b); + } else { + ctx.AddF64("{}={}*{};", inst, a, b); + } +} + +void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=-({});", inst, value); +} + +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=-({});", inst, value); +} + +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=sin({});", inst, value); +} + +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=cos({});", inst, value); +} + +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=exp2({});", inst, value); +} + +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=log2({});", inst, value); +} + +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=(1.0f)/{};", inst, value); +} + +void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=1.0/{};", inst, value); +} + +void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=inversesqrt({});", inst, value); +} + +void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=sqrt({});", inst, value); +} + +void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value); +} + +void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value); +} + +void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value) { + // GLSL's clamp does not produce desirable results + ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value); +} + +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value) { + // GLSL's clamp does not produce desirable results + ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value); +} + +void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=roundEven({});", inst, value); +} + +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=roundEven({});", inst, value); +} + +void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=floor({});", inst, value); +} + +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=floor({});", inst, value); +} + +void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=ceil({});", inst, value); +} + +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=ceil({});", inst, value); +} + +void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=trunc({});", inst, value); +} + +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=trunc({});", inst, value); +} + +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", true); +} + +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", true); +} + +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", false); +} + +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", false); +} + +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", true); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", true); +} + +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", false); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", false); +} + +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", true); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", true); +} + +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", false); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", false); +} + +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", true); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", true); +} + +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", false); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", false); +} + +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", true); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", true); +} + +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", false); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", false); +} + +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", true); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", true); +} + +void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", false); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", false); +} + +void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp new file mode 100644 index 000000000..447eb8e0a --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -0,0 +1,799 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index) + : ctx.textures.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("tex{}{}", def.binding, index_offset); +} + +std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index) + : ctx.images.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("img{}{}", def.binding, index_offset); +} + +std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::Buffer: + return fmt::format("int({})", value); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorArray2D: + return fmt::format("ivec2({})", value); + case TextureType::Color3D: + case TextureType::ColorCube: + return fmt::format("ivec3({})", value); + case TextureType::ColorArrayCube: + return fmt::format("ivec4({})", value); + default: + throw NotImplementedException("Integer cast for TextureType {}", info.type.Value()); + } +} + +std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::Buffer: + return fmt::format("int({})", value); + case TextureType::ColorArray1D: + case TextureType::Color2D: + return fmt::format("ivec2({})", value); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorCube: + return fmt::format("ivec3({})", value); + case TextureType::ColorArrayCube: + return fmt::format("ivec4({})", value); + default: + throw NotImplementedException("TexelFetchCast type {}", info.type.Value()); + } +} + +bool NeedsShadowLodExt(TextureType type) { + switch (type) { + case TextureType::ColorArray2D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + return true; + default: + return false; + } +} + +std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsImmediate()) { + return fmt::format("int({})", offset.U32()); + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32()); + case IR::Opcode::CompositeConstructU32x3: + return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), + inst->Arg(2).U32()); + case IR::Opcode::CompositeConstructU32x4: + return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), + inst->Arg(2).U32(), inst->Arg(3).U32()); + default: + break; + } + } + const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; + if (!has_var_aoffi) { + LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING"); + } + const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; + switch (offset.Type()) { + case IR::Type::U32: + return fmt::format("int({})", offset_str); + case IR::Type::U32x2: + return fmt::format("ivec2({})", offset_str); + case IR::Type::U32x3: + return fmt::format("ivec3({})", offset_str); + case IR::Type::U32x4: + return fmt::format("ivec4({})", offset_str); + default: + throw NotImplementedException("Offset type {}", offset.Type()); + } +} + +std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { + const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; + if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { + LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING"); + return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))"; + } + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + throw LogicError("Invalid PTP arguments"); + } + auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; + + return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0), + read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2), + read(1, 3)); +} + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} +} // Anonymous namespace + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (ctx.stage == Stage::Fragment) { + ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias); + } else { + ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str); + } + } else { + if (ctx.stage == Stage::Fragment) { + ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); + } else { + ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords); + } + } + return; + } + if (!offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", + *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst, + texture, coords, texel, bias); + } +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (!offset.IsEmpty()) { + ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, + GetOffsetVec(ctx, offset)); + } else { + ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc); + } + return; + } + if (!offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, + GetOffsetVec(ctx, offset), texel); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst, + texture, coords, lod_lc, texel); + } +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples"); + } + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && + ctx.stage != Stage::Fragment && needs_shadow_ext}; + if (use_grad) { + LOG_WARNING(Shader_GLSL, + "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref, + offset_str, bias); + } else { + ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords, + dref, offset_str); + } + } else { + if (ctx.stage == Stage::Fragment) { + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref); + } else { + ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); + } + } else { + ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); + } + } +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples"); + } + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + if (use_grad) { + LOG_WARNING(Shader_GLSL, + "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, + offset_str); + } else { + ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords, + dref, lod_lc, offset_str); + } + } else { + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); + } else { + ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref, + lod_lc); + } + } +} + +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (offset.IsEmpty()) { + ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, + info.gather_component); + return; + } + if (offset2.IsEmpty()) { + ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords, + GetOffsetVec(ctx, offset), info.gather_component); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets, + info.gather_component); + return; + } + if (offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", + *sparse_inst, texture, coords, texel, info.gather_component); + return; + } + if (offset2.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", + *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset), + texel, info.gather_component); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", + *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel, + info.gather_component); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (offset.IsEmpty()) { + ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); + return; + } + if (offset2.IsEmpty()) { + ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref, + GetOffsetVec(ctx, offset)); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); + return; + } + if (offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, + texture, coords, dref, texel); + return; + } + if (offset2.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), dref, + GetOffsetVec(ctx, offset), texel); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + [[maybe_unused]] std::string_view ms) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (info.has_bias) { + throw NotImplementedException("EmitImageFetch Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageFetch Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto sparse_inst{PrepareSparse(inst)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (!offset.empty()) { + ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, + CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); + } else { + if (info.type == TextureType::Buffer) { + ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); + } else { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, + CoordsCastToInt(coords, info), lod); + } + } + return; + } + if (!offset.empty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod, + CastToIntVec(offset, info), texel); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); + } +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto texture{Texture(ctx, info, index)}; + switch (info.type) { + case TextureType::Color1D: + return ctx.AddU32x4( + "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, + texture, lod, texture); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorCube: + return ctx.AddU32x4( + "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, + texture, lod, texture); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorArrayCube: + return ctx.AddU32x4( + "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, + lod, texture); + case TextureType::Buffer: + throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); + } + throw LogicError("Unspecified image type {}", info.type.Value()); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto texture{Texture(ctx, info, index)}; + return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& derivatives, + const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageGradient Lod clamp samples"); + } + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageGradient Sparse"); + } + if (!offset.IsEmpty()) { + throw NotImplementedException("EmitImageGradient offset"); + } + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; + if (multi_component) { + ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, + derivatives_vec, derivatives_vec); + } else { + ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords, + derivatives_vec, derivatives_vec); + } +} + +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageRead Sparse"); + } + const auto image{Image(ctx, info, index)}; + ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info)); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view color) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, + std::string_view) { + NotImplemented(); +} + +void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, + std::string_view) { + NotImplemented(); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h new file mode 100644 index 000000000..5936d086f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -0,0 +1,702 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string_view> + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { +class EmitContext; + +#define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__) + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst& inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext& ctx, const IR::Value& value); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); +void EmitJoin(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex); +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); +void EmitSetSampleMask(EmitContext& ctx, std::string_view value); +void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitSampleId(EmitContext& ctx, IR::Inst& inst); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view color); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitLaneId(EmitContext& ctx, IR::Inst& inst); +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle); +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp new file mode 100644 index 000000000..38419f88f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -0,0 +1,253 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { + IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; + if (!zero) { + return; + } + ctx.AddU1("{}={}==0;", *zero, result); + zero->Invalidate(); +} + +void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { + IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; + if (!sign) { + return; + } + ctx.AddU1("{}=int({})<0;", *sign, result); + sign->Invalidate(); +} + +void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + char lop) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}={}{}{};", result, a, lop, b); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} +} // Anonymous namespace + +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + // Compute the overflow CC first as it requires the original operand values, + // which may be overwritten by the result of the addition + if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; + const auto sub_a{fmt::format("{}u-{}", s32_max, a)}; + const auto positive_result{fmt::format("int({})>int({})", b, sub_a)}; + const auto negative_result{fmt::format("int({})<int({})", b, sub_a)}; + ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result); + overflow->Invalidate(); + } + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + ctx.uses_cc_carry = true; + ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); + ctx.AddU1("{}=carry!=0;", *carry); + carry->Invalidate(); + } else { + ctx.Add("{}={}+{};", result, a, b); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU64("{}={}+{};", inst, a, b); +} + +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}={}-{};", inst, a, b); +} + +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU64("{}={}-{};", inst, a, b); +} + +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=uint({}*{});", inst, a, b); +} + +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint(-({}));", inst, value); +} + +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=-({});", inst, value); +} + +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=abs(int({}));", inst, value); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}={}<<{};", inst, base, shift); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}={}<<{};", inst, base, shift); +} + +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}={}>>{};", inst, base, shift); +} + +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}={}>>{};", inst, base, shift); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}=int({})>>{};", inst, base, shift); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '&'); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '|'); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '^'); +} + +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count) { + ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count); +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=bitfieldReverse({});", inst, value); +} + +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=bitCount({});", inst, value); +} + +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=~{};", inst, value); +} + +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=findMSB(int({}));", inst, value); +} + +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=findMSB(uint({}));", inst, value); +} + +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=min(int({}),int({}));", inst, a, b); +} + +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b); +} + +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=max(int({}),int({}));", inst, a, b); +} + +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs); +} + +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs); +} + +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}={}=={};", inst, lhs, rhs); +} + +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs); +} + +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs); +} + +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs); +} + +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs); +} + +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}={}!={};", inst, lhs, rhs); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp new file mode 100644 index 000000000..338ff4bd6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { + +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}||{};", inst, a, b); +} + +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}&&{};", inst, a, b); +} + +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}^^{};", inst, a, b); +} + +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=!{};", inst, value); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp new file mode 100644 index 000000000..e3957491f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -0,0 +1,202 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var, + std::string_view value, std::string_view bit_offset, u32 num_bits) { + const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)}; + ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits); +} +} // Anonymous namespace + +void EmitLoadGlobalU8(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalS8(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalU16(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalS16(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32("{}=LoadGlobal32({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32("{}=0u;", inst); +} + +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x2("{}=uvec2(0);", inst); +} + +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x4("{}=uvec4(0);", inst); +} + +void EmitWriteGlobalU8(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalS8(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalU16(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalS16(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal32({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal64({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal128({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, + ctx.stage_name, binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var); +} + +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, + binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); +} + +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" + "+12)>>2]);", + inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), + offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, + binding.U32(), offset_var); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp new file mode 100644 index 000000000..f420fe388 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4100) +#endif + +namespace Shader::Backend::GLSL { + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp new file mode 100644 index 000000000..49fba9073 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp new file mode 100644 index 000000000..518b78f06 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view bit_offset, u32 num_bits) { + const auto smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); +} +} // Anonymous namespace + +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); +} + +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); +} + +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); +} + +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); +} + +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=smem[{}>>2];", inst, offset); +} + +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); +} + +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, + offset, offset, offset, offset); +} + +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { + const auto bit_offset{fmt::format("int({}%4)*8", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 8); +} + +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 16); +} + +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={};", offset, value); +} + +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={}.x;", offset, value); + ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); +} + +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={}.x;", offset, value); + ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); + ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); + ctx.Add("smem[({}+12)>>2]={}.w;", offset, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp new file mode 100644 index 000000000..9b866f889 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -0,0 +1,111 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +std::string_view OutputVertexIndex(EmitContext& ctx) { + return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; +} + +void InitializeOutputVaryings(EmitContext& ctx) { + if (ctx.uses_geometry_passthrough) { + return; + } + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { + continue; + } + const auto& info_array{ctx.output_generics.at(index)}; + const auto output_decorator{OutputVertexIndex(ctx)}; + size_t element{}; + while (element < info_array.size()) { + const auto& info{info_array.at(element)}; + const auto varying_name{fmt::format("{}{}", info.name, output_decorator)}; + switch (info.num_components) { + case 1: { + const char value{element == 3 ? '1' : '0'}; + ctx.Add("{}={}.f;", varying_name, value); + break; + } + case 2: + case 3: + if (element + info.num_components < 4) { + ctx.Add("{}=vec{}(0);", varying_name, info.num_components); + } else { + // last element is the w component, must be initialized to 1 + const auto zeros{info.num_components == 3 ? "0,0," : "0,"}; + ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros); + } + break; + case 4: + ctx.Add("{}=vec4(0,0,0,1);", varying_name); + break; + default: + break; + } + element += info.num_components; + } + } +} +} // Anonymous namespace + +void EmitPhi(EmitContext& ctx, IR::Inst& phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + ctx.var_alloc.Consume(phi.Arg(i)); + } + if (!phi.Definition<Id>().is_valid) { + // The phi node wasn't forward defined + ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type()); + } +} + +void EmitVoid(EmitContext&) {} + +void EmitReference(EmitContext& ctx, const IR::Value& value) { + ctx.var_alloc.Consume(value); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { + IR::Inst& phi{*phi_value.InstRecursive()}; + const auto phi_type{phi.Arg(0).Type()}; + if (!phi.Definition<Id>().is_valid) { + // The phi node wasn't forward defined + ctx.var_alloc.PhiDefine(phi, phi_type); + } + const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; + const auto val_reg{ctx.var_alloc.Consume(value)}; + if (phi_reg == val_reg) { + return; + } + ctx.Add("{}={};", phi_reg, val_reg); +} + +void EmitPrologue(EmitContext& ctx) { + InitializeOutputVaryings(ctx); +} + +void EmitEpilogue(EmitContext&) {} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); + InitializeOutputVaryings(ctx); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream)); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp new file mode 100644 index 000000000..15bf02dd6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp @@ -0,0 +1,32 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" + +namespace Shader::Backend::GLSL { + +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=false;", inst); +} + +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU64("{}=0u;", inst); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp new file mode 100644 index 000000000..a982dd8a2 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -0,0 +1,217 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); + in_bounds->Invalidate(); +} + +std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) { + return fmt::format("({}&{})", thread_id, segmentation_mask); +} + +std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp, + std::string_view not_seg_mask) { + return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask); +} + +std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, + std::string_view segmentation_mask) { + const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; + const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; + return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); +} + +void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, + std::string_view value, std::string_view index, + [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { + const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; + ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); + SetInBoundsFlag(ctx, inst); +} +} // Anonymous namespace + +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); +} + +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; + ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); + } +} + +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=anyInvocationARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; + ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); + } +} + +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; + const auto value{fmt::format("({}^{})", ballot, active_mask)}; + ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); + } +} + +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); + } else { + ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); + } +} + +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); +} + +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); +} + +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); +} + +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); +} + +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); + return; + } + const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; + const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; + + const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; + const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); + return; + } + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); + return; + } + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); + return; + } + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); +} + +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; + const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); + const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); + ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); +} + +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } +} + +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } +} + +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } +} + +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp new file mode 100644 index 000000000..194f926ca --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -0,0 +1,308 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> +#include <string_view> + +#include <fmt/format.h> + +#include "shader_recompiler/backend/glsl/var_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +std::string TypePrefix(GlslVarType type) { + switch (type) { + case GlslVarType::U1: + return "b_"; + case GlslVarType::F16x2: + return "f16x2_"; + case GlslVarType::U32: + return "u_"; + case GlslVarType::F32: + return "f_"; + case GlslVarType::U64: + return "u64_"; + case GlslVarType::F64: + return "d_"; + case GlslVarType::U32x2: + return "u2_"; + case GlslVarType::F32x2: + return "f2_"; + case GlslVarType::U32x3: + return "u3_"; + case GlslVarType::F32x3: + return "f3_"; + case GlslVarType::U32x4: + return "u4_"; + case GlslVarType::F32x4: + return "f4_"; + case GlslVarType::PrecF32: + return "pf_"; + case GlslVarType::PrecF64: + return "pd_"; + case GlslVarType::Void: + return ""; + default: + throw NotImplementedException("Type {}", type); + } +} + +std::string FormatFloat(std::string_view value, IR::Type type) { + // TODO: Confirm FP64 nan/inf + if (type == IR::Type::F32) { + if (value == "nan") { + return "utof(0x7fc00000)"; + } + if (value == "inf") { + return "utof(0x7f800000)"; + } + if (value == "-inf") { + return "utof(0xff800000)"; + } + } + if (value.find_first_of('e') != std::string_view::npos) { + // scientific notation + const auto cast{type == IR::Type::F32 ? "float" : "double"}; + return fmt::format("{}({})", cast, value); + } + const bool needs_dot{value.find_first_of('.') == std::string_view::npos}; + const bool needs_suffix{!value.ends_with('f')}; + const auto suffix{type == IR::Type::F32 ? "f" : "lf"}; + return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); +} + +std::string MakeImm(const IR::Value& value) { + switch (value.Type()) { + case IR::Type::U1: + return fmt::format("{}", value.U1() ? "true" : "false"); + case IR::Type::U32: + return fmt::format("{}u", value.U32()); + case IR::Type::F32: + return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32); + case IR::Type::U64: + return fmt::format("{}ul", value.U64()); + case IR::Type::F64: + return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); + case IR::Type::Void: + return ""; + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} +} // Anonymous namespace + +std::string VarAlloc::Representation(u32 index, GlslVarType type) const { + const auto prefix{TypePrefix(type)}; + return fmt::format("{}{}", prefix, index); +} + +std::string VarAlloc::Representation(Id id) const { + return Representation(id.index, id.type); +} + +std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) { + if (inst.HasUses()) { + inst.SetDefinition<Id>(Alloc(type)); + return Representation(inst.Definition<Id>()); + } else { + Id id{}; + id.type.Assign(type); + GetUseTracker(type).uses_temp = true; + inst.SetDefinition<Id>(id); + return 't' + Representation(inst.Definition<Id>()); + } +} + +std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) { + return Define(inst, RegType(type)); +} + +std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) { + return AddDefine(inst, RegType(type)); +} + +std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) { + if (inst.HasUses()) { + inst.SetDefinition<Id>(Alloc(type)); + return Representation(inst.Definition<Id>()); + } else { + return ""; + } + return Representation(inst.Definition<Id>()); +} + +std::string VarAlloc::Consume(const IR::Value& value) { + return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); +} + +std::string VarAlloc::ConsumeInst(IR::Inst& inst) { + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(inst.Definition<Id>()); + } + return Representation(inst.Definition<Id>()); +} + +std::string VarAlloc::GetGlslType(IR::Type type) const { + return GetGlslType(RegType(type)); +} + +Id VarAlloc::Alloc(GlslVarType type) { + auto& use_tracker{GetUseTracker(type)}; + const auto num_vars{use_tracker.var_use.size()}; + for (size_t var = 0; var < num_vars; ++var) { + if (use_tracker.var_use[var]) { + continue; + } + use_tracker.num_used = std::max(use_tracker.num_used, var + 1); + use_tracker.var_use[var] = true; + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast<u32>(var)); + return ret; + } + // Allocate a new variable + use_tracker.var_use.push_back(true); + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast<u32>(use_tracker.num_used)); + ++use_tracker.num_used; + return ret; +} + +void VarAlloc::Free(Id id) { + if (id.is_valid == 0) { + throw LogicError("Freeing invalid variable"); + } + auto& use_tracker{GetUseTracker(id.type)}; + use_tracker.var_use[id.index] = false; +} + +GlslVarType VarAlloc::RegType(IR::Type type) const { + switch (type) { + case IR::Type::U1: + return GlslVarType::U1; + case IR::Type::U32: + return GlslVarType::U32; + case IR::Type::F32: + return GlslVarType::F32; + case IR::Type::U64: + return GlslVarType::U64; + case IR::Type::F64: + return GlslVarType::F64; + default: + throw NotImplementedException("IR type {}", type); + } +} + +std::string VarAlloc::GetGlslType(GlslVarType type) const { + switch (type) { + case GlslVarType::U1: + return "bool"; + case GlslVarType::F16x2: + return "f16vec2"; + case GlslVarType::U32: + return "uint"; + case GlslVarType::F32: + case GlslVarType::PrecF32: + return "float"; + case GlslVarType::U64: + return "uint64_t"; + case GlslVarType::F64: + case GlslVarType::PrecF64: + return "double"; + case GlslVarType::U32x2: + return "uvec2"; + case GlslVarType::F32x2: + return "vec2"; + case GlslVarType::U32x3: + return "uvec3"; + case GlslVarType::F32x3: + return "vec3"; + case GlslVarType::U32x4: + return "uvec4"; + case GlslVarType::F32x4: + return "vec4"; + case GlslVarType::Void: + return ""; + default: + throw NotImplementedException("Type {}", type); + } +} + +VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) { + switch (type) { + case GlslVarType::U1: + return var_bool; + case GlslVarType::F16x2: + return var_f16x2; + case GlslVarType::U32: + return var_u32; + case GlslVarType::F32: + return var_f32; + case GlslVarType::U64: + return var_u64; + case GlslVarType::F64: + return var_f64; + case GlslVarType::U32x2: + return var_u32x2; + case GlslVarType::F32x2: + return var_f32x2; + case GlslVarType::U32x3: + return var_u32x3; + case GlslVarType::F32x3: + return var_f32x3; + case GlslVarType::U32x4: + return var_u32x4; + case GlslVarType::F32x4: + return var_f32x4; + case GlslVarType::PrecF32: + return var_precf32; + case GlslVarType::PrecF64: + return var_precf64; + default: + throw NotImplementedException("Type {}", type); + } +} + +const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const { + switch (type) { + case GlslVarType::U1: + return var_bool; + case GlslVarType::F16x2: + return var_f16x2; + case GlslVarType::U32: + return var_u32; + case GlslVarType::F32: + return var_f32; + case GlslVarType::U64: + return var_u64; + case GlslVarType::F64: + return var_f64; + case GlslVarType::U32x2: + return var_u32x2; + case GlslVarType::F32x2: + return var_f32x2; + case GlslVarType::U32x3: + return var_u32x3; + case GlslVarType::F32x3: + return var_f32x3; + case GlslVarType::U32x4: + return var_u32x4; + case GlslVarType::F32x4: + return var_f32x4; + case GlslVarType::PrecF32: + return var_precf32; + case GlslVarType::PrecF64: + return var_precf64; + default: + throw NotImplementedException("Type {}", type); + } +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h new file mode 100644 index 000000000..8b49f32a6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.h @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <bitset> +#include <string> +#include <vector> + +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Shader::IR { +class Inst; +class Value; +enum class Type; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { +enum class GlslVarType : u32 { + U1, + F16x2, + U32, + F32, + U64, + F64, + U32x2, + F32x2, + U32x3, + F32x3, + U32x4, + F32x4, + PrecF32, + PrecF64, + Void, +}; + +struct Id { + union { + u32 raw; + BitField<0, 1, u32> is_valid; + BitField<1, 4, GlslVarType> type; + BitField<6, 26, u32> index; + }; + + bool operator==(Id rhs) const noexcept { + return raw == rhs.raw; + } + bool operator!=(Id rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(sizeof(Id) == sizeof(u32)); + +class VarAlloc { +public: + struct UseTracker { + bool uses_temp{}; + size_t num_used{}; + std::vector<bool> var_use; + }; + + /// Used for explicit usages of variables, may revert to temporaries + std::string Define(IR::Inst& inst, GlslVarType type); + std::string Define(IR::Inst& inst, IR::Type type); + + /// Used to assign variables used by the IR. May return a blank string if + /// the instruction's result is unused in the IR. + std::string AddDefine(IR::Inst& inst, GlslVarType type); + std::string PhiDefine(IR::Inst& inst, IR::Type type); + + std::string Consume(const IR::Value& value); + std::string ConsumeInst(IR::Inst& inst); + + std::string GetGlslType(GlslVarType type) const; + std::string GetGlslType(IR::Type type) const; + + const UseTracker& GetUseTracker(GlslVarType type) const; + std::string Representation(u32 index, GlslVarType type) const; + +private: + GlslVarType RegType(IR::Type type) const; + Id Alloc(GlslVarType type); + void Free(Id id); + UseTracker& GetUseTracker(GlslVarType type); + std::string Representation(Id id) const; + + UseTracker var_bool{}; + UseTracker var_f16x2{}; + UseTracker var_u32{}; + UseTracker var_u32x2{}; + UseTracker var_u32x3{}; + UseTracker var_u32x4{}; + UseTracker var_f32{}; + UseTracker var_f32x2{}; + UseTracker var_f32x3{}; + UseTracker var_f32x4{}; + UseTracker var_u64{}; + UseTracker var_f64{}; + UseTracker var_precf32{}; + UseTracker var_precf64{}; +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp new file mode 100644 index 000000000..2d29d8c14 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -0,0 +1,1368 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <climits> +#include <string_view> + +#include <fmt/format.h> + +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "shader_recompiler/backend/spirv/emit_context.h" + +namespace Shader::Backend::SPIRV { +namespace { +enum class Operation { + Increment, + Decrement, + FPAdd, + FPMin, + FPMax, +}; + +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; + +Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + const Id type{ctx.F32[1]}; + const bool depth{desc.is_depth}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); + case TextureType::ColorCube: + return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format); + case TextureType::ColorArrayCube: + return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format); + case TextureType::Buffer: + break; + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} + +spv::ImageFormat GetImageFormat(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return spv::ImageFormat::Unknown; + case ImageFormat::R8_UINT: + return spv::ImageFormat::R8ui; + case ImageFormat::R8_SINT: + return spv::ImageFormat::R8i; + case ImageFormat::R16_UINT: + return spv::ImageFormat::R16ui; + case ImageFormat::R16_SINT: + return spv::ImageFormat::R16i; + case ImageFormat::R32_UINT: + return spv::ImageFormat::R32ui; + case ImageFormat::R32G32_UINT: + return spv::ImageFormat::Rg32ui; + case ImageFormat::R32G32B32A32_UINT: + return spv::ImageFormat::Rgba32ui; + } + throw InvalidArgument("Invalid image format {}", format); +} + +Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { + const spv::ImageFormat format{GetImageFormat(desc.format)}; + const Id type{ctx.U32[1]}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format); + case TextureType::Buffer: + throw NotImplementedException("Image buffer"); + default: + break; + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} + +Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin, + spv::StorageClass storage_class) { + const Id pointer_type{ctx.TypePointer(storage_class, type)}; + const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)}; + if (builtin) { + ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + ctx.interfaces.push_back(id); + return id; +} + +u32 NumVertices(InputTopology input_topology) { + switch (input_topology) { + case InputTopology::Points: + return 1; + case InputTopology::Lines: + return 2; + case InputTopology::LinesAdjacency: + return 4; + case InputTopology::Triangles: + return 3; + case InputTopology::TrianglesAdjacency: + return 6; + } + throw InvalidArgument("Invalid input topology {}", input_topology); +} + +Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, + std::optional<spv::BuiltIn> builtin = std::nullopt) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + if (per_invocation) { + type = ctx.TypeArray(type, ctx.Const(32u)); + } + break; + case Stage::Geometry: + if (per_invocation) { + const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)}; + type = ctx.TypeArray(type, ctx.Const(num_vertices)); + } + break; + default: + break; + } + return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); +} + +Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations, + std::optional<spv::BuiltIn> builtin = std::nullopt) { + if (invocations && ctx.stage == Stage::TessellationControl) { + type = ctx.TypeArray(type, ctx.Const(*invocations)); + } + return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); +} + +void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!ctx.runtime_info.xfb_varyings.empty()) { + xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + + const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)}; + ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); + if (element > 0) { + ctx.Decorate(id, spv::Decoration::Component, element); + } + if (xfb_varying) { + ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer); + ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride); + ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); + } + if (num_components < 4 || element > 0) { + const std::string_view subswizzle{swizzle.substr(element, num_components)}; + ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); + } else { + ctx.Name(id, fmt::format("out_attr{}", index)); + } + const GenericElementInfo info{ + .id = id, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(ctx.output_generics[index].begin() + element, num_components, info); + element += num_components; + } +} + +Id GetAttributeType(EmitContext& ctx, AttributeType type) { + switch (type) { + case AttributeType::Float: + return ctx.F32[4]; + case AttributeType::SignedInt: + return ctx.TypeVector(ctx.TypeInt(32, true), 4); + case AttributeType::UnsignedInt: + return ctx.U32[4]; + case AttributeType::Disabled: + break; + } + throw InvalidArgument("Invalid attribute type {}", type); +} + +std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; + switch (type) { + case AttributeType::Float: + return AttrInfo{ctx.input_f32, ctx.F32[1], false}; + case AttributeType::UnsignedInt: + return AttrInfo{ctx.input_u32, ctx.U32[1], true}; + case AttributeType::SignedInt: + return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + case AttributeType::Disabled: + return std::nullopt; + } + throw InvalidArgument("Invalid attribute type {}", type); +} + +std::string_view StageName(Stage stage) { + switch (stage) { + case Stage::VertexA: + return "vs_a"; + case Stage::VertexB: + return "vs"; + case Stage::TessellationControl: + return "tcs"; + case Stage::TessellationEval: + return "tes"; + case Stage::Geometry: + return "gs"; + case Stage::Fragment: + return "fs"; + case Stage::Compute: + return "cs"; + } + throw InvalidArgument("Invalid stage {}", stage); +} + +template <typename... Args> +void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) { + ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage), + std::forward<Args>(args)...) + .c_str()); +} + +void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{ctx.TypeStruct(array_type)}; + Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT); + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberName(struct_type, 0, "data"); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)}; + ctx.uniform_types.*member_type = uniform_type; + + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("c{}", desc.index)); + for (size_t i = 0; i < desc.count; ++i) { + ctx.cbufs[desc.index + i].*member_type = id; + } + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + binding += desc.count; + } +} + +void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type, + u32 stride) { + const Id array_type{ctx.TypeRuntimeArray(type)}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride); + + const Id struct_type{ctx.TypeStruct(array_type)}; + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + type_def.array = struct_pointer; + type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type); + + u32 index{}; + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("ssbo{}", index)); + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + for (size_t i = 0; i < desc.count; ++i) { + ctx.ssbos[index + i].*member_type = id; + } + index += desc.count; + binding += desc.count; + } +} + +Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) { + const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id op_a{ctx.OpFunctionParameter(value_type)}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + ctx.AddLabel(); + Id result{}; + switch (operation) { + case Operation::Increment: { + const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; + const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); + break; + } + case Operation::Decrement: { + const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; + const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; + const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; + const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, op_b, decr); + break; + } + case Operation::FPAdd: + result = ctx.OpFAdd(value_type, op_a, op_b); + break; + case Operation::FPMin: + result = ctx.OpFMin(value_type, op_a, op_b); + break; + case Operation::FPMax: + result = ctx.OpFMax(value_type, op_a, op_b); + break; + default: + break; + } + ctx.OpReturnValue(result); + ctx.OpFunctionEnd(); + return func; +} + +Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer, + Id value_type, Id memory_type, spv::Scope scope) { + const bool is_shared{scope == spv::Scope::Workgroup}; + const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; + const Id cas_func{CasFunction(ctx, operation, value_type)}; + const Id zero{ctx.u32_zero_value}; + const Id scope_id{ctx.Const(static_cast<u32>(scope))}; + + const Id loop_header{ctx.OpLabel()}; + const Id continue_block{ctx.OpLabel()}; + const Id merge_block{ctx.OpLabel()}; + const Id func_type{is_shared + ? ctx.TypeFunction(value_type, ctx.U32[1], value_type) + : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)}; + + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id index{ctx.OpFunctionParameter(ctx.U32[1])}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)}; + ctx.AddLabel(); + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + + ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + ctx.OpBranch(continue_block); + + ctx.AddLabel(continue_block); + const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index) + : ctx.OpAccessChain(element_pointer, base, index)}; + if (value_type.value == ctx.F32[2].value) { + const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)}; + const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)}; + const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)}; + const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, u32_new_value, u32_value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res)); + } else { + const Id value{ctx.OpLoad(memory_type, word_pointer)}; + const bool matching_type{value_type.value == memory_type.value}; + const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)}; + const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)}; + const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, new_value, value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res)); + } + ctx.OpFunctionEnd(); + return func; +} + +template <typename Desc> +std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) { + if (desc.count > 1) { + return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index, + desc.cbuf_offset, desc.count); + } else { + return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index, + desc.cbuf_offset); + } +} + +Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { + if (count > 1) { + const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))}; + return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type); + } else { + return pointer_type; + } +} +} // Anonymous namespace + +void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { + defs[0] = sirit_ctx.Name(base_type, name); + + std::array<char, 6> def_name; + for (int i = 1; i < 4; ++i) { + const std::string_view def_name_view( + def_name.data(), + fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); + defs[static_cast<size_t>(i)] = + sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + } +} + +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, + IR::Program& program, Bindings& bindings) + : Sirit::Module(profile_.supported_spirv), profile{profile_}, + runtime_info{runtime_info_}, stage{program.stage} { + const bool is_unified{profile.unified_descriptor_binding}; + u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; + u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; + u32& texture_binding{is_unified ? bindings.unified : bindings.texture}; + u32& image_binding{is_unified ? bindings.unified : bindings.image}; + AddCapability(spv::Capability::Shader); + DefineCommonTypes(program.info); + DefineCommonConstants(); + DefineInterfaces(program); + DefineLocalMemory(program); + DefineSharedMemory(program); + DefineSharedMemoryFunctions(program); + DefineConstantBuffers(program.info, uniform_binding); + DefineStorageBuffers(program.info, storage_binding); + DefineTextureBuffers(program.info, texture_binding); + DefineImageBuffers(program.info, image_binding); + DefineTextures(program.info, texture_binding); + DefineImages(program.info, image_binding); + DefineAttributeMemAccess(program.info); + DefineGlobalMemoryFunctions(program.info); +} + +EmitContext::~EmitContext() = default; + +Id EmitContext::Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return value.InstRecursive()->Definition<Id>(); + } + switch (value.Type()) { + case IR::Type::Void: + // Void instructions are used for optional arguments (e.g. texture offsets) + // They are not meant to be used in the SPIR-V module + return Id{}; + case IR::Type::U1: + return value.U1() ? true_value : false_value; + case IR::Type::U32: + return Const(value.U32()); + case IR::Type::U64: + return Constant(U64, value.U64()); + case IR::Type::F32: + return Const(value.F32()); + case IR::Type::F64: + return Constant(F64[1], value.F64()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} + +Id EmitContext::BitOffset8(const IR::Value& offset) { + if (offset.IsImmediate()) { + return Const((offset.U32() % 4) * 8); + } + return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u)); +} + +Id EmitContext::BitOffset16(const IR::Value& offset) { + if (offset.IsImmediate()) { + return Const(((offset.U32() / 2) % 2) * 16); + } + return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u)); +} + +void EmitContext::DefineCommonTypes(const Info& info) { + void_id = TypeVoid(); + + U1 = Name(TypeBool(), "u1"); + + F32.Define(*this, TypeFloat(32), "f32"); + U32.Define(*this, TypeInt(32, false), "u32"); + S32.Define(*this, TypeInt(32, true), "s32"); + + private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); + + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); + input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); + + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); + + if (info.uses_int8 && profile.support_int8) { + AddCapability(spv::Capability::Int8); + U8 = Name(TypeInt(8, false), "u8"); + S8 = Name(TypeInt(8, true), "s8"); + } + if (info.uses_int16 && profile.support_int16) { + AddCapability(spv::Capability::Int16); + U16 = Name(TypeInt(16, false), "u16"); + S16 = Name(TypeInt(16, true), "s16"); + } + if (info.uses_int64) { + AddCapability(spv::Capability::Int64); + U64 = Name(TypeInt(64, false), "u64"); + } + if (info.uses_fp16) { + AddCapability(spv::Capability::Float16); + F16.Define(*this, TypeFloat(16), "f16"); + } + if (info.uses_fp64) { + AddCapability(spv::Capability::Float64); + F64.Define(*this, TypeFloat(64), "f64"); + } +} + +void EmitContext::DefineCommonConstants() { + true_value = ConstantTrue(U1); + false_value = ConstantFalse(U1); + u32_zero_value = Const(0U); + f32_zero_value = Const(0.0f); +} + +void EmitContext::DefineInterfaces(const IR::Program& program) { + DefineInputs(program); + DefineOutputs(program); +} + +void EmitContext::DefineLocalMemory(const IR::Program& program) { + if (program.local_memory_size == 0) { + return; + } + const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Const(num_elements))}; + const Id pointer{TypePointer(spv::StorageClass::Private, type)}; + local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(local_memory); + } +} + +void EmitContext::DefineSharedMemory(const IR::Program& program) { + if (program.shared_memory_size == 0) { + return; + } + const auto make{[&](Id element_type, u32 element_size) { + const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; + const Id array_type{TypeArray(element_type, Const(num_elements))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{TypeStruct(array_type)}; + MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); + Decorate(struct_type, spv::Decoration::Block); + + const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; + const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; + const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; + Decorate(variable, spv::Decoration::Aliased); + interfaces.push_back(variable); + + return std::make_tuple(variable, element_pointer, pointer); + }}; + if (profile.support_explicit_workgroup_layout) { + AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); + if (program.info.uses_int8) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); + std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); + } + if (program.info.uses_int16) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); + std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); + } + if (program.info.uses_int64) { + std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8); + } + std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); + std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); + std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); + return; + } + const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Const(num_elements))}; + shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); + + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); + shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); + interfaces.push_back(shared_memory_u32); + + const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; + const auto make_function{[&](u32 mask, u32 size) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id insert_value{OpFunctionParameter(U32[1])}; + AddLabel(); + OpBranch(loop_header); + + AddLabel(loop_header); + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; + const Id count{Const(size)}; + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; + const Id old_value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value, + u32_zero_value, new_value, old_value)}; + const Id success{OpIEqual(U1, atomic_res, old_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturn(); + OpFunctionEnd(); + return func; + }}; + if (program.info.uses_int8) { + shared_store_u8_func = make_function(24, 8); + } + if (program.info.uses_int16) { + shared_store_u16_func = make_function(16, 16); + } +} + +void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { + if (program.info.uses_shared_increment) { + increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); + } + if (program.info.uses_shared_decrement) { + decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); + } +} + +void EmitContext::DefineAttributeMemAccess(const Info& info) { + const auto make_load{[&] { + const bool is_array{stage == Stage::Geometry}; + const Id end_block{OpLabel()}; + const Id default_label{OpLabel()}; + + const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1]) + : TypeFunction(F32[1], U32[1])}; + const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; + + AddLabel(); + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; + std::vector<Sirit::Literal> literals; + std::vector<Id> labels; + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { + literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); + labels.push_back(OpLabel()); + } + const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; + for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { + if (!info.loads.Generic(index)) { + continue; + } + literals.push_back(base_attribute_value + index); + labels.push_back(OpLabel()); + } + OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); + OpSwitch(compare_index, default_label, literals, labels); + AddLabel(default_label); + OpReturnValue(Const(0.0f)); + size_t label_index{0}; + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { + AddLabel(labels[label_index]); + const Id pointer{is_array + ? OpAccessChain(input_f32, input_position, vertex, masked_index) + : OpAccessChain(input_f32, input_position, masked_index)}; + const Id result{OpLoad(F32[1], pointer)}; + OpReturnValue(result); + ++label_index; + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { + continue; + } + AddLabel(labels[label_index]); + const auto type{AttrTypes(*this, static_cast<u32>(index))}; + if (!type) { + OpReturnValue(Const(0.0f)); + ++label_index; + continue; + } + const Id generic_id{input_generics.at(index)}; + const Id pointer{is_array + ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) + : OpAccessChain(type->pointer, generic_id, masked_index)}; + const Id value{OpLoad(type->id, pointer)}; + const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; + OpReturnValue(result); + ++label_index; + } + AddLabel(end_block); + OpUnreachable(); + OpFunctionEnd(); + return func; + }}; + const auto make_store{[&] { + const Id end_block{OpLabel()}; + const Id default_label{OpLabel()}; + + const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])}; + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id store_value{OpFunctionParameter(F32[1])}; + AddLabel(); + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; + std::vector<Sirit::Literal> literals; + std::vector<Id> labels; + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { + literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); + labels.push_back(OpLabel()); + } + const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { + continue; + } + literals.push_back(base_attribute_value + static_cast<u32>(index)); + labels.push_back(OpLabel()); + } + if (info.stores.ClipDistances()) { + literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2); + labels.push_back(OpLabel()); + literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2); + labels.push_back(OpLabel()); + } + OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); + OpSwitch(compare_index, default_label, literals, labels); + AddLabel(default_label); + OpReturn(); + size_t label_index{0}; + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { + AddLabel(labels[label_index]); + const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + ++label_index; + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { + continue; + } + if (output_generics[index][0].num_components != 4) { + throw NotImplementedException("Physical stores and transform feedbacks"); + } + AddLabel(labels[label_index]); + const Id generic_id{output_generics[index][0].id}; + const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + ++label_index; + } + if (info.stores.ClipDistances()) { + AddLabel(labels[label_index]); + const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + ++label_index; + AddLabel(labels[label_index]); + const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))}; + const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; + OpStore(pointer2, store_value); + OpReturn(); + ++label_index; + } + AddLabel(end_block); + OpUnreachable(); + OpFunctionEnd(); + return func; + }}; + if (info.loads_indexed_attributes) { + indexed_load_func = make_load(); + } + if (info.stores_indexed_attributes) { + indexed_store_func = make_store(); + } +} + +void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { + if (!info.uses_global_memory || !profile.support_int64) { + return; + } + using DefPtr = Id StorageDefinitions::*; + const Id zero{u32_zero_value}; + const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, + auto&& callback) { + AddLabel(); + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; + const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; + const Id ssbo_addr_pointer{OpAccessChain( + uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)}; + const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, + zero, ssbo_size_cbuf_offset)}; + + const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; + const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; + const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; + const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), + OpULessThan(U1, addr, ssbo_end))}; + const Id then_label{OpLabel()}; + const Id else_label{OpLabel()}; + OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); + OpBranchConditional(cond, then_label, else_label); + AddLabel(then_label); + const Id ssbo_id{ssbos[index].*ssbo_member}; + const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; + const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; + const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; + callback(ssbo_pointer); + AddLabel(else_label); + } + }}; + const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(type, U64)}; + const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + define_body(ssbo_member, addr, element_pointer, shift, + [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); + OpReturnValue(ConstantNull(type)); + OpFunctionEnd(); + return func_id; + }}; + const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(void_id, U64, type)}; + const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + const Id data{OpFunctionParameter(type)}; + define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { + OpStore(ssbo_pointer, data); + OpReturn(); + }); + OpReturn(); + OpFunctionEnd(); + return func_id; + }}; + const auto define{ + [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { + const Id element_type{type_def.element}; + const u32 shift{static_cast<u32>(std::countr_zero(size))}; + const Id load_func{define_load(ssbo_member, element_type, type, shift)}; + const Id write_func{define_write(ssbo_member, element_type, type, shift)}; + return std::make_pair(load_func, write_func); + }}; + std::tie(load_global_func_u32, write_global_func_u32) = + define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); + std::tie(load_global_func_u32x2, write_global_func_u32x2) = + define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); + std::tie(load_global_func_u32x4, write_global_func_u32x4) = + define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); +} + +void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { + if (info.constant_buffer_descriptors.empty()) { + return; + } + if (!profile.support_descriptor_aliasing) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u', + sizeof(u32[4])); + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + binding += desc.count; + } + return; + } + IR::Type types{info.used_constant_buffer_types}; + if (True(types & IR::Type::U8)) { + if (profile.support_int8) { + DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + } else { + types |= IR::Type::U32; + } + } + if (True(types & IR::Type::U16)) { + if (profile.support_int16) { + DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', + sizeof(u16)); + DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', + sizeof(s16)); + } else { + types |= IR::Type::U32; + } + } + if (True(types & IR::Type::U32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', + sizeof(u32)); + } + if (True(types & IR::Type::F32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', + sizeof(f32)); + } + if (True(types & IR::Type::U32x2)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', + sizeof(u32[2])); + } + binding += static_cast<u32>(info.constant_buffer_descriptors.size()); +} + +void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { + if (info.storage_buffers_descriptors.empty()) { + return; + } + AddExtension("SPV_KHR_storage_buffer_storage_class"); + + const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types + : IR::Type::U32}; + if (profile.support_int8 && True(used_types & IR::Type::U8)) { + DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, + sizeof(u8)); + DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, + sizeof(u8)); + } + if (profile.support_int16 && True(used_types & IR::Type::U16)) { + DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, + sizeof(u16)); + DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, + sizeof(u16)); + } + if (True(used_types & IR::Type::U32)) { + DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], + sizeof(u32)); + } + if (True(used_types & IR::Type::F32)) { + DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], + sizeof(f32)); + } + if (True(used_types & IR::Type::U64)) { + DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, + sizeof(u64)); + } + if (True(used_types & IR::Type::U32x2)) { + DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], + sizeof(u32[2])); + } + if (True(used_types & IR::Type::U32x4)) { + DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], + sizeof(u32[4])); + } + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + binding += desc.count; + } + const bool needs_function{ + info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add || + info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max || + info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max}; + if (needs_function) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + } + if (info.uses_global_increment) { + increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); + } + if (info.uses_global_decrement) { + decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); + } + if (info.uses_atomic_f32_add) { + f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[1], U32[1], spv::Scope::Device); + } + if (info.uses_atomic_f16x2_add) { + f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); + } + if (info.uses_atomic_f16x2_min) { + f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); + } + if (info.uses_atomic_f16x2_max) { + f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); + } + if (info.uses_atomic_f32x2_add) { + f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); + } + if (info.uses_atomic_f32x2_min) { + f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); + } + if (info.uses_atomic_f32x2_max) { + f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); + } +} + +void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { + if (info.texture_buffer_descriptors.empty()) { + return; + } + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); + sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); + + const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of texture buffers"); + } + const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, NameOf(stage, desc, "texbuf")); + texture_buffers.push_back({ + .id = id, + .count = desc.count, + }); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + ++binding; + } +} + +void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { + image_buffers.reserve(info.image_buffer_descriptors.size()); + for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of image buffers"); + } + const spv::ImageFormat format{GetImageFormat(desc.format)}; + const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, NameOf(stage, desc, "imgbuf")); + image_buffers.push_back({ + .id = id, + .image_type = image_type, + .count = desc.count, + }); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + ++binding; + } +} + +void EmitContext::DefineTextures(const Info& info, u32& binding) { + textures.reserve(info.texture_descriptors.size()); + for (const TextureDescriptor& desc : info.texture_descriptors) { + const Id image_type{ImageType(*this, desc)}; + const Id sampled_type{TypeSampledImage(image_type)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; + const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)}; + const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, NameOf(stage, desc, "tex")); + textures.push_back({ + .id = id, + .sampled_type = sampled_type, + .pointer_type = pointer_type, + .image_type = image_type, + .count = desc.count, + }); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + ++binding; + } + if (info.uses_atomic_image_u32) { + image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); + } +} + +void EmitContext::DefineImages(const Info& info, u32& binding) { + images.reserve(info.image_descriptors.size()); + for (const ImageDescriptor& desc : info.image_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of images"); + } + const Id image_type{ImageType(*this, desc)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, NameOf(stage, desc, "img")); + images.push_back({ + .id = id, + .image_type = image_type, + .count = desc.count, + }); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + ++binding; + } +} + +void EmitContext::DefineInputs(const IR::Program& program) { + const Info& info{program.info}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + + if (info.uses_workgroup_id) { + workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); + } + if (info.uses_local_invocation_id) { + local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId); + } + if (info.uses_invocation_id) { + invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); + } + if (info.uses_sample_id) { + sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); + } + if (info.uses_is_helper_invocation) { + is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); + } + if (info.uses_subgroup_mask) { + subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); + subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); + subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); + subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); + subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); + } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles || + (profile.warp_size_potentially_larger_than_guest && + (info.uses_subgroup_vote || info.uses_subgroup_mask))) { + subgroup_local_invocation_id = + DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); + } + if (info.uses_fswzadd) { + const Id f32_one{Const(1.0f)}; + const Id f32_minus_one{Const(-1.0f)}; + const Id f32_zero{Const(0.0f)}; + fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); + fswzadd_lut_b = + ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); + } + if (loads[IR::Attribute::PrimitiveId]) { + primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); + } + if (loads.AnyComponent(IR::Attribute::PositionX)) { + const bool is_fragment{stage != Stage::Fragment}; + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; + input_position = DefineInput(*this, F32[4], true, built_in); + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } + } + } + if (loads[IR::Attribute::InstanceId]) { + if (profile.support_vertex_instance_id) { + instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); + } else { + instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); + } + } + if (loads[IR::Attribute::VertexId]) { + if (profile.support_vertex_instance_id) { + vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); + } else { + vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); + } + } + if (loads[IR::Attribute::FrontFace]) { + front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); + } + if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { + point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); + } + if (loads[IR::Attribute::TessellationEvaluationPointU] || + loads[IR::Attribute::TessellationEvaluationPointV]) { + tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const AttributeType input_type{runtime_info.generic_input_types[index]}; + if (!runtime_info.previous_stage_stores.Generic(index)) { + continue; + } + if (!loads.Generic(index)) { + continue; + } + if (input_type == AttributeType::Disabled) { + continue; + } + const Id type{GetAttributeType(*this, input_type)}; + const Id id{DefineInput(*this, type, true)}; + Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); + Name(id, fmt::format("in_attr{}", index)); + input_generics[index] = id; + + if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { + Decorate(id, spv::Decoration::PassthroughNV); + } + if (stage != Stage::Fragment) { + continue; + } + switch (info.interpolation[index]) { + case Interpolation::Smooth: + // Default + // Decorate(id, spv::Decoration::Smooth); + break; + case Interpolation::NoPerspective: + Decorate(id, spv::Decoration::NoPerspective); + break; + case Interpolation::Flat: + Decorate(id, spv::Decoration::Flat); + break; + } + } + if (stage == Stage::TessellationEval) { + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineInput(*this, F32[4], false)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); + patches[index] = id; + } + } +} + +void EmitContext::DefineOutputs(const IR::Program& program) { + const Info& info{program.info}; + const std::optional<u32> invocations{program.invocations}; + if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { + output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); + } + if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing PointSize in fragment stage"); + } + output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); + } + if (info.stores.ClipDistances()) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing ClipDistance in fragment stage"); + } + const Id type{TypeArray(F32[1], Const(8U))}; + clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); + } + if (info.stores[IR::Attribute::Layer] && + (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing Layer in fragment stage"); + } + layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); + } + if (info.stores[IR::Attribute::ViewportIndex] && + (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing ViewportIndex in fragment stage"); + } + viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); + } + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { + viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, + spv::BuiltIn::ViewportMaskNV); + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(*this, index, invocations); + } + } + switch (stage) { + case Stage::TessellationControl: + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], Const(4U))}; + output_tess_level_outer = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], Const(2U))}; + output_tess_level_inner = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineOutput(*this, F32[4], std::nullopt)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); + patches[index] = id; + } + break; + case Stage::Fragment: + for (u32 index = 0; index < 8; ++index) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { + continue; + } + frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); + Decorate(frag_color[index], spv::Decoration::Location, index); + Name(frag_color[index], fmt::format("frag_color{}", index)); + } + if (info.stores_frag_depth) { + frag_depth = DefineOutput(*this, F32[1], std::nullopt); + Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); + } + if (info.stores_sample_mask) { + sample_mask = DefineOutput(*this, U32[1], std::nullopt); + Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); + } + break; + default: + break; + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h new file mode 100644 index 000000000..e277bc358 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -0,0 +1,307 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <string_view> + +#include <sirit/sirit.h> + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class VectorTypes { +public: + void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); + + [[nodiscard]] Id operator[](size_t size) const noexcept { + return defs[size - 1]; + } + +private: + std::array<Id, 4> defs{}; +}; + +struct TextureDefinition { + Id id; + Id sampled_type; + Id pointer_type; + Id image_type; + u32 count; +}; + +struct TextureBufferDefinition { + Id id; + u32 count; +}; + +struct ImageBufferDefinition { + Id id; + Id image_type; + u32 count; +}; + +struct ImageDefinition { + Id id; + Id image_type; + u32 count; +}; + +struct UniformDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U32x2{}; + Id U32x4{}; +}; + +struct StorageTypeDefinition { + Id array{}; + Id element{}; +}; + +struct StorageTypeDefinitions { + StorageTypeDefinition U8{}; + StorageTypeDefinition S8{}; + StorageTypeDefinition U16{}; + StorageTypeDefinition S16{}; + StorageTypeDefinition U32{}; + StorageTypeDefinition U64{}; + StorageTypeDefinition F32{}; + StorageTypeDefinition U32x2{}; + StorageTypeDefinition U32x4{}; +}; + +struct StorageDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U64{}; + Id U32x2{}; + Id U32x4{}; +}; + +struct GenericElementInfo { + Id id{}; + u32 first_element{}; + u32 num_components{}; +}; + +class EmitContext final : public Sirit::Module { +public: + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& binding); + ~EmitContext(); + + [[nodiscard]] Id Def(const IR::Value& value); + + [[nodiscard]] Id BitOffset8(const IR::Value& offset); + [[nodiscard]] Id BitOffset16(const IR::Value& offset); + + Id Const(u32 value) { + return Constant(U32[1], value); + } + + Id Const(u32 element_1, u32 element_2) { + return ConstantComposite(U32[2], Const(element_1), Const(element_2)); + } + + Id Const(u32 element_1, u32 element_2, u32 element_3) { + return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3)); + } + + Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) { + return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3), + Const(element_4)); + } + + Id SConst(s32 value) { + return Constant(S32[1], value); + } + + Id SConst(s32 element_1, s32 element_2) { + return ConstantComposite(S32[2], SConst(element_1), SConst(element_2)); + } + + Id SConst(s32 element_1, s32 element_2, s32 element_3) { + return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3)); + } + + Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) { + return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3), + SConst(element_4)); + } + + Id Const(f32 value) { + return Constant(F32[1], value); + } + + const Profile& profile; + const RuntimeInfo& runtime_info; + Stage stage{}; + + Id void_id{}; + Id U1{}; + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U64{}; + VectorTypes F32; + VectorTypes U32; + VectorTypes S32; + VectorTypes F16; + VectorTypes F64; + + Id true_value{}; + Id false_value{}; + Id u32_zero_value{}; + Id f32_zero_value{}; + + UniformDefinitions uniform_types; + StorageTypeDefinitions storage_types; + + Id private_u32{}; + + Id shared_u8{}; + Id shared_u16{}; + Id shared_u32{}; + Id shared_u64{}; + Id shared_u32x2{}; + Id shared_u32x4{}; + + Id input_f32{}; + Id input_u32{}; + Id input_s32{}; + + Id output_f32{}; + Id output_u32{}; + + Id image_buffer_type{}; + Id sampled_texture_buffer_type{}; + Id image_u32{}; + + std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; + std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{}; + std::vector<TextureBufferDefinition> texture_buffers; + std::vector<ImageBufferDefinition> image_buffers; + std::vector<TextureDefinition> textures; + std::vector<ImageDefinition> images; + + Id workgroup_id{}; + Id local_invocation_id{}; + Id invocation_id{}; + Id sample_id{}; + Id is_helper_invocation{}; + Id subgroup_local_invocation_id{}; + Id subgroup_mask_eq{}; + Id subgroup_mask_lt{}; + Id subgroup_mask_le{}; + Id subgroup_mask_gt{}; + Id subgroup_mask_ge{}; + Id instance_id{}; + Id instance_index{}; + Id base_instance{}; + Id vertex_id{}; + Id vertex_index{}; + Id base_vertex{}; + Id front_face{}; + Id point_coord{}; + Id tess_coord{}; + Id clip_distances{}; + Id layer{}; + Id viewport_index{}; + Id viewport_mask{}; + Id primitive_id{}; + + Id fswzadd_lut_a{}; + Id fswzadd_lut_b{}; + + Id indexed_load_func{}; + Id indexed_store_func{}; + + Id local_memory{}; + + Id shared_memory_u8{}; + Id shared_memory_u16{}; + Id shared_memory_u32{}; + Id shared_memory_u64{}; + Id shared_memory_u32x2{}; + Id shared_memory_u32x4{}; + + Id shared_memory_u32_type{}; + + Id shared_store_u8_func{}; + Id shared_store_u16_func{}; + Id increment_cas_shared{}; + Id increment_cas_ssbo{}; + Id decrement_cas_shared{}; + Id decrement_cas_ssbo{}; + Id f32_add_cas{}; + Id f16x2_add_cas{}; + Id f16x2_min_cas{}; + Id f16x2_max_cas{}; + Id f32x2_add_cas{}; + Id f32x2_min_cas{}; + Id f32x2_max_cas{}; + + Id load_global_func_u32{}; + Id load_global_func_u32x2{}; + Id load_global_func_u32x4{}; + Id write_global_func_u32{}; + Id write_global_func_u32x2{}; + Id write_global_func_u32x4{}; + + Id input_position{}; + std::array<Id, 32> input_generics{}; + + Id output_point_size{}; + Id output_position{}; + std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; + + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + std::array<Id, 30> patches{}; + + std::array<Id, 8> frag_color{}; + Id sample_mask{}; + Id frag_depth{}; + + std::vector<Id> interfaces; + +private: + void DefineCommonTypes(const Info& info); + void DefineCommonConstants(); + void DefineInterfaces(const IR::Program& program); + void DefineLocalMemory(const IR::Program& program); + void DefineSharedMemory(const IR::Program& program); + void DefineSharedMemoryFunctions(const IR::Program& program); + void DefineConstantBuffers(const Info& info, u32& binding); + void DefineStorageBuffers(const Info& info, u32& binding); + void DefineTextureBuffers(const Info& info, u32& binding); + void DefineImageBuffers(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding); + void DefineImages(const Info& info, u32& binding); + void DefineAttributeMemAccess(const Info& info); + void DefineGlobalMemoryFunctions(const Info& info); + + void DefineInputs(const IR::Program& program); + void DefineOutputs(const IR::Program& program); +}; + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp new file mode 100644 index 000000000..d7a86e270 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -0,0 +1,541 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <span> +#include <tuple> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/settings.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::SPIRV { +namespace { +template <class Func> +struct FuncTraits {}; + +template <class ReturnType_, class... Args> +struct FuncTraits<ReturnType_ (*)(Args...)> { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template <size_t I> + using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; +}; + +template <auto func, typename... Args> +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); +} + +template <typename ArgType> +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v<ArgType, Id>) { + return ctx.Def(arg); + } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { + return arg; + } else if constexpr (std::is_same_v<ArgType, u32>) { + return arg.U32(); + } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { + return arg.Attribute(); + } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { + return arg.Patch(); + } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { + return arg.Reg(); + } +} + +template <auto func, bool is_first_arg_inst, size_t... I> +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { + using Traits = FuncTraits<decltype(func)>; + if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { + if constexpr (is_first_arg_inst) { + SetDefinition<func>( + ctx, inst, inst, + Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); + } else { + SetDefinition<func>( + ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); + } + } +} + +template <auto func> +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits<decltype(func)>; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>; + using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; + Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.U1; + case IR::Type::U32: + return ctx.U32[1]; + default: + throw NotImplementedException("Phi node type {}", type); + } +} + +void Traverse(EmitContext& ctx, IR::Program& program) { + IR::Block* current_block{}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: { + const Id label{node.data.block->Definition<Id>()}; + if (current_block) { + ctx.OpBranch(label); + } + current_block = node.data.block; + ctx.AddLabel(label); + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + } + case IR::AbstractSyntaxNode::Type::If: { + const Id if_label{node.data.if_node.body->Definition<Id>()}; + const Id endif_label{node.data.if_node.merge->Definition<Id>()}; + ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label); + break; + } + case IR::AbstractSyntaxNode::Type::Loop: { + const Id body_label{node.data.loop.body->Definition<Id>()}; + const Id continue_label{node.data.loop.continue_block->Definition<Id>()}; + const Id endloop_label{node.data.loop.merge->Definition<Id>()}; + + ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); + ctx.OpBranch(body_label); + break; + } + case IR::AbstractSyntaxNode::Type::Break: { + const Id break_label{node.data.break_node.merge->Definition<Id>()}; + const Id skip_label{node.data.break_node.skip->Definition<Id>()}; + ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label); + break; + } + case IR::AbstractSyntaxNode::Type::EndIf: + if (current_block) { + ctx.OpBranch(node.data.end_if.merge->Definition<Id>()); + } + break; + case IR::AbstractSyntaxNode::Type::Repeat: { + Id cond{ctx.Def(node.data.repeat.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])}; + const Id safety_counter{ctx.AddGlobalVariable( + pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))}; + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(safety_counter); + } + const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)}; + const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))}; + ctx.OpStore(safety_counter, new_counter); + + const Id safety_cond{ + ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)}; + cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond); + } + const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()}; + const Id merge_label{node.data.repeat.merge->Definition<Id>()}; + ctx.OpBranchConditional(cond, loop_header_label, merge_label); + break; + } + case IR::AbstractSyntaxNode::Type::Return: + ctx.OpReturn(); + break; + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.OpUnreachable(); + break; + } + if (node.type != IR::AbstractSyntaxNode::Type::Block) { + current_block = nullptr; + } + } +} + +Id DefineMain(EmitContext& ctx, IR::Program& program) { + const Id void_function{ctx.TypeFunction(ctx.void_id)}; + const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; + for (IR::Block* const block : program.blocks) { + block->SetDefinition(ctx.OpLabel()); + } + Traverse(ctx, program); + ctx.OpFunctionEnd(); + return main; +} + +spv::ExecutionMode ExecutionMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Isolines: + return spv::ExecutionMode::Isolines; + case TessPrimitive::Triangles: + return spv::ExecutionMode::Triangles; + case TessPrimitive::Quads: + return spv::ExecutionMode::Quads; + } + throw InvalidArgument("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return spv::ExecutionMode::SpacingEqual; + case TessSpacing::FractionalOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case TessSpacing::FractionalEven: + return spv::ExecutionMode::SpacingFractionalEven; + } + throw InvalidArgument("Tessellation spacing {}", spacing); +} + +void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (program.stage) { + case Stage::Compute: { + const std::array<u32, 3> workgroup_size{program.workgroup_size}; + execution_model = spv::ExecutionModel::GLCompute; + ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], + workgroup_size[1], workgroup_size[2]); + break; + } + case Stage::VertexB: + execution_model = spv::ExecutionModel::Vertex; + break; + case Stage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations); + break; + case Stage::TessellationEval: + execution_model = spv::ExecutionModel::TessellationEvaluation; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing)); + ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise + ? spv::ExecutionMode::VertexOrderCw + : spv::ExecutionMode::VertexOrderCcw); + break; + case Stage::Geometry: + execution_model = spv::ExecutionModel::Geometry; + ctx.AddCapability(spv::Capability::Geometry); + ctx.AddCapability(spv::Capability::GeometryStreams); + switch (ctx.runtime_info.input_topology) { + case InputTopology::Points: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); + break; + case InputTopology::Lines: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines); + break; + case InputTopology::LinesAdjacency: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency); + break; + case InputTopology::Triangles: + ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles); + break; + case InputTopology::TrianglesAdjacency: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency); + break; + } + switch (program.output_topology) { + case OutputTopology::PointList: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints); + break; + case OutputTopology::LineStrip: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip); + break; + case OutputTopology::TriangleStrip: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); + break; + } + if (program.info.stores[IR::Attribute::PointSize]) { + ctx.AddCapability(spv::Capability::GeometryPointSize); + } + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); + ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + if (program.is_geometry_passthrough) { + if (ctx.profile.support_geometry_shader_passthrough) { + ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); + ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); + } else { + LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); + } + } + break; + case Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + if (ctx.profile.lower_left_origin_mode) { + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); + } else { + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + } + if (program.info.stores_frag_depth) { + ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); + } + if (ctx.runtime_info.force_early_z) { + ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); + } + break; + default: + throw NotImplementedException("Stage {}", program.stage); + } + ctx.AddEntryPoint(execution_model, main, "main", interfaces); +} + +void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + const Info& info{program.info}; + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); + } else if (info.uses_fp32_denorms_flush) { + if (profile.support_fp32_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } else { + // Drivers will most likely flush denorms by default, no need to warn + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp32_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); + } + } + if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) { + // No separate denorm behavior + return; + } + if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); + } else if (info.uses_fp16_denorms_flush) { + if (profile.support_fp16_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U); + } else { + // Same as fp32, no need to warn as most drivers will flush by default + } + } else if (info.uses_fp16_denorms_preserve) { + if (profile.support_fp16_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); + } + } +} + +void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, + EmitContext& ctx, Id main_func) { + if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { + return; + } + if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); + } + if (profile.support_fp32_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); + } + if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U); + } +} + +void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { + if (info.uses_sampled_1d) { + ctx.AddCapability(spv::Capability::Sampled1D); + } + if (info.uses_sparse_residency) { + ctx.AddCapability(spv::Capability::SparseResidency); + } + if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) { + ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } + if (info.stores[IR::Attribute::ViewportIndex]) { + ctx.AddCapability(spv::Capability::MultiViewport); + } + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { + ctx.AddExtension("SPV_NV_viewport_array2"); + ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); + } + if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { + if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { + ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); + ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); + } + } + if (!profile.support_vertex_instance_id && + (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { + ctx.AddExtension("SPV_KHR_shader_draw_parameters"); + ctx.AddCapability(spv::Capability::DrawParameters); + } + if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id || + info.uses_subgroup_shuffles) && + profile.support_vote) { + ctx.AddExtension("SPV_KHR_shader_ballot"); + ctx.AddCapability(spv::Capability::SubgroupBallotKHR); + if (!profile.warp_size_potentially_larger_than_guest) { + // vote ops are only used when not taking the long path + ctx.AddExtension("SPV_KHR_subgroup_vote"); + ctx.AddCapability(spv::Capability::SubgroupVoteKHR); + } + } + if (info.uses_int64_bit_atomics && profile.support_int64_atomics) { + ctx.AddCapability(spv::Capability::Int64Atomics); + } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); + } + if (info.uses_typeless_image_writes) { + ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + } + if (info.uses_image_buffers) { + ctx.AddCapability(spv::Capability::ImageBuffer); + } + if (info.uses_sample_id) { + ctx.AddCapability(spv::Capability::SampleRateShading); + } + if (!ctx.runtime_info.xfb_varyings.empty()) { + ctx.AddCapability(spv::Capability::TransformFeedback); + } + if (info.uses_derivatives) { + ctx.AddCapability(spv::Capability::DerivativeControl); + } + // TODO: Track this usage + ctx.AddCapability(spv::Capability::ImageGatherExtended); + ctx.AddCapability(spv::Capability::ImageQuery); + ctx.AddCapability(spv::Capability::SampledBuffer); +} + +void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { + auto inst{program.blocks.front()->begin()}; + size_t block_index{0}; + ctx.PatchDeferredPhi([&](size_t phi_arg) { + if (phi_arg == 0) { + ++inst; + if (inst == program.blocks[block_index]->end() || + inst->GetOpcode() != IR::Opcode::Phi) { + do { + ++block_index; + inst = program.blocks[block_index]->begin(); + } while (inst->GetOpcode() != IR::Opcode::Phi); + } + } + return ctx.Def(inst->Arg(phi_arg)); + }); +} +} // Anonymous namespace + +std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings) { + EmitContext ctx{profile, runtime_info, program, bindings}; + const Id main{DefineMain(ctx, program)}; + DefineEntryPoint(program, ctx, main); + if (profile.support_float_controls) { + ctx.AddExtension("SPV_KHR_float_controls"); + SetupDenormControl(profile, program, ctx, main); + SetupSignedNanCapabilities(profile, program, ctx, main); + } + SetupCapabilities(profile, program.info, ctx); + PatchPhiNodes(program, ctx); + return ctx.Assemble(); +} + +Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { + const size_t num_args{inst->NumArgs()}; + boost::container::small_vector<Id, 32> blocks; + blocks.reserve(num_args); + for (size_t index = 0; index < num_args; ++index) { + blocks.push_back(inst->PhiBlock(index)->Definition<Id>()); + } + // The type of a phi instruction is stored in its flags + const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())}; + return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size())); +} + +void EmitVoid(EmitContext&) {} + +Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { + const Id id{ctx.Def(value)}; + if (!Sirit::ValidId(id)) { + throw NotImplementedException("Forward identity declaration"); + } + return id; +} + +Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { + const Id id{ctx.Def(value)}; + if (!Sirit::ValidId(id)) { + throw NotImplementedException("Forward identity declaration"); + } + return id; +} + +void EmitReference(EmitContext&) {} + +void EmitPhiMove(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetZeroFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetSignFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetCarryFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetOverflowFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetSparseFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetInBoundsFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h new file mode 100644 index 000000000..db0c935fe --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -0,0 +1,27 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include <sirit/sirit.h> + +#include "common/common_types.h" +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::SPIRV { + +[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); + +[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, {}, program, binding); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp new file mode 100644 index 000000000..9af8bb9e1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -0,0 +1,448 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { + const Id shift_id{ctx.Const(2U)}; + Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + if (index_offset > 0) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); + } + return ctx.profile.support_explicit_workgroup_layout + ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) + : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); +} + +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)}; + return ctx.Const(imm_offset); + } + const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; + const Id index{ctx.Def(offset)}; + if (shift == 0) { + return index; + } + const Id shift_id{ctx.Const(shift)}; + return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); +} + +Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_ptr, const IR::Value& binding, + const IR::Value& offset, size_t element_size) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); +} + +std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { + const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; + const Id semantics{ctx.u32_zero_value}; + return {scope, semantics}; +} + +Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{SharedPointer(ctx, offset)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding, + offset, sizeof(u32))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), + Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (ctx.profile.support_int64_atomics) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); + } + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)}; + ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result)); + return original_value; +} +} // Anonymous namespace + +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { + const Id shift_id{ctx.Const(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); +} + +Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { + const Id shift_id{ctx.Const(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); +} + +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange); +} + +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { + const Id shift_id{ctx.Const(3U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); + } + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer_1{SharedPointer(ctx, offset, 0)}; + const Id pointer_2{SharedPointer(ctx, offset, 1)}; + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); + return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); +} + +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange); +} + +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd, + &Sirit::Module::OpIAdd); +} + +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin, + &Sirit::Module::OpSMin); +} + +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin, + &Sirit::Module::OpUMin); +} + +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax, + &Sirit::Module::OpSMax); +} + +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax, + &Sirit::Module::OpUMax); +} + +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd, + &Sirit::Module::OpBitwiseAnd); +} + +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr, + &Sirit::Module::OpBitwiseOr); +} + +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor, + &Sirit::Module::OpBitwiseXor); +} + +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (ctx.profile.support_int64_atomics) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); + } + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + ctx.OpStore(pointer, value); + return original; +} + +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); +} + +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp new file mode 100644 index 000000000..e0b52a001 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { + const auto semantics{ + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | + spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | + spv::MemorySemanticsMask::ImageMemory}; + ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics))); +} +} // Anonymous namespace + +void EmitBarrier(EmitContext& ctx) { + const auto execution{spv::Scope::Workgroup}; + const auto memory{spv::Scope::Workgroup}; + const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | + spv::MemorySemanticsMask::WorkgroupMemory}; + ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)), + ctx.Const(static_cast<u32>(memory)), + ctx.Const(static_cast<u32>(memory_semantics))); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + MemoryBarrier(ctx, spv::Scope::Workgroup); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + MemoryBarrier(ctx, spv::Scope::Device); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp new file mode 100644 index 000000000..bb11f4f4e --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +void EmitBitCastU16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBitCastU32F32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); +} + +void EmitBitCastU64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitBitCastF16U16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBitCastF32U32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F32[1], value); +} + +void EmitBitCastF64U64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitPackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U64, value); +} + +Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); +} + +Id EmitPackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); +} + +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F16[2], value); +} + +Id EmitPackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpPackHalf2x16(ctx.U32[1], value); +} + +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpUnpackHalf2x16(ctx.F32[2], value); +} + +Id EmitPackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F64[1], value); +} + +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp new file mode 100644 index 000000000..10ff4ecab --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -0,0 +1,155 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { + +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); +} + +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); +} + +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); +} + +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); +} + +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); +} + +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); +} + +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); +} + +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); +} + +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); +} + +void EmitCompositeConstructF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); +} + +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); +} + +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp new file mode 100644 index 000000000..fb8c02a77 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -0,0 +1,505 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <tuple> +#include <utility> + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; + +std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; + switch (type) { + case AttributeType::Float: + return AttrInfo{ctx.input_f32, ctx.F32[1], false}; + case AttributeType::UnsignedInt: + return AttrInfo{ctx.input_u32, ctx.U32[1], true}; + case AttributeType::SignedInt: + return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + case AttributeType::Disabled: + return std::nullopt; + } + throw InvalidArgument("Invalid attribute type {}", type); +} + +template <typename... Args> +Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: + return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...); + default: + return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...); + } +} + +template <typename... Args> +Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { + if (ctx.stage == Stage::TessellationControl) { + const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)}; + return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...); + } else { + return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...); + } +} + +struct OutAttr { + OutAttr(Id pointer_) : pointer{pointer_} {} + OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {} + + Id pointer{}; + Id type{}; +}; + +std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const u32 element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; + if (info.num_components == 1) { + return info.id; + } else { + const u32 index_element{element - info.first_element}; + const Id index_id{ctx.Const(index_element)}; + return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); + } + } + switch (attr) { + case IR::Attribute::PointSize: + return ctx.output_point_size; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: { + const u32 element{static_cast<u32>(attr) % 4}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); + } + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)}; + const u32 index{static_cast<u32>(attr) - base}; + const Id clip_num{ctx.Const(index)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); + } + case IR::Attribute::Layer: + if (ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry) { + return OutAttr{ctx.layer, ctx.U32[1]}; + } + return std::nullopt; + case IR::Attribute::ViewportIndex: + if (ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry) { + return OutAttr{ctx.viewport_index, ctx.U32[1]}; + } + return std::nullopt; + case IR::Attribute::ViewportMask: + if (!ctx.profile.support_viewport_mask) { + return std::nullopt; + } + return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value), + ctx.U32[1]}; + default: + throw NotImplementedException("Read attribute {}", attr); + } +} + +Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, + const IR::Value& binding, const IR::Value& offset) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Constant buffer indexing"); + } + const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; + const Id uniform_type{ctx.uniform_types.*member_ptr}; + if (!offset.IsImmediate()) { + Id index{ctx.Def(offset)}; + if (element_size > 1) { + const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; + const Id shift{ctx.Const(log2_element_size)}; + index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + } + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; + return ctx.OpLoad(result_type, access_chain); + } + // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4) + const Id imm_offset{ctx.Const(offset.U32() / element_size)}; + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; + return ctx.OpLoad(result_type, access_chain); +} + +Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); +} + +Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset); +} + +Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { + if (offset.IsImmediate()) { + const u32 element{(offset.U32() / 4) % 4 + index_offset}; + return ctx.OpCompositeExtract(ctx.U32[1], vector, element); + } + const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; + Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; + if (index_offset > 0) { + element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); + } + return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element); +} +} // Anonymous namespace + +void EmitGetRegister(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetRegister(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetPred(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetPred(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetGotoVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetGotoVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetIndirectBranchVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetIndirectBranchVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { + const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + element = GetCbufElement(ctx, vector, offset, 0u); + } + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); +} + +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { + const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + element = GetCbufElement(ctx, vector, offset, 0u); + } + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); +} + +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { + const Id load{ + GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + element = GetCbufElement(ctx, vector, offset, 0u); + } + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); +} + +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { + const Id load{ + GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + element = GetCbufElement(ctx, vector, offset, 0u); + } + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); +} + +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return GetCbufElement(ctx, vector, offset, 0u); + } +} + +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u)); + } +} + +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, + offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u), + GetCbufElement(ctx, vector, offset, 1u)); + } +} + +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { + const u32 element{static_cast<u32>(attr) % 4}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; + if (!type) { + // Attribute is disabled + return ctx.Const(element == 3 ? 1.0f : 0.0f); + } + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + // Varying component is not written + return ctx.Const(type && element == 3 ? 1.0f : 0.0f); + } + const Id generic_id{ctx.input_generics.at(index)}; + const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; + const Id value{ctx.OpLoad(type->id, pointer)}; + return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + } + switch (attr) { + case IR::Attribute::PrimitiveId: + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, + ctx.Const(element))); + case IR::Attribute::InstanceId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); + } else { + const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)}; + return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); + } + case IR::Attribute::VertexId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id)); + } else { + const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; + return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); + } + case IR::Attribute::FrontFace: + return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), + ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); + case IR::Attribute::PointSpriteS: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); + case IR::Attribute::PointSpriteT: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U))); + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U))); + + default: + throw NotImplementedException("Read attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { + const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)}; + if (!output) { + return; + } + if (Sirit::ValidId(output->type)) { + value = ctx.OpBitcast(output->type, value); + } + ctx.OpStore(output->pointer, value); +} + +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); + default: + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); + } +} + +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) { + ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); +} + +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Const(IR::GenericPatchElement(patch))}; + const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32}; + const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Const(IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.Const(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u)); + default: + throw NotImplementedException("Patch {}", patch); + } + }()}; + ctx.OpStore(pointer, value); +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { + const Id component_id{ctx.Const(component)}; + const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; + ctx.OpStore(pointer, value); +} + +void EmitSetSampleMask(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.sample_mask, value); +} + +void EmitSetFragDepth(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.frag_depth, value); +} + +void EmitGetZFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetSFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetCFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetOFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetZFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetSFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetCFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetOFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitWorkgroupId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); +} + +Id EmitLocalInvocationId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); +} + +Id EmitInvocationId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); +} + +Id EmitSampleId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.sample_id); +} + +Id EmitIsHelperInvocation(EmitContext& ctx) { + return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); +} + +Id EmitYDirection(EmitContext& ctx) { + return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); +} + +Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + ctx.OpStore(pointer, value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp new file mode 100644 index 000000000..d33486f28 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +void EmitJoin(EmitContext&) { + throw NotImplementedException("Join shouldn't be emitted"); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx) { + if (ctx.profile.support_demote_to_helper_invocation) { + ctx.OpDemoteToHelperInvocationEXT(); + } else { + const Id kill_label{ctx.OpLabel()}; + const Id impossible_label{ctx.OpLabel()}; + ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label); + ctx.AddLabel(kill_label); + ctx.OpKill(); + ctx.AddLabel(impossible_label); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 000000000..fd42b7a16 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -0,0 +1,269 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id ExtractU16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U16, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); + } +} + +Id ExtractS16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.S16, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); + } +} + +Id ExtractU8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int8) { + return ctx.OpUConvert(ctx.U8, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); + } +} + +Id ExtractS8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int8) { + return ctx.OpSConvert(ctx.S8, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); + } +} +} // Anonymous namespace + +Id EmitConvertS16F16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS16F32(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS16F64(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value)); + } else { + return ctx.OpConvertFToS(ctx.U32[1], value); + } +} + +Id EmitConvertS32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertU16F16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU16F32(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU16F64(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64U32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U64, value); +} + +Id EmitConvertU32U64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], value); +} + +Id EmitConvertF16F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F16[1], value); +} + +Id EmitConvertF32F16(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF32F64(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF64F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F64[1], value); +} + +Id EmitConvertF16S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF16S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF16S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF16U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF16U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF32S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF32S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF32S32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + value = ctx.OpBitcast(ctx.S32[1], value); + } + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF32U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF32U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF32U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF64S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF64S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF64S32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + value = ctx.OpBitcast(ctx.S32[1], value); + } + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF64U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF64U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +Id EmitConvertF64U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp new file mode 100644 index 000000000..61cf25f9c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -0,0 +1,396 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { + const auto flags{inst->Flags<IR::FpControl>()}; + if (flags.no_contraction) { + ctx.Decorate(op, spv::Decoration::NoContraction); + } + return op; +} + +Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) { + if (ctx.profile.has_broken_spirv_clamp) { + return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); + } else { + return ctx.OpFClamp(type, value, zero, one); + } +} + +Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) { + if (ctx.profile.ignore_nan_fp_comparisons) { + const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)}; + const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))}; + const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))}; + return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan); + } else { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + } +} + +Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) { + if (ctx.profile.ignore_nan_fp_comparisons) { + const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)}; + const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)}; + const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan); + } else { + return (ctx.*comp_func)(ctx.U1, lhs, rhs); + } +} +} // Anonymous namespace + +Id EmitFPAbs16(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F16[1], value); +} + +Id EmitFPAbs32(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F32[1], value); +} + +Id EmitFPAbs64(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F64[1], value); +} + +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); +} + +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); +} + +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); +} + +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); +} + +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); +} + +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); +} + +Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F32[1], a, b); +} + +Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F64[1], a, b); +} + +Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F32[1], a, b); +} + +Id EmitFPMin64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F64[1], a, b); +} + +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); +} + +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); +} + +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); +} + +Id EmitFPNeg16(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F16[1], value); +} + +Id EmitFPNeg32(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F32[1], value); +} + +Id EmitFPNeg64(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F64[1], value); +} + +Id EmitFPSin(EmitContext& ctx, Id value) { + return ctx.OpSin(ctx.F32[1], value); +} + +Id EmitFPCos(EmitContext& ctx, Id value) { + return ctx.OpCos(ctx.F32[1], value); +} + +Id EmitFPExp2(EmitContext& ctx, Id value) { + return ctx.OpExp2(ctx.F32[1], value); +} + +Id EmitFPLog2(EmitContext& ctx, Id value) { + return ctx.OpLog2(ctx.F32[1], value); +} + +Id EmitFPRecip32(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value); +} + +Id EmitFPRecip64(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value); +} + +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F32[1], value); +} + +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F64[1], value); +} + +Id EmitFPSqrt(EmitContext& ctx, Id value) { + return ctx.OpSqrt(ctx.F32[1], value); +} + +Id EmitFPSaturate16(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; + const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; + return Clamp(ctx, ctx.F16[1], value, zero, one); +} + +Id EmitFPSaturate32(EmitContext& ctx, Id value) { + const Id zero{ctx.Const(f32{0.0})}; + const Id one{ctx.Const(f32{1.0})}; + return Clamp(ctx, ctx.F32[1], value, zero, one); +} + +Id EmitFPSaturate64(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; + const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; + return Clamp(ctx, ctx.F64[1], value, zero, one); +} + +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F16[1], value, min_value, max_value); +} + +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F32[1], value, min_value, max_value); +} + +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F64[1], value, min_value, max_value); +} + +Id EmitFPRoundEven16(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F16[1], value); +} + +Id EmitFPRoundEven32(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F32[1], value); +} + +Id EmitFPRoundEven64(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F64[1], value); +} + +Id EmitFPFloor16(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F16[1], value); +} + +Id EmitFPFloor32(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F32[1], value); +} + +Id EmitFPFloor64(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F64[1], value); +} + +Id EmitFPCeil16(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F16[1], value); +} + +Id EmitFPCeil32(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F32[1], value); +} + +Id EmitFPCeil64(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F64[1], value); +} + +Id EmitFPTrunc16(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F16[1], value); +} + +Id EmitFPTrunc32(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F32[1], value); +} + +Id EmitFPTrunc64(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F64[1], value); +} + +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); +} + +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return FPOrdNotEqual(ctx, lhs, rhs); +} + +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return FPOrdNotEqual(ctx, lhs, rhs); +} + +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return FPOrdNotEqual(ctx, lhs, rhs); +} + +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); +} + +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); +} + +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); +} + +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); +} + +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); +} + +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); +} + +Id EmitFPIsNan16(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + +Id EmitFPIsNan32(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + +Id EmitFPIsNan64(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 000000000..3588f052b --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -0,0 +1,462 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <boost/container/static_vector.hpp> + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +class ImageOperands { +public: + explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, + Id lod, const IR::Value& offset) { + if (has_bias) { + const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Bias, bias); + } + if (has_lod) { + const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Lod, lod_value); + } + AddOffset(ctx, offset); + if (has_lod_clamp) { + const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + + explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) { + if (offset2.IsEmpty()) { + if (offset.IsEmpty()) { + return; + } + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + return; + } + const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; + if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { + LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring"); + return; + } + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + throw LogicError("Invalid PTP arguments"); + } + auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; + + const Id offsets{ctx.ConstantComposite( + ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)), + ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)), + ctx.Const(read(1, 2), read(1, 3)))}; + Add(spv::ImageOperandsMask::ConstOffsets, offsets); + } + + explicit ImageOperands(Id offset, Id lod, Id ms) { + if (Sirit::ValidId(lod)) { + Add(spv::ImageOperandsMask::Lod, lod); + } + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (Sirit::ValidId(ms)) { + Add(spv::ImageOperandsMask::Sample, ms); + } + } + + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, + Id offset, Id lod_clamp) { + if (!Sirit::ValidId(derivates)) { + throw LogicError("Derivates must be present"); + } + boost::container::static_vector<Id, 3> deriv_x_accum; + boost::container::static_vector<Id, 3> deriv_y_accum; + for (u32 i = 0; i < num_derivates; ++i) { + deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); + deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); + } + const Id derivates_X{ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; + const Id derivates_Y{ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; + Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (has_lod_clamp) { + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + + std::span<const Id> Span() const noexcept { + return std::span{operands.data(), operands.size()}; + } + + std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept { + return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt; + } + + spv::ImageOperandsMask Mask() const noexcept { + return mask; + } + +private: + void AddOffset(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsEmpty()) { + return; + } + if (offset.IsImmediate()) { + Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32()))); + return; + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), + static_cast<s32>(inst->Arg(1).U32()))); + return; + case IR::Opcode::CompositeConstructU32x3: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), + static_cast<s32>(inst->Arg(1).U32()), + static_cast<s32>(inst->Arg(2).U32()))); + return; + case IR::Opcode::CompositeConstructU32x4: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), + static_cast<s32>(inst->Arg(1).U32()), + static_cast<s32>(inst->Arg(2).U32()), + static_cast<s32>(inst->Arg(3).U32()))); + return; + default: + break; + } + } + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + } + + void Add(spv::ImageOperandsMask new_mask, Id value) { + mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | + static_cast<unsigned>(new_mask)); + operands.push_back(value); + } + + void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) { + mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | + static_cast<unsigned>(new_mask)); + operands.push_back(value_1); + operands.push_back(value_2); + } + + boost::container::static_vector<Id, 4> operands; + spv::ImageOperandsMask mask{}; +}; + +Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))}; + return ctx.OpLoad(def.sampled_type, pointer); + } else { + return ctx.OpLoad(def.sampled_type, def.id); + } +} + +Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) { + if (!index.IsImmediate() || index.U32() != 0) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } + const Id sampler_id{def.id}; + const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; + return ctx.OpImage(ctx.image_buffer_type, id); + } else { + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } + return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); + } +} + +Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate() || index.U32() != 0) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)}; + return ctx.OpLoad(def.image_type, def.id); + } else { + const ImageDefinition def{ctx.images.at(info.descriptor_index)}; + return ctx.OpLoad(def.image_type, def.id); + } +} + +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + if (info.relaxed_precision != 0) { + ctx.Decorate(sample, spv::Decoration::RelaxedPrecision); + } + return sample; +} + +template <typename MethodPtrType, typename... Args> +Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, + Id result_type, Args&&... args) { + IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (!sparse) { + return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...)); + } + const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; + const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)}; + const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; + sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); + sparse->Invalidate(); + Decorate(ctx, inst, sample); + return ctx.OpCompositeExtract(result_type, sample, 1U); +} +} // Anonymous namespace + +Id EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + if (ctx.stage == Stage::Fragment) { + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, + bias_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); + } else { + // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as + // if the lod was explicitly zero. This may change on Turing with implicit compute + // derivatives + const Id lod{ctx.Const(0.0f)}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + } +} + +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod, const IR::Value& offset) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, false, true, false, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, + offset); + return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, + &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span()); +} + +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod, const IR::Value& offset) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, false, true, false, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, + &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); +} + +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, offset, offset2); + return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, + ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), + operands.MaskOptional(), operands.Span()); +} + +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, offset, offset2); + return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, + ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), + operands.Span()); +} + +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + if (info.type == TextureType::Buffer) { + lod = Id{}; + } + const ImageOperands operands(offset, lod, ms); + return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], + TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); +} + +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const Id image{TextureImage(ctx, info, index)}; + const Id zero{ctx.u32_zero_value}; + const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + switch (info.type) { + case TextureType::Color1D: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), + zero, zero, mips()); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorCube: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), + zero, mips()); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorArrayCube: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), + mips()); + case TextureType::Buffer: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero, + zero, mips()); + } + throw LogicError("Unspecified image type {}", info.type.Value()); +} + +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const Id zero{ctx.f32_zero_value}; + const Id sampler{Texture(ctx, info, index)}; + return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), + zero, zero); +} + +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, + offset, lod_clamp); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); +} + +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) { + LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); + return ctx.ConstantNull(ctx.U32[4]); + } + return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], + Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{}); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + ctx.OpImageWrite(Image(ctx, index, info), coords, color); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp new file mode 100644 index 000000000..d7f1a365a --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp @@ -0,0 +1,183 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate()) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())}; + return def.id; + } else { + const ImageDefinition def{ctx.images.at(index.U32())}; + return def.id; + } +} + +std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { + const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; + const Id semantics{ctx.u32_zero_value}; + return {scope, semantics}; +} + +Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const Id image{Image(ctx, index, info)}; + const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} +} // Anonymous namespace + +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { + // TODO: This is not yet implemented + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { + // TODO: This is not yet implemented + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange); +} + +Id EmitBindlessImageAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..c9db1c164 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -0,0 +1,581 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <sirit/sirit.h> + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class EmitContext; + +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); +void EmitReference(EmitContext&); +void EmitPhiMove(EmitContext&); +void EmitJoin(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBindlessImageGather(EmitContext&); +Id EmitBindlessImageGatherDref(EmitContext&); +Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); +Id EmitBindlessImageQueryLod(EmitContext&); +Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageGather(EmitContext&); +Id EmitBoundImageGatherDref(EmitContext&); +Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); +Id EmitBoundImageQueryLod(EmitContext&); +Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod, const IR::Value& offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitBindlessImageAtomicIAdd32(EmitContext&); +Id EmitBindlessImageAtomicSMin32(EmitContext&); +Id EmitBindlessImageAtomicUMin32(EmitContext&); +Id EmitBindlessImageAtomicSMax32(EmitContext&); +Id EmitBindlessImageAtomicUMax32(EmitContext&); +Id EmitBindlessImageAtomicInc32(EmitContext&); +Id EmitBindlessImageAtomicDec32(EmitContext&); +Id EmitBindlessImageAtomicAnd32(EmitContext&); +Id EmitBindlessImageAtomicOr32(EmitContext&); +Id EmitBindlessImageAtomicXor32(EmitContext&); +Id EmitBindlessImageAtomicExchange32(EmitContext&); +Id EmitBoundImageAtomicIAdd32(EmitContext&); +Id EmitBoundImageAtomicSMin32(EmitContext&); +Id EmitBoundImageAtomicUMin32(EmitContext&); +Id EmitBoundImageAtomicSMax32(EmitContext&); +Id EmitBoundImageAtomicUMax32(EmitContext&); +Id EmitBoundImageAtomicInc32(EmitContext&); +Id EmitBoundImageAtomicDec32(EmitContext&); +Id EmitBoundImageAtomicAnd32(EmitContext&); +Id EmitBoundImageAtomicOr32(EmitContext&); +Id EmitBoundImageAtomicXor32(EmitContext&); +Id EmitBoundImageAtomicExchange32(EmitContext&); +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitLaneId(EmitContext& ctx); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitSubgroupEqMask(EmitContext& ctx); +Id EmitSubgroupLtMask(EmitContext& ctx); +Id EmitSubgroupLeMask(EmitContext& ctx); +Id EmitSubgroupGtMask(EmitContext& ctx); +Id EmitSubgroupGeMask(EmitContext& ctx); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); +Id EmitDPdxFine(EmitContext& ctx, Id op_a); +Id EmitDPdyFine(EmitContext& ctx, Id op_a); +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp new file mode 100644 index 000000000..3501d7495 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -0,0 +1,270 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) { + IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; + if (!zero) { + return; + } + zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); + zero->Invalidate(); +} + +void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) { + IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; + if (!sign) { + return; + } + sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); + sign->Invalidate(); +} +} // Anonymous namespace + +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + Id result{}; + if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; + const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; + result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); + + const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; + carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value)); + carry->Invalidate(); + } else { + result = ctx.OpIAdd(ctx.U32[1], a, b); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; + const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; + const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)}; + + const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; + const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; + const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)}; + overflow->SetDefinition(carry_flag); + overflow->Invalidate(); + } + return result; +} + +Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { + return ctx.OpIAdd(ctx.U64, a, b); +} + +Id EmitISub32(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.U32[1], a, b); +} + +Id EmitISub64(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.U64, a, b); +} + +Id EmitIMul32(EmitContext& ctx, Id a, Id b) { + return ctx.OpIMul(ctx.U32[1], a, b); +} + +Id EmitINeg32(EmitContext& ctx, Id value) { + return ctx.OpSNegate(ctx.U32[1], value); +} + +Id EmitINeg64(EmitContext& ctx, Id value) { + return ctx.OpSNegate(ctx.U64, value); +} + +Id EmitIAbs32(EmitContext& ctx, Id value) { + return ctx.OpSAbs(ctx.U32[1], value); +} + +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); +} + +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.U64, base, shift); +} + +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightLogical(ctx.U32[1], base, shift); +} + +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightLogical(ctx.U64, base, shift); +} + +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift); +} + +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightArithmetic(ctx.U64, base, shift); +} + +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) { + return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count); +} + +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { + const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { + const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitReverse32(EmitContext& ctx, Id value) { + return ctx.OpBitReverse(ctx.U32[1], value); +} + +Id EmitBitCount32(EmitContext& ctx, Id value) { + return ctx.OpBitCount(ctx.U32[1], value); +} + +Id EmitBitwiseNot32(EmitContext& ctx, Id value) { + return ctx.OpNot(ctx.U32[1], value); +} + +Id EmitFindSMsb32(EmitContext& ctx, Id value) { + return ctx.OpFindSMsb(ctx.U32[1], value); +} + +Id EmitFindUMsb32(EmitContext& ctx, Id value) { + return ctx.OpFindUMsb(ctx.U32[1], value); +} + +Id EmitSMin32(EmitContext& ctx, Id a, Id b) { + const bool is_broken{ctx.profile.has_broken_signed_operations}; + if (is_broken) { + a = ctx.OpBitcast(ctx.S32[1], a); + b = ctx.OpBitcast(ctx.S32[1], b); + } + const Id result{ctx.OpSMin(ctx.U32[1], a, b)}; + return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; +} + +Id EmitUMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMin(ctx.U32[1], a, b); +} + +Id EmitSMax32(EmitContext& ctx, Id a, Id b) { + const bool is_broken{ctx.profile.has_broken_signed_operations}; + if (is_broken) { + a = ctx.OpBitcast(ctx.S32[1], a); + b = ctx.OpBitcast(ctx.S32[1], b); + } + const Id result{ctx.OpSMax(ctx.U32[1], a, b)}; + return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; +} + +Id EmitUMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMax(ctx.U32[1], a, b); +} + +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { + Id result{}; + if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) { + value = ctx.OpBitcast(ctx.S32[1], value); + min = ctx.OpBitcast(ctx.S32[1], min); + max = ctx.OpBitcast(ctx.S32[1], max); + if (ctx.profile.has_broken_spirv_clamp) { + result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min); + } else { + result = ctx.OpSClamp(ctx.S32[1], value, min, max); + } + result = ctx.OpBitcast(ctx.U32[1], result); + } else { + result = ctx.OpSClamp(ctx.U32[1], value, min, max); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { + Id result{}; + if (ctx.profile.has_broken_spirv_clamp) { + result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min); + } else { + result = ctx.OpUClamp(ctx.U32[1], value, min, max); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThan(ctx.U1, lhs, rhs); +} + +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1, lhs, rhs); +} + +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpIEqual(ctx.U1, lhs, rhs); +} + +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpINotEqual(ctx.U1, lhs, rhs); +} + +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp new file mode 100644 index 000000000..b9a9500fc --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalOr(ctx.U1, a, b); +} + +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalAnd(ctx.U1, a, b); +} + +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalNotEqual(ctx.U1, a, b); +} + +Id EmitLogicalNot(EmitContext& ctx, Id value) { + return ctx.OpLogicalNot(ctx.U1, value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp new file mode 100644 index 000000000..679ee2684 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -0,0 +1,275 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <bit> + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size, + u32 index_offset = 0) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset}; + return ctx.Const(imm_offset); + } + const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; + Id index{ctx.Def(offset)}; + if (shift != 0) { + const Id shift_id{ctx.Const(shift)}; + index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); + } + if (index_offset != 0) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); + } + return index; +} + +Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size, index_offset)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); +} + +Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { + const Id pointer{ + StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; + return ctx.OpLoad(result_type, pointer); +} + +Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + u32 index_offset = 0) { + return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32, index_offset); +} + +void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { + const Id pointer{ + StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; + ctx.OpStore(pointer, value); +} + +void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + u32 index_offset = 0) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32, index_offset); +} +} // Anonymous namespace + +void EmitLoadGlobalU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitLoadGlobalS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitLoadGlobalU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitLoadGlobalS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitLoadGlobal32(EmitContext& ctx, Id address) { + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u); +} + +Id EmitLoadGlobal64(EmitContext& ctx, Id address) { + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u); +} + +Id EmitLoadGlobal128(EmitContext& ctx, Id address) { + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u, 0u, 0u); +} + +void EmitWriteGlobalU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitWriteGlobalS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitWriteGlobalU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitWriteGlobalS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); +} + +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8)); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset8(offset), ctx.Const(8u)); + } +} + +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8)); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset8(offset), ctx.Const(8u)); + } +} + +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16)); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset16(offset), ctx.Const(16u)); + } +} + +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16)); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset16(offset), ctx.Const(16u)); + } +} + +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return LoadStorage32(ctx, binding, offset); +} + +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, + sizeof(u32[2]), &StorageDefinitions::U32x2); + } else { + return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0), + LoadStorage32(ctx, binding, offset, 1)); + } +} + +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, + sizeof(u32[4]), &StorageDefinitions::U32x4); + } else { + return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0), + LoadStorage32(ctx, binding, offset, 1), + LoadStorage32(ctx, binding, offset, 2), + LoadStorage32(ctx, binding, offset, 3)); + } +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage32(ctx, binding, offset, value); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (ctx.profile.support_descriptor_aliasing) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); + } else { + for (u32 index = 0; index < 2; ++index) { + const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; + WriteStorage32(ctx, binding, offset, element, index); + } + } +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (ctx.profile.support_descriptor_aliasing) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); + } else { + for (u32 index = 0; index < 4; ++index) { + const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; + WriteStorage32(ctx, binding, offset, element, index); + } + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp new file mode 100644 index 000000000..c5b4f4720 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -0,0 +1,42 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U1, cond, true_value, false_value); +} + +Id EmitSelectU8(EmitContext&, Id, Id, Id) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U16, cond, true_value, false_value); +} + +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); +} + +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U64, cond, true_value, false_value); +} + +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value); +} + +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); +} + +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..9a79fc7a2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -0,0 +1,174 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { + const Id shift_id{ctx.Const(shift)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); +} + +Id Word(EmitContext& ctx, Id offset) { + const Id shift_id{ctx.Const(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { + const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))}; + const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))}; + const Id count_id{ctx.Const(count)}; + return {bit, count_id}; +} +} // Anonymous namespace + +Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; + return ctx.OpLoad(ctx.U32[1], pointer); + } else { + return Word(ctx, offset); + } +} + +Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + return ctx.OpLoad(ctx.U32[2], pointer); + } else { + const Id shift_id{ctx.Const(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); + } +} + +Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + return ctx.OpLoad(ctx.U32[4], pointer); + } + const Id shift_id{ctx.Const(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + std::array<Id, 4> values{}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + values[i] = ctx.OpLoad(ctx.U32[1], pointer); + } + return ctx.OpCompositeConstruct(ctx.U32[4], values); +} + +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); + } +} + +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); + } +} + +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { + Id pointer{}; + if (ctx.profile.support_explicit_workgroup_layout) { + pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); + } else { + const Id shift{ctx.Const(2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); + } + ctx.OpStore(pointer, value); +} + +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Const(2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; + ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); +} + +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Const(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp new file mode 100644 index 000000000..9e7eb3cb1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -0,0 +1,150 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +void ConvertDepthMode(EmitContext& ctx) { + const Id type{ctx.F32[1]}; + const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; + const Id z{ctx.OpCompositeExtract(type, position, 2u)}; + const Id w{ctx.OpCompositeExtract(type, position, 3u)}; + const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; + const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; + ctx.OpStore(ctx.output_position, vector); +} + +void SetFixedPipelinePointSize(EmitContext& ctx) { + if (ctx.runtime_info.fixed_state_point_size) { + const float point_size{*ctx.runtime_info.fixed_state_point_size}; + ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); + } +} + +Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one, + Id default_vector) { + switch (num_components) { + case 1: + return element == 3 ? one : zero; + case 2: + return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero); + case 3: + return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero); + case 4: + return default_vector; + } + throw InvalidArgument("Bad element"); +} + +Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) { + switch (comparison) { + case CompareFunction::Never: + return ctx.false_value; + case CompareFunction::Less: + return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2); + case CompareFunction::Equal: + return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::LessThanEqual: + return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::Greater: + return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2); + case CompareFunction::NotEqual: + return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::GreaterThanEqual: + return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::Always: + return ctx.true_value; + } + throw InvalidArgument("Comparison function {}", comparison); +} + +void AlphaTest(EmitContext& ctx) { + if (!ctx.runtime_info.alpha_test_func) { + return; + } + const auto comparison{*ctx.runtime_info.alpha_test_func}; + if (comparison == CompareFunction::Always) { + return; + } + if (!Sirit::ValidId(ctx.frag_color[0])) { + return; + } + + const Id type{ctx.F32[1]}; + const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])}; + const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)}; + + const Id true_label{ctx.OpLabel()}; + const Id discard_label{ctx.OpLabel()}; + const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)}; + const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; + + ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(condition, true_label, discard_label); + ctx.AddLabel(discard_label); + ctx.OpKill(); + ctx.AddLabel(true_label); +} +} // Anonymous namespace + +void EmitPrologue(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB) { + const Id zero{ctx.Const(0.0f)}; + const Id one{ctx.Const(1.0f)}; + const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; + ctx.OpStore(ctx.output_position, default_vector); + for (const auto& info : ctx.output_generics) { + if (info[0].num_components == 0) { + continue; + } + u32 element{0}; + while (element < 4) { + const auto& element_info{info[element]}; + const u32 num{element_info.num_components}; + const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)}; + ctx.OpStore(element_info.id, value); + element += num; + } + } + } + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + SetFixedPipelinePointSize(ctx); + } +} + +void EmitEpilogue(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) { + ConvertDepthMode(ctx); + } + if (ctx.stage == Stage::Fragment) { + AlphaTest(ctx); + } +} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + if (ctx.runtime_info.convert_depth_mode) { + ConvertDepthMode(ctx); + } + if (stream.IsImmediate()) { + ctx.OpEmitStreamVertex(ctx.Def(stream)); + } else { + LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); + ctx.OpEmitStreamVertex(ctx.u32_zero_value); + } + // Restore fixed pipeline point size after emitting the vertex + SetFixedPipelinePointSize(ctx); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + if (stream.IsImmediate()) { + ctx.OpEndStreamPrimitive(ctx.Def(stream)); + } else { + LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); + ctx.OpEndStreamPrimitive(ctx.u32_zero_value); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp new file mode 100644 index 000000000..c9f469e90 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +Id EmitUndefU1(EmitContext& ctx) { + return ctx.OpUndef(ctx.U1); +} + +Id EmitUndefU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitUndefU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitUndefU32(EmitContext& ctx) { + return ctx.OpUndef(ctx.U32[1]); +} + +Id EmitUndefU64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..78b1e1ba7 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -0,0 +1,203 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id WarpExtract(EmitContext& ctx, Id value) { + const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); +} + +Id LoadMask(EmitContext& ctx, Id mask) { + const Id value{ctx.OpLoad(ctx.U32[4], mask)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], value, 0U); + } + return WarpExtract(ctx, value); +} + +void SetInBoundsFlag(IR::Inst* inst, Id result) { + IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + in_bounds->SetDefinition(result); + in_bounds->Invalidate(); +} + +Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { + return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); +} + +Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { + return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, + ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); +} + +Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { + const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; + const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; + return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); +} + +Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { + return ctx.OpSelect(ctx.U32[1], in_range, + ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); +} +} // Anonymous namespace + +Id EmitLaneId(EmitContext& ctx) { + const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return id; + } + return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); +} + +Id EmitVoteAll(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpIEqual(ctx.U1, lhs, active_mask); +} + +Id EmitVoteAny(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAnyKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); +} + +Id EmitVoteEqual(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), + ctx.OpIEqual(ctx.U1, lhs, active_mask)); +} + +Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { + const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); + } + return WarpExtract(ctx, ballot); +} + +Id EmitSubgroupEqMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_eq); +} + +Id EmitSubgroupLtMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_lt); +} + +Id EmitSubgroupLeMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_le); +} + +Id EmitSubgroupGtMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_gt); +} + +Id EmitSubgroupGeMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_ge); +} + +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; + const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; + + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; + const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { + const Id three{ctx.Const(3U)}; + Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); + mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U)); + mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); + mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); + + const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)}; + const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)}; + + const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)}; + const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)}; + return ctx.OpFAdd(ctx.F32[1], result_a, result_b); +} + +Id EmitDPdxFine(EmitContext& ctx, Id op_a) { + return ctx.OpDPdxFine(ctx.F32[1], op_a); +} + +Id EmitDPdyFine(EmitContext& ctx, Id op_a) { + return ctx.OpDPdyFine(ctx.F32[1], op_a); +} + +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) { + return ctx.OpDPdxCoarse(ctx.F32[1], op_a); +} + +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) { + return ctx.OpDPdyCoarse(ctx.F32[1], op_a); +} + +} // namespace Shader::Backend::SPIRV |