summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/backend
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-05-25 07:22:21 +0200
committerameerj <52414509+ameerj@users.noreply.github.com>2021-07-23 03:51:33 +0200
commitca05a13c62ad7693f8be924c168e400e8139b0d2 (patch)
tree813638ab0c537089f3493f824707417dd429a48f /src/shader_recompiler/backend
parentglasm: Fix usage counting on phi nodes (diff)
downloadyuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.gz
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.bz2
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.lz
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.xz
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.zst
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.zip
Diffstat (limited to 'src/shader_recompiler/backend')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp8
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp8
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp4
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp27
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp58
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp6
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.cpp28
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.h16
8 files changed, 114 insertions, 41 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index 2ce839059..4aa3682c2 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -203,7 +203,13 @@ void Precolor(EmitContext& ctx, const IR::Program& program) {
for (size_t i = 0; i < num_args; ++i) {
IR::Block& phi_block{*phi.PhiBlock(i)};
auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
- IR::IREmitter{phi_block, it}.PhiMove(phi, phi.Arg(i));
+ IR::IREmitter ir{phi_block, it};
+ const IR::Value arg{phi.Arg(i)};
+ if (arg.IsImmediate()) {
+ ir.PhiMove(phi, arg);
+ } else {
+ ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
+ }
}
for (size_t i = 0; i < num_args; ++i) {
IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
index 808c72105..9201ccd39 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -23,7 +23,13 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
}
void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
- ctx.Add("MOV.S {},{};", inst, ScalarS32{ctx.reg_alloc.Consume(value)});
+ // Fake one usage to get a real register out of the condition
+ inst.DestructiveAddUsage(1);
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ const ScalarS32 input{ctx.reg_alloc.Consume(value)};
+ if (ret != input) {
+ ctx.Add("MOV.S {},{};", ret, input);
+ }
}
void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
index d829f05b3..bff0b7c1c 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -52,7 +52,9 @@ void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, Objec
// The input composite is not aliased with the return value so we have to copy it before
// hand. But the insert object is not aliased with the return value, so we don't have to
// worry about that
- ctx.Add("MOV.{} {},{};MOV.{} {}.{},{};", type, ret, composite, type, ret, swizzle, object);
+ ctx.Add("MOV.{} {},{};"
+ "MOV.{} {}.{},{};",
+ type, ret, composite, type, ret, swizzle, object);
} else {
// The return value is alised so we can just insert the object, it doesn't matter if it's
// aliased
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index a7def0897..34725b8c6 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -181,7 +181,6 @@ void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
ctx.Add("MOV.S {},-1;"
"MOV.S {}(NONRESIDENT),0;",
sparse_ret, sparse_ret);
- sparse_inst->Invalidate();
}
std::string_view FormatStorage(ImageFormat format) {
@@ -215,12 +214,20 @@ void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Regis
const Register ret{ctx.reg_alloc.Define(inst)};
ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+ const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (sparse_inst) {
+ sparse_inst->Invalidate();
+ }
+ return sparse_inst;
+}
} // Anonymous namespace
void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
const std::string_view type{TextureType(info)};
@@ -259,7 +266,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu
void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
@@ -288,7 +295,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
}
const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
@@ -393,7 +400,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
}
const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
@@ -436,7 +443,7 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
const char comp{"xyzw"[info.gather_component]};
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
@@ -462,7 +469,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
// Allocate offsets early so they don't overwrite any consumed register
const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
@@ -500,7 +507,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
@@ -547,7 +554,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
dpdx = ScopedRegister{ctx.reg_alloc};
dpdy = ScopedRegister{ctx.reg_alloc};
}
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
@@ -581,7 +588,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
- const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ const auto sparse_inst{PrepareSparse(inst)};
const std::string_view format{FormatStorage(info.image_format)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
index e5aac14c8..e9d1bae05 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -9,6 +9,17 @@
namespace Shader::Backend::GLASM {
void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ const std::array flags{
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
+ };
+ for (IR::Inst* const flag_inst : flags) {
+ if (flag_inst) {
+ flag_inst->Invalidate();
+ }
+ }
const bool cc{inst.HasAssociatedPseudoOperation()};
const std::string_view cc_mod{cc ? ".CC" : ""};
if (cc) {
@@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
if (!cc) {
return;
}
- static constexpr std::array<std::string_view, 4> masks{"EQ", "SF", "CF", "OF"};
- const std::array flags{
- inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
- inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
- inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
- inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
- };
- for (size_t i = 0; i < flags.size(); ++i) {
- if (flags[i]) {
- const auto flag_ret{ctx.reg_alloc.Define(*flags[i])};
- ctx.Add("MOV.S {},0;"
- "MOV.S {}({}.x),-1;",
- flag_ret, flag_ret, masks[i]);
- flags[i]->Invalidate();
+ static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
+ for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
+ if (!flags[flag_index]) {
+ continue;
+ }
+ const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
+ if (flag_index == 0) {
+ ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
+ } else {
+ // We could use conditional execution here, but it's broken on Nvidia's compiler
+ ctx.Add("IF {}.x;"
+ "MOV.S {}.x,-1;"
+ "ELSE;"
+ "MOV.S {}.x,0;"
+ "ENDIF;",
+ masks[flag_index], flag_ret, flag_ret);
}
}
}
@@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal
void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
ScalarU32 count) {
+ const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+ const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+ if (zero) {
+ zero->Invalidate();
+ }
+ if (sign) {
+ sign->Invalidate();
+ }
+ if (zero || sign) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
const Register ret{ctx.reg_alloc.Define(inst)};
if (count.type != Type::Register && offset.type != Type::Register) {
ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
@@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal
"BFE.U {},RC,{};",
count, offset, ret, base);
}
- if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) {
+ if (zero) {
ctx.Add("SEQ.S {},{},0;", *zero, ret);
- zero->Invalidate();
}
- if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) {
+ if (sign) {
ctx.Add("SLT.S {},{},0;", *sign, ret);
- sign->Invalidate();
}
}
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
index af0e13d43..6e30790bb 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
@@ -51,6 +51,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask,
std::string_view op) {
+ IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+ if (in_bounds) {
+ in_bounds->Invalidate();
+ }
std::string mask;
if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
@@ -61,13 +65,11 @@ static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32
ScalarU32{ctx.reg_alloc.Consume(clamp)});
}
const Register value_ret{ctx.reg_alloc.Define(inst)};
- IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
if (in_bounds) {
const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
ctx.Add("SHF{}.U {},{},{},{};"
"MOV.U {}.x,{}.y;",
op, bounds_ret, value, index, mask, value_ret, bounds_ret);
- in_bounds->Invalidate();
} else {
ctx.Add("SHF{}.U {},{},{},{};"
"MOV.U {}.x,{}.y;",
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
index 707b22247..c55a833c6 100644
--- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -22,11 +22,19 @@ Register RegAlloc::LongDefine(IR::Inst& inst) {
}
Value RegAlloc::Peek(const IR::Value& value) {
- return value.IsImmediate() ? MakeImm(value) : PeekInst(*value.InstRecursive());
+ if (value.IsImmediate()) {
+ return MakeImm(value);
+ } else {
+ return PeekInst(*value.Inst());
+ }
}
Value RegAlloc::Consume(const IR::Value& value) {
- return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive());
+ if (value.IsImmediate()) {
+ return MakeImm(value);
+ } else {
+ return ConsumeInst(*value.Inst());
+ }
}
void RegAlloc::Unref(IR::Inst& inst) {
@@ -88,7 +96,14 @@ Value RegAlloc::MakeImm(const IR::Value& value) {
}
Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
- inst.SetDefinition<Id>(Alloc(is_long));
+ if (inst.HasUses()) {
+ inst.SetDefinition<Id>(Alloc(is_long));
+ } else {
+ Id id{};
+ id.is_long.Assign(is_long ? 1 : 0);
+ id.is_null.Assign(1);
+ inst.SetDefinition<Id>(id);
+ }
return Register{PeekInst(inst)};
}
@@ -115,10 +130,12 @@ Id RegAlloc::Alloc(bool is_long) {
num_regs = std::max(num_regs, reg + 1);
use[reg] = true;
Id ret{};
- ret.index.Assign(static_cast<u32>(reg));
+ ret.is_valid.Assign(1);
ret.is_long.Assign(is_long ? 1 : 0);
ret.is_spill.Assign(0);
ret.is_condition_code.Assign(0);
+ ret.is_null.Assign(0);
+ ret.index.Assign(static_cast<u32>(reg));
return ret;
}
}
@@ -126,6 +143,9 @@ Id RegAlloc::Alloc(bool is_long) {
}
void RegAlloc::Free(Id id) {
+ if (id.is_valid == 0) {
+ throw LogicError("Freeing invalid register");
+ }
if (id.is_spill != 0) {
throw NotImplementedException("Free spill");
}
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h
index 41b7c92be..b97c84146 100644
--- a/src/shader_recompiler/backend/glasm/reg_alloc.h
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.h
@@ -35,10 +35,12 @@ enum class Type : u32 {
struct Id {
union {
u32 raw;
- BitField<0, 29, u32> index;
- BitField<29, 1, u32> is_long;
- BitField<30, 1, u32> is_spill;
- BitField<31, 1, u32> is_condition_code;
+ BitField<0, 1, u32> is_valid;
+ BitField<1, 1, u32> is_long;
+ BitField<2, 1, u32> is_spill;
+ BitField<3, 1, u32> is_condition_code;
+ BitField<4, 1, u32> is_null;
+ BitField<5, 27, u32> index;
};
bool operator==(Id rhs) const noexcept {
@@ -164,12 +166,18 @@ auto FormatTo(FormatContext& ctx, Id id) {
throw NotImplementedException("Spill emission");
}
if constexpr (scalar) {
+ if (id.is_null != 0) {
+ return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
+ }
if (id.is_long != 0) {
return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
} else {
return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
}
} else {
+ if (id.is_null != 0) {
+ return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
+ }
if (id.is_long != 0) {
return fmt::format_to(ctx.out(), "D{}", id.index.Value());
} else {