diff options
Diffstat (limited to 'src/shader_recompiler')
16 files changed, 405 insertions, 50 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index fa268d38f..755db5dfa 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -88,6 +88,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_shift_right.cpp frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp + frontend/maxwell/translate/impl/load_constant.cpp frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 21900d387..278b26b50 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -104,15 +104,23 @@ void EmitContext::DefineCommonTypes(const Info& info) { U1 = Name(TypeBool(), "u1"); - // TODO: Conditionally define these - AddCapability(spv::Capability::Int16); - AddCapability(spv::Capability::Int64); - U16 = Name(TypeInt(16, false), "u16"); - U64 = Name(TypeInt(64, false), "u64"); - F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + if (info.uses_int8) { + AddCapability(spv::Capability::Int8); + U8 = Name(TypeInt(8, false), "u8"); + S8 = Name(TypeInt(8, true), "s8"); + } + if (info.uses_int16) { + AddCapability(spv::Capability::Int16); + U16 = Name(TypeInt(16, false), "u16"); + S16 = Name(TypeInt(16, true), "s16"); + } + if (info.uses_int64) { + AddCapability(spv::Capability::Int64); + U64 = Name(TypeInt(64, false), "u64"); + } if (info.uses_fp16) { AddCapability(spv::Capability::Float16); F16.Define(*this, TypeFloat(16), "f16"); @@ -151,26 +159,51 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } - const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))}; - Decorate(array_type, spv::Decoration::ArrayStride, 4U); + if (True(info.used_constant_buffer_types & IR::Type::U8)) { + DefineConstantBuffers(info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstantBuffers(info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + } + if (True(info.used_constant_buffer_types & IR::Type::U16)) { + DefineConstantBuffers(info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); + DefineConstantBuffers(info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); + } + if (True(info.used_constant_buffer_types & IR::Type::U32)) { + DefineConstantBuffers(info, &UniformDefinitions::U32, binding, U32[1], 'u', sizeof(u32)); + } + if (True(info.used_constant_buffer_types & IR::Type::F32)) { + DefineConstantBuffers(info, &UniformDefinitions::F32, binding, F32[1], 'f', sizeof(f32)); + } + if (True(info.used_constant_buffer_types & IR::Type::U64)) { + DefineConstantBuffers(info, &UniformDefinitions::U64, binding, U64, 'u', sizeof(u64)); + } + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + binding += desc.count; + } +} + +void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, "cbuf_block"); + Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "data"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; - uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); + const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; + uniform_types.*member_type = uniform_type; - u32 index{}; for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; + const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); - std::fill_n(cbufs.data() + desc.index, desc.count, id); - index += desc.count; + for (size_t i = 0; i < desc.count; ++i) { + cbufs[desc.index + i].*member_type = id; + } binding += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 8b3109eb8..35eca258a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -10,8 +10,8 @@ #include <sirit/sirit.h> #include "shader_recompiler/frontend/ir/program.h" -#include "shader_recompiler/shader_info.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/shader_info.h" namespace Shader::Backend::SPIRV { @@ -34,6 +34,16 @@ struct TextureDefinition { Id type; }; +struct UniformDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U64{}; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program); @@ -45,7 +55,10 @@ public: Id void_id{}; Id U1{}; + Id U8{}; + Id S8{}; Id U16{}; + Id S16{}; Id U64{}; VectorTypes F32; VectorTypes U32; @@ -56,10 +69,11 @@ public: Id false_value{}; Id u32_zero_value{}; - Id uniform_u32{}; + UniformDefinitions uniform_types; + Id storage_u32{}; - std::array<Id, Info::MAX_CBUFS> cbufs{}; + std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; std::array<Id, Info::MAX_SSBOS> ssbos{}; std::vector<TextureDefinition> textures; @@ -71,6 +85,8 @@ private: void DefineCommonConstants(); void DefineSpecialVariables(const Info& info); void DefineConstantBuffers(const Info& info, u32& binding); + void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, + Id type, char type_char, u32 element_size); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 69698c478..aafc59bbb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -34,7 +34,13 @@ void EmitGetPred(EmitContext& ctx); void EmitSetPred(EmitContext& ctx); void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); -Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx); void EmitSetAttribute(EmitContext& ctx); void EmitGetAttributeIndexed(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index eb9c01c5a..125b58cf7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -30,17 +30,61 @@ void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, + u32 element_size, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } + const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; + const Id uniform_type{ctx.uniform_types.*member_ptr}; if (!offset.IsImmediate()) { - throw NotImplementedException("Variable constant buffer offset"); + Id index{ctx.Def(offset)}; + if (element_size > 1) { + const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; + const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; + index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + } + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; + return ctx.OpLoad(result_type, access_chain); } - const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / 4)}; - const Id cbuf{ctx.cbufs[binding.U32()]}; - const Id access_chain{ctx.OpAccessChain(ctx.uniform_u32, cbuf, ctx.u32_zero_value, imm_offset)}; - return ctx.OpLoad(ctx.U32[1], access_chain); + if (offset.U32() % element_size != 0) { + throw NotImplementedException("Unaligned immediate constant buffer load"); + } + const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; + return ctx.OpLoad(result_type, access_chain); +} + +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); +} + +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); +} + +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + const Id load{GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); +} + +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + const Id load{GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); +} + +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); +} + +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); +} + +Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); } void EmitGetAttribute(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ae3354c66..33819dd36 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -112,7 +112,27 @@ void IREmitter::SetPred(IR::Pred pred, const U1& value) { } U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { - return Inst<U32>(Opcode::GetCbuf, binding, byte_offset); + return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset); +} + +UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed) { + switch (bitsize) { + case 8: + return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset); + case 16: + return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset); + case 32: + return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset); + case 64: + return Inst<U64>(Opcode::GetCbufU64, binding, byte_offset); + default: + throw InvalidArgument("Invalid bit size {}", bitsize); + } +} + +F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) { + return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset); } U1 IREmitter::GetZFlag() { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index cb2a7710a..e4d110540 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -47,6 +47,9 @@ public: void SetGotoVariable(u32 id, const U1& value); [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); + [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed); + [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); [[nodiscard]] U1 GetSFlag(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aa011fab1..64bd495ed 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -21,7 +21,13 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetGotoVariable, U1, U32, ) OPCODE(SetGotoVariable, Void, U32, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetCbufU8, U32, U32, U32, ) +OPCODE(GetCbufS8, U32, U32, U32, ) +OPCODE(GetCbufU16, U32, U32, U32, ) +OPCODE(GetCbufS16, U32, U32, U32, ) +OPCODE(GetCbufU32, U32, U32, U32, ) +OPCODE(GetCbufF32, F32, U32, U32, ) +OPCODE(GetCbufU64, U64, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, U32, ) OPCODE(GetAttributeIndexed, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index a5a0e1a9b..7564aeeb2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -56,25 +56,32 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast<IR::F32>(GetReg39(insn)); } -IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { +static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) { union { u64 raw; BitField<20, 14, s64> offset; BitField<34, 5, u64> binding; } const cbuf{insn}; + if (cbuf.binding >= 18) { throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); } if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); } - const IR::U32 binding{ir.Imm32(static_cast<u32>(cbuf.binding))}; - const IR::U32 byte_offset{ir.Imm32(static_cast<u32>(cbuf.offset) * 4)}; + const IR::Value binding{static_cast<u32>(cbuf.binding)}; + const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4}; + return {IR::U32{binding}, IR::U32{byte_offset}}; +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + const auto[binding, byte_offset]{CbufAddr(insn)}; return ir.GetCbuf(binding, byte_offset); } IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { - return ir.BitCast<IR::F32>(GetCbuf(insn)); + const auto[binding, byte_offset]{CbufAddr(insn)}; + return ir.GetFloatCbuf(binding, byte_offset); } IR::U32 TranslatorVisitor::GetImm20(u64 insn) { @@ -83,6 +90,7 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; + if (imm.is_negative != 0) { const s64 raw{static_cast<s64>(imm.value)}; return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..39becf93c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -0,0 +1,85 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, + const IR::U32& reg, const IR::U32& imm) { + switch (mode) { + case Mode::Default: + return {imm_index, ir.IAdd(reg, imm)}; + default: + break; + } + throw NotImplementedException("Mode {}", mode); +} +} // Anonymous namespace + +void TranslatorVisitor::LDC(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; + } const ldc{insn}; + + const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))}; + const IR::U32 reg{X(ldc.src_reg)}; + const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))}; + const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; + switch (ldc.size) { + case Size::U8: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false)); + break; + case Size::S8: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true)); + break; + case Size::U16: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false)); + break; + case Size::S16: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true)); + break; + case Size::B32: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false)); + break; + case Size::B64: { + if (!IR::IsAligned(ldc.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register"); + } + const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))}; + for (int i = 0; i < 2; ++i) { + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid size {}", ldc.size.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ff429c126..5b153acff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -425,10 +425,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDC(u64) { - ThrowNotImplemented(Opcode::LDC); -} - void TranslatorVisitor::LDL(u64) { ThrowNotImplemented(Opcode::LDL); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 960beadd4..cdbe85221 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -25,18 +25,13 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { - case IR::Opcode::WorkgroupId: - info.uses_workgroup_id = true; - break; - case IR::Opcode::LocalInvocationId: - info.uses_local_invocation_id = true; - break; case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: case IR::Opcode::CompositeExtractF16x2: case IR::Opcode::CompositeExtractF16x3: case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::SelectF16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: case IR::Opcode::PackFloat2x16: @@ -75,13 +70,139 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPTrunc64: info.uses_fp64 = true; break; - case IR::Opcode::GetCbuf: + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::UndefU8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + case IR::Opcode::SelectU8: + info.uses_int8 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::UndefU16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + case IR::Opcode::SelectU16: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS16F32: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU16F32: + case IR::Opcode::ConvertU16F64: + info.uses_int16 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU64: + case IR::Opcode::UndefU64: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::SelectU64: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF64U64: + case IR::Opcode::PackUint2x32: + case IR::Opcode::UnpackUint2x32: + case IR::Opcode::IAdd64: + case IR::Opcode::ISub64: + case IR::Opcode::INeg64: + case IR::Opcode::ShiftLeftLogical64: + case IR::Opcode::ShiftRightLogical64: + case IR::Opcode::ShiftRightArithmetic64: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertS64F32: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertU64F32: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertU64U32: + case IR::Opcode::ConvertU32U64: + case IR::Opcode::ConvertF16U64: + case IR::Opcode::ConvertF32U64: + case IR::Opcode::ConvertF64U64: + info.uses_int64 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::WorkgroupId: + info.uses_workgroup_id = true; + break; + case IR::Opcode::LocalInvocationId: + info.uses_local_invocation_id = true; + break; + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::GetCbufU32: + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU64: { if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + info.used_constant_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + info.used_constant_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::GetCbufU32: + info.used_constant_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::GetCbufF32: + info.used_constant_buffer_types |= IR::Type::F32; + break; + case IR::Opcode::GetCbufU64: + info.used_constant_buffer_types |= IR::Type::U64; + break; + default: + break; + } break; + } case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ae3d5a7d6..7ba9ebe9b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -193,7 +193,7 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && + return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; @@ -207,7 +207,7 @@ void FoldISub32(IR::Inst& inst) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbuf) { + if (op_b->Opcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; @@ -277,7 +277,7 @@ void FoldLogicalNot(IR::Inst& inst) { } } -template <typename Dest, typename Source> +template <IR::Opcode op, typename Dest, typename Source> void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; if (value.IsImmediate()) { @@ -285,8 +285,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (value.InstRecursive()->Opcode() == reverse) { + if (arg_inst->Opcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } + if constexpr (op == IR::Opcode::BitCastF32U32) { + if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + // Replace the bitcast with a typed constant buffer read + inst.ReplaceOpcode(IR::Opcode::GetCbufF32); + inst.SetArg(0, arg_inst->Arg(0)); + inst.SetArg(1, arg_inst->Arg(1)); + return; + } } } @@ -325,9 +335,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::ISub32: return FoldISub32(inst); case IR::Opcode::BitCastF32U32: - return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32); + return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: - return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32); + return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd<u64>(block, inst); case IR::Opcode::SelectU32: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 2625c0bb2..5d98d278e 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -203,7 +203,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, return std::nullopt; } const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbuf) { + if (inst->Opcode() == IR::Opcode::GetCbufU32) { const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 80e4ad6a9..ec802e02c 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -78,7 +78,7 @@ std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value, return std::nullopt; } const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbuf) { + if (inst->Opcode() == IR::Opcode::GetCbufU32) { const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 103a2f0b4..adc1d9a64 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -7,6 +7,7 @@ #include <array> #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/type.h" #include <boost/container/small_vector.hpp> #include <boost/container/static_vector.hpp> @@ -61,10 +62,15 @@ struct Info { bool uses_fp16_denorms_preserve{}; bool uses_fp32_denorms_flush{}; bool uses_fp32_denorms_preserve{}; + bool uses_int8{}; + bool uses_int16{}; + bool uses_int64{}; bool uses_image_1d{}; bool uses_sampled_1d{}; bool uses_sparse_residency{}; + IR::Type used_constant_buffer_types{}; + u32 constant_buffer_mask{}; boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> |