diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 186 | ||||
-rw-r--r-- | src/video_core/shader/decode/texture.cpp | 33 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 28 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.cpp | 6 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 10 |
6 files changed, 194 insertions, 71 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index b427ac873..0229733b6 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -65,7 +65,7 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) - : program_code{program_code}, start{start}, locker{locker} {} + : program_code{program_code}, locker{locker}, start{start} {} const ProgramCode& program_code; ConstBufferLocker& locker; diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index c934d0719..b5fbc4d58 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -6,6 +6,7 @@ #include <vector> #include <fmt/format.h> +#include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" @@ -15,44 +16,75 @@ namespace VideoCommon::Shader { +using Tegra::Shader::AtomicOp; +using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; +using Tegra::Shader::GlobalAtomicOp; +using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::StoreType; namespace { -u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { +bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { + return uniform_type == Tegra::Shader::UniformType::UnsignedByte || + uniform_type == Tegra::Shader::UniformType::UnsignedShort; +} + +u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { case Tegra::Shader::UniformType::UnsignedByte: - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; + return 0b11; + case Tegra::Shader::UniformType::UnsignedShort: + return 0b10; default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); - return 1; + UNREACHABLE(); + return 0; } } -u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { +u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { + case Tegra::Shader::UniformType::UnsignedByte: + return 8; + case Tegra::Shader::UniformType::UnsignedShort: + return 16; case Tegra::Shader::UniformType::Single: - return 1; + return 32; case Tegra::Shader::UniformType::Double: - return 2; + return 64; case Tegra::Shader::UniformType::Quad: case Tegra::Shader::UniformType::UnsignedQuad: - return 4; + return 128; default: UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); - return 1; + return 32; } } +Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), + Immediate(size)); +} + +Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), + std::move(offset), Immediate(size)); +} + +Node Sign16Extend(Node value) { + Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); + return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); +} + } // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -128,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); [[fallthrough]]; case OpCode::Id::LD_S: { - const auto GetMemory = [&](s32 offset) { + const auto GetAddress = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); - const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), - immediate_offset); - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) - : GetLocalMemory(address); + return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); + }; + const auto GetMemory = [&](s32 offset) { + return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) + : GetLocalMemory(GetAddress(offset)); }; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: - case Tegra::Shader::StoreType::Bits64: - case Tegra::Shader::StoreType::Bits128: { - const u32 count = [&]() { + case StoreType::Signed16: + SetRegister(bb, instr.gpr0, + Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); + break; + case StoreType::Bits32: + case StoreType::Bits64: + case StoreType::Bits128: { + const u32 count = [&] { switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: return 1; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: return 2; - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: return 4; default: UNREACHABLE(); @@ -184,9 +221,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, false); + TrackGlobalMemory(bb, instr, true, false); - const u32 count = GetLdgMemorySize(type); + const u32 size = GetMemorySize(type); + const u32 count = Common::AlignUp(size, 32) / 32; if (!real_address_base || !base_address) { // Tracking failed, load zeroes. for (u32 i = 0; i < count; ++i) { @@ -200,14 +238,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - if (type == Tegra::Shader::UniformType::UnsignedByte) { - // To handle unaligned loads get the byte used to dereferenced global memory - // and extract that byte from the loaded uint32. - Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); - byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); - - gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), - Immediate(8)); + // To handle unaligned loads get the bytes used to dereference global memory and extract + // those bytes from the loaded u32. + if (IsUnaligned(type)) { + gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); } SetTemporary(bb, i, gmem); @@ -259,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); }; - const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L - ? &ShaderIR::SetLocalMemory - : &ShaderIR::SetSharedMemory; + const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; + const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; + const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Signed16: { + Node address = GetAddress(0); + Node memory = (this->*get_memory)(address); + (this->*set_memory)( + bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); + break; + } default: UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); @@ -295,23 +336,67 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } }(); + // For unaligned reads we have to read memory too. + const bool is_read = IsUnaligned(type); const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true); + TrackGlobalMemory(bb, instr, is_read, true); if (!real_address_base || !base_address) { // Tracking failed, skip the store. break; } - const u32 count = GetStgMemorySize(type); + const u32 size = GetMemorySize(type); + const u32 count = Common::AlignUp(size, 32) / 32; for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - const Node value = GetRegister(instr.gpr0.Value() + i); + Node value = GetRegister(instr.gpr0.Value() + i); + + if (IsUnaligned(type)) { + const u32 mask = GetUnalignedMask(type); + value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); + } + bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::ATOM: { + UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", + static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", + static_cast<int>(instr.atom.type.Value())); + + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } + case OpCode::Id::ATOMS: { + UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", + static_cast<int>(instr.atoms.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", + static_cast<int>(instr.atoms.type.Value())); + + const s32 offset = instr.atoms.GetImmediateOffset(); + Node address = GetRegister(instr.gpr8); + address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); + + Node memory = GetSharedMemory(std::move(address)); + Node data = GetRegister(instr.gpr20); + + Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::AL2P: { // Ignore al2p.direction since we don't care about it. @@ -336,7 +421,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, Instruction instr, - bool is_write) { + bool is_read, bool is_write) { const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; @@ -351,11 +436,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& const GlobalMemoryBase descriptor{index, offset}; const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); auto& usage = entry->second; - if (is_write) { - usage.is_written = true; - } else { - usage.is_read = true; - } + usage.is_written |= is_write; + usage.is_read |= is_read; const auto real_address = Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4b14cdf58..0b567e39d 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -161,16 +161,16 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { case OpCode::Id::TXD: { UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented"); + const bool is_array = instr.txd.is_array != 0; u64 base_reg = instr.gpr8.Value(); const auto derivate_reg = instr.gpr20.Value(); const auto texture_type = instr.txd.texture_type.Value(); const auto coord_count = GetCoordCount(texture_type); - const Sampler* sampler = is_bindless - ? GetBindlessSampler(base_reg, {{texture_type, false, false}}) - : GetSampler(instr.sampler, {{texture_type, false, false}}); + const Sampler* sampler = + is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; if (sampler == nullptr) { for (u32 element = 0; element < values.size(); ++element) { @@ -179,6 +179,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { WriteTexInstructionFloat(bb, instr, values); break; } + if (is_bindless) { base_reg++; } @@ -192,8 +193,14 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { derivates.push_back(GetRegister(derivate_reg + derivate + 1)); } + Node array_node = {}; + if (is_array) { + const Node info_reg = GetRegister(base_reg + coord_count); + array_node = BitfieldExtract(info_reg, 0, 16); + } + for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } @@ -794,14 +801,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4) { - const auto [coord_offsets, size, wrap_value, - diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { - if (is_tld4) { - return {{0, 8, 16}, 6, 32, 64}; - } else { - return {{0, 4, 8}, 4, 8, 16}; - } - }(); + const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; + const u32 size = is_tld4 ? 6 : 4; + const s32 wrap_value = is_tld4 ? 32 : 8; + const s32 diff_value = is_tld4 ? 64 : 16; const u32 mask = (1U << size) - 1; std::vector<Node> aoffi; @@ -814,7 +817,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor LOG_WARNING(HW_GPU, "AOFFI constant folding failed, some hardware might have graphical issues"); for (std::size_t coord = 0; coord < coord_count; ++coord) { - const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); + const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); const Node condition = Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); @@ -824,7 +827,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor } for (std::size_t coord = 0; coord < coord_count; ++coord) { - s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; + s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; if (value >= wrap_value) { value -= diff_value; } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4d2f4d6a8..9af1f0228 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,6 +162,8 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void + AtomicAdd, /// (memory, {u}int) -> {u}int + Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void @@ -392,8 +394,30 @@ struct MetaImage { using Meta = std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; +class AmendNode { +public: + std::optional<std::size_t> GetAmendIndex() const { + if (amend_index == amend_null_index) { + return std::nullopt; + } + return {amend_index}; + } + + void SetAmendIndex(std::size_t index) { + amend_index = index; + } + + void ClearAmend() { + amend_index = amend_null_index; + } + +private: + static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; + std::size_t amend_index{amend_null_index}; +}; + /// Holds any kind of operation that can be done in the IR -class OperationNode final { +class OperationNode final : public AmendNode { public: explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} @@ -433,7 +457,7 @@ private: }; /// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final { +class ConditionalNode final : public AmendNode { public: explicit ConditionalNode(Node condition, std::vector<Node>&& code) : condition{std::move(condition)}, code{std::move(code)} {} diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 1d9825c76..31eecb3f4 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { Immediate(bits)); } +std::size_t ShaderIR::DeclareAmend(Node new_amend) { + const std::size_t id = amend_code.size(); + amend_code.push_back(new_amend); + return id; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index baed06ccd..ba1db4c11 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -176,6 +176,10 @@ public: /// Returns a condition code evaluated from internal flags Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; + const Node& GetAmendNode(std::size_t index) const { + return amend_code[index]; + } + private: friend class ASTDecoder; @@ -390,7 +394,10 @@ private: std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, Tegra::Shader::Instruction instr, - bool is_write); + bool is_read, bool is_write); + + /// Register new amending code and obtain the reference id. + std::size_t DeclareAmend(Node new_amend); const ProgramCode& program_code; const u32 main_offset; @@ -406,6 +413,7 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; ASTManager program_manager{true, true}; + std::vector<Node> amend_code; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; |