diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 109 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 16 |
2 files changed, 87 insertions, 38 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea3c71eed..ff19ada55 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -18,6 +18,23 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +namespace { +u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { + switch (uniform_type) { + case Tegra::Shader::UniformType::Single: + return 1; + case Tegra::Shader::UniformType::Double: + return 2; + case Tegra::Shader::UniformType::Quad: + case Tegra::Shader::UniformType::UnsignedQuad: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); + return 1; + } +} +} // namespace + u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -126,45 +143,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LDG: { - const u32 count = [&]() { - switch (instr.ldg.type) { - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; - default: - UNIMPLEMENTED_MSG("Unimplemented LDG size!"); - return 1; - } - }(); - - const Node addr_register = GetRegister(instr.gpr8); - const Node base_address = - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_address); - ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - - bb.push_back(Comment( - fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); - - const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; - used_global_memory_bases.insert(descriptor); - - const Node immediate_offset = - Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); - const Node base_real_address = - Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.ldg.immediate_offset.Value()), false); + const u32 count = GetUniformTypeElementsCount(instr.ldg.type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); SetTemporal(bb, i, gmem); @@ -174,6 +161,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::STG: { + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.stg.immediate_offset.Value()), true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(instr.stg.type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -236,4 +245,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write) { + const Node base_address{ + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; + const auto cbuf = std::get_if<CbufNode>(base_address); + ASSERT(cbuf != nullptr); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + + bb.push_back( + Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); + + const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; + const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); + auto& usage = entry->second; + if (is_write) { + usage.is_written = true; + } else { + usage.is_read = true; + } + + const auto real_address = + Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); + + return {real_address, base_address, descriptor}; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 4888998d3..1afab08c0 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -276,6 +276,11 @@ struct GlobalMemoryBase { } }; +struct GlobalMemoryUsage { + bool is_read{}; + bool is_written{}; +}; + struct MetaArithmetic { bool precise{}; }; @@ -578,8 +583,8 @@ public: return used_clip_distances; } - const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { - return used_global_memory_bases; + const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { + return used_global_memory; } std::size_t GetLength() const { @@ -781,6 +786,11 @@ private: std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); + std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write); + template <typename... T> Node Operation(OperationCode code, const T*... operands) { return StoreNode(OperationNode(code, operands...)); @@ -834,7 +844,7 @@ private: std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; - std::set<GlobalMemoryBase> used_global_memory_bases; + std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; Tegra::Shader::Header header; }; |