diff options
Diffstat (limited to 'src/video_core/shader/decode')
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 118 | ||||
-rw-r--r-- | src/video_core/shader/decode/xmad.cpp | 39 |
2 files changed, 109 insertions, 48 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea3c71eed..ea1092db1 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -18,6 +19,23 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +namespace { +u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { + switch (uniform_type) { + case Tegra::Shader::UniformType::Single: + return 1; + case Tegra::Shader::UniformType::Double: + return 2; + case Tegra::Shader::UniformType::Quad: + case Tegra::Shader::UniformType::UnsignedQuad: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); + return 1; + } +} +} // namespace + u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -85,8 +103,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LD_L: { - UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", - static_cast<u32>(instr.ld_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", + static_cast<u64>(instr.ld_l.unknown.Value())); const auto GetLmem = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -126,45 +144,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LDG: { - const u32 count = [&]() { - switch (instr.ldg.type) { - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; - default: - UNIMPLEMENTED_MSG("Unimplemented LDG size!"); - return 1; - } - }(); - - const Node addr_register = GetRegister(instr.gpr8); - const Node base_address = - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_address); - ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - - bb.push_back(Comment( - fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); - - const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; - used_global_memory_bases.insert(descriptor); - - const Node immediate_offset = - Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); - const Node base_real_address = - Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.ldg.immediate_offset.Value()), false); + const u32 count = GetUniformTypeElementsCount(instr.ldg.type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); SetTemporal(bb, i, gmem); @@ -174,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::STG: { + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.stg.immediate_offset.Value()), true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(instr.stg.type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -205,8 +215,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ST_L: { - UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", - static_cast<u32>(instr.st_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", + static_cast<u64>(instr.st_l.cache_management.Value())); const auto GetLmemAddr = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -236,4 +246,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write) { + const Node base_address{ + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; + const auto cbuf = std::get_if<CbufNode>(base_address); + ASSERT(cbuf != nullptr); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + + bb.push_back( + Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); + + const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; + const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); + auto& usage = entry->second; + if (is_write) { + usage.is_written = true; + } else { + usage.is_read = true; + } + + const auto real_address = + Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); + + return {real_address, base_address, descriptor}; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index c34843307..db15c0718 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -29,39 +29,55 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { const bool is_signed_b = instr.xmad.sign_b == 1; const bool is_signed_c = is_signed_a; - auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { + auto [is_merge, is_psl, is_high_b, mode, op_b, + op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::XMAD_CR: return {instr.xmad.merge_56, + instr.xmad.product_shift_left_second, + instr.xmad.high_b, + instr.xmad.mode_cbf, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; + return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, + instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RC: - return {false, GetRegister(instr.gpr39), + return {false, + false, + instr.xmad.high_b, + instr.xmad.mode_cbf, + GetRegister(instr.gpr39), GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), + return {instr.xmad.merge_37, + instr.xmad.product_shift_left, + false, + instr.xmad.mode, + Immediate(static_cast<u32>(instr.xmad.imm20_16)), GetRegister(instr.gpr39)}; } UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, Immediate(0), Immediate(0)}; + return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; }(); op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); const Node original_b = op_b; - op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); + op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16); // TODO(Rodrigo): Use an appropiate sign for this operation Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); - if (instr.xmad.product_shift_left) { + if (is_psl) { product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); } + SetTemporal(bb, 0, product); + product = GetTemporal(0); const Node original_c = op_c; + const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error op_c = [&]() { - switch (instr.xmad.mode) { + switch (set_mode) { case Tegra::Shader::XmadMode::None: return original_c; case Tegra::Shader::XmadMode::CLo: @@ -80,8 +96,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { } }(); + SetTemporal(bb, 1, op_c); + op_c = GetTemporal(1); + // TODO(Rodrigo): Use an appropiate sign for this operation Node sum = Operation(OperationCode::IAdd, product, op_c); + SetTemporal(bb, 2, sum); + sum = GetTemporal(2); if (is_merge) { const Node a = BitfieldExtract(sum, 0, 16); const Node b = @@ -95,4 +116,4 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader |