From 96638f57c9e17c3427d1ec6b39f250268a29ddd3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Jan 2020 03:15:55 -0300 Subject: shader/memory: Implement LDL.S16 and LDS.S16 --- src/video_core/shader/decode/memory.cpp | 35 ++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7591a715f..3f3ef6e7c 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -22,6 +22,7 @@ using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::StoreType; namespace { @@ -61,6 +62,13 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { } } +Node Sign16Extend(Node value) { + Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); + return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); +} + } // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -139,23 +147,26 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const auto GetMemory = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast(instr.smem_imm) + offset); - const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), - immediate_offset); + const Node address = + Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) : GetLocalMemory(address); }; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: - case Tegra::Shader::StoreType::Bits64: - case Tegra::Shader::StoreType::Bits128: { - const u32 count = [&]() { + case StoreType::Signed16: + SetRegister(bb, instr.gpr0, Sign16Extend(GetMemory(0))); + break; + case StoreType::Bits32: + case StoreType::Bits64: + case StoreType::Bits128: { + const u32 count = [&] { switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: return 1; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: return 2; - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: return 4; default: UNREACHABLE(); @@ -274,14 +285,14 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { : &ShaderIR::SetSharedMemory; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; default: -- cgit v1.2.3 From 531f25a03789dbdd3242edbe00d07dabc85847eb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Jan 2020 02:18:14 -0300 Subject: shader/memory: Move unaligned load/store to functions --- src/video_core/shader/decode/memory.cpp | 45 ++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 18 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 3f3ef6e7c..2f5ca5de5 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -62,6 +62,20 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { } } +Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), + Immediate(size)); +} + +Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), + std::move(offset), Immediate(size)); +} + Node Sign16Extend(Node value) { Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); @@ -144,19 +158,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast(instr.ld_l.unknown)); [[fallthrough]]; case OpCode::Id::LD_S: { - const auto GetMemory = [&](s32 offset) { + const auto GetAddress = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast(instr.smem_imm) + offset); - const Node address = - Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) - : GetLocalMemory(address); + return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); + }; + const auto GetMemory = [&](s32 offset) { + return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) + : GetLocalMemory(GetAddress(offset)); }; switch (instr.ldst_sl.type.Value()) { - case StoreType::Signed16: + case StoreType::Signed16: { + Node address = GetAddress(0); + SetRegister(bb, instr.gpr0, Sign16Extend(GetMemory(0))); break; + } case StoreType::Bits32: case StoreType::Bits64: case StoreType::Bits128: { @@ -223,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { // To handle unaligned loads get the bytes used to dereference global memory and extract // those bytes from the loaded u32. if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), - std::move(offset), Immediate(size)); + gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); } SetTemporary(bb, i, gmem); @@ -334,12 +347,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node value = GetRegister(instr.gpr0.Value() + i); if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, - Immediate(size)); + const u32 mask = GetUnalignedMask(type); + value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); -- cgit v1.2.3 From 9a2cdf85205b7c77112d73fbd491426f96e0c993 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Jan 2020 02:21:05 -0300 Subject: shader/memory: Implement unaligned LDL.S16 and LDS.S16 --- src/video_core/shader/decode/memory.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 2f5ca5de5..8cd0e7d96 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -169,12 +169,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }; switch (instr.ldst_sl.type.Value()) { - case StoreType::Signed16: { - Node address = GetAddress(0); - - SetRegister(bb, instr.gpr0, Sign16Extend(GetMemory(0))); + case StoreType::Signed16: + SetRegister(bb, instr.gpr0, + Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); break; - } case StoreType::Bits32: case StoreType::Bits64: case StoreType::Bits128: { -- cgit v1.2.3 From d26e74f0a3af0e015f7d33f06d1381d8f0d21e93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Jan 2020 02:30:20 -0300 Subject: shader/memory: Implement STL.S16 and STS.S16 --- src/video_core/shader/decode/memory.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 8cd0e7d96..58744d29a 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -291,9 +291,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); }; - const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L - ? &ShaderIR::SetLocalMemory - : &ShaderIR::SetSharedMemory; + const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; + const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; + const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; switch (instr.ldst_sl.type.Value()) { case StoreType::Bits128: @@ -306,6 +306,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Signed16: { + Node address = GetAddress(0); + Node memory = (this->*get_memory)(address); + (this->*set_memory)( + bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); + break; + } default: UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), static_cast(instr.ldst_sl.type.Value())); -- cgit v1.2.3