summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/control_flow.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp186
-rw-r--r--src/video_core/shader/decode/texture.cpp33
-rw-r--r--src/video_core/shader/node.h28
-rw-r--r--src/video_core/shader/shader_ir.cpp6
-rw-r--r--src/video_core/shader/shader_ir.h10
6 files changed, 194 insertions, 71 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index b427ac873..0229733b6 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -65,7 +65,7 @@ struct BlockInfo {
struct CFGRebuildState {
explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
- : program_code{program_code}, start{start}, locker{locker} {}
+ : program_code{program_code}, locker{locker}, start{start} {}
const ProgramCode& program_code;
ConstBufferLocker& locker;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index c934d0719..b5fbc4d58 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
#include <vector>
#include <fmt/format.h>
+#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
@@ -15,44 +16,75 @@
namespace VideoCommon::Shader {
+using Tegra::Shader::AtomicOp;
+using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute;
+using Tegra::Shader::GlobalAtomicOp;
+using Tegra::Shader::GlobalAtomicType;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
+using Tegra::Shader::StoreType;
namespace {
-u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
+ return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
+ uniform_type == Tegra::Shader::UniformType::UnsignedShort;
+}
+
+u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
switch (uniform_type) {
case Tegra::Shader::UniformType::UnsignedByte:
- case Tegra::Shader::UniformType::Single:
- return 1;
- case Tegra::Shader::UniformType::Double:
- return 2;
- case Tegra::Shader::UniformType::Quad:
- case Tegra::Shader::UniformType::UnsignedQuad:
- return 4;
+ return 0b11;
+ case Tegra::Shader::UniformType::UnsignedShort:
+ return 0b10;
default:
- UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
- return 1;
+ UNREACHABLE();
+ return 0;
}
}
-u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
+u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
switch (uniform_type) {
+ case Tegra::Shader::UniformType::UnsignedByte:
+ return 8;
+ case Tegra::Shader::UniformType::UnsignedShort:
+ return 16;
case Tegra::Shader::UniformType::Single:
- return 1;
+ return 32;
case Tegra::Shader::UniformType::Double:
- return 2;
+ return 64;
case Tegra::Shader::UniformType::Quad:
case Tegra::Shader::UniformType::UnsignedQuad:
- return 4;
+ return 128;
default:
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
- return 1;
+ return 32;
}
}
+Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
+ Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
+ offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+ return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
+ Immediate(size));
+}
+
+Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
+ Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
+ offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+ return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
+ std::move(offset), Immediate(size));
+}
+
+Node Sign16Extend(Node value) {
+ Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
+ Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
+ Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
+ return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
+}
+
} // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -128,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
[[fallthrough]];
case OpCode::Id::LD_S: {
- const auto GetMemory = [&](s32 offset) {
+ const auto GetAddress = [&](s32 offset) {
ASSERT(offset % 4 == 0);
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
- const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
- immediate_offset);
- return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address)
- : GetLocalMemory(address);
+ return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
+ };
+ const auto GetMemory = [&](s32 offset) {
+ return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
+ : GetLocalMemory(GetAddress(offset));
};
switch (instr.ldst_sl.type.Value()) {
- case Tegra::Shader::StoreType::Bits32:
- case Tegra::Shader::StoreType::Bits64:
- case Tegra::Shader::StoreType::Bits128: {
- const u32 count = [&]() {
+ case StoreType::Signed16:
+ SetRegister(bb, instr.gpr0,
+ Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
+ break;
+ case StoreType::Bits32:
+ case StoreType::Bits64:
+ case StoreType::Bits128: {
+ const u32 count = [&] {
switch (instr.ldst_sl.type.Value()) {
- case Tegra::Shader::StoreType::Bits32:
+ case StoreType::Bits32:
return 1;
- case Tegra::Shader::StoreType::Bits64:
+ case StoreType::Bits64:
return 2;
- case Tegra::Shader::StoreType::Bits128:
+ case StoreType::Bits128:
return 4;
default:
UNREACHABLE();
@@ -184,9 +221,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}();
const auto [real_address_base, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, false);
+ TrackGlobalMemory(bb, instr, true, false);
- const u32 count = GetLdgMemorySize(type);
+ const u32 size = GetMemorySize(type);
+ const u32 count = Common::AlignUp(size, 32) / 32;
if (!real_address_base || !base_address) {
// Tracking failed, load zeroes.
for (u32 i = 0; i < count; ++i) {
@@ -200,14 +238,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- if (type == Tegra::Shader::UniformType::UnsignedByte) {
- // To handle unaligned loads get the byte used to dereferenced global memory
- // and extract that byte from the loaded uint32.
- Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
- byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
-
- gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
- Immediate(8));
+ // To handle unaligned loads get the bytes used to dereference global memory and extract
+ // those bytes from the loaded u32.
+ if (IsUnaligned(type)) {
+ gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
}
SetTemporary(bb, i, gmem);
@@ -259,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
};
- const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L
- ? &ShaderIR::SetLocalMemory
- : &ShaderIR::SetSharedMemory;
+ const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
+ const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
+ const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
switch (instr.ldst_sl.type.Value()) {
- case Tegra::Shader::StoreType::Bits128:
+ case StoreType::Bits128:
(this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
(this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
[[fallthrough]];
- case Tegra::Shader::StoreType::Bits64:
+ case StoreType::Bits64:
(this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
[[fallthrough]];
- case Tegra::Shader::StoreType::Bits32:
+ case StoreType::Bits32:
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break;
+ case StoreType::Signed16: {
+ Node address = GetAddress(0);
+ Node memory = (this->*get_memory)(address);
+ (this->*set_memory)(
+ bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
+ break;
+ }
default:
UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
static_cast<u32>(instr.ldst_sl.type.Value()));
@@ -295,23 +336,67 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
}();
+ // For unaligned reads we have to read memory too.
+ const bool is_read = IsUnaligned(type);
const auto [real_address_base, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, true);
+ TrackGlobalMemory(bb, instr, is_read, true);
if (!real_address_base || !base_address) {
// Tracking failed, skip the store.
break;
}
- const u32 count = GetStgMemorySize(type);
+ const u32 size = GetMemorySize(type);
+ const u32 count = Common::AlignUp(size, 32) / 32;
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- const Node value = GetRegister(instr.gpr0.Value() + i);
+ Node value = GetRegister(instr.gpr0.Value() + i);
+
+ if (IsUnaligned(type)) {
+ const u32 mask = GetUnalignedMask(type);
+ value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
+ }
+
bb.push_back(Operation(OperationCode::Assign, gmem, value));
}
break;
}
+ case OpCode::Id::ATOM: {
+ UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
+ static_cast<int>(instr.atom.operation.Value()));
+ UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
+ static_cast<int>(instr.atom.type.Value()));
+
+ const auto [real_address, base_address, descriptor] =
+ TrackGlobalMemory(bb, instr, true, true);
+ if (!real_address || !base_address) {
+ // Tracking failed, skip atomic.
+ break;
+ }
+
+ Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+ Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
+ SetRegister(bb, instr.gpr0, std::move(value));
+ break;
+ }
+ case OpCode::Id::ATOMS: {
+ UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
+ static_cast<int>(instr.atoms.operation.Value()));
+ UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
+ static_cast<int>(instr.atoms.type.Value()));
+
+ const s32 offset = instr.atoms.GetImmediateOffset();
+ Node address = GetRegister(instr.gpr8);
+ address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
+
+ Node memory = GetSharedMemory(std::move(address));
+ Node data = GetRegister(instr.gpr20);
+
+ Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
+ SetRegister(bb, instr.gpr0, std::move(value));
+ break;
+ }
case OpCode::Id::AL2P: {
// Ignore al2p.direction since we don't care about it.
@@ -336,7 +421,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
Instruction instr,
- bool is_write) {
+ bool is_read, bool is_write) {
const auto addr_register{GetRegister(instr.gmem.gpr)};
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
@@ -351,11 +436,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
const GlobalMemoryBase descriptor{index, offset};
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
auto& usage = entry->second;
- if (is_write) {
- usage.is_written = true;
- } else {
- usage.is_read = true;
- }
+ usage.is_written |= is_write;
+ usage.is_read |= is_read;
const auto real_address =
Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 4b14cdf58..0b567e39d 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -161,16 +161,16 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
case OpCode::Id::TXD: {
UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented");
+ const bool is_array = instr.txd.is_array != 0;
u64 base_reg = instr.gpr8.Value();
const auto derivate_reg = instr.gpr20.Value();
const auto texture_type = instr.txd.texture_type.Value();
const auto coord_count = GetCoordCount(texture_type);
- const Sampler* sampler = is_bindless
- ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
- : GetSampler(instr.sampler, {{texture_type, false, false}});
+ const Sampler* sampler =
+ is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
if (sampler == nullptr) {
for (u32 element = 0; element < values.size(); ++element) {
@@ -179,6 +179,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
WriteTexInstructionFloat(bb, instr, values);
break;
}
+
if (is_bindless) {
base_reg++;
}
@@ -192,8 +193,14 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
derivates.push_back(GetRegister(derivate_reg + derivate + 1));
}
+ Node array_node = {};
+ if (is_array) {
+ const Node info_reg = GetRegister(base_reg + coord_count);
+ array_node = BitfieldExtract(info_reg, 0, 16);
+ }
+
for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element};
+ MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element};
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
}
@@ -794,14 +801,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
bool is_tld4) {
- const auto [coord_offsets, size, wrap_value,
- diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
- if (is_tld4) {
- return {{0, 8, 16}, 6, 32, 64};
- } else {
- return {{0, 4, 8}, 4, 8, 16};
- }
- }();
+ const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
+ const u32 size = is_tld4 ? 6 : 4;
+ const s32 wrap_value = is_tld4 ? 32 : 8;
+ const s32 diff_value = is_tld4 ? 64 : 16;
const u32 mask = (1U << size) - 1;
std::vector<Node> aoffi;
@@ -814,7 +817,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
LOG_WARNING(HW_GPU,
"AOFFI constant folding failed, some hardware might have graphical issues");
for (std::size_t coord = 0; coord < coord_count; ++coord) {
- const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+ const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
const Node condition =
Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
@@ -824,7 +827,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
}
for (std::size_t coord = 0; coord < coord_count; ++coord) {
- s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+ s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
if (value >= wrap_value) {
value -= diff_value;
}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4d2f4d6a8..9af1f0228 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,6 +162,8 @@ enum class OperationCode {
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
+ AtomicAdd, /// (memory, {u}int) -> {u}int
+
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void
@@ -392,8 +394,30 @@ struct MetaImage {
using Meta =
std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
+class AmendNode {
+public:
+ std::optional<std::size_t> GetAmendIndex() const {
+ if (amend_index == amend_null_index) {
+ return std::nullopt;
+ }
+ return {amend_index};
+ }
+
+ void SetAmendIndex(std::size_t index) {
+ amend_index = index;
+ }
+
+ void ClearAmend() {
+ amend_index = amend_null_index;
+ }
+
+private:
+ static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
+ std::size_t amend_index{amend_null_index};
+};
+
/// Holds any kind of operation that can be done in the IR
-class OperationNode final {
+class OperationNode final : public AmendNode {
public:
explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
@@ -433,7 +457,7 @@ private:
};
/// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final {
+class ConditionalNode final : public AmendNode {
public:
explicit ConditionalNode(Node condition, std::vector<Node>&& code)
: condition{std::move(condition)}, code{std::move(code)} {}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 1d9825c76..31eecb3f4 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
Immediate(bits));
}
+std::size_t ShaderIR::DeclareAmend(Node new_amend) {
+ const std::size_t id = amend_code.size();
+ amend_code.push_back(new_amend);
+ return id;
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index baed06ccd..ba1db4c11 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -176,6 +176,10 @@ public:
/// Returns a condition code evaluated from internal flags
Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
+ const Node& GetAmendNode(std::size_t index) const {
+ return amend_code[index];
+ }
+
private:
friend class ASTDecoder;
@@ -390,7 +394,10 @@ private:
std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
Tegra::Shader::Instruction instr,
- bool is_write);
+ bool is_read, bool is_write);
+
+ /// Register new amending code and obtain the reference id.
+ std::size_t DeclareAmend(Node new_amend);
const ProgramCode& program_code;
const u32 main_offset;
@@ -406,6 +413,7 @@ private:
std::map<u32, NodeBlock> basic_blocks;
NodeBlock global_code;
ASTManager program_manager{true, true};
+ std::vector<Node> amend_code;
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;