summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/control_flow.cpp6
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp3
-rw-r--r--src/video_core/shader/decode/memory.cpp100
-rw-r--r--src/video_core/shader/decode/shift.cpp1
-rw-r--r--src/video_core/shader/node.h14
5 files changed, 76 insertions, 48 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 2e2711350..6d313963a 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -484,17 +484,17 @@ bool TryInspectAddress(CFGRebuildState& state) {
}
case BlockCollision::Inside: {
// This case is the tricky one:
- // We need to Split the block in 2 sepparate blocks
+ // We need to split the block into 2 separate blocks
const u32 end = state.block_info[block_index].end;
BlockInfo& new_block = CreateBlockInfo(state, address, end);
BlockInfo& current_block = state.block_info[block_index];
current_block.end = address - 1;
- new_block.branch = current_block.branch;
+ new_block.branch = std::move(current_block.branch);
BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
const auto branch = std::get_if<SingleBranch>(forward_branch.get());
branch->address = address;
branch->ignore = true;
- current_block.branch = forward_branch;
+ current_block.branch = std::move(forward_branch);
return true;
}
default:
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 478394682..4db329fa5 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, value);
break;
}
- case OpCode::Id::FCMP_R: {
+ case OpCode::Id::FCMP_RR:
+ case OpCode::Id::FCMP_RC: {
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
Node op_c = GetRegister(instr.gpr39);
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index b8f63922f..8112ead3e 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -3,7 +3,9 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <utility>
#include <vector>
+
#include <fmt/format.h>
#include "common/alignment.h"
@@ -16,6 +18,7 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::AtomicOp;
using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute;
@@ -27,29 +30,26 @@ using Tegra::Shader::StoreType;
namespace {
-Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) {
- const OperationCode operation_code = [op] {
- switch (op) {
- case AtomicOp::Add:
- return OperationCode::AtomicIAdd;
- case AtomicOp::Min:
- return OperationCode::AtomicIMin;
- case AtomicOp::Max:
- return OperationCode::AtomicIMax;
- case AtomicOp::And:
- return OperationCode::AtomicIAnd;
- case AtomicOp::Or:
- return OperationCode::AtomicIOr;
- case AtomicOp::Xor:
- return OperationCode::AtomicIXor;
- case AtomicOp::Exch:
- return OperationCode::AtomicIExchange;
- default:
- UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
- return OperationCode::AtomicIAdd;
- }
- }();
- return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
+OperationCode GetAtomOperation(AtomicOp op) {
+ switch (op) {
+ case AtomicOp::Add:
+ return OperationCode::AtomicIAdd;
+ case AtomicOp::Min:
+ return OperationCode::AtomicIMin;
+ case AtomicOp::Max:
+ return OperationCode::AtomicIMax;
+ case AtomicOp::And:
+ return OperationCode::AtomicIAnd;
+ case AtomicOp::Or:
+ return OperationCode::AtomicIOr;
+ case AtomicOp::Xor:
+ return OperationCode::AtomicIXor;
+ case AtomicOp::Exch:
+ return OperationCode::AtomicIExchange;
+ default:
+ UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
+ return OperationCode::AtomicIAdd;
+ }
}
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
@@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
- offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
- return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
- Immediate(size));
+ offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
+ return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
}
Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
- Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
- offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
- return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
- std::move(offset), Immediate(size));
+ Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
+ offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
+ return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
+ Immediate(size));
}
Node Sign16Extend(Node value) {
Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
- Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
+ Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
- return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
+ return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
}
} // Anonymous namespace
@@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
if (IsUnaligned(type)) {
const u32 mask = GetUnalignedMask(type);
- value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
+ value = InsertUnaligned(gmem, move(value), real_address, mask, size);
}
bb.push_back(Operation(OperationCode::Assign, gmem, value));
}
break;
}
+ case OpCode::Id::RED: {
+ UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+ UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
+ const auto [real_address, base_address, descriptor] =
+ TrackGlobalMemory(bb, instr, true, true);
+ if (!real_address || !base_address) {
+ // Tracking failed, skip atomic.
+ break;
+ }
+ Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+ Node value = GetRegister(instr.gpr0);
+ bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
+ break;
+ }
case OpCode::Id::ATOM: {
UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
instr.atom.operation == AtomicOp::Dec ||
instr.atom.operation == AtomicOp::SafeAdd,
"operation={}", static_cast<int>(instr.atom.operation.Value()));
UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
- instr.atom.type == GlobalAtomicType::U64,
+ instr.atom.type == GlobalAtomicType::U64 ||
+ instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
+ instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
"type={}", static_cast<int>(instr.atom.type.Value()));
const auto [real_address, base_address, descriptor] =
@@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
const bool is_signed =
- instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
+ instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem,
- GetRegister(instr.gpr20));
- SetRegister(bb, instr.gpr0, std::move(value));
+ SetRegister(bb, instr.gpr0,
+ SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
+ GetRegister(instr.gpr20)));
break;
}
case OpCode::Id::ATOMS: {
@@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
const s32 offset = instr.atoms.GetImmediateOffset();
Node address = GetRegister(instr.gpr8);
- address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
- Node value =
- GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed,
- GetSharedMemory(std::move(address)), GetRegister(instr.gpr20));
- SetRegister(bb, instr.gpr0, std::move(value));
+ address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
+ SetRegister(bb, instr.gpr0,
+ SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
+ GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
break;
}
case OpCode::Id::AL2P: {
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 3b391d3e6..d4ffa8014 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -23,7 +23,6 @@ Node IsFull(Node shift) {
}
Node Shift(OperationCode opcode, Node value, Node shift) {
- Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32));
Node shifted = Operation(opcode, move(value), shift);
return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 5fcc9da60..3eee961f5 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -178,6 +178,20 @@ enum class OperationCode {
AtomicIOr, /// (memory, int) -> int
AtomicIXor, /// (memory, int) -> int
+ ReduceUAdd, /// (memory, uint) -> void
+ ReduceUMin, /// (memory, uint) -> void
+ ReduceUMax, /// (memory, uint) -> void
+ ReduceUAnd, /// (memory, uint) -> void
+ ReduceUOr, /// (memory, uint) -> void
+ ReduceUXor, /// (memory, uint) -> void
+
+ ReduceIAdd, /// (memory, int) -> void
+ ReduceIMin, /// (memory, int) -> void
+ ReduceIMax, /// (memory, int) -> void
+ ReduceIAnd, /// (memory, int) -> void
+ ReduceIOr, /// (memory, int) -> void
+ ReduceIXor, /// (memory, int) -> void
+
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void