summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/decode.cpp4
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp31
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp17
-rw-r--r--src/video_core/shader/decode/conversion.cpp76
-rw-r--r--src/video_core/shader/decode/half_set.cpp16
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp8
-rw-r--r--src/video_core/shader/decode/hfma2.cpp21
-rw-r--r--src/video_core/shader/decode/memory.cpp118
-rw-r--r--src/video_core/shader/decode/texture.cpp113
-rw-r--r--src/video_core/shader/decode/xmad.cpp5
-rw-r--r--src/video_core/shader/shader_ir.cpp60
-rw-r--r--src/video_core/shader/shader_ir.h101
12 files changed, 387 insertions, 183 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index e4c438792..2da595c0d 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -116,6 +116,8 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
// Continue scanning for an exit method.
break;
}
+ default:
+ break;
}
}
return exit_method = ExitMethod::AlwaysReturn;
@@ -206,4 +208,4 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
return pc + 1;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index baee89107..2098c1170 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -9,6 +9,7 @@
namespace VideoCommon::Shader {
+using Tegra::Shader::HalfType;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -18,48 +19,50 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
opcode->get().GetId() == OpCode::Id::HADD2_R) {
- UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
+ if (instr.alu_half.ftz != 0) {
+ LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ }
}
- UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
const bool negate_a =
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
const bool negate_b =
opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
- const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a);
-
- // instr.alu_half.type_a
+ Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
+ op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
- Node op_b = [&]() {
+ auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R:
- return GetRegister(instr.gpr20);
+ return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
default:
UNREACHABLE();
- return Immediate(0);
+ return {HalfType::F32, Immediate(0)};
}
}();
- op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
+ op_b = UnpackHalfFloat(op_b, type_b);
+ // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
+ Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
Node value = [&]() {
- MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HADD2_R:
- return Operation(OperationCode::HAdd, meta, op_a, op_b);
+ return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
case OpCode::Id::HMUL2_C:
case OpCode::Id::HMUL2_R:
- return Operation(OperationCode::HMul, meta, op_a, op_b);
+ return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
default:
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
return Immediate(0);
}
}();
+ value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
SetRegister(bb, instr.gpr0, value);
@@ -67,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index c2164ba50..fbcd35b18 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -17,34 +17,33 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
- UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
+ if (instr.alu_half_imm.ftz != 0) {
+ LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ }
} else {
UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
}
- UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
- "Half float immediate saturation not implemented");
- Node op_a = GetRegister(instr.gpr8);
+ Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
const Node op_b = UnpackHalfImmediate(instr, true);
Node value = [&]() {
- MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_IMM:
- return Operation(OperationCode::HAdd, meta, op_a, op_b);
+ return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
case OpCode::Id::HMUL2_IMM:
- return Operation(OperationCode::HMul, meta, op_a, op_b);
+ return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
default:
UNREACHABLE();
return Immediate(0);
}
}();
- value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
+ value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
+ value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
SetRegister(bb, instr.gpr0, value);
-
return pc;
}
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 55a6fbbf2..b5ec9a6f5 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -18,13 +18,29 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
- case OpCode::Id::I2I_R: {
+ case OpCode::Id::I2I_R:
+ case OpCode::Id::I2I_C:
+ case OpCode::Id::I2I_IMM: {
UNIMPLEMENTED_IF(instr.conversion.selector);
+ UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
+ UNIMPLEMENTED_IF(instr.alu.saturate_d);
const bool input_signed = instr.conversion.is_input_signed;
const bool output_signed = instr.conversion.is_output_signed;
- Node value = GetRegister(instr.gpr20);
+ Node value = [&]() {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::I2I_R:
+ return GetRegister(instr.gpr20);
+ case OpCode::Id::I2I_C:
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ case OpCode::Id::I2I_IMM:
+ return Immediate(instr.alu.GetSignedImm20_20());
+ default:
+ UNREACHABLE();
+ return Immediate(0);
+ }
+ }();
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
@@ -38,17 +54,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::I2F_R:
- case OpCode::Id::I2F_C: {
- UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
+ case OpCode::Id::I2F_C:
+ case OpCode::Id::I2F_IMM: {
+ UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
Node value = [&]() {
- if (instr.is_b_gpr) {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::I2F_R:
return GetRegister(instr.gpr20);
- } else {
+ case OpCode::Id::I2F_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ case OpCode::Id::I2F_IMM:
+ return Immediate(instr.alu.GetSignedImm20_20());
+ default:
+ UNREACHABLE();
+ return Immediate(0);
}
}();
const bool input_signed = instr.conversion.is_input_signed;
@@ -62,24 +85,31 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::F2F_R:
- case OpCode::Id::F2F_C: {
- UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
- UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
+ case OpCode::Id::F2F_C:
+ case OpCode::Id::F2F_IMM: {
+ UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word);
+ UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in F2F is not implemented");
Node value = [&]() {
- if (instr.is_b_gpr) {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::F2F_R:
return GetRegister(instr.gpr20);
- } else {
+ case OpCode::Id::F2F_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ case OpCode::Id::F2F_IMM:
+ return GetImmediate19(instr);
+ default:
+ UNREACHABLE();
+ return Immediate(0);
}
}();
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
value = [&]() {
- switch (instr.conversion.f2f.rounding) {
+ switch (instr.conversion.f2f.GetRoundingMode()) {
case Tegra::Shader::F2fRoundingOp::None:
return value;
case Tegra::Shader::F2fRoundingOp::Round:
@@ -90,10 +120,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::FCeil, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Trunc:
return Operation(OperationCode::FTrunc, PRECISE, value);
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
+ static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+ return Immediate(0);
}
- UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
- static_cast<u32>(instr.conversion.f2f.rounding.Value()));
- return Immediate(0);
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
@@ -102,15 +133,22 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::F2I_R:
- case OpCode::Id::F2I_C: {
+ case OpCode::Id::F2I_C:
+ case OpCode::Id::F2I_IMM: {
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in F2I is not implemented");
Node value = [&]() {
- if (instr.is_b_gpr) {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::F2I_R:
return GetRegister(instr.gpr20);
- } else {
+ case OpCode::Id::F2I_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ case OpCode::Id::F2I_IMM:
+ return GetImmediate19(instr);
+ default:
+ UNREACHABLE();
+ return Immediate(0);
}
}();
@@ -134,7 +172,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}();
const bool is_signed = instr.conversion.is_output_signed;
value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
- value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed);
+ value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
SetRegister(bb, instr.gpr0, value);
break;
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 748368555..1dd94bf9d 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -18,11 +18,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
+ if (instr.hset2.ftz != 0) {
+ LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ }
+
+ Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
+ op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
- // instr.hset2.type_a
- // instr.hset2.type_b
- Node op_a = GetRegister(instr.gpr8);
Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HSET2_R:
@@ -32,14 +34,12 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
return Immediate(0);
}
}();
-
- op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
+ op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
- MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}};
- const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b);
+ const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index e68512692..6e59eb650 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
- Node op_a = GetRegister(instr.gpr8);
+ Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
- const Node op_b = [&]() {
+ Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HSETP2_R:
return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
@@ -32,6 +32,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
return Immediate(0);
}
}();
+ op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b);
// We can't use the constant predicate as destination.
ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
@@ -42,8 +43,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
const OperationCode pair_combiner =
instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
- MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}};
- const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
+ const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
const Node first_pred = Operation(pair_combiner, comparison);
// Set the primary predicate to the result of Predicate OP SecondPredicate
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 7a07c5ec6..a425f9eb7 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -27,10 +27,6 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
}
constexpr auto identity = HalfType::H0_H1;
-
- const HalfType type_a = instr.hfma2.type_a;
- const Node op_a = GetRegister(instr.gpr8);
-
bool neg_b{}, neg_c{};
auto [saturate, type_b, op_b, type_c,
op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
@@ -38,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
case OpCode::Id::HFMA2_CR:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
- return {instr.hfma2.saturate, instr.hfma2.type_b,
+ return {instr.hfma2.saturate, HalfType::F32,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
- instr.hfma2.type_b,
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+ HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c;
@@ -60,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return {false, identity, Immediate(0), identity, Immediate(0)};
}
}();
- UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
- op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
- op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
+ const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
+ op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
+ op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
- MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
- Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
+ Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
+ value = GetSaturatedHalfFloat(value, saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
SetRegister(bb, instr.gpr0, value);
@@ -74,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ea3c71eed..ea1092db1 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -8,6 +8,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -18,6 +19,23 @@ using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
+namespace {
+u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
+ switch (uniform_type) {
+ case Tegra::Shader::UniformType::Single:
+ return 1;
+ case Tegra::Shader::UniformType::Double:
+ return 2;
+ case Tegra::Shader::UniformType::Quad:
+ case Tegra::Shader::UniformType::UnsignedQuad:
+ return 4;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+ return 1;
+ }
+}
+} // namespace
+
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
@@ -85,8 +103,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::LD_L: {
- UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
- static_cast<u32>(instr.ld_l.unknown.Value()));
+ LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}",
+ static_cast<u64>(instr.ld_l.unknown.Value()));
const auto GetLmem = [&](s32 offset) {
ASSERT(offset % 4 == 0);
@@ -126,45 +144,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::LDG: {
- const u32 count = [&]() {
- switch (instr.ldg.type) {
- case Tegra::Shader::UniformType::Single:
- return 1;
- case Tegra::Shader::UniformType::Double:
- return 2;
- case Tegra::Shader::UniformType::Quad:
- case Tegra::Shader::UniformType::UnsignedQuad:
- return 4;
- default:
- UNIMPLEMENTED_MSG("Unimplemented LDG size!");
- return 1;
- }
- }();
-
- const Node addr_register = GetRegister(instr.gpr8);
- const Node base_address =
- TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
- const auto cbuf = std::get_if<CbufNode>(base_address);
- ASSERT(cbuf != nullptr);
- const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
- ASSERT(cbuf_offset_imm != nullptr);
- const auto cbuf_offset = cbuf_offset_imm->GetValue();
-
- bb.push_back(Comment(
- fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
-
- const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
- used_global_memory_bases.insert(descriptor);
-
- const Node immediate_offset =
- Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
- const Node base_real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
+ const auto [real_address_base, base_address, descriptor] =
+ TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
+ static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
+ const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
+ Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
SetTemporal(bb, i, gmem);
@@ -174,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::STG: {
+ const auto [real_address_base, base_address, descriptor] =
+ TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
+ static_cast<u32>(instr.stg.immediate_offset.Value()), true);
+
+ // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
+ SetTemporal(bb, 0, real_address_base);
+
+ const u32 count = GetUniformTypeElementsCount(instr.stg.type);
+ for (u32 i = 0; i < count; ++i) {
+ SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
+ }
+ for (u32 i = 0; i < count; ++i) {
+ const Node it_offset = Immediate(i * 4);
+ const Node real_address =
+ Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
+ const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
+
+ bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
+ }
+ break;
+ }
case OpCode::Id::ST_A: {
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported");
@@ -205,8 +215,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::ST_L: {
- UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
- static_cast<u32>(instr.st_l.unknown.Value()));
+ LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}",
+ static_cast<u64>(instr.st_l.cache_management.Value()));
const auto GetLmemAddr = [&](s32 offset) {
ASSERT(offset % 4 == 0);
@@ -236,4 +246,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return pc;
}
+std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
+ Node addr_register,
+ u32 immediate_offset,
+ bool is_write) {
+ const Node base_address{
+ TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
+ const auto cbuf = std::get_if<CbufNode>(base_address);
+ ASSERT(cbuf != nullptr);
+ const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+ ASSERT(cbuf_offset_imm != nullptr);
+ const auto cbuf_offset = cbuf_offset_imm->GetValue();
+
+ bb.push_back(
+ Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
+
+ const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
+ const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
+ auto& usage = entry->second;
+ if (is_write) {
+ usage.is_written = true;
+ } else {
+ usage.is_read = true;
+ }
+
+ const auto real_address =
+ Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
+
+ return {real_address, base_address, descriptor};
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a775b402b..fa65ac9a9 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
-
+ bool is_bindless = false;
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
@@ -54,7 +54,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
bb, instr,
- GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
+ GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
+ break;
+ }
+ case OpCode::Id::TEX_B: {
+ UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+
+ if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
+ LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
+ }
+
+ const TextureType texture_type{instr.tex_b.texture_type};
+ const bool is_array = instr.tex_b.array != 0;
+ const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
+ const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
+ const auto process_mode = instr.tex_b.GetTextureProcessMode();
+ WriteTexInstructionFloat(bb, instr,
+ GetTexCode(instr, texture_type, process_mode, depth_compare,
+ is_array, is_aoffi, {instr.gpr20}));
break;
}
case OpCode::Id::TEXS: {
@@ -134,6 +152,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
WriteTexsInstructionFloat(bb, instr, values);
break;
}
+ case OpCode::Id::TXQ_B:
+ is_bindless = true;
+ [[fallthrough]];
case OpCode::Id::TXQ: {
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
@@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
- GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+ is_bindless
+ ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
+ false)
+ : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
u32 indexer = 0;
switch (instr.txq.query_type) {
@@ -154,7 +178,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value =
- Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+ Operation(OperationCode::TextureQueryDimensions, meta,
+ GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
SetTemporal(bb, indexer++, value);
}
for (u32 i = 0; i < indexer; ++i) {
@@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::TMML_B:
+ is_bindless = true;
+ [[fallthrough]];
case OpCode::Id::TMML: {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
@@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler = is_bindless
+ ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
+ : GetSampler(instr.sampler, texture_type, is_array, false);
std::vector<Node> coords;
@@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
texture_type = TextureType::Texture2D;
}
-
+ u32 indexer = 0;
for (u32 element = 0; element < 2; ++element) {
+ if (!instr.tmml.IsComponentEnabled(element)) {
+ continue;
+ }
auto params = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
- SetTemporal(bb, element, value);
+ SetTemporal(bb, indexer++, value);
}
- for (u32 element = 0; element < 2; ++element) {
- SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
-
break;
}
case OpCode::Id::TLDS: {
@@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
return *used_samplers.emplace(entry).first;
}
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
+ bool is_array, bool is_shadow) {
+ const Node sampler_register = GetRegister(reg);
+ const Node base_sampler =
+ TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
+ const auto cbuf = std::get_if<CbufNode>(base_sampler);
+ const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+ ASSERT(cbuf_offset_imm != nullptr);
+ const auto cbuf_offset = cbuf_offset_imm->GetValue();
+ const auto cbuf_index = cbuf->GetIndex();
+ const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset;
+
+ // If this sampler has already been used, return the existing mapping.
+ const auto itr =
+ std::find_if(used_samplers.begin(), used_samplers.end(),
+ [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
+ if (itr != used_samplers.end()) {
+ ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
+ itr->IsShadow() == is_shadow);
+ return *itr;
+ }
+
+ // Otherwise create a new mapping for this sampler
+ const std::size_t next_index = used_samplers.size();
+ const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
+ return *used_samplers.emplace(entry).first;
+}
+
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
@@ -326,22 +386,27 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset,
- std::vector<Node> aoffi) {
+ std::vector<Node> aoffi,
+ std::optional<Tegra::Shader::Register> bindless_reg) {
const bool is_array = array;
const bool is_shadow = depth_compare;
+ const bool is_bindless = bindless_reg.has_value();
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+ const auto& sampler = is_bindless
+ ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
+ : GetSampler(instr.sampler, texture_type, is_array, is_shadow);
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
process_mode == TextureProcessMode::LLA;
- // LOD selection (either via bias or explicit textureLod) not supported in GL for
- // sampler2DArrayShadow and samplerCubeArrayShadow.
+ // LOD selection (either via bias or explicit textureLod) not
+ // supported in GL for sampler2DArrayShadow and
+ // samplerCubeArrayShadow.
const bool gl_lod_supported =
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
@@ -359,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
lod = Immediate(0.0f);
break;
case TextureProcessMode::LB:
- // If present, lod or bias are always stored in the register indexed by the gpr20
- // field with an offset depending on the usage of the other registers
+ // If present, lod or bias are always stored in the register
+ // indexed by the gpr20 field with an offset depending on the
+ // usage of the other registers
bias = GetRegister(instr.gpr20.Value() + bias_offset);
break;
case TextureProcessMode::LL:
@@ -384,11 +450,18 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array,
- bool is_aoffi) {
+ bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
const bool lod_bias_enabled{
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
+ const bool is_bindless = bindless_reg.has_value();
+
u64 parameter_register = instr.gpr20.Value();
+ if (is_bindless) {
+ ++parameter_register;
+ }
+
+ const u32 bias_lod_offset = (is_bindless ? 1 : 0);
if (lod_bias_enabled) {
++parameter_register;
}
@@ -423,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
dc = GetRegister(parameter_register++);
}
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
+ aoffi, bindless_reg);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -459,7 +533,8 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
dc = GetRegister(depth_register);
}
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
+ {});
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index db15c0718..04a776398 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -56,9 +56,10 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
instr.xmad.mode,
Immediate(static_cast<u32>(instr.xmad.imm20_16)),
GetRegister(instr.gpr39)};
+ default:
+ UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
+ return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
}
- UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
- return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
}();
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index ac5112d78..e4eb0dfd9 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -189,7 +189,11 @@ Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
- return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate);
+ return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate);
+}
+
+Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
+ return Operation(OperationCode::HUnpack, type, value);
}
Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -209,17 +213,26 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
if (absolute) {
- value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value);
+ value = Operation(OperationCode::HAbsolute, NO_PRECISE, value);
}
if (negate) {
- value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true),
+ value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true),
GetPredicate(true));
}
return value;
}
+Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
+ if (!saturate) {
+ return value;
+ }
+ const Node positive_zero = Immediate(std::copysignf(0, 1));
+ const Node positive_one = Immediate(1.0f);
+ return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one);
+}
+
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
- static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+ const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
{PredCondition::LessThan, OperationCode::LogicalFLessThan},
{PredCondition::Equal, OperationCode::LogicalFEqual},
{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
@@ -255,7 +268,7 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
Node op_b) {
- static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+ const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
{PredCondition::LessThan, OperationCode::LogicalILessThan},
{PredCondition::Equal, OperationCode::LogicalIEqual},
{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
@@ -283,40 +296,32 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
return predicate;
}
-Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
- const MetaHalfArithmetic& meta, Node op_a, Node op_b) {
-
- UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
- condition == PredCondition::NotEqualWithNan ||
- condition == PredCondition::LessEqualWithNan ||
- condition == PredCondition::GreaterThanWithNan ||
- condition == PredCondition::GreaterEqualWithNan,
- "Unimplemented NaN comparison for half floats");
-
- static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
+ Node op_b) {
+ const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
{PredCondition::LessThan, OperationCode::Logical2HLessThan},
{PredCondition::Equal, OperationCode::Logical2HEqual},
{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan},
- {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual},
- {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual},
- {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan},
- {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}};
+ {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
+ {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
+ {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
+ {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
+ {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}};
const auto comparison{PredicateComparisonTable.find(condition)};
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
"Unknown predicate comparison operation");
- const Node predicate = Operation(comparison->second, meta, op_a, op_b);
+ const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
return predicate;
}
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
- static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
+ const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
{PredOperation::And, OperationCode::LogicalAnd},
{PredOperation::Or, OperationCode::LogicalOr},
{PredOperation::Xor, OperationCode::LogicalXor},
@@ -434,11 +439,14 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
return OperationCode::LogicalUGreaterEqual;
case OperationCode::INegate:
UNREACHABLE_MSG("Can't negate an unsigned integer");
+ return {};
case OperationCode::IAbsolute:
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
+ return {};
+ default:
+ UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
+ return {};
}
- UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
- return {};
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 4888998d3..81278fb33 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -109,11 +109,13 @@ enum class OperationCode {
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
UBitCount, /// (MetaArithmetic, uint) -> uint
- HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
- HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
- HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
+ HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+ HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+ HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
HAbsolute, /// (f16vec2 a) -> f16vec2
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
+ HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
+ HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
HMergeF32, /// (f16vec2 src) -> float
HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
@@ -150,12 +152,18 @@ enum class OperationCode {
LogicalUNotEqual, /// (uint a, uint b) -> bool
LogicalUGreaterEqual, /// (uint a, uint b) -> bool
- Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Texture, /// (MetaTexture, float[N] coords) -> float4
TextureLod, /// (MetaTexture, float[N] coords) -> float4
@@ -196,9 +204,23 @@ enum class ExitMethod {
class Sampler {
public:
+ // Use this constructor for bounded Samplers
explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
+ : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+ is_bindless{false} {}
+
+ // Use this constructor for bindless Samplers
+ explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
+ Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
+ : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
+ is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
+
+ // Use this only for serialization/deserialization
+ explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow, bool is_bindless)
+ : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+ is_bindless{is_bindless} {}
std::size_t GetOffset() const {
return offset;
@@ -220,6 +242,14 @@ public:
return is_shadow;
}
+ bool IsBindless() const {
+ return is_bindless;
+ }
+
+ std::pair<u32, u32> GetBindlessCBuf() const {
+ return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
+ }
+
bool operator<(const Sampler& rhs) const {
return std::tie(offset, index, type, is_array, is_shadow) <
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
@@ -231,8 +261,9 @@ private:
std::size_t offset{};
std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
- bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
- bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+ bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
+ bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+ bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
};
class ConstBuffer {
@@ -276,15 +307,13 @@ struct GlobalMemoryBase {
}
};
-struct MetaArithmetic {
- bool precise{};
+struct GlobalMemoryUsage {
+ bool is_read{};
+ bool is_written{};
};
-struct MetaHalfArithmetic {
+struct MetaArithmetic {
bool precise{};
- std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1,
- Tegra::Shader::HalfType::H0_H1,
- Tegra::Shader::HalfType::H0_H1};
};
struct MetaTexture {
@@ -298,11 +327,10 @@ struct MetaTexture {
u32 element{};
};
-constexpr MetaArithmetic PRECISE = {true};
-constexpr MetaArithmetic NO_PRECISE = {false};
-constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false};
+inline constexpr MetaArithmetic PRECISE = {true};
+inline constexpr MetaArithmetic NO_PRECISE = {false};
-using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>;
+using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
/// Holds any kind of operation that can be done in the IR
class OperationNode final {
@@ -578,8 +606,8 @@ public:
return used_clip_distances;
}
- const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
- return used_global_memory_bases;
+ const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
+ return used_global_memory;
}
std::size_t GetLength() const {
@@ -706,10 +734,14 @@ private:
/// Unpacks a half immediate from an instruction
Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
+ /// Unpacks a binary value into a half float pair with a type format
+ Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
/// Merges a half pair into another value
Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
/// Conditionally absolute/negated half float pair. Absolute is applied first
Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
+ /// Conditionally saturates a half float pair
+ Node GetSaturatedHalfFloat(Node value, bool saturate = true);
/// Returns a predicate comparing two floats
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
@@ -717,8 +749,7 @@ private:
Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
Node op_a, Node op_b);
/// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
- Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
- const MetaHalfArithmetic& meta, Node op_a, Node op_b);
+ Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
/// Returns a predicate combiner operation
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
@@ -730,6 +761,11 @@ private:
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
+ // Accesses a texture sampler for a bindless texture.
+ const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
+ Tegra::Shader::TextureType type, bool is_array,
+ bool is_shadow);
+
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -743,7 +779,8 @@ private:
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array, bool is_aoffi);
+ bool is_array, bool is_aoffi,
+ std::optional<Tegra::Shader::Register> bindless_reg);
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
@@ -763,7 +800,8 @@ private:
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
+ Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
+ std::optional<Tegra::Shader::Register> bindless_reg);
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
u64 byte_height);
@@ -781,6 +819,11 @@ private:
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
+ std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
+ Node addr_register,
+ u32 immediate_offset,
+ bool is_write);
+
template <typename... T>
Node Operation(OperationCode code, const T*... operands) {
return StoreNode(OperationNode(code, operands...));
@@ -834,7 +877,7 @@ private:
std::map<u32, ConstBuffer> used_cbufs;
std::set<Sampler> used_samplers;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
- std::set<GlobalMemoryBase> used_global_memory_bases;
+ std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
Tegra::Shader::Header header;
};