summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader/decode
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader/decode')
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp3
-rw-r--r--src/video_core/shader/decode/conversion.cpp113
-rw-r--r--src/video_core/shader/decode/image.cpp360
-rw-r--r--src/video_core/shader/decode/other.cpp48
-rw-r--r--src/video_core/shader/decode/texture.cpp14
-rw-r--r--src/video_core/shader/decode/video.cpp58
6 files changed, 539 insertions, 57 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 478394682..4db329fa5 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, value);
break;
}
- case OpCode::Id::FCMP_R: {
+ case OpCode::Id::FCMP_RR:
+ case OpCode::Id::FCMP_RC: {
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
Node op_c = GetRegister(instr.gpr39);
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index c72690b2b..b9989c88c 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -2,6 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <limits>
+#include <optional>
+#include <utility>
+
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
@@ -15,9 +19,49 @@ using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
namespace {
+
constexpr OperationCode GetFloatSelector(u64 selector) {
return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
}
+
+constexpr u32 SizeInBits(Register::Size size) {
+ switch (size) {
+ case Register::Size::Byte:
+ return 8;
+ case Register::Size::Short:
+ return 16;
+ case Register::Size::Word:
+ return 32;
+ case Register::Size::Long:
+ return 64;
+ }
+ return 0;
+}
+
+constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
+ Register::Size dst_size,
+ bool src_signed,
+ bool dst_signed) {
+ const u32 dst_bits = SizeInBits(dst_size);
+ if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
+ if (src_signed == dst_signed) {
+ return std::nullopt;
+ }
+ return std::make_pair(0, std::numeric_limits<s32>::max());
+ }
+ if (dst_signed) {
+ // Signed destination, clamp to [-128, 127] for instance
+ return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
+ } else {
+ // Unsigned destination
+ if (dst_bits == 32) {
+ // Avoid shifting by 32, that is undefined behavior
+ return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
+ }
+ return std::make_pair(0, (1 << dst_bits) - 1);
+ }
+}
+
} // Anonymous namespace
u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
@@ -28,14 +72,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2I_R:
case OpCode::Id::I2I_C:
case OpCode::Id::I2I_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
- UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
- UNIMPLEMENTED_IF(instr.alu.saturate_d);
+ const bool src_signed = instr.conversion.is_input_signed;
+ const bool dst_signed = instr.conversion.is_output_signed;
+ const Register::Size src_size = instr.conversion.src_size;
+ const Register::Size dst_size = instr.conversion.dst_size;
+ const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
- const bool input_signed = instr.conversion.is_input_signed;
- const bool output_signed = instr.conversion.is_output_signed;
-
- Node value = [&]() {
+ Node value = [this, instr, opcode] {
switch (opcode->get().GetId()) {
case OpCode::Id::I2I_R:
return GetRegister(instr.gpr20);
@@ -48,16 +91,60 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Immediate(0);
}
}();
- value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
- value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
- input_signed);
- if (input_signed != output_signed) {
- value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
+ // Ensure the source selector is valid
+ switch (instr.conversion.src_size) {
+ case Register::Size::Byte:
+ break;
+ case Register::Size::Short:
+ ASSERT(selector == 0 || selector == 2);
+ break;
+ default:
+ ASSERT(selector == 0);
+ break;
+ }
+
+ if (src_size != Register::Size::Word || selector != 0) {
+ value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
+ Immediate(selector * 8), Immediate(SizeInBits(src_size)));
+ }
+
+ value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
+ instr.conversion.negate_a, src_signed);
+
+ if (instr.alu.saturate_d) {
+ if (src_signed && !dst_signed) {
+ Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
+ Immediate(1 << (SizeInBits(src_size) - 1)));
+ value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
+ std::move(value));
+
+ // Simplify generated expressions, this can be removed without semantic impact
+ SetTemporary(bb, 0, std::move(value));
+ value = GetTemporary(0);
+
+ if (dst_size != Register::Size::Word) {
+ const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
+ Node is_large =
+ Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
+ value = Operation(OperationCode::Select, std::move(is_large), limit,
+ std::move(value));
+ }
+ } else if (const std::optional bounds =
+ IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
+ value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
+ Immediate(bounds->first));
+ value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
+ Immediate(bounds->second));
+ }
+ } else if (dst_size != Register::Size::Word) {
+ // No saturation, we only have to mask the result
+ Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
+ value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
}
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
+ SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::I2F_R:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index d2fe4ec5d..0dd7a1196 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -13,13 +13,247 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/textures/texture.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
+using Tegra::Shader::StoreType;
+using Tegra::Texture::ComponentType;
+using Tegra::Texture::TextureFormat;
+using Tegra::Texture::TICEntry;
namespace {
+
+ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
+ std::size_t component) {
+ const TextureFormat format{descriptor.format};
+ switch (format) {
+ case TextureFormat::R16_G16_B16_A16:
+ case TextureFormat::R32_G32_B32_A32:
+ case TextureFormat::R32_G32_B32:
+ case TextureFormat::R32_G32:
+ case TextureFormat::R16_G16:
+ case TextureFormat::R32:
+ case TextureFormat::R16:
+ case TextureFormat::R8:
+ case TextureFormat::R1:
+ if (component == 0) {
+ return descriptor.r_type;
+ }
+ if (component == 1) {
+ return descriptor.g_type;
+ }
+ if (component == 2) {
+ return descriptor.b_type;
+ }
+ if (component == 3) {
+ return descriptor.a_type;
+ }
+ break;
+ case TextureFormat::A8R8G8B8:
+ if (component == 0) {
+ return descriptor.a_type;
+ }
+ if (component == 1) {
+ return descriptor.r_type;
+ }
+ if (component == 2) {
+ return descriptor.g_type;
+ }
+ if (component == 3) {
+ return descriptor.b_type;
+ }
+ break;
+ case TextureFormat::A2B10G10R10:
+ case TextureFormat::A4B4G4R4:
+ case TextureFormat::A5B5G5R1:
+ case TextureFormat::A1B5G5R5:
+ if (component == 0) {
+ return descriptor.a_type;
+ }
+ if (component == 1) {
+ return descriptor.b_type;
+ }
+ if (component == 2) {
+ return descriptor.g_type;
+ }
+ if (component == 3) {
+ return descriptor.r_type;
+ }
+ break;
+ case TextureFormat::R32_B24G8:
+ if (component == 0) {
+ return descriptor.r_type;
+ }
+ if (component == 1) {
+ return descriptor.b_type;
+ }
+ if (component == 2) {
+ return descriptor.g_type;
+ }
+ break;
+ case TextureFormat::B5G6R5:
+ case TextureFormat::B6G5R5:
+ if (component == 0) {
+ return descriptor.b_type;
+ }
+ if (component == 1) {
+ return descriptor.g_type;
+ }
+ if (component == 2) {
+ return descriptor.r_type;
+ }
+ break;
+ case TextureFormat::G8R24:
+ case TextureFormat::G24R8:
+ case TextureFormat::G8R8:
+ case TextureFormat::G4R4:
+ if (component == 0) {
+ return descriptor.g_type;
+ }
+ if (component == 1) {
+ return descriptor.r_type;
+ }
+ break;
+ }
+ UNIMPLEMENTED_MSG("texture format not implement={}", format);
+ return ComponentType::FLOAT;
+}
+
+bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
+ constexpr u8 R = 0b0001;
+ constexpr u8 G = 0b0010;
+ constexpr u8 B = 0b0100;
+ constexpr u8 A = 0b1000;
+ constexpr std::array<u8, 16> mask = {
+ 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
+ (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
+ return std::bitset<4>{mask.at(component_mask)}.test(component);
+}
+
+u32 GetComponentSize(TextureFormat format, std::size_t component) {
+ switch (format) {
+ case TextureFormat::R32_G32_B32_A32:
+ return 32;
+ case TextureFormat::R16_G16_B16_A16:
+ return 16;
+ case TextureFormat::R32_G32_B32:
+ return component <= 2 ? 32 : 0;
+ case TextureFormat::R32_G32:
+ return component <= 1 ? 32 : 0;
+ case TextureFormat::R16_G16:
+ return component <= 1 ? 16 : 0;
+ case TextureFormat::R32:
+ return component == 0 ? 32 : 0;
+ case TextureFormat::R16:
+ return component == 0 ? 16 : 0;
+ case TextureFormat::R8:
+ return component == 0 ? 8 : 0;
+ case TextureFormat::R1:
+ return component == 0 ? 1 : 0;
+ case TextureFormat::A8R8G8B8:
+ return 8;
+ case TextureFormat::A2B10G10R10:
+ return (component == 3 || component == 2 || component == 1) ? 10 : 2;
+ case TextureFormat::A4B4G4R4:
+ return 4;
+ case TextureFormat::A5B5G5R1:
+ return (component == 0 || component == 1 || component == 2) ? 5 : 1;
+ case TextureFormat::A1B5G5R5:
+ return (component == 1 || component == 2 || component == 3) ? 5 : 1;
+ case TextureFormat::R32_B24G8:
+ if (component == 0) {
+ return 32;
+ }
+ if (component == 1) {
+ return 24;
+ }
+ if (component == 2) {
+ return 8;
+ }
+ return 0;
+ case TextureFormat::B5G6R5:
+ if (component == 0 || component == 2) {
+ return 5;
+ }
+ if (component == 1) {
+ return 6;
+ }
+ return 0;
+ case TextureFormat::B6G5R5:
+ if (component == 1 || component == 2) {
+ return 5;
+ }
+ if (component == 0) {
+ return 6;
+ }
+ return 0;
+ case TextureFormat::G8R24:
+ if (component == 0) {
+ return 8;
+ }
+ if (component == 1) {
+ return 24;
+ }
+ return 0;
+ case TextureFormat::G24R8:
+ if (component == 0) {
+ return 8;
+ }
+ if (component == 1) {
+ return 24;
+ }
+ return 0;
+ case TextureFormat::G8R8:
+ return (component == 0 || component == 1) ? 8 : 0;
+ case TextureFormat::G4R4:
+ return (component == 0 || component == 1) ? 4 : 0;
+ default:
+ UNIMPLEMENTED_MSG("texture format not implement={}", format);
+ return 0;
+ }
+}
+
+std::size_t GetImageComponentMask(TextureFormat format) {
+ constexpr u8 R = 0b0001;
+ constexpr u8 G = 0b0010;
+ constexpr u8 B = 0b0100;
+ constexpr u8 A = 0b1000;
+ switch (format) {
+ case TextureFormat::R32_G32_B32_A32:
+ case TextureFormat::R16_G16_B16_A16:
+ case TextureFormat::A8R8G8B8:
+ case TextureFormat::A2B10G10R10:
+ case TextureFormat::A4B4G4R4:
+ case TextureFormat::A5B5G5R1:
+ case TextureFormat::A1B5G5R5:
+ return std::size_t{R | G | B | A};
+ case TextureFormat::R32_G32_B32:
+ case TextureFormat::R32_B24G8:
+ case TextureFormat::B5G6R5:
+ case TextureFormat::B6G5R5:
+ return std::size_t{R | G | B};
+ case TextureFormat::R32_G32:
+ case TextureFormat::R16_G16:
+ case TextureFormat::G8R24:
+ case TextureFormat::G24R8:
+ case TextureFormat::G8R8:
+ case TextureFormat::G4R4:
+ return std::size_t{R | G};
+ case TextureFormat::R32:
+ case TextureFormat::R16:
+ case TextureFormat::R8:
+ case TextureFormat::R1:
+ return std::size_t{R};
+ default:
+ UNIMPLEMENTED_MSG("texture format not implement={}", format);
+ return std::size_t{R | G | B | A};
+ }
+}
+
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
switch (image_type) {
case Tegra::Shader::ImageType::Texture1D:
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
}
} // Anonymous namespace
+std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
+ Node original_value) {
+ switch (component_type) {
+ case ComponentType::SNORM: {
+ // range [-1.0, 1.0]
+ auto cnv_value = Operation(OperationCode::FMul, original_value,
+ Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
+ cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
+ return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
+ }
+ case ComponentType::SINT:
+ case ComponentType::UNORM: {
+ bool is_signed = component_type == ComponentType::SINT;
+ // range [0.0, 1.0]
+ auto cnv_value = Operation(OperationCode::FMul, original_value,
+ Immediate(static_cast<float>(1 << component_size) - 1.f));
+ return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
+ is_signed};
+ }
+ case ComponentType::UINT: // range [0, (1 << component_size) - 1]
+ return {std::move(original_value), false};
+ case ComponentType::FLOAT:
+ if (component_size == 16) {
+ return {Operation(OperationCode::HCastFloat, original_value), true};
+ } else {
+ return {std::move(original_value), true};
+ }
+ default:
+ UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
+ return {std::move(original_value), true};
+ }
+}
+
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::SULD: {
- UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
Tegra::Shader::OutOfBoundsStore::Ignore);
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
: GetBindlessImage(instr.gpr39, type)};
image.MarkRead();
- u32 indexer = 0;
- for (u32 element = 0; element < 4; ++element) {
- if (!instr.suldst.IsComponentEnabled(element)) {
- continue;
+ if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
+ u32 indexer = 0;
+ for (u32 element = 0; element < 4; ++element) {
+ if (!instr.suldst.IsComponentEnabled(element)) {
+ continue;
+ }
+ MetaImage meta{image, {}, element};
+ Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
+ SetTemporary(bb, indexer++, std::move(value));
+ }
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
+ } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
+ UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
+ instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
+
+ auto descriptor = [this, instr] {
+ std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
+ if (instr.suldst.is_immediate) {
+ descriptor =
+ registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
+ } else {
+ const Node image_register = GetRegister(instr.gpr39);
+ const auto [base_image, buffer, offset] = TrackCbuf(
+ image_register, global_code, static_cast<s64>(global_code.size()));
+ descriptor = registry.ObtainBindlessSampler(buffer, offset);
+ }
+ if (!descriptor) {
+ UNREACHABLE_MSG("Failed to obtain image descriptor");
+ }
+ return *descriptor;
+ }();
+
+ const auto comp_mask = GetImageComponentMask(descriptor.format);
+
+ switch (instr.suldst.GetStoreDataLayout()) {
+ case StoreType::Bits32:
+ case StoreType::Bits64: {
+ u32 indexer = 0;
+ u32 shifted_counter = 0;
+ Node value = Immediate(0);
+ for (u32 element = 0; element < 4; ++element) {
+ if (!IsComponentEnabled(comp_mask, element)) {
+ continue;
+ }
+ const auto component_type = GetComponentType(descriptor, element);
+ const auto component_size = GetComponentSize(descriptor.format, element);
+ MetaImage meta{image, {}, element};
+
+ auto [converted_value, is_signed] = GetComponentValue(
+ component_type, component_size,
+ Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
+
+ // shift element to correct position
+ const auto shifted = shifted_counter;
+ if (shifted > 0) {
+ converted_value =
+ SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
+ std::move(converted_value), Immediate(shifted));
+ }
+ shifted_counter += component_size;
+
+ // add value into result
+ value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
+
+ // if we shifted enough for 1 byte -> we save it into temp
+ if (shifted_counter >= 32) {
+ SetTemporary(bb, indexer++, std::move(value));
+ // reset counter and value to prepare pack next byte
+ value = Immediate(0);
+ shifted_counter = 0;
+ }
+ }
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
}
- MetaImage meta{image, {}, element};
- Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
- SetTemporary(bb, indexer++, std::move(value));
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
break;
}
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 4944e9d69..d4f95b18c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -11,12 +11,17 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::ConditionCode;
using Tegra::Shader::Instruction;
+using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::OpCode;
+using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using Tegra::Shader::SystemVariable;
+using Index = Tegra::Shader::Attribute::Index;
+
u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
@@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
bb.push_back(Operation(OperationCode::Discard));
break;
}
- case OpCode::Id::MOV_SYS: {
+ case OpCode::Id::S2R: {
const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::LaneId:
- LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
+ LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
return Immediate(0U);
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
- LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+ LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
+ return Immediate(0U);
+ case SystemVariable::WscaleFactorXY:
+ UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
+ return Immediate(0U);
+ case SystemVariable::WscaleFactorZ:
+ UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
return Immediate(0U);
case SystemVariable::Tid: {
Node value = Immediate(0);
@@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
}
case OpCode::Id::IPA: {
const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
-
const auto attribute = instr.attribute.fmt28;
- const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
- instr.ipa.sample_mode.Value()};
+ const Index index = attribute.index;
Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
- : GetInputAttribute(attribute.index, attribute.element);
- const Tegra::Shader::Attribute::Index index = attribute.index.Value();
- const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
- index <= Tegra::Shader::Attribute::Index::Attribute_31;
- if (is_generic || is_physical) {
- // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
- // In theory by setting them as perspective, OpenGL does the perspective correction.
- // A way must figured to reverse the last step of it.
- if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
- value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
+ : GetInputAttribute(index, attribute.element);
+
+ // Code taken from Ryujinx.
+ if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
+ const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
+ if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
+ Node position_w = GetInputAttribute(Index::Position, 3);
+ value = Operation(OperationCode::FMul, move(value), move(position_w));
}
}
- value = GetSaturatedFloat(value, instr.ipa.saturate);
- SetRegister(bb, instr.gpr0, value);
+ if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
+ value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
+ }
+
+ value = GetSaturatedFloat(move(value), instr.ipa.saturate);
+
+ SetRegister(bb, instr.gpr0, move(value));
break;
}
case OpCode::Id::OUT_R: {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 48350e042..6c4a1358b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
- // Fill empty entries from the guest sampler
- const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
- if (type_coord_count != entry_coord_count) {
- LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
-
- // When the size is higher we insert zeroes
- for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
- coords.push_back(GetRegister(Register::ZeroIndex));
- }
-
- // Then we ensure the size matches the number of entries (dropping unused values)
- coords.resize(entry_coord_count);
- }
-
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b047cf870..64ba60ea2 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
using Tegra::Shader::VideoType;
using Tegra::Shader::VmadShr;
+using Tegra::Shader::VmnmxOperation;
+using Tegra::Shader::VmnmxType;
u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
+ if (opcode->get().GetId() == OpCode::Id::VMNMX) {
+ DecodeVMNMX(bb, instr);
+ return pc;
+ }
+
const Node op_a =
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
}
}
+void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
+ UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
+ UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
+ UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
+ UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
+ UNIMPLEMENTED_IF(instr.vmnmx.sat);
+ UNIMPLEMENTED_IF(instr.generates_cc);
+
+ Node op_a = GetRegister(instr.gpr8);
+ Node op_b = GetRegister(instr.gpr20);
+ Node op_c = GetRegister(instr.gpr39);
+
+ const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
+ const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
+
+ const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
+ Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
+
+ switch (instr.vmnmx.operation) {
+ case VmnmxOperation::Mrg_16H:
+ value = BitfieldInsert(move(op_c), move(value), 16, 16);
+ break;
+ case VmnmxOperation::Mrg_16L:
+ value = BitfieldInsert(move(op_c), move(value), 0, 16);
+ break;
+ case VmnmxOperation::Mrg_8B0:
+ value = BitfieldInsert(move(op_c), move(value), 0, 8);
+ break;
+ case VmnmxOperation::Mrg_8B2:
+ value = BitfieldInsert(move(op_c), move(value), 16, 8);
+ break;
+ case VmnmxOperation::Acc:
+ value = Operation(OperationCode::IAdd, move(value), move(op_c));
+ break;
+ case VmnmxOperation::Min:
+ value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
+ break;
+ case VmnmxOperation::Max:
+ value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
+ break;
+ case VmnmxOperation::Nop:
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+
+ SetRegister(bb, instr.gpr0, move(value));
+}
+
} // namespace VideoCommon::Shader