6 files changed, 539 insertions, 57 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 478394682..4db329fa5 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
-    case OpCode::Id::FCMP_R: {
+    case OpCode::Id::FCMP_RR:
+    case OpCode::Id::FCMP_RC: {
         UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
         Node op_c = GetRegister(instr.gpr39);
         Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index c72690b2b..b9989c88c 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -2,6 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <limits>
+#include <optional>
+#include <utility>
+
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -15,9 +19,49 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 
 namespace {
+
 constexpr OperationCode GetFloatSelector(u64 selector) {
     return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
 }
+
+constexpr u32 SizeInBits(Register::Size size) {
+    switch (size) {
+    case Register::Size::Byte:
+        return 8;
+    case Register::Size::Short:
+        return 16;
+    case Register::Size::Word:
+        return 32;
+    case Register::Size::Long:
+        return 64;
+    }
+    return 0;
+}
+
+constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
+                                                                   Register::Size dst_size,
+                                                                   bool src_signed,
+                                                                   bool dst_signed) {
+    const u32 dst_bits = SizeInBits(dst_size);
+    if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
+        if (src_signed == dst_signed) {
+            return std::nullopt;
+        }
+        return std::make_pair(0, std::numeric_limits<s32>::max());
+    }
+    if (dst_signed) {
+        // Signed destination, clamp to [-128, 127] for instance
+        return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
+    } else {
+        // Unsigned destination
+        if (dst_bits == 32) {
+            // Avoid shifting by 32, that is undefined behavior
+            return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
+        }
+        return std::make_pair(0, (1 << dst_bits) - 1);
+    }
+}
+
 } // Anonymous namespace
 
 u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
@@ -28,14 +72,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
     case OpCode::Id::I2I_R:
     case OpCode::Id::I2I_C:
     case OpCode::Id::I2I_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
-        UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
-        UNIMPLEMENTED_IF(instr.alu.saturate_d);
+        const bool src_signed = instr.conversion.is_input_signed;
+        const bool dst_signed = instr.conversion.is_output_signed;
+        const Register::Size src_size = instr.conversion.src_size;
+        const Register::Size dst_size = instr.conversion.dst_size;
+        const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
 
-        const bool input_signed = instr.conversion.is_input_signed;
-        const bool output_signed = instr.conversion.is_output_signed;
-
-        Node value = [&]() {
+        Node value = [this, instr, opcode] {
             switch (opcode->get().GetId()) {
             case OpCode::Id::I2I_R:
                 return GetRegister(instr.gpr20);
@@ -48,16 +91,60 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
                 return Immediate(0);
             }
         }();
-        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
 
-        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
-                                        input_signed);
-        if (input_signed != output_signed) {
-            value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
+        // Ensure the source selector is valid
+        switch (instr.conversion.src_size) {
+        case Register::Size::Byte:
+            break;
+        case Register::Size::Short:
+            ASSERT(selector == 0 || selector == 2);
+            break;
+        default:
+            ASSERT(selector == 0);
+            break;
+        }
+
+        if (src_size != Register::Size::Word || selector != 0) {
+            value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
+                                    Immediate(selector * 8), Immediate(SizeInBits(src_size)));
+        }
+
+        value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
+                                        instr.conversion.negate_a, src_signed);
+
+        if (instr.alu.saturate_d) {
+            if (src_signed && !dst_signed) {
+                Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
+                                             Immediate(1 << (SizeInBits(src_size) - 1)));
+                value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
+                                  std::move(value));
+
+                // Simplify generated expressions, this can be removed without semantic impact
+                SetTemporary(bb, 0, std::move(value));
+                value = GetTemporary(0);
+
+                if (dst_size != Register::Size::Word) {
+                    const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
+                    Node is_large =
+                        Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
+                    value = Operation(OperationCode::Select, std::move(is_large), limit,
+                                      std::move(value));
+                }
+            } else if (const std::optional bounds =
+                           IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
+                value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
+                                        Immediate(bounds->first));
+                value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
+                                        Immediate(bounds->second));
+            }
+        } else if (dst_size != Register::Size::Word) {
+            // No saturation, we only have to mask the result
+            Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
+            value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
         }
 
         SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
+        SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
     case OpCode::Id::I2F_R:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index d2fe4ec5d..0dd7a1196 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -13,13 +13,247 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/textures/texture.h"
 
 namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
+using Tegra::Shader::StoreType;
+using Tegra::Texture::ComponentType;
+using Tegra::Texture::TextureFormat;
+using Tegra::Texture::TICEntry;
 
 namespace {
+
+ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
+                               std::size_t component) {
+    const TextureFormat format{descriptor.format};
+    switch (format) {
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32_G32:
+    case TextureFormat::R16_G16:
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.b_type;
+        }
+        if (component == 3) {
+            return descriptor.a_type;
+        }
+        break;
+    case TextureFormat::A8R8G8B8:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.b_type;
+        }
+        break;
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        break;
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+        if (component == 0) {
+            return descriptor.b_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::G8R24:
+    case TextureFormat::G24R8:
+    case TextureFormat::G8R8:
+    case TextureFormat::G4R4:
+        if (component == 0) {
+            return descriptor.g_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        break;
+    }
+    UNIMPLEMENTED_MSG("texture format not implement={}", format);
+    return ComponentType::FLOAT;
+}
+
+bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    constexpr std::array<u8, 16> mask = {
+        0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B),
+        (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
+    return std::bitset<4>{mask.at(component_mask)}.test(component);
+}
+
+u32 GetComponentSize(TextureFormat format, std::size_t component) {
+    switch (format) {
+    case TextureFormat::R32_G32_B32_A32:
+        return 32;
+    case TextureFormat::R16_G16_B16_A16:
+        return 16;
+    case TextureFormat::R32_G32_B32:
+        return component <= 2 ? 32 : 0;
+    case TextureFormat::R32_G32:
+        return component <= 1 ? 32 : 0;
+    case TextureFormat::R16_G16:
+        return component <= 1 ? 16 : 0;
+    case TextureFormat::R32:
+        return component == 0 ? 32 : 0;
+    case TextureFormat::R16:
+        return component == 0 ? 16 : 0;
+    case TextureFormat::R8:
+        return component == 0 ? 8 : 0;
+    case TextureFormat::R1:
+        return component == 0 ? 1 : 0;
+    case TextureFormat::A8R8G8B8:
+        return 8;
+    case TextureFormat::A2B10G10R10:
+        return (component == 3 || component == 2 || component == 1) ? 10 : 2;
+    case TextureFormat::A4B4G4R4:
+        return 4;
+    case TextureFormat::A5B5G5R1:
+        return (component == 0 || component == 1 || component == 2) ? 5 : 1;
+    case TextureFormat::A1B5G5R5:
+        return (component == 1 || component == 2 || component == 3) ? 5 : 1;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return 32;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        if (component == 2) {
+            return 8;
+        }
+        return 0;
+    case TextureFormat::B5G6R5:
+        if (component == 0 || component == 2) {
+            return 5;
+        }
+        if (component == 1) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::B6G5R5:
+        if (component == 1 || component == 2) {
+            return 5;
+        }
+        if (component == 0) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::G8R24:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::G24R8:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::G8R8:
+        return (component == 0 || component == 1) ? 8 : 0;
+    case TextureFormat::G4R4:
+        return (component == 0 || component == 1) ? 4 : 0;
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return 0;
+    }
+}
+
+std::size_t GetImageComponentMask(TextureFormat format) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    switch (format) {
+    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::A8R8G8B8:
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        return std::size_t{R | G | B | A};
+    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32_B24G8:
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+        return std::size_t{R | G | B};
+    case TextureFormat::R32_G32:
+    case TextureFormat::R16_G16:
+    case TextureFormat::G8R24:
+    case TextureFormat::G24R8:
+    case TextureFormat::G8R8:
+    case TextureFormat::G4R4:
+        return std::size_t{R | G};
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        return std::size_t{R};
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return std::size_t{R | G | B | A};
+    }
+}
+
 std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
     switch (image_type) {
     case Tegra::Shader::ImageType::Texture1D:
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
 }
 } // Anonymous namespace
 
+std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
+                                                  Node original_value) {
+    switch (component_type) {
+    case ComponentType::SNORM: {
+        // range [-1.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
+        cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
+        return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
+    }
+    case ComponentType::SINT:
+    case ComponentType::UNORM: {
+        bool is_signed = component_type == ComponentType::SINT;
+        // range [0.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) - 1.f));
+        return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
+                is_signed};
+    }
+    case ComponentType::UINT: // range [0, (1 << component_size) - 1]
+        return {std::move(original_value), false};
+    case ComponentType::FLOAT:
+        if (component_size == 16) {
+            return {Operation(OperationCode::HCastFloat, original_value), true};
+        } else {
+            return {std::move(original_value), true};
+        }
+    default:
+        UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
+        return {std::move(original_value), true};
+    }
+}
+
 u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
 
     switch (opcode->get().GetId()) {
     case OpCode::Id::SULD: {
-        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
         UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
                          Tegra::Shader::OutOfBoundsStore::Ignore);
 
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
                                               : GetBindlessImage(instr.gpr39, type)};
         image.MarkRead();
 
-        u32 indexer = 0;
-        for (u32 element = 0; element < 4; ++element) {
-            if (!instr.suldst.IsComponentEnabled(element)) {
-                continue;
+        if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.suldst.IsComponentEnabled(element)) {
+                    continue;
+                }
+                MetaImage meta{image, {}, element};
+                Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
+                SetTemporary(bb, indexer++, std::move(value));
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+        } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
+            UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
+                             instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
+
+            auto descriptor = [this, instr] {
+                std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
+                if (instr.suldst.is_immediate) {
+                    descriptor =
+                        registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
+                } else {
+                    const Node image_register = GetRegister(instr.gpr39);
+                    const auto [base_image, buffer, offset] = TrackCbuf(
+                        image_register, global_code, static_cast<s64>(global_code.size()));
+                    descriptor = registry.ObtainBindlessSampler(buffer, offset);
+                }
+                if (!descriptor) {
+                    UNREACHABLE_MSG("Failed to obtain image descriptor");
+                }
+                return *descriptor;
+            }();
+
+            const auto comp_mask = GetImageComponentMask(descriptor.format);
+
+            switch (instr.suldst.GetStoreDataLayout()) {
+            case StoreType::Bits32:
+            case StoreType::Bits64: {
+                u32 indexer = 0;
+                u32 shifted_counter = 0;
+                Node value = Immediate(0);
+                for (u32 element = 0; element < 4; ++element) {
+                    if (!IsComponentEnabled(comp_mask, element)) {
+                        continue;
+                    }
+                    const auto component_type = GetComponentType(descriptor, element);
+                    const auto component_size = GetComponentSize(descriptor.format, element);
+                    MetaImage meta{image, {}, element};
+
+                    auto [converted_value, is_signed] = GetComponentValue(
+                        component_type, component_size,
+                        Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
+
+                    // shift element to correct position
+                    const auto shifted = shifted_counter;
+                    if (shifted > 0) {
+                        converted_value =
+                            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
+                                            std::move(converted_value), Immediate(shifted));
+                    }
+                    shifted_counter += component_size;
+
+                    // add value into result
+                    value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
+
+                    // if we shifted enough for 1 byte -> we save it into temp
+                    if (shifted_counter >= 32) {
+                        SetTemporary(bb, indexer++, std::move(value));
+                        // reset counter and value to prepare pack next byte
+                        value = Immediate(0);
+                        shifted_counter = 0;
+                    }
+                }
+                for (u32 i = 0; i < indexer; ++i) {
+                    SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+                }
+                break;
+            }
+            default:
+                UNREACHABLE();
+                break;
             }
-            MetaImage meta{image, {}, element};
-            Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
-            SetTemporary(bb, indexer++, std::move(value));
-        }
-        for (u32 i = 0; i < indexer; ++i) {
-            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
         }
         break;
     }
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 4944e9d69..d4f95b18c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -11,12 +11,17 @@
 
 namespace VideoCommon::Shader {
 
+using std::move;
 using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Instruction;
+using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::PixelImap;
 using Tegra::Shader::Register;
 using Tegra::Shader::SystemVariable;
 
+using Index = Tegra::Shader::Attribute::Index;
+
 u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         bb.push_back(Operation(OperationCode::Discard));
         break;
     }
-    case OpCode::Id::MOV_SYS: {
+    case OpCode::Id::S2R: {
         const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::LaneId:
-                LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
+                LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
                 return Immediate(0U);
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
                 return Operation(OperationCode::YNegate);
             case SystemVariable::InvocationInfo:
-                LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+                LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorXY:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorZ:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
                 return Immediate(0U);
             case SystemVariable::Tid: {
                 Node value = Immediate(0);
@@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     }
     case OpCode::Id::IPA: {
         const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
-
         const auto attribute = instr.attribute.fmt28;
-        const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
-                                                instr.ipa.sample_mode.Value()};
+        const Index index = attribute.index;
 
         Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
-                                 : GetInputAttribute(attribute.index, attribute.element);
-        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
-        const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
-                                index <= Tegra::Shader::Attribute::Index::Attribute_31;
-        if (is_generic || is_physical) {
-            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
-            // In theory by setting them as perspective, OpenGL does the perspective correction.
-            // A way must figured to reverse the last step of it.
-            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
-                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
+                                 : GetInputAttribute(index, attribute.element);
+
+        // Code taken from Ryujinx.
+        if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
+            const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
+            if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
+                Node position_w = GetInputAttribute(Index::Position, 3);
+                value = Operation(OperationCode::FMul, move(value), move(position_w));
             }
         }
-        value = GetSaturatedFloat(value, instr.ipa.saturate);
 
-        SetRegister(bb, instr.gpr0, value);
+        if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
+            value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
+        }
+
+        value = GetSaturatedFloat(move(value), instr.ipa.saturate);
+
+        SetRegister(bb, instr.gpr0, move(value));
         break;
     }
     case OpCode::Id::OUT_R: {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 48350e042..6c4a1358b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
     // When lod is used always is in gpr20
     const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
 
-    // Fill empty entries from the guest sampler
-    const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
-    if (type_coord_count != entry_coord_count) {
-        LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
-
-        // When the size is higher we insert zeroes
-        for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
-            coords.push_back(GetRegister(Register::ZeroIndex));
-        }
-
-        // Then we ensure the size matches the number of entries (dropping unused values)
-        coords.resize(entry_coord_count);
-    }
-
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b047cf870..64ba60ea2 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@
 
 namespace VideoCommon::Shader {
 
+using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::VideoType;
 using Tegra::Shader::VmadShr;
+using Tegra::Shader::VmnmxOperation;
+using Tegra::Shader::VmnmxType;
 
 u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
+    if (opcode->get().GetId() == OpCode::Id::VMNMX) {
+        DecodeVMNMX(bb, instr);
+        return pc;
+    }
+
     const Node op_a =
         GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
                         instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
     }
 }
 
+void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
+    UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
+    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
+    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
+    UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
+    UNIMPLEMENTED_IF(instr.vmnmx.sat);
+    UNIMPLEMENTED_IF(instr.generates_cc);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = GetRegister(instr.gpr20);
+    Node op_c = GetRegister(instr.gpr39);
+
+    const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
+    const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
+
+    const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
+    Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
+
+    switch (instr.vmnmx.operation) {
+    case VmnmxOperation::Mrg_16H:
+        value = BitfieldInsert(move(op_c), move(value), 16, 16);
+        break;
+    case VmnmxOperation::Mrg_16L:
+        value = BitfieldInsert(move(op_c), move(value), 0, 16);
+        break;
+    case VmnmxOperation::Mrg_8B0:
+        value = BitfieldInsert(move(op_c), move(value), 0, 8);
+        break;
+    case VmnmxOperation::Mrg_8B2:
+        value = BitfieldInsert(move(op_c), move(value), 16, 8);
+        break;
+    case VmnmxOperation::Acc:
+        value = Operation(OperationCode::IAdd, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Min:
+        value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Max:
+        value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Nop:
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+
+    SetRegister(bb, instr.gpr0, move(value));
+}
+
 } // namespace VideoCommon::Shader