8 files changed, 94 insertions, 48 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2626b1616..21fb9cb83 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
     return (absolute_offset % SchedPeriod) == 0;
 }
 
-} // namespace
+} // Anonymous namespace
 
 class ASTDecoder {
 public:
@@ -102,7 +102,7 @@ void ShaderIR::Decode() {
     std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
 
     decompiled = false;
-    auto info = ScanFlow(program_code, program_size, main_offset, settings);
+    auto info = ScanFlow(program_code, main_offset, settings, locker);
     auto& shader_info = *info;
     coverage_begin = shader_info.start;
     coverage_end = shader_info.end;
@@ -155,7 +155,7 @@ void ShaderIR::Decode() {
         [[fallthrough]];
     case CompileDepth::BruteForce: {
         coverage_begin = main_offset;
-        const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
+        const std::size_t shader_end = program_code.size();
         coverage_end = shader_end;
         for (u32 label = main_offset; label < shader_end; label++) {
             basic_blocks.insert({label, DecodeRange(label, label + 1)});
@@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
         }
         return result;
     };
-    if (block.branch.address < 0) {
-        if (block.branch.kills) {
-            Node n = Operation(OperationCode::Discard);
-            n = apply_conditions(block.branch.cond, n);
+    if (std::holds_alternative<SingleBranch>(*block.branch)) {
+        auto branch = std::get_if<SingleBranch>(block.branch.get());
+        if (branch->address < 0) {
+            if (branch->kill) {
+                Node n = Operation(OperationCode::Discard);
+                n = apply_conditions(branch->condition, n);
+                bb.push_back(n);
+                global_code.push_back(n);
+                return;
+            }
+            Node n = Operation(OperationCode::Exit);
+            n = apply_conditions(branch->condition, n);
             bb.push_back(n);
             global_code.push_back(n);
             return;
         }
-        Node n = Operation(OperationCode::Exit);
-        n = apply_conditions(block.branch.cond, n);
+        Node n = Operation(OperationCode::Branch, Immediate(branch->address));
+        n = apply_conditions(branch->condition, n);
         bb.push_back(n);
         global_code.push_back(n);
         return;
     }
-    Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
-    n = apply_conditions(block.branch.cond, n);
-    bb.push_back(n);
-    global_code.push_back(n);
+    auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+    Node op_a = GetRegister(multi_branch->gpr);
+    for (auto& branch_case : multi_branch->branches) {
+        Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
+        Node op_b = Immediate(branch_case.cmp_value);
+        Node condition =
+            GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
+        auto result = Conditional(condition, {n});
+        bb.push_back(result);
+        global_code.push_back(result);
+    }
 }
 
 u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index b73f6536e..a33d242e9 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
     case OpCode::Id::ICMP_IMM: {
         const Node zero = Immediate(0);
 
-        const auto [op_b, test] = [&]() -> std::pair<Node, Node> {
+        const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
             switch (opcode->get().GetId()) {
             case OpCode::Id::ICMP_CR:
                 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
@@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
                 return {zero, zero};
             }
         }();
-        const Node op_a = GetRegister(instr.gpr8);
+        const Node op_lhs = GetRegister(instr.gpr8);
         const Node comparison =
             GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
-        SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b));
+        SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
         break;
     }
     case OpCode::Id::LOP_C:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 95ec1cdd9..b02d2cb95 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -144,8 +144,8 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
 
 Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
     const auto offset{static_cast<std::size_t>(image.index.Value())};
-    if (const auto image = TryUseExistingImage(offset, type)) {
-        return *image;
+    if (const auto existing_image = TryUseExistingImage(offset, type)) {
+        return *existing_image;
     }
 
     const std::size_t next_index{used_images.size()};
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46e0f823..116b95f76 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::MOV_SYS: {
-        const Node value = [&]() {
+        const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::Ydirection:
                 return Operation(OperationCode::YNegate);
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index f6ee68a54..d419e9c45 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     Node op_a = GetRegister(instr.gpr8);
-    Node op_b = [&]() {
+    Node op_b = [this, instr] {
         if (instr.is_b_imm) {
             return Immediate(instr.alu.GetSignedImm20_20());
         } else if (instr.is_b_gpr) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b934a069..d61e656b7 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
 
         const auto& sampler =
-            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
+            GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
 
         Node4 values;
         for (u32 element = 0; element < values.size(); ++element) {
@@ -150,7 +150,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
         }
 
-        WriteTexsInstructionFloat(bb, instr, values);
+        WriteTexsInstructionFloat(bb, instr, values, true);
         break;
     }
     case OpCode::Id::TXQ_B:
@@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         // Sadly, not all texture instructions specify the type of texture their sampler
         // uses. This must be fixed at a later instance.
         const auto& sampler =
-            is_bindless
-                ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
-                                     false)
-                : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+            is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
 
         u32 indexer = 0;
         switch (instr.txq.query_type) {
@@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
 
         auto texture_type = instr.tmml.texture_type.Value();
         const bool is_array = instr.tmml.array != 0;
-        const auto& sampler = is_bindless
-                                  ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
-                                  : GetSampler(instr.sampler, texture_type, is_array, false);
+        const auto& sampler =
+            is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
+                        : GetSampler(instr.sampler, {{texture_type, is_array, false}});
 
         std::vector<Node> coords;
 
@@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
-                                    bool is_array, bool is_shadow) {
-    const auto offset = static_cast<std::size_t>(sampler.index.Value());
+const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+                                    std::optional<SamplerInfo> sampler_info) {
+    const auto offset = static_cast<u32>(sampler.index.Value());
+
+    Tegra::Shader::TextureType type;
+    bool is_array;
+    bool is_shadow;
+    if (sampler_info) {
+        type = sampler_info->type;
+        is_array = sampler_info->is_array;
+        is_shadow = sampler_info->is_shadow;
+    } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
+        type = sampler->texture_type.Value();
+        is_array = sampler->is_array.Value() != 0;
+        is_shadow = sampler->is_shadow.Value() != 0;
+    } else {
+        type = Tegra::Shader::TextureType::Texture2D;
+        is_array = false;
+        is_shadow = false;
+    }
 
     // If this sampler has already been used, return the existing mapping.
     const auto itr =
@@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
     const std::size_t next_index = used_samplers.size();
     const Sampler entry{offset, next_index, type, is_array, is_shadow};
     return *used_samplers.emplace(entry).first;
-}
+} // namespace VideoCommon::Shader
 
-const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
-                                            bool is_array, bool is_shadow) {
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
+                                            std::optional<SamplerInfo> sampler_info) {
     const Node sampler_register = GetRegister(reg);
     const auto [base_sampler, cbuf_index, cbuf_offset] =
         TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
     ASSERT(base_sampler != nullptr);
     const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
+    Tegra::Shader::TextureType type;
+    bool is_array;
+    bool is_shadow;
+    if (sampler_info) {
+        type = sampler_info->type;
+        is_array = sampler_info->is_array;
+        is_shadow = sampler_info->is_shadow;
+    } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
+        type = sampler->texture_type.Value();
+        is_array = sampler->is_array.Value() != 0;
+        is_shadow = sampler->is_shadow.Value() != 0;
+    } else {
+        type = Tegra::Shader::TextureType::Texture2D;
+        is_array = false;
+        is_shadow = false;
+    }
 
     // If this sampler has already been used, return the existing mapping.
     const auto itr =
@@ -344,14 +374,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
     }
 }
 
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
-                                         const Node4& components) {
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
+                                         bool ignore_mask) {
     // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
     // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
 
     u32 dest_elem = 0;
     for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component))
+        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
             continue;
         SetTemporary(bb, dest_elem++, components[component]);
     }
@@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                              (texture_type == TextureType::TextureCube && is_array && is_shadow),
                          "This method is not supported.");
 
-    const auto& sampler = is_bindless
-                              ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
-                              : GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+    const auto& sampler =
+        is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
+                    : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
 
     const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                             process_mode == TextureProcessMode::LL ||
@@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
         dc = GetRegister(parameter_register++);
     }
 
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+    const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
@@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
     // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
     // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
 
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+    const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
@@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
     // When lod is used always is in gpr20
     const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
 
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+    const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 97fc6f9b1..b047cf870 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
     const Node op_a =
         GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
                         instr.video.type_a, instr.video.byte_height_a);
-    const Node op_b = [&]() {
+    const Node op_b = [this, instr] {
         if (instr.video.use_register_b) {
             return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
                                    instr.video.signed_b, instr.video.type_b,
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index a8e481b3c..fa8a250cc 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -46,9 +46,10 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::SHFL: {
-        Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
-                                           : GetRegister(instr.gpr39);
-        Node width = [&] {
+        Node width = [this, instr] {
+            Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+                                               : GetRegister(instr.gpr39);
+
             // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
             // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
             // different parameters that don't properly map to GLSL's interface, but it should work