21 files changed, 755 insertions, 324 deletions
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index c23106299..5a5851f66 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1342,7 +1342,7 @@ static void ExitProcess(Core::System& system) {
 /// Creates a new thread
 static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
                                VAddr stack_top, u32 priority, s32 processor_id) {
-    LOG_TRACE(Kernel_SVC,
+    LOG_DEBUG(Kernel_SVC,
               "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
               "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
               entry_point, arg, stack_top, priority, processor_id, *out_handle);
@@ -1402,7 +1402,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
 
 /// Starts the thread for the provided handle
 static ResultCode StartThread(Core::System& system, Handle thread_handle) {
-    LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
+    LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
 
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
@@ -1425,7 +1425,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
 
 /// Called when a thread exits
 static void ExitThread(Core::System& system) {
-    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
+    LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
 
     auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
     current_thread->Stop();
@@ -1435,7 +1435,7 @@ static void ExitThread(Core::System& system) {
 
 /// Sleep the current thread
 static void SleepThread(Core::System& system, s64 nanoseconds) {
-    LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
+    LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
 
     enum class SleepType : s64 {
         YieldWithoutLoadBalancing = 0,
@@ -1880,52 +1880,59 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
 }
 
 static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
-                                    u64 mask) {
-    LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
-              mask, core);
+                                    u64 affinity_mask) {
+    LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
+              thread_handle, core, affinity_mask);
 
-    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
-    const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
-    if (!thread) {
-        LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
-                  thread_handle);
-        return ERR_INVALID_HANDLE;
-    }
+    const auto* const current_process = system.Kernel().CurrentProcess();
 
     if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
-        const u8 ideal_cpu_core = thread->GetOwnerProcess()->GetIdealCore();
+        const u8 ideal_cpu_core = current_process->GetIdealCore();
 
         ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
 
         // Set the target CPU to the ideal core specified by the process.
         core = ideal_cpu_core;
-        mask = 1ULL << core;
-    }
-
-    if (mask == 0) {
-        LOG_ERROR(Kernel_SVC, "Mask is 0");
-        return ERR_INVALID_COMBINATION;
-    }
+        affinity_mask = 1ULL << core;
+    } else {
+        const u64 core_mask = current_process->GetCoreMask();
+
+        if ((core_mask | affinity_mask) != core_mask) {
+            LOG_ERROR(
+                Kernel_SVC,
+                "Invalid processor ID specified (core_mask=0x{:08X}, affinity_mask=0x{:016X})",
+                core_mask, affinity_mask);
+            return ERR_INVALID_PROCESSOR_ID;
+        }
 
-    /// This value is used to only change the affinity mask without changing the current ideal core.
-    static constexpr u32 OnlyChangeMask = static_cast<u32>(-3);
+        if (affinity_mask == 0) {
+            LOG_ERROR(Kernel_SVC, "Specfified affinity mask is zero.");
+            return ERR_INVALID_COMBINATION;
+        }
 
-    if (core == OnlyChangeMask) {
-        core = thread->GetIdealCore();
-    } else if (core >= Core::NUM_CPU_CORES && core != static_cast<u32>(-1)) {
-        LOG_ERROR(Kernel_SVC, "Invalid core specified, got {}", core);
-        return ERR_INVALID_PROCESSOR_ID;
+        if (core < Core::NUM_CPU_CORES) {
+            if ((affinity_mask & (1ULL << core)) == 0) {
+                LOG_ERROR(Kernel_SVC,
+                          "Core is not enabled for the current mask, core={}, mask={:016X}", core,
+                          affinity_mask);
+                return ERR_INVALID_COMBINATION;
+            }
+        } else if (core != static_cast<u32>(THREADPROCESSORID_DONT_CARE) &&
+                   core != static_cast<u32>(THREADPROCESSORID_DONT_UPDATE)) {
+            LOG_ERROR(Kernel_SVC, "Invalid processor ID specified (core={}).", core);
+            return ERR_INVALID_PROCESSOR_ID;
+        }
     }
 
-    // Error out if the input core isn't enabled in the input mask.
-    if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
-        LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}",
-                  core, mask);
-        return ERR_INVALID_COMBINATION;
+    const auto& handle_table = current_process->GetHandleTable();
+    const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
+                  thread_handle);
+        return ERR_INVALID_HANDLE;
     }
 
-    thread->ChangeCore(core, mask);
-
+    thread->ChangeCore(core, affinity_mask);
     return RESULT_SUCCESS;
 }
 
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index f07332f02..b4b9cda7c 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -30,12 +30,21 @@ enum ThreadPriority : u32 {
 };
 
 enum ThreadProcessorId : s32 {
-    THREADPROCESSORID_IDEAL = -2, ///< Run thread on the ideal core specified by the process.
-    THREADPROCESSORID_0 = 0,      ///< Run thread on core 0
-    THREADPROCESSORID_1 = 1,      ///< Run thread on core 1
-    THREADPROCESSORID_2 = 2,      ///< Run thread on core 2
-    THREADPROCESSORID_3 = 3,      ///< Run thread on core 3
-    THREADPROCESSORID_MAX = 4,    ///< Processor ID must be less than this
+    /// Indicates that no particular processor core is preferred.
+    THREADPROCESSORID_DONT_CARE = -1,
+
+    /// Run thread on the ideal core specified by the process.
+    THREADPROCESSORID_IDEAL = -2,
+
+    /// Indicates that the preferred processor ID shouldn't be updated in
+    /// a core mask setting operation.
+    THREADPROCESSORID_DONT_UPDATE = -3,
+
+    THREADPROCESSORID_0 = 0,   ///< Run thread on core 0
+    THREADPROCESSORID_1 = 1,   ///< Run thread on core 1
+    THREADPROCESSORID_2 = 2,   ///< Run thread on core 2
+    THREADPROCESSORID_3 = 3,   ///< Run thread on core 3
+    THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
 
     /// Allowed CPU mask
     THREADPROCESSORID_DEFAULT_MASK = (1 << THREADPROCESSORID_0) | (1 << THREADPROCESSORID_1) |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index e69f6cf7f..75db0c2dc 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -8,6 +8,7 @@
 
 #include "audio_core/audio_renderer.h"
 #include "common/alignment.h"
+#include "common/bit_util.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
 #include "common/string_util.h"
@@ -262,64 +263,304 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
     OpenAudioRendererImpl(ctx);
 }
 
+static u64 CalculateNumPerformanceEntries(const AudioCore::AudioRendererParameter& params) {
+    // +1 represents the final mix.
+    return u64{params.effect_count} + params.submix_count + params.sink_count + params.voice_count +
+           1;
+}
+
 void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
     LOG_DEBUG(Service_Audio, "called");
 
-    u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
-    buffer_sz += params.submix_count * 1024;
-    buffer_sz += 0x940 * (params.submix_count + 1);
-    buffer_sz += 0x3F0 * params.voice_count;
-    buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
-    buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
-    buffer_sz += Common::AlignUp(
-        (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
-            (params.mix_buffer_count + 6),
-        0x40);
-
-    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        const u32 count = params.submix_count + 1;
-        u64 node_count = Common::AlignUp(count, 0x40);
-        const u64 node_state_buffer_sz =
-            4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
-        u64 edge_matrix_buffer_sz = 0;
-        node_count = Common::AlignUp(count * count, 0x40);
-        if (node_count >> 31 != 0) {
-            edge_matrix_buffer_sz = (node_count | 7) / 8;
-        } else {
-            edge_matrix_buffer_sz = node_count / 8;
+    // Several calculations below align the sizes being calculated
+    // onto a 64 byte boundary.
+    static constexpr u64 buffer_alignment_size = 64;
+
+    // Some calculations that calculate portions of the buffer
+    // that will contain information, on the other hand, align
+    // the result of some of their calcularions on a 16 byte boundary.
+    static constexpr u64 info_field_alignment_size = 16;
+
+    // Maximum detail entries that may exist at one time for performance
+    // frame statistics.
+    static constexpr u64 max_perf_detail_entries = 100;
+
+    // Size of the data structure representing the bulk of the voice-related state.
+    static constexpr u64 voice_state_size = 0x100;
+
+    // Size of the upsampler manager data structure
+    constexpr u64 upsampler_manager_size = 0x48;
+
+    // Calculates the part of the size that relates to mix buffers.
+    const auto calculate_mix_buffer_sizes = [](const AudioCore::AudioRendererParameter& params) {
+        // As of 8.0.0 this is the maximum on voice channels.
+        constexpr u64 max_voice_channels = 6;
+
+        // The service expects the sample_count member of the parameters to either be
+        // a value of 160 or 240, so the maximum sample count is assumed in order
+        // to adequately handle all values at runtime.
+        constexpr u64 default_max_sample_count = 240;
+
+        const u64 total_mix_buffers = params.mix_buffer_count + max_voice_channels;
+
+        u64 size = 0;
+        size += total_mix_buffers * (sizeof(s32) * params.sample_count);
+        size += total_mix_buffers * (sizeof(s32) * default_max_sample_count);
+        size += u64{params.submix_count} + params.sink_count;
+        size = Common::AlignUp(size, buffer_alignment_size);
+        size += Common::AlignUp(params.unknown_30, buffer_alignment_size);
+        size += Common::AlignUp(sizeof(s32) * params.mix_buffer_count, buffer_alignment_size);
+        return size;
+    };
+
+    // Calculates the portion of the size related to the mix data (and the sorting thereof).
+    const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) {
+        // The size of the mixing info data structure.
+        constexpr u64 mix_info_size = 0x940;
+
+        // Consists of total submixes with the final mix included.
+        const u64 total_mix_count = u64{params.submix_count} + 1;
+
+        // The total number of effects that may be available to the audio renderer at any time.
+        constexpr u64 max_effects = 256;
+
+        // Calculates the part of the size related to the audio node state.
+        // This will only be used if the audio revision supports the splitter.
+        const auto calculate_node_state_size = [](std::size_t num_nodes) {
+            // Internally within a nodestate, it appears to use a data structure
+            // similar to a std::bitset<64> twice.
+            constexpr u64 bit_size = Common::BitSize<u64>();
+            constexpr u64 num_bitsets = 2;
+
+            // Node state instances have three states internally for performing
+            // depth-first searches of nodes. Initialized, Found, and Done Sorting.
+            constexpr u64 num_states = 3;
+
+            u64 size = 0;
+            size += (num_nodes * num_nodes) * sizeof(s32);
+            size += num_states * (num_nodes * sizeof(s32));
+            size += num_bitsets * (Common::AlignUp(num_nodes, bit_size) / Common::BitSize<u8>());
+            return size;
+        };
+
+        // Calculates the part of the size related to the adjacency (aka edge) matrix.
+        const auto calculate_edge_matrix_size = [](std::size_t num_nodes) {
+            return (num_nodes * num_nodes) * sizeof(s32);
+        };
+
+        u64 size = 0;
+        size += Common::AlignUp(sizeof(void*) * total_mix_count, info_field_alignment_size);
+        size += Common::AlignUp(mix_info_size * total_mix_count, info_field_alignment_size);
+        size += Common::AlignUp(sizeof(s32) * max_effects * params.submix_count,
+                                info_field_alignment_size);
+
+        if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
+            size += Common::AlignUp(calculate_node_state_size(total_mix_count) +
+                                        calculate_edge_matrix_size(total_mix_count),
+                                    info_field_alignment_size);
         }
-        buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10);
-    }
 
-    buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
-    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        buffer_sz += 0xE0 * params.num_splitter_send_channels;
-        buffer_sz += 0x20 * params.splitter_count;
-        buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
-    }
-    buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
-    u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
-                    ((params.voice_count * 256) | 0x40);
-
-    if (params.performance_frame_count >= 1) {
-        output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
-                                      16 * params.voice_count + 16) +
-                                     0x658) *
-                                            (params.performance_frame_count + 1) +
-                                        0xc0,
-                                    0x40) +
-                    output_sz;
-    }
-    output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000);
+        return size;
+    };
 
-    IPC::ResponseBuilder rb{ctx, 4};
+    // Calculates the part of the size related to voice channel info.
+    const auto calculate_voice_info_size = [](const AudioCore::AudioRendererParameter& params) {
+        constexpr u64 voice_info_size = 0x220;
+        constexpr u64 voice_resource_size = 0xD0;
+
+        u64 size = 0;
+        size += Common::AlignUp(sizeof(void*) * params.voice_count, info_field_alignment_size);
+        size += Common::AlignUp(voice_info_size * params.voice_count, info_field_alignment_size);
+        size +=
+            Common::AlignUp(voice_resource_size * params.voice_count, info_field_alignment_size);
+        size += Common::AlignUp(voice_state_size * params.voice_count, info_field_alignment_size);
+        return size;
+    };
+
+    // Calculates the part of the size related to memory pools.
+    const auto calculate_memory_pools_size = [](const AudioCore::AudioRendererParameter& params) {
+        const u64 num_memory_pools = sizeof(s32) * (u64{params.effect_count} + params.voice_count);
+        const u64 memory_pool_info_size = 0x20;
+        return Common::AlignUp(num_memory_pools * memory_pool_info_size, info_field_alignment_size);
+    };
+
+    // Calculates the part of the size related to the splitter context.
+    const auto calculate_splitter_context_size =
+        [this](const AudioCore::AudioRendererParameter& params) -> u64 {
+        if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
+            return 0;
+        }
+
+        constexpr u64 splitter_info_size = 0x20;
+        constexpr u64 splitter_destination_data_size = 0xE0;
+
+        u64 size = 0;
+        size += params.num_splitter_send_channels;
+        size +=
+            Common::AlignUp(splitter_info_size * params.splitter_count, info_field_alignment_size);
+        size += Common::AlignUp(splitter_destination_data_size * params.num_splitter_send_channels,
+                                info_field_alignment_size);
+
+        return size;
+    };
+
+    // Calculates the part of the size related to the upsampler info.
+    const auto calculate_upsampler_info_size = [](const AudioCore::AudioRendererParameter& params) {
+        constexpr u64 upsampler_info_size = 0x280;
+        // Yes, using the buffer size over info alignment size is intentional here.
+        return Common::AlignUp(upsampler_info_size * (u64{params.submix_count} + params.sink_count),
+                               buffer_alignment_size);
+    };
+
+    // Calculates the part of the size related to effect info.
+    const auto calculate_effect_info_size = [](const AudioCore::AudioRendererParameter& params) {
+        constexpr u64 effect_info_size = 0x2B0;
+        return Common::AlignUp(effect_info_size * params.effect_count, info_field_alignment_size);
+    };
+
+    // Calculates the part of the size related to audio sink info.
+    const auto calculate_sink_info_size = [](const AudioCore::AudioRendererParameter& params) {
+        const u64 sink_info_size = 0x170;
+        return Common::AlignUp(sink_info_size * params.sink_count, info_field_alignment_size);
+    };
+
+    // Calculates the part of the size related to voice state info.
+    const auto calculate_voice_state_size = [](const AudioCore::AudioRendererParameter& params) {
+        const u64 voice_state_size = 0x100;
+        const u64 additional_size = buffer_alignment_size - 1;
+        return Common::AlignUp(voice_state_size * params.voice_count + additional_size,
+                               info_field_alignment_size);
+    };
+
+    // Calculates the part of the size related to performance statistics.
+    const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) {
+        // Extra size value appended to the end of the calculation.
+        constexpr u64 appended = 128;
+
+        // Whether or not we assume the newer version of performance metrics data structures.
+        const bool is_v2 =
+            IsFeatureSupported(AudioFeatures::PerformanceMetricsVersion2, params.revision);
+
+        // Data structure sizes
+        constexpr u64 perf_statistics_size = 0x0C;
+        const u64 header_size = is_v2 ? 0x30 : 0x18;
+        const u64 entry_size = is_v2 ? 0x18 : 0x10;
+        const u64 detail_size = is_v2 ? 0x18 : 0x10;
+
+        const u64 entry_count = CalculateNumPerformanceEntries(params);
+        const u64 size_per_frame =
+            header_size + (entry_size * entry_count) + (detail_size * max_perf_detail_entries);
+
+        u64 size = 0;
+        size += Common::AlignUp(size_per_frame * params.performance_frame_count + 1,
+                                buffer_alignment_size);
+        size += Common::AlignUp(perf_statistics_size, buffer_alignment_size);
+        size += appended;
+        return size;
+    };
+
+    // Calculates the part of the size that relates to the audio command buffer.
+    const auto calculate_command_buffer_size =
+        [this](const AudioCore::AudioRendererParameter& params) {
+            constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
+
+            if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
+                constexpr u64 command_buffer_size = 0x18000;
+
+                return command_buffer_size + alignment;
+            }
+
+            // When the variadic command buffer is supported, this means
+            // the command generator for the audio renderer can issue commands
+            // that are (as one would expect), variable in size. So what we need to do
+            // is determine the maximum possible size for a few command data structures
+            // then multiply them by the amount of present commands indicated by the given
+            // respective audio parameters.
+
+            constexpr u64 max_biquad_filters = 2;
+            constexpr u64 max_mix_buffers = 24;
+
+            constexpr u64 biquad_filter_command_size = 0x2C;
+
+            constexpr u64 depop_mix_command_size = 0x24;
+            constexpr u64 depop_setup_command_size = 0x50;
+
+            constexpr u64 effect_command_max_size = 0x540;
+
+            constexpr u64 mix_command_size = 0x1C;
+            constexpr u64 mix_ramp_command_size = 0x24;
+            constexpr u64 mix_ramp_grouped_command_size = 0x13C;
+
+            constexpr u64 perf_command_size = 0x28;
+
+            constexpr u64 sink_command_size = 0x130;
+
+            constexpr u64 submix_command_max_size =
+                depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
+
+            constexpr u64 volume_command_size = 0x1C;
+            constexpr u64 volume_ramp_command_size = 0x20;
+
+            constexpr u64 voice_biquad_filter_command_size =
+                biquad_filter_command_size * max_biquad_filters;
+            constexpr u64 voice_data_command_size = 0x9C;
+            const u64 voice_command_max_size =
+                (params.splitter_count * depop_setup_command_size) +
+                (voice_data_command_size + voice_biquad_filter_command_size +
+                 volume_ramp_command_size + mix_ramp_grouped_command_size);
+
+            // Now calculate the individual elements that comprise the size and add them together.
+            const u64 effect_commands_size = params.effect_count * effect_command_max_size;
+
+            const u64 final_mix_commands_size =
+                depop_mix_command_size + volume_command_size * max_mix_buffers;
 
+            const u64 perf_commands_size =
+                perf_command_size *
+                (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
+
+            const u64 sink_commands_size = params.sink_count * sink_command_size;
+
+            const u64 splitter_commands_size =
+                params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
+
+            const u64 submix_commands_size = params.submix_count * submix_command_max_size;
+
+            const u64 voice_commands_size = params.voice_count * voice_command_max_size;
+
+            return effect_commands_size + final_mix_commands_size + perf_commands_size +
+                   sink_commands_size + splitter_commands_size + submix_commands_size +
+                   voice_commands_size + alignment;
+        };
+
+    IPC::RequestParser rp{ctx};
+    const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
+
+    u64 size = 0;
+    size += calculate_mix_buffer_sizes(params);
+    size += calculate_mix_info_size(params);
+    size += calculate_voice_info_size(params);
+    size += upsampler_manager_size;
+    size += calculate_memory_pools_size(params);
+    size += calculate_splitter_context_size(params);
+
+    size = Common::AlignUp(size, buffer_alignment_size);
+
+    size += calculate_upsampler_info_size(params);
+    size += calculate_effect_info_size(params);
+    size += calculate_sink_info_size(params);
+    size += calculate_voice_state_size(params);
+    size += calculate_perf_size(params);
+    size += calculate_command_buffer_size(params);
+
+    // finally, 4KB page align the size, and we're done.
+    size = Common::AlignUp(size, 4096);
+
+    IPC::ResponseBuilder rb{ctx, 4};
     rb.Push(RESULT_SUCCESS);
-    rb.Push<u64>(output_sz);
+    rb.Push<u64>(size);
 
-    LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
+    LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", size);
 }
 
 void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
@@ -357,10 +598,15 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
 }
 
 bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
-    u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
+    // Byte swap
+    const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
+
     switch (feature) {
     case AudioFeatures::Splitter:
-        return version_num >= 2u;
+        return version_num >= 2U;
+    case AudioFeatures::PerformanceMetricsVersion2:
+    case AudioFeatures::VariadicCommandBuffer:
+        return version_num >= 5U;
     default:
         return false;
     }
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index e55d25973..1d3c8df61 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -28,6 +28,8 @@ private:
 
     enum class AudioFeatures : u32 {
         Splitter,
+        PerformanceMetricsVersion2,
+        VariadicCommandBuffer,
     };
 
     bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 036e66f05..3175579cc 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -40,6 +40,13 @@ bool DmaPusher::Step() {
     }
 
     const CommandList& command_list{dma_pushbuffer.front()};
+    ASSERT_OR_EXECUTE(!command_list.empty(), {
+        // Somehow the command_list is empty, in order to avoid a crash
+        // We ignore it and assume its size is 0.
+        dma_pushbuffer.pop();
+        dma_pushbuffer_subindex = 0;
+        return true;
+    });
     const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
     GPUVAddr dma_get = command_list_header.addr;
     GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 48e4fec33..f342c78e6 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -59,6 +59,7 @@ public:
         static constexpr std::size_t NumCBData = 16;
         static constexpr std::size_t NumVertexArrays = 32;
         static constexpr std::size_t NumVertexAttributes = 32;
+        static constexpr std::size_t NumVaryings = 31;
         static constexpr std::size_t NumTextureSamplers = 32;
         static constexpr std::size_t NumClipDistances = 8;
         static constexpr std::size_t MaxShaderProgram = 6;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e5b4eadea..7bbc556da 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -98,6 +98,10 @@ union Attribute {
         BitField<22, 2, u64> element;
         BitField<24, 6, Index> index;
         BitField<47, 3, AttributeSize> size;
+
+        bool IsPhysical() const {
+            return element == 0 && static_cast<u64>(index.Value()) == 0;
+        }
     } fmt20;
 
     union {
@@ -499,6 +503,11 @@ enum class SystemVariable : u64 {
     CircularQueueEntryAddressHigh = 0x63,
 };
 
+enum class PhysicalAttributeDirection : u64 {
+    Input = 0,
+    Output = 1,
+};
+
 union Instruction {
     Instruction& operator=(const Instruction& instr) {
         value = instr.value;
@@ -587,6 +596,7 @@ union Instruction {
     } alu;
 
     union {
+        BitField<38, 1, u64> idx;
         BitField<51, 1, u64> saturate;
         BitField<52, 2, IpaSampleMode> sample_mode;
         BitField<54, 2, IpaInterpMode> interp_mode;
@@ -812,6 +822,12 @@ union Instruction {
     } stg;
 
     union {
+        BitField<32, 1, PhysicalAttributeDirection> direction;
+        BitField<47, 3, AttributeSize> size;
+        BitField<20, 11, u64> address;
+    } al2p;
+
+    union {
         BitField<0, 3, u64> pred0;
         BitField<3, 3, u64> pred3;
         BitField<7, 1, u64> abs_a;
@@ -1374,8 +1390,9 @@ public:
         ST_A,
         ST_L,
         ST_S,
-        LDG, // Load from global memory
-        STG, // Store in global memory
+        LDG,  // Load from global memory
+        STG,  // Store in global memory
+        AL2P, // Transforms attribute memory into physical memory
         TEX,
         TEX_B,  // Texture Load Bindless
         TXQ,    // Texture Query
@@ -1646,6 +1663,7 @@ private:
             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
             INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
             INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
             INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
             INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cdf86f562..05a168a72 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -81,12 +81,6 @@ struct CommandDataContainer {
     CommandDataContainer(CommandData&& data, u64 next_fence)
         : data{std::move(data)}, fence{next_fence} {}
 
-    CommandDataContainer& operator=(const CommandDataContainer& t) {
-        data = std::move(t.data);
-        fence = t.fence;
-        return *this;
-    }
-
     CommandData data;
     u64 fence{};
 };
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index fbea107ca..c766ed692 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -120,7 +120,9 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
 
     // An instruction with the Exit flag will not actually
     // cause an exit if it's executed inside a delay slot.
-    if (opcode.is_exit && !is_delay_slot) {
+    // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further
+    // testing on the MME code.
+    if (opcode.is_exit) {
         // Exit has a delay slot, execute the next instruction
         Step(offset, true);
         return false;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index f820f3ed9..0c4ea1494 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -144,8 +144,9 @@ protected:
 
         object->SetIsRegistered(false);
         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
+        const CacheAddr addr = object->GetCacheAddr();
         interval_cache.subtract({GetInterval(object), ObjectSet{object}});
-        map_cache.erase(object->GetCacheAddr());
+        map_cache.erase(addr);
     }
 
     /// Returns a ticks counter used for tracking when cached objects were last modified
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b6d9e0ddb..38497678a 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,18 @@ T GetInteger(GLenum pname) {
 
 Device::Device() {
     uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
+    max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
+    max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
     has_variable_aoffi = TestVariableAoffi();
 }
 
+Device::Device(std::nullptr_t) {
+    uniform_buffer_alignment = 0;
+    max_vertex_attributes = 16;
+    max_varyings = 15;
+    has_variable_aoffi = true;
+}
+
 bool Device::TestVariableAoffi() {
     const GLchar* AOFFI_TEST = R"(#version 430 core
 uniform sampler2D tex;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 78ff5ee58..de8490682 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -5,17 +5,27 @@
 #pragma once
 
 #include <cstddef>
+#include "common/common_types.h"
 
 namespace OpenGL {
 
 class Device {
 public:
-    Device();
+    explicit Device();
+    explicit Device(std::nullptr_t);
 
     std::size_t GetUniformBufferAlignment() const {
         return uniform_buffer_alignment;
     }
 
+    u32 GetMaxVertexAttributes() const {
+        return max_vertex_attributes;
+    }
+
+    u32 GetMaxVaryings() const {
+        return max_varyings;
+    }
+
     bool HasVariableAoffi() const {
         return has_variable_aoffi;
     }
@@ -24,6 +34,8 @@ private:
     static bool TestVariableAoffi();
 
     std::size_t uniform_buffer_alignment{};
+    u32 max_vertex_attributes{};
+    u32 max_varyings{};
     bool has_variable_aoffi{};
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 1a62795e1..4bff54a59 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -134,6 +134,19 @@ bool IsPrecise(Node node) {
     return false;
 }
 
+constexpr bool IsGenericAttribute(Attribute::Index index) {
+    return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
+}
+
+constexpr Attribute::Index ToGenericAttribute(u32 value) {
+    return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0));
+}
+
+u32 GetGenericAttributeIndex(Attribute::Index index) {
+    ASSERT(IsGenericAttribute(index));
+    return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
 class GLSLDecompiler final {
 public:
     explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
@@ -152,6 +165,7 @@ public:
         DeclareConstantBuffers();
         DeclareGlobalMemory();
         DeclareSamplers();
+        DeclarePhysicalAttributeReader();
 
         code.AddLine("void execute_" + suffix + "() {");
         ++code.scope;
@@ -296,76 +310,95 @@ private:
     }
 
     std::string GetInputFlags(AttributeUse attribute) {
-        std::string out;
-
         switch (attribute) {
-        case AttributeUse::Constant:
-            out += "flat ";
-            break;
-        case AttributeUse::ScreenLinear:
-            out += "noperspective ";
-            break;
         case AttributeUse::Perspective:
             // Default, Smooth
-            break;
+            return {};
+        case AttributeUse::Constant:
+            return "flat ";
+        case AttributeUse::ScreenLinear:
+            return "noperspective ";
         default:
-            LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
-            UNREACHABLE();
+        case AttributeUse::Unused:
+            UNREACHABLE_MSG("Unused attribute being fetched");
+            return {};
+            UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
+            return {};
         }
-        return out;
     }
 
     void DeclareInputAttributes() {
-        const auto& attributes = ir.GetInputAttributes();
-        for (const auto element : attributes) {
-            const Attribute::Index index = element.first;
-            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
-                // Skip when it's not a generic attribute
-                continue;
-            }
-
-            // TODO(bunnei): Use proper number of elements for these
-            u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-            if (stage != ShaderStage::Vertex) {
-                // If inputs are varyings, add an offset
-                idx += GENERIC_VARYING_START_LOCATION;
+        if (ir.HasPhysicalAttributes()) {
+            const u32 num_inputs{GetNumPhysicalInputAttributes()};
+            for (u32 i = 0; i < num_inputs; ++i) {
+                DeclareInputAttribute(ToGenericAttribute(i), true);
             }
+            code.AddNewLine();
+            return;
+        }
 
-            std::string attr = GetInputAttribute(index);
-            if (stage == ShaderStage::Geometry) {
-                attr = "gs_" + attr + "[]";
-            }
-            std::string suffix;
-            if (stage == ShaderStage::Fragment) {
-                const auto input_mode =
-                    header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
-                suffix = GetInputFlags(input_mode);
+        const auto& attributes = ir.GetInputAttributes();
+        for (const auto index : attributes) {
+            if (IsGenericAttribute(index)) {
+                DeclareInputAttribute(index, false);
             }
-            code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
-                         attr + ';');
         }
         if (!attributes.empty())
             code.AddNewLine();
     }
 
+    void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
+        const u32 generic_index{GetGenericAttributeIndex(index)};
+
+        std::string name{GetInputAttribute(index)};
+        if (stage == ShaderStage::Geometry) {
+            name = "gs_" + name + "[]";
+        }
+
+        std::string suffix;
+        if (stage == ShaderStage::Fragment) {
+            const auto input_mode{header.ps.GetAttributeUse(generic_index)};
+            if (skip_unused && input_mode == AttributeUse::Unused) {
+                return;
+            }
+            suffix = GetInputFlags(input_mode);
+        }
+
+        u32 location = generic_index;
+        if (stage != ShaderStage::Vertex) {
+            // If inputs are varyings, add an offset
+            location += GENERIC_VARYING_START_LOCATION;
+        }
+
+        code.AddLine("layout (location = " + std::to_string(location) + ") " + suffix + "in vec4 " +
+                     name + ';');
+    }
+
     void DeclareOutputAttributes() {
+        if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) {
+            for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
+                DeclareOutputAttribute(ToGenericAttribute(i));
+            }
+            code.AddNewLine();
+            return;
+        }
+
         const auto& attributes = ir.GetOutputAttributes();
         for (const auto index : attributes) {
-            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
-                // Skip when it's not a generic attribute
-                continue;
+            if (IsGenericAttribute(index)) {
+                DeclareOutputAttribute(index);
             }
-            // TODO(bunnei): Use proper number of elements for these
-            const auto idx = static_cast<u32>(index) -
-                             static_cast<u32>(Attribute::Index::Attribute_0) +
-                             GENERIC_VARYING_START_LOCATION;
-            code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
-                         GetOutputAttribute(index) + ';');
         }
         if (!attributes.empty())
             code.AddNewLine();
     }
 
+    void DeclareOutputAttribute(Attribute::Index index) {
+        const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION};
+        code.AddLine("layout (location = " + std::to_string(location) + ") out vec4 " +
+                     GetOutputAttribute(index) + ';');
+    }
+
     void DeclareConstantBuffers() {
         for (const auto& entry : ir.GetConstantBuffers()) {
             const auto [index, size] = entry;
@@ -429,6 +462,39 @@ private:
             code.AddNewLine();
     }
 
+    void DeclarePhysicalAttributeReader() {
+        if (!ir.HasPhysicalAttributes()) {
+            return;
+        }
+        code.AddLine("float readPhysicalAttribute(uint physical_address) {");
+        ++code.scope;
+        code.AddLine("switch (physical_address) {");
+
+        // Just declare generic attributes for now.
+        const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
+        for (u32 index = 0; index < num_attributes; ++index) {
+            const auto attribute{ToGenericAttribute(index)};
+            for (u32 element = 0; element < 4; ++element) {
+                constexpr u32 generic_base{0x80};
+                constexpr u32 generic_stride{16};
+                constexpr u32 element_stride{4};
+                const u32 address{generic_base + index * generic_stride + element * element_stride};
+
+                const bool declared{stage != ShaderStage::Fragment ||
+                                    header.ps.GetAttributeUse(index) != AttributeUse::Unused};
+                const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
+                code.AddLine(fmt::format("case 0x{:x}: return {};", address, value));
+            }
+        }
+
+        code.AddLine("default: return 0;");
+
+        code.AddLine('}');
+        --code.scope;
+        code.AddLine('}');
+        code.AddNewLine();
+    }
+
     void VisitBlock(const NodeBlock& bb) {
         for (const Node node : bb) {
             if (const std::string expr = Visit(node); !expr.empty()) {
@@ -483,70 +549,12 @@ private:
             return value;
 
         } else if (const auto abuf = std::get_if<AbufNode>(node)) {
-            const auto attribute = abuf->GetIndex();
-            const auto element = abuf->GetElement();
-
-            const auto GeometryPass = [&](const std::string& name) {
-                if (stage == ShaderStage::Geometry && abuf->GetBuffer()) {
-                    // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
-                    // set an 0x80000000 index for those and the shader fails to build. Find out why
-                    // this happens and what's its intent.
-                    return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) +
-                           ") % MAX_VERTEX_INPUT]";
-                }
-                return name;
-            };
-
-            switch (attribute) {
-            case Attribute::Index::Position:
-                if (stage != ShaderStage::Fragment) {
-                    return GeometryPass("position") + GetSwizzle(element);
-                } else {
-                    return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
-                }
-            case Attribute::Index::PointCoord:
-                switch (element) {
-                case 0:
-                    return "gl_PointCoord.x";
-                case 1:
-                    return "gl_PointCoord.y";
-                case 2:
-                case 3:
-                    return "0";
-                }
-                UNREACHABLE();
-                return "0";
-            case Attribute::Index::TessCoordInstanceIDVertexID:
-                // TODO(Subv): Find out what the values are for the first two elements when inside a
-                // vertex shader, and what's the value of the fourth element when inside a Tess Eval
-                // shader.
-                ASSERT(stage == ShaderStage::Vertex);
-                switch (element) {
-                case 2:
-                    // Config pack's first value is instance_id.
-                    return "uintBitsToFloat(config_pack[0])";
-                case 3:
-                    return "uintBitsToFloat(gl_VertexID)";
-                }
-                UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
-                return "0";
-            case Attribute::Index::FrontFacing:
-                // TODO(Subv): Find out what the values are for the other elements.
-                ASSERT(stage == ShaderStage::Fragment);
-                switch (element) {
-                case 3:
-                    return "itof(gl_FrontFacing ? -1 : 0)";
-                }
-                UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
-                return "0";
-            default:
-                if (attribute >= Attribute::Index::Attribute_0 &&
-                    attribute <= Attribute::Index::Attribute_31) {
-                    return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
-                }
-                break;
+            UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
+                                 "Physical attributes in geometry shaders are not implemented");
+            if (abuf->IsPhysicalBuffer()) {
+                return "readPhysicalAttribute(ftou(" + Visit(abuf->GetPhysicalAddress()) + "))";
             }
-            UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+            return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
 
         } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
             const Node offset = cbuf->GetOffset();
@@ -598,6 +606,69 @@ private:
         return {};
     }
 
+    std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) {
+        const auto GeometryPass = [&](std::string name) {
+            if (stage == ShaderStage::Geometry && buffer) {
+                // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
+                // set an 0x80000000 index for those and the shader fails to build. Find out why
+                // this happens and what's its intent.
+                return "gs_" + std::move(name) + "[ftou(" + Visit(buffer) + ") % MAX_VERTEX_INPUT]";
+            }
+            return name;
+        };
+
+        switch (attribute) {
+        case Attribute::Index::Position:
+            if (stage != ShaderStage::Fragment) {
+                return GeometryPass("position") + GetSwizzle(element);
+            } else {
+                return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
+            }
+        case Attribute::Index::PointCoord:
+            switch (element) {
+            case 0:
+                return "gl_PointCoord.x";
+            case 1:
+                return "gl_PointCoord.y";
+            case 2:
+            case 3:
+                return "0";
+            }
+            UNREACHABLE();
+            return "0";
+        case Attribute::Index::TessCoordInstanceIDVertexID:
+            // TODO(Subv): Find out what the values are for the first two elements when inside a
+            // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+            // shader.
+            ASSERT(stage == ShaderStage::Vertex);
+            switch (element) {
+            case 2:
+                // Config pack's first value is instance_id.
+                return "uintBitsToFloat(config_pack[0])";
+            case 3:
+                return "uintBitsToFloat(gl_VertexID)";
+            }
+            UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+            return "0";
+        case Attribute::Index::FrontFacing:
+            // TODO(Subv): Find out what the values are for the other elements.
+            ASSERT(stage == ShaderStage::Fragment);
+            switch (element) {
+            case 3:
+                return "itof(gl_FrontFacing ? -1 : 0)";
+            }
+            UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
+            return "0";
+        default:
+            if (IsGenericAttribute(attribute)) {
+                return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
+            }
+            break;
+        }
+        UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+        return "0";
+    }
+
     std::string ApplyPrecise(Operation operation, const std::string& value) {
         if (!IsPrecise(operation)) {
             return value;
@@ -833,6 +904,8 @@ private:
             target = GetRegister(gpr->GetIndex());
 
         } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+            UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
+
             target = [&]() -> std::string {
                 switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
                 case Attribute::Index::Position:
@@ -844,8 +917,7 @@ private:
                 case Attribute::Index::ClipDistances4567:
                     return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']';
                 default:
-                    if (attribute >= Attribute::Index::Attribute_0 &&
-                        attribute <= Attribute::Index::Attribute_31) {
+                    if (IsGenericAttribute(attribute)) {
                         return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
                     }
                     UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
@@ -1591,15 +1663,11 @@ private:
     }
 
     std::string GetInputAttribute(Attribute::Index attribute) const {
-        const auto index{static_cast<u32>(attribute) -
-                         static_cast<u32>(Attribute::Index::Attribute_0)};
-        return GetDeclarationWithSuffix(index, "input_attr");
+        return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
     }
 
     std::string GetOutputAttribute(Attribute::Index attribute) const {
-        const auto index{static_cast<u32>(attribute) -
-                         static_cast<u32>(Attribute::Index::Attribute_0)};
-        return GetDeclarationWithSuffix(index, "output_attr");
+        return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
     }
 
     std::string GetConstBuffer(u32 index) const {
@@ -1640,6 +1708,19 @@ private:
         return name + '_' + std::to_string(index) + '_' + suffix;
     }
 
+    u32 GetNumPhysicalInputAttributes() const {
+        return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
+    }
+
+    u32 GetNumPhysicalAttributes() const {
+        return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
+    }
+
+    u32 GetNumPhysicalVaryings() const {
+        return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION,
+                             Maxwell::NumVaryings);
+    }
+
     const Device& device;
     const ShaderIR& ir;
     const ShaderStage stage;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 254c0d499..fba9c594a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -104,8 +104,9 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
     return true;
 }
 
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
-    : system{system}, precompiled_cache_virtual_file_offset{0} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+
+ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
 
 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
 ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -243,7 +244,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
                 return {};
             }
 
-            const auto entry = LoadDecompiledEntry();
+            auto entry = LoadDecompiledEntry();
             if (!entry) {
                 return {};
             }
@@ -287,13 +288,13 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
         return {};
     }
 
-    std::vector<u8> code(code_size);
+    std::string code(code_size, '\0');
     if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
         return {};
     }
 
     ShaderDiskCacheDecompiled entry;
-    entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
+    entry.code = std::move(code);
 
     u32 const_buffers_count{};
     if (!LoadObjectFromPrecompiled(const_buffers_count)) {
@@ -303,12 +304,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
     for (u32 i = 0; i < const_buffers_count; ++i) {
         u32 max_offset{};
         u32 index{};
-        u8 is_indirect{};
+        bool is_indirect{};
         if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
             !LoadObjectFromPrecompiled(is_indirect)) {
             return {};
         }
-        entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
+        entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
     }
 
     u32 samplers_count{};
@@ -320,18 +321,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
         u64 offset{};
         u64 index{};
         u32 type{};
-        u8 is_array{};
-        u8 is_shadow{};
-        u8 is_bindless{};
+        bool is_array{};
+        bool is_shadow{};
+        bool is_bindless{};
         if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
             !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
             !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
             return {};
         }
-        entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
-                                            static_cast<std::size_t>(index),
-                                            static_cast<Tegra::Shader::TextureType>(type),
-                                            is_array != 0, is_shadow != 0, is_bindless != 0);
+        entry.entries.samplers.emplace_back(
+            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+            static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
     }
 
     u32 global_memory_count{};
@@ -342,21 +342,20 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
     for (u32 i = 0; i < global_memory_count; ++i) {
         u32 cbuf_index{};
         u32 cbuf_offset{};
-        u8 is_read{};
-        u8 is_written{};
+        bool is_read{};
+        bool is_written{};
         if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
             !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
             return {};
         }
-        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
-                                                         is_written != 0);
+        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
+                                                         is_written);
     }
 
     for (auto& clip_distance : entry.entries.clip_distances) {
-        u8 clip_distance_raw{};
-        if (!LoadObjectFromPrecompiled(clip_distance_raw))
+        if (!LoadObjectFromPrecompiled(clip_distance)) {
             return {};
-        clip_distance = clip_distance_raw != 0;
+        }
     }
 
     u64 shader_length{};
@@ -384,7 +383,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
     for (const auto& cbuf : entries.const_buffers) {
         if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
             !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
+            !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
             return false;
         }
     }
@@ -396,9 +395,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
         if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
             !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
             !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
+            !SaveObjectToPrecompiled(sampler.IsArray()) ||
+            !SaveObjectToPrecompiled(sampler.IsShadow()) ||
+            !SaveObjectToPrecompiled(sampler.IsBindless())) {
             return false;
         }
     }
@@ -409,14 +408,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
     for (const auto& gmem : entries.global_memory_entries) {
         if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
             !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
+            !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
             return false;
         }
     }
 
     for (const bool clip_distance : entries.clip_distances) {
-        if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
+        if (!SaveObjectToPrecompiled(clip_distance)) {
             return false;
         }
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 0142b2e3b..2da0a4a23 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -70,14 +70,14 @@ namespace std {
 
 template <>
 struct hash<OpenGL::BaseBindings> {
-    std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+    std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
         return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
     }
 };
 
 template <>
 struct hash<OpenGL::ShaderDiskCacheUsage> {
-    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
+    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
         return static_cast<std::size_t>(usage.unique_identifier) ^
                std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
     }
@@ -162,6 +162,7 @@ struct ShaderDiskCacheDump {
 class ShaderDiskCacheOpenGL {
 public:
     explicit ShaderDiskCacheOpenGL(Core::System& system);
+    ~ShaderDiskCacheOpenGL();
 
     /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
     std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
@@ -259,20 +260,35 @@ private:
         return SaveArrayToPrecompiled(&object, 1);
     }
 
+    bool SaveObjectToPrecompiled(bool object) {
+        const auto value = static_cast<u8>(object);
+        return SaveArrayToPrecompiled(&value, 1);
+    }
+
     template <typename T>
     bool LoadObjectFromPrecompiled(T& object) {
         return LoadArrayFromPrecompiled(&object, 1);
     }
 
-    // Copre system
+    bool LoadObjectFromPrecompiled(bool& object) {
+        u8 value;
+        const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
+        if (!read_ok) {
+            return false;
+        }
+
+        object = value != 0;
+        return true;
+    }
+
+    // Core system
     Core::System& system;
     // Stored transferable shaders
     std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
-    // Stores whole precompiled cache which will be read from or saved to the precompiled chache
-    // file
+    // Stores whole precompiled cache which will be read from/saved to the precompiled cache file
     FileSys::VectorVfsFile precompiled_cache_virtual_file;
     // Stores the current offset of the precompiled cache file for IO purposes
-    std::size_t precompiled_cache_virtual_file_offset;
+    std::size_t precompiled_cache_virtual_file_offset = 0;
 
     // The cache has been loaded at boot
     bool tried_to_load{};
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a11000f6b..b61a6d170 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -194,8 +194,8 @@ public:
         for (const auto& sampler : ir.GetSamplers()) {
             entries.samplers.emplace_back(sampler);
         }
-        for (const auto& attr : ir.GetInputAttributes()) {
-            entries.attributes.insert(GetGenericAttributeLocation(attr.first));
+        for (const auto& attribute : ir.GetInputAttributes()) {
+            entries.attributes.insert(GetGenericAttributeLocation(attribute));
         }
         entries.clip_distances = ir.GetClipDistances();
         entries.shader_length = ir.GetLength();
@@ -321,8 +321,7 @@ private:
     }
 
     void DeclareInputAttributes() {
-        for (const auto element : ir.GetInputAttributes()) {
-            const Attribute::Index index = element.first;
+        for (const auto index : ir.GetInputAttributes()) {
             if (!IsGenericAttribute(index)) {
                 continue;
             }
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ea1092db1..6a992c543 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -12,6 +12,8 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"
 
+#pragma optimize("", off)
+
 namespace VideoCommon::Shader {
 
 using Tegra::Shader::Attribute;
@@ -47,17 +49,20 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
                              "Indirect attribute loads are not supported");
         UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                              "Unaligned attribute loads are not supported");
+        UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
+                                 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
+                             "Non-32 bits PHYS reads are not implemented");
 
-        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
-                                          Tegra::Shader::IpaSampleMode::Default};
+        const Node buffer{GetRegister(instr.gpr39)};
 
         u64 next_element = instr.attribute.fmt20.element;
         auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
 
         const auto LoadNextElement = [&](u32 reg_offset) {
-            const Node buffer = GetRegister(instr.gpr39);
-            const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
-                                                     next_element, input_mode, buffer);
+            const Node attribute{instr.attribute.fmt20.IsPhysical()
+                                     ? GetPhysicalInputAttribute(instr.gpr8, buffer)
+                                     : GetInputAttribute(static_cast<Attribute::Index>(next_index),
+                                                         next_element, buffer)};
 
             SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
 
@@ -239,6 +244,21 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::AL2P: {
+        // Ignore al2p.direction since we don't care about it.
+
+        // Calculate emulation fake physical address.
+        const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
+        const Node reg{GetRegister(instr.gpr8)};
+        const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
+
+        // Set the fake address to target register.
+        SetRegister(bb, instr.gpr0, fake_address);
+
+        // Signal the shader IR to declare all possible attributes and varyings
+        uses_physical_attributes = true;
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
     }
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d750a2936..fa17c45b5 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -130,15 +130,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::IPA: {
-        const auto& attribute = instr.attribute.fmt28;
+        const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
+
+        const auto attribute = instr.attribute.fmt28;
         const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
                                                 instr.ipa.sample_mode.Value()};
 
-        const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
-        Node value = attr;
+        Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
+                                 : GetInputAttribute(attribute.index, attribute.element);
         const Tegra::Shader::Attribute::Index index = attribute.index.Value();
-        if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
-            index <= Tegra::Shader::Attribute::Index::Attribute_31) {
+        const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
+                                index <= Tegra::Shader::Attribute::Index::Attribute_31;
+        if (is_generic || is_physical) {
             // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
             // In theory by setting them as perspective, OpenGL does the perspective correction.
             // A way must figured to reverse the last step of it.
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index e4eb0dfd9..153ad1fd0 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -21,6 +21,13 @@ using Tegra::Shader::PredCondition;
 using Tegra::Shader::PredOperation;
 using Tegra::Shader::Register;
 
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
+    : program_code{program_code}, main_offset{main_offset} {
+    Decode();
+}
+
+ShaderIR::~ShaderIR() = default;
+
 Node ShaderIR::StoreNode(NodeData&& node_data) {
     auto store = std::make_unique<NodeData>(node_data);
     const Node node = store.get();
@@ -89,13 +96,14 @@ Node ShaderIR::GetPredicate(bool immediate) {
     return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
 }
 
-Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element,
-                                 const Tegra::Shader::IpaMode& input_mode, Node buffer) {
-    const auto [entry, is_new] =
-        used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{}));
-    entry->second.insert(input_mode);
+Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
+    used_input_attributes.emplace(index);
+    return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
+}
 
-    return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer));
+Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
+    uses_physical_attributes = true;
+    return StoreNode(AbufNode(GetRegister(physical_address), buffer));
 }
 
 Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 65f1e1de9..0bf124252 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -328,40 +328,31 @@ struct MetaTexture {
     u32 element{};
 };
 
-inline constexpr MetaArithmetic PRECISE = {true};
-inline constexpr MetaArithmetic NO_PRECISE = {false};
+constexpr MetaArithmetic PRECISE = {true};
+constexpr MetaArithmetic NO_PRECISE = {false};
 
 using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
 
 /// Holds any kind of operation that can be done in the IR
 class OperationNode final {
 public:
-    template <typename... T>
-    explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
+    explicit OperationNode(OperationCode code) : code{code} {}
 
-    template <typename... T>
-    explicit constexpr OperationNode(OperationCode code, Meta&& meta)
-        : code{code}, meta{std::move(meta)} {}
+    explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {}
 
     template <typename... T>
-    explicit constexpr OperationNode(OperationCode code, const T*... operands)
+    explicit OperationNode(OperationCode code, const T*... operands)
         : OperationNode(code, {}, operands...) {}
 
     template <typename... T>
-    explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
-        : code{code}, meta{std::move(meta)} {
-
-        auto operands_list = {operands_...};
-        for (auto& operand : operands_list) {
-            operands.push_back(operand);
-        }
-    }
+    explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
+        : code{code}, meta{std::move(meta)}, operands{operands_...} {}
 
     explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
         : code{code}, meta{meta}, operands{std::move(operands)} {}
 
     explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
-        : code{code}, meta{}, operands{std::move(operands)} {}
+        : code{code}, operands{std::move(operands)} {}
 
     OperationCode GetCode() const {
         return code;
@@ -465,17 +456,14 @@ private:
 /// Attribute buffer memory (known as attributes or varyings in GLSL terms)
 class AbufNode final {
 public:
-    explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
-                                const Tegra::Shader::IpaMode& input_mode, Node buffer = {})
-        : input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {}
-
+    // Initialize for standard attributes (index is explicit).
     explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
                                 Node buffer = {})
-        : input_mode{}, buffer{buffer}, index{index}, element{element} {}
+        : buffer{buffer}, index{index}, element{element} {}
 
-    Tegra::Shader::IpaMode GetInputMode() const {
-        return input_mode;
-    }
+    // Initialize for physical attributes (index is a variable value).
+    explicit constexpr AbufNode(Node physical_address, Node buffer = {})
+        : physical_address{physical_address}, buffer{buffer} {}
 
     Tegra::Shader::Attribute::Index GetIndex() const {
         return index;
@@ -489,11 +477,19 @@ public:
         return buffer;
     }
 
+    bool IsPhysicalBuffer() const {
+        return physical_address != nullptr;
+    }
+
+    Node GetPhysicalAddress() const {
+        return physical_address;
+    }
+
 private:
-    const Tegra::Shader::IpaMode input_mode;
-    const Node buffer;
-    const Tegra::Shader::Attribute::Index index;
-    const u32 element;
+    Node physical_address{};
+    Node buffer{};
+    Tegra::Shader::Attribute::Index index{};
+    u32 element{};
 };
 
 /// Constant buffer node, usually mapped to uniform buffers in GLSL
@@ -567,11 +563,8 @@ private:
 
 class ShaderIR final {
 public:
-    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
-        : program_code{program_code}, main_offset{main_offset} {
-
-        Decode();
-    }
+    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
+    ~ShaderIR();
 
     const std::map<u32, NodeBlock>& GetBasicBlocks() const {
         return basic_blocks;
@@ -585,8 +578,7 @@ public:
         return used_predicates;
     }
 
-    const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>&
-    GetInputAttributes() const {
+    const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
         return used_input_attributes;
     }
 
@@ -615,6 +607,10 @@ public:
         return static_cast<std::size_t>(coverage_end * sizeof(u64));
     }
 
+    bool HasPhysicalAttributes() const {
+        return uses_physical_attributes;
+    }
+
     const Tegra::Shader::Header& GetHeader() const {
         return header;
     }
@@ -696,8 +692,9 @@ private:
     /// Generates a predicate node for an immediate true or false value
     Node GetPredicate(bool immediate);
     /// Generates a node representing an input attribute. Keeps track of used attributes.
-    Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element,
-                           const Tegra::Shader::IpaMode& input_mode, Node buffer = {});
+    Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
+    /// Generates a node representing a physical input attribute.
+    Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
     /// Generates a node representing an output attribute. Keeps track of used attributes.
     Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
     /// Generates a node representing an internal flag
@@ -814,11 +811,12 @@ private:
     void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
                               Node op_c, Node imm_lut, bool sets_cc);
 
-    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
+    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
 
-    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
 
-    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
+    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
+                                       s64 cursor) const;
 
     std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
                                                                      Node addr_register,
@@ -835,12 +833,10 @@ private:
         return StoreNode(OperationNode(code, std::move(meta), operands...));
     }
 
-    template <typename... T>
     Node Operation(OperationCode code, std::vector<Node>&& operands) {
         return StoreNode(OperationNode(code, std::move(operands)));
     }
 
-    template <typename... T>
     Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
         return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
     }
@@ -872,13 +868,13 @@ private:
 
     std::set<u32> used_registers;
     std::set<Tegra::Shader::Pred> used_predicates;
-    std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>
-        used_input_attributes;
+    std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
     std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
     std::map<u32, ConstBuffer> used_cbufs;
     std::set<Sampler> used_samplers;
     std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
     std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
+    bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
 
     Tegra::Shader::Header header;
 };
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 4505667ff..19ede1eb9 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -17,22 +17,24 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
     for (; cursor >= 0; --cursor) {
         const Node node = code.at(cursor);
         if (const auto operation = std::get_if<OperationNode>(node)) {
-            if (operation->GetCode() == operation_code)
+            if (operation->GetCode() == operation_code) {
                 return {node, cursor};
+            }
         }
         if (const auto conditional = std::get_if<ConditionalNode>(node)) {
             const auto& conditional_code = conditional->GetCode();
             const auto [found, internal_cursor] = FindOperation(
                 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
-            if (found)
+            if (found) {
                 return {found, cursor};
+            }
         }
     }
     return {};
 }
 } // namespace
 
-Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
+Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
     if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
         // Cbuf found, but it has to be immediate
         return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
@@ -65,7 +67,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
     return nullptr;
 }
 
-std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
     // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
     // that it uses as operand
     const auto [found, found_cursor] =
@@ -80,7 +82,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
 }
 
 std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
-                                             s64 cursor) {
+                                             s64 cursor) const {
     for (; cursor >= 0; --cursor) {
         const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
         if (!found_node) {