77 files changed, 1841 insertions, 783 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f30dd49a3..f8ec8fea8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -45,13 +45,23 @@ if (MSVC)
         /Zc:inline
         /Zc:throwingNew
 
+        # External headers diagnostics
+        /experimental:external  # Enables the external headers options. This option isn't required in Visual Studio 2019 version 16.10 and later
+        /external:anglebrackets # Treats all headers included by #include <header>, where the header file is enclosed in angle brackets (< >), as external headers
+        /external:W0            # Sets the default warning level to 0 for external headers, effectively turning off warnings for external headers
+
         # Warnings
         /W3
-        /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled
+        /we4018 # 'expression': signed/unsigned mismatch
+        /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled
         /we4101 # 'identifier': unreferenced local variable
+        /we4189 # 'identifier': local variable is initialized but not referenced
         /we4265 # 'class': class has virtual functions, but destructor is not virtual
-        /we4388 # signed/unsigned mismatch
-        /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
+        /we4267 # 'var': conversion from 'size_t' to 'type', possible loss of data
+        /we4305 # 'context': truncation from 'type1' to 'type2'
+        /we4388 # 'expression': signed/unsigned mismatch
+        /we4389 # 'operator': signed/unsigned mismatch
+        /we4547 # 'operator': operator before comma has no effect; expected operator with side-effect
         /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
         /we4555 # Expression has no effect; expected expression with side-effect
         /we4715 # 'function': not all control paths return a value
@@ -72,6 +82,7 @@ else()
         -Werror=missing-declarations
         -Werror=missing-field-initializers
         -Werror=reorder
+        -Werror=sign-compare
         -Werror=switch
         -Werror=uninitialized
         -Werror=unused-function
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d25a1a645..090dd19b1 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -51,9 +51,6 @@ if (NOT MSVC)
     target_compile_options(audio_core PRIVATE
         -Werror=conversion
         -Werror=ignored-qualifiers
-        -Werror=implicit-fallthrough
-        -Werror=reorder
-        -Werror=sign-compare
         -Werror=shadow
         -Werror=unused-parameter
         -Werror=unused-variable
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 80ffddb10..7dba739b4 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -12,6 +12,7 @@
 #include "audio_core/voice_context.h"
 #include "common/logging/log.h"
 #include "common/settings.h"
+#include "core/core_timing.h"
 #include "core/memory.h"
 
 namespace {
@@ -28,10 +29,9 @@ namespace {
                                        (static_cast<float>(r_channel) * r_mix_amount)));
 }
 
-[[nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(s16 fl_channel, s16 fr_channel,
-                                                            s16 fc_channel,
-                                                            [[maybe_unused]] s16 lf_channel,
-                                                            s16 bl_channel, s16 br_channel) {
+[[maybe_unused, nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(
+    s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel,
+    s16 br_channel) {
     // Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels
     // are mixed to be 36.94%
 
@@ -56,11 +56,11 @@ namespace {
     const std::array<float_le, 4>& coeff) {
     const auto left =
         static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
-        static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[0];
+        static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[3];
 
     const auto right =
         static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
-        static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[0];
+        static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[3];
 
     return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))};
 }
@@ -68,7 +68,9 @@ namespace {
 } // namespace
 
 namespace AudioCore {
-AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
+constexpr s32 NUM_BUFFERS = 2;
+
+AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_,
                              AudioCommon::AudioRendererParameter params,
                              Stream::ReleaseCallback&& release_callback,
                              std::size_t instance_number)
@@ -77,7 +79,8 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
       sink_context(params.sink_count), splitter_context(),
       voices(params.voice_count), memory{memory_},
       command_generator(worker_params, voice_context, mix_context, splitter_context, effect_context,
-                        memory) {
+                        memory),
+      core_timing{core_timing_} {
     behavior_info.SetUserRevision(params.revision);
     splitter_context.Initialize(behavior_info, params.splitter_count,
                                 params.num_splitter_send_channels);
@@ -86,16 +89,27 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
     stream = audio_out->OpenStream(
         core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
         fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback));
-    audio_out->StartStream(stream);
-
-    QueueMixedBuffer(0);
-    QueueMixedBuffer(1);
-    QueueMixedBuffer(2);
-    QueueMixedBuffer(3);
+    process_event = Core::Timing::CreateEvent(
+        fmt::format("AudioRenderer-Instance{}-Process", instance_number),
+        [this](std::uintptr_t, std::chrono::nanoseconds) { ReleaseAndQueueBuffers(); });
+    for (s32 i = 0; i < NUM_BUFFERS; ++i) {
+        QueueMixedBuffer(i);
+    }
 }
 
 AudioRenderer::~AudioRenderer() = default;
 
+ResultCode AudioRenderer::Start() {
+    audio_out->StartStream(stream);
+    ReleaseAndQueueBuffers();
+    return ResultSuccess;
+}
+
+ResultCode AudioRenderer::Stop() {
+    audio_out->StopStream(stream);
+    return ResultSuccess;
+}
+
 u32 AudioRenderer::GetSampleRate() const {
     return worker_params.sample_rate;
 }
@@ -114,7 +128,7 @@ Stream::State AudioRenderer::GetStreamState() const {
 
 ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params,
                                               std::vector<u8>& output_params) {
-
+    std::scoped_lock lock{mutex};
     InfoUpdater info_updater{input_params, output_params, behavior_info};
 
     if (!info_updater.UpdateBehaviorInfo(behavior_info)) {
@@ -194,9 +208,6 @@ ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_param
         LOG_ERROR(Audio, "Audio buffers were not consumed!");
         return AudioCommon::Audren::ERR_INVALID_PARAMETERS;
     }
-
-    ReleaseAndQueueBuffers();
-
     return ResultSuccess;
 }
 
@@ -220,10 +231,8 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
     command_generator.PostCommand();
     // Base sample size
     std::size_t BUFFER_SIZE{worker_params.sample_count};
-    // Samples
-    std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels());
-    // Make sure to clear our samples
-    std::memset(buffer.data(), 0, buffer.size() * sizeof(s16));
+    // Samples, making sure to clear
+    std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels(), 0);
 
     if (sink_context.InUse()) {
         const auto stream_channel_count = stream->GetNumChannels();
@@ -231,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
         const auto channel_count = buffer_offsets.size();
         const auto& final_mix = mix_context.GetFinalMixInfo();
         const auto& in_params = final_mix.GetInParams();
-        std::vector<s32*> mix_buffers(channel_count);
+        std::vector<std::span<s32>> mix_buffers(channel_count);
         for (std::size_t i = 0; i < channel_count; i++) {
             mix_buffers[i] =
                 command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]);
@@ -284,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
                     buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample);
                 } else if (stream_channel_count == 2) {
                     // Mix all channels into 2 channels
-                    if (sink_context.HasDownMixingCoefficients()) {
-                        const auto [left, right] = Mix6To2WithCoefficients(
-                            fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
-                            sink_context.GetDownmixCoefficients());
-                        buffer[i * stream_channel_count + 0] = left;
-                        buffer[i * stream_channel_count + 1] = right;
-                    } else {
-                        const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample,
-                                                           lf_sample, bl_sample, br_sample);
-                        buffer[i * stream_channel_count + 0] = left;
-                        buffer[i * stream_channel_count + 1] = right;
-                    }
+                    const auto [left, right] = Mix6To2WithCoefficients(
+                        fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
+                        sink_context.GetDownmixCoefficients());
+                    buffer[i * stream_channel_count + 0] = left;
+                    buffer[i * stream_channel_count + 1] = right;
                 } else if (stream_channel_count == 6) {
                     // Pass through
                     buffer[i * stream_channel_count + 0] = fl_sample;
@@ -315,10 +317,24 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
 }
 
 void AudioRenderer::ReleaseAndQueueBuffers() {
-    const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)};
-    for (const auto& tag : released_buffers) {
-        QueueMixedBuffer(tag);
+    if (!stream->IsPlaying()) {
+        return;
     }
+
+    {
+        std::scoped_lock lock{mutex};
+        const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)};
+        for (const auto& tag : released_buffers) {
+            QueueMixedBuffer(tag);
+        }
+    }
+
+    const f32 sample_rate = static_cast<f32>(GetSampleRate());
+    const f32 sample_count = static_cast<f32>(GetSampleCount());
+    const f32 consume_rate = sample_rate / (sample_count * (sample_count / 240));
+    const s32 ms = (1000 / static_cast<s32>(consume_rate)) - 1;
+    const std::chrono::milliseconds next_event_time(std::max(ms / NUM_BUFFERS, 1));
+    core_timing.ScheduleEvent(next_event_time, process_event, {});
 }
 
 } // namespace AudioCore
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 18567f618..88fdd13dd 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <memory>
+#include <mutex>
 #include <vector>
 
 #include "audio_core/behavior_info.h"
@@ -45,6 +46,8 @@ public:
 
     [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params,
                                                  std::vector<u8>& output_params);
+    [[nodiscard]] ResultCode Start();
+    [[nodiscard]] ResultCode Stop();
     void QueueMixedBuffer(Buffer::Tag tag);
     void ReleaseAndQueueBuffers();
     [[nodiscard]] u32 GetSampleRate() const;
@@ -68,6 +71,9 @@ private:
     Core::Memory::Memory& memory;
     CommandGenerator command_generator;
     std::size_t elapsed_frame_count{};
+    Core::Timing::CoreTiming& core_timing;
+    std::shared_ptr<Core::Timing::EventType> process_event;
+    std::mutex mutex;
 };
 
 } // namespace AudioCore
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index 437cc5ccd..b99d0fc91 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -31,7 +31,7 @@ constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
     0.72867f, 0.69794f, 0.5464f,  0.24563f, 0.45214f, 0.44042f};
 
 template <std::size_t N>
-void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
+void ApplyMix(std::span<s32> output, std::span<const s32> input, s32 gain, s32 sample_count) {
     for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) {
         for (std::size_t j = 0; j < N; j++) {
             output[i + j] +=
@@ -40,7 +40,8 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
     }
 }
 
-s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) {
+s32 ApplyMixRamp(std::span<s32> output, std::span<const s32> input, float gain, float delta,
+                 s32 sample_count) {
     s32 x = 0;
     for (s32 i = 0; i < sample_count; i++) {
         x = static_cast<s32>(static_cast<float>(input[i]) * gain);
@@ -50,20 +51,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam
     return x;
 }
 
-void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) {
+void ApplyGain(std::span<s32> output, std::span<const s32> input, s32 gain, s32 delta,
+               s32 sample_count) {
     for (s32 i = 0; i < sample_count; i++) {
         output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
         gain += delta;
     }
 }
 
-void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) {
+void ApplyGainWithoutDelta(std::span<s32> output, std::span<const s32> input, s32 gain,
+                           s32 sample_count) {
     for (s32 i = 0; i < sample_count; i++) {
         output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
     }
 }
 
-s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
+s32 ApplyMixDepop(std::span<s32> output, s32 first_sample, s32 delta, s32 sample_count) {
     const bool positive = first_sample > 0;
     auto final_sample = std::abs(first_sample);
     for (s32 i = 0; i < sample_count; i++) {
@@ -128,10 +131,10 @@ constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1,
                                                            1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
 
 template <std::size_t CHANNEL_COUNT>
-void ApplyReverbGeneric(I3dl2ReverbState& state,
-                        const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
-                        const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
-                        s32 sample_count) {
+void ApplyReverbGeneric(
+    I3dl2ReverbState& state,
+    const std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT>& input,
+    const std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) {
 
     auto GetTapLookup = []() {
         if constexpr (CHANNEL_COUNT == 1) {
@@ -400,7 +403,10 @@ void CommandGenerator::GenerateDataSourceCommand(ServerVoiceInfo& voice_info, Vo
         }
     } else {
         switch (in_params.sample_format) {
+        case SampleFormat::Pcm8:
         case SampleFormat::Pcm16:
+        case SampleFormat::Pcm32:
+        case SampleFormat::PcmFloat:
             DecodeFromWaveBuffers(voice_info, GetChannelMixBuffer(channel), dsp_state, channel,
                                   worker_params.sample_rate, worker_params.sample_count,
                                   in_params.node_id);
@@ -454,8 +460,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff
                   "input_mix_buffer={}, output_mix_buffer={}",
                   node_id, input_offset, output_offset);
     }
-    const auto* input = GetMixBuffer(input_offset);
-    auto* output = GetMixBuffer(output_offset);
+    std::span<const s32> input = GetMixBuffer(input_offset);
+    std::span<s32> output = GetMixBuffer(output_offset);
 
     // Biquad filter parameters
     const auto [n0, n1, n2] = params.numerator;
@@ -548,8 +554,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
         return;
     }
 
-    std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
-    std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
+    std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT> input{};
+    std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT> output{};
 
     const auto status = params.status;
     for (s32 i = 0; i < channel_count; i++) {
@@ -584,7 +590,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
         for (s32 i = 0; i < channel_count; i++) {
             // Only copy if the buffer input and output do not match!
             if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
-                std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
+                std::memcpy(output[i].data(), input[i].data(),
+                            worker_params.sample_count * sizeof(s32));
             }
         }
     }
@@ -600,8 +607,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset,
     for (s32 i = 0; i < channel_count; i++) {
         // TODO(ogniK): Actually implement biquad filter
         if (params.input[i] != params.output[i]) {
-            const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]);
-            auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
+            std::span<const s32> input = GetMixBuffer(mix_buffer_offset + params.input[i]);
+            std::span<s32> output = GetMixBuffer(mix_buffer_offset + params.output[i]);
             ApplyMix<1>(output, input, 32768, worker_params.sample_count);
         }
     }
@@ -640,14 +647,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf
 
                 if (samples_read != static_cast<int>(worker_params.sample_count) &&
                     samples_read <= params.sample_count) {
-                    std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read);
+                    std::memset(GetMixBuffer(output_index).data(), 0,
+                                params.sample_count - samples_read);
                 }
             } else {
                 AuxInfoDSP empty{};
                 memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP));
                 memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP));
                 if (output_index != input_index) {
-                    std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index),
+                    std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(),
                                 worker_params.sample_count * sizeof(s32));
                 }
             }
@@ -665,7 +673,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter
 }
 
 s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
-                                     const s32* data, u32 sample_count, u32 write_offset,
+                                     std::span<const s32> data, u32 sample_count, u32 write_offset,
                                      u32 write_count) {
     if (max_samples == 0) {
         return 0;
@@ -675,14 +683,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
         return 0;
     }
 
-    std::size_t data_offset{};
+    s32 data_offset{};
     u32 remaining = sample_count;
     while (remaining > 0) {
         // Get position in buffer
         const auto base = send_buffer + (offset * sizeof(u32));
         const auto samples_to_grab = std::min(max_samples - offset, remaining);
         // Write to output
-        memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32));
+        memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32));
         offset = (offset + samples_to_grab) % max_samples;
         remaining -= samples_to_grab;
         data_offset += samples_to_grab;
@@ -695,7 +703,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
 }
 
 s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
-                                    s32* out_data, u32 sample_count, u32 read_offset,
+                                    std::span<s32> out_data, u32 sample_count, u32 read_offset,
                                     u32 read_count) {
     if (max_samples == 0) {
         return 0;
@@ -707,15 +715,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
     }
 
     u32 remaining = sample_count;
+    s32 data_offset{};
     while (remaining > 0) {
         const auto base = recv_buffer + (offset * sizeof(u32));
         const auto samples_to_grab = std::min(max_samples - offset, remaining);
         std::vector<s32> buffer(samples_to_grab);
         memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32));
-        std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32));
-        out_data += samples_to_grab;
+        std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32));
         offset = (offset + samples_to_grab) % max_samples;
         remaining -= samples_to_grab;
+        data_offset += samples_to_grab;
     }
 
     if (read_count != 0) {
@@ -795,7 +804,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta
         state.lowpass_1 = 0.0f;
     } else {
         const auto a = 1.0f - hf_gain;
-        const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference /
+        const auto b = 2.0f * (2.0f - hf_gain * CosD(256.0f * info.hf_reference /
                                                      static_cast<f32>(info.sample_rate)));
         const auto c = std::sqrt(b * b - 4.0f * a * a);
 
@@ -843,7 +852,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta
     }
 
     const auto max_early_delay = state.early_delay_line.GetMaxDelay();
-    const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f);
+    const auto reflection_time = 1000.0f * (0.9998f * info.reverb_delay + 0.02f);
     for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
         const auto length = AudioCommon::CalculateDelaySamples(
             sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
@@ -962,8 +971,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t
                   node_id, input_offset, output_offset, volume);
     }
 
-    auto* output = GetMixBuffer(output_offset);
-    const auto* input = GetMixBuffer(input_offset);
+    std::span<s32> output = GetMixBuffer(output_offset);
+    std::span<const s32> input = GetMixBuffer(input_offset);
 
     const s32 gain = static_cast<s32>(volume * 32768.0f);
     // Mix with loop unrolling
@@ -1003,8 +1012,10 @@ void CommandGenerator::GenerateFinalMixCommand() {
     }
 }
 
-s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
-                                  s32 sample_count, s32 channel, std::size_t mix_offset) {
+template <typename T>
+s32 CommandGenerator::DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
+                                s32 sample_start_offset, s32 sample_end_offset, s32 sample_count,
+                                s32 channel, std::size_t mix_offset) {
     const auto& in_params = voice_info.GetInParams();
     const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
     if (wave_buffer.buffer_address == 0) {
@@ -1013,39 +1024,50 @@ s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_s
     if (wave_buffer.buffer_size == 0) {
         return 0;
     }
-    if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) {
+    if (sample_end_offset < sample_start_offset) {
         return 0;
     }
-    const auto samples_remaining =
-        (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset;
+    const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset;
     const auto start_offset =
-        ((wave_buffer.start_sample_offset + dsp_state.offset) * in_params.channel_count) *
-        sizeof(s16);
+        ((dsp_state.offset + sample_start_offset) * in_params.channel_count) * sizeof(T);
     const auto buffer_pos = wave_buffer.buffer_address + start_offset;
     const auto samples_processed = std::min(sample_count, samples_remaining);
 
-    if (in_params.channel_count == 1) {
-        std::vector<s16> buffer(samples_processed);
-        memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16));
-        for (std::size_t i = 0; i < buffer.size(); i++) {
-            sample_buffer[mix_offset + i] = buffer[i];
-        }
-    } else {
-        const auto channel_count = in_params.channel_count;
-        std::vector<s16> buffer(samples_processed * channel_count);
-        memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16));
+    const auto channel_count = in_params.channel_count;
+    std::vector<T> buffer(samples_processed * channel_count);
+    memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(T));
 
+    if constexpr (std::is_floating_point_v<T>) {
+        for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+            sample_buffer[mix_offset + i] = static_cast<s32>(buffer[i * channel_count + channel] *
+                                                             std::numeric_limits<s16>::max());
+        }
+    } else if constexpr (sizeof(T) == 1) {
+        for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+            sample_buffer[mix_offset + i] =
+                static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] /
+                                                  std::numeric_limits<s8>::max()) *
+                                 std::numeric_limits<s16>::max());
+        }
+    } else if constexpr (sizeof(T) == 2) {
         for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
             sample_buffer[mix_offset + i] = buffer[i * channel_count + channel];
         }
+    } else {
+        for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+            sample_buffer[mix_offset + i] =
+                static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] /
+                                                  std::numeric_limits<s32>::max()) *
+                                 std::numeric_limits<s16>::max());
+        }
     }
 
     return samples_processed;
 }
 
 s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
-                                  s32 sample_count, [[maybe_unused]] s32 channel,
-                                  std::size_t mix_offset) {
+                                  s32 sample_start_offset, s32 sample_end_offset, s32 sample_count,
+                                  [[maybe_unused]] s32 channel, std::size_t mix_offset) {
     const auto& in_params = voice_info.GetInParams();
     const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
     if (wave_buffer.buffer_address == 0) {
@@ -1054,7 +1076,7 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
     if (wave_buffer.buffer_size == 0) {
         return 0;
     }
-    if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) {
+    if (sample_end_offset < sample_start_offset) {
         return 0;
     }
 
@@ -1079,10 +1101,9 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
     s32 coef1 = coeffs[idx * 2];
     s32 coef2 = coeffs[idx * 2 + 1];
 
-    const auto samples_remaining =
-        (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset;
+    const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset;
     const auto samples_processed = std::min(sample_count, samples_remaining);
-    const auto sample_pos = wave_buffer.start_sample_offset + dsp_state.offset;
+    const auto sample_pos = dsp_state.offset + sample_start_offset;
 
     const auto samples_remaining_in_frame = sample_pos % SAMPLES_PER_FRAME;
     auto position_in_frame = ((sample_pos / SAMPLES_PER_FRAME) * NIBBLES_PER_SAMPLE) +
@@ -1157,12 +1178,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
     return samples_processed;
 }
 
-s32* CommandGenerator::GetMixBuffer(std::size_t index) {
-    return mix_buffer.data() + (index * worker_params.sample_count);
+std::span<s32> CommandGenerator::GetMixBuffer(std::size_t index) {
+    return std::span<s32>(mix_buffer.data() + (index * worker_params.sample_count),
+                          worker_params.sample_count);
 }
 
-const s32* CommandGenerator::GetMixBuffer(std::size_t index) const {
-    return mix_buffer.data() + (index * worker_params.sample_count);
+std::span<const s32> CommandGenerator::GetMixBuffer(std::size_t index) const {
+    return std::span<const s32>(mix_buffer.data() + (index * worker_params.sample_count),
+                                worker_params.sample_count);
 }
 
 std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const {
@@ -1173,15 +1196,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const {
     return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT;
 }
 
-s32* CommandGenerator::GetChannelMixBuffer(s32 channel) {
+std::span<s32> CommandGenerator::GetChannelMixBuffer(s32 channel) {
     return GetMixBuffer(worker_params.mix_buffer_count + channel);
 }
 
-const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const {
+std::span<const s32> CommandGenerator::GetChannelMixBuffer(s32 channel) const {
     return GetMixBuffer(worker_params.mix_buffer_count + channel);
 }
 
-void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output,
+void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
                                              VoiceState& dsp_state, s32 channel,
                                              s32 target_sample_rate, s32 sample_count,
                                              s32 node_id) {
@@ -1193,7 +1216,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
                   node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate,
                   in_params.mix_id, in_params.splitter_info_id);
     }
-    ASSERT_OR_EXECUTE(output != nullptr, { return; });
+    ASSERT_OR_EXECUTE(output.data() != nullptr, { return; });
 
     const auto resample_rate = static_cast<s32>(
         static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) *
@@ -1210,9 +1233,9 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
     }
 
     std::size_t temp_mix_offset{};
-    bool is_buffer_completed{false};
+    s32 samples_output{};
     auto samples_remaining = sample_count;
-    while (samples_remaining > 0 && !is_buffer_completed) {
+    while (samples_remaining > 0) {
         const auto samples_to_output = std::min(samples_remaining, min_required_samples);
         const auto samples_to_read = (samples_to_output * resample_rate + dsp_state.fraction) >> 15;
 
@@ -1229,24 +1252,53 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
             const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
             // No more data can be read
             if (!dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index]) {
-                is_buffer_completed = true;
                 break;
             }
 
             if (in_params.sample_format == SampleFormat::Adpcm && dsp_state.offset == 0 &&
                 wave_buffer.context_address != 0 && wave_buffer.context_size != 0) {
-                // TODO(ogniK): ADPCM loop context
+                memory.ReadBlock(wave_buffer.context_address, &dsp_state.context,
+                                 sizeof(ADPCMContext));
+            }
+
+            s32 samples_offset_start;
+            s32 samples_offset_end;
+            if (dsp_state.loop_count > 0 && wave_buffer.loop_start_sample != 0 &&
+                wave_buffer.loop_end_sample != 0 &&
+                wave_buffer.loop_start_sample <= wave_buffer.loop_end_sample) {
+                samples_offset_start = wave_buffer.loop_start_sample;
+                samples_offset_end = wave_buffer.loop_end_sample;
+            } else {
+                samples_offset_start = wave_buffer.start_sample_offset;
+                samples_offset_end = wave_buffer.end_sample_offset;
             }
 
             s32 samples_decoded{0};
             switch (in_params.sample_format) {
+            case SampleFormat::Pcm8:
+                samples_decoded =
+                    DecodePcm<s8>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+                                  samples_to_read - samples_read, channel, temp_mix_offset);
+                break;
             case SampleFormat::Pcm16:
-                samples_decoded = DecodePcm16(voice_info, dsp_state, samples_to_read - samples_read,
-                                              channel, temp_mix_offset);
+                samples_decoded =
+                    DecodePcm<s16>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+                                   samples_to_read - samples_read, channel, temp_mix_offset);
+                break;
+            case SampleFormat::Pcm32:
+                samples_decoded =
+                    DecodePcm<s32>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+                                   samples_to_read - samples_read, channel, temp_mix_offset);
+                break;
+            case SampleFormat::PcmFloat:
+                samples_decoded =
+                    DecodePcm<f32>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+                                   samples_to_read - samples_read, channel, temp_mix_offset);
                 break;
             case SampleFormat::Adpcm:
-                samples_decoded = DecodeAdpcm(voice_info, dsp_state, samples_to_read - samples_read,
-                                              channel, temp_mix_offset);
+                samples_decoded =
+                    DecodeAdpcm(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+                                samples_to_read - samples_read, channel, temp_mix_offset);
                 break;
             default:
                 UNREACHABLE_MSG("Unimplemented sample format={}", in_params.sample_format);
@@ -1257,15 +1309,19 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
             dsp_state.offset += samples_decoded;
             dsp_state.played_sample_count += samples_decoded;
 
-            if (dsp_state.offset >=
-                    (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) ||
+            if (dsp_state.offset >= (samples_offset_end - samples_offset_start) ||
                 samples_decoded == 0) {
                 // Reset our sample offset
                 dsp_state.offset = 0;
                 if (wave_buffer.is_looping) {
-                    if (samples_decoded == 0) {
+                    dsp_state.loop_count++;
+                    if (wave_buffer.loop_count > 0 &&
+                        (dsp_state.loop_count > wave_buffer.loop_count || samples_decoded == 0)) {
                         // End of our buffer
-                        is_buffer_completed = true;
+                        voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer);
+                    }
+
+                    if (samples_decoded == 0) {
                         break;
                     }
 
@@ -1273,35 +1329,29 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
                         dsp_state.played_sample_count = 0;
                     }
                 } else {
-
                     // Update our wave buffer states
-                    dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false;
-                    dsp_state.wave_buffer_consumed++;
-                    dsp_state.wave_buffer_index =
-                        (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS;
-                    if (wave_buffer.end_of_stream) {
-                        dsp_state.played_sample_count = 0;
-                    }
+                    voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer);
                 }
             }
         }
 
         if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) {
             // No need to resample
-            std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32));
+            std::memcpy(output.data() + samples_output, sample_buffer.data(),
+                        samples_read * sizeof(s32));
         } else {
             std::fill(sample_buffer.begin() + temp_mix_offset,
                       sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read),
                       0);
-            AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction,
-                                samples_to_output);
+            AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate,
+                                dsp_state.fraction, samples_to_output);
             // Resample
             for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) {
                 dsp_state.sample_history[i] = sample_buffer[samples_to_read + i];
             }
         }
-        output += samples_to_output;
         samples_remaining -= samples_to_output;
+        samples_output += samples_to_output;
     }
 }
 
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index 2ebb755b0..59a33ba76 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <span>
 #include "audio_core/common.h"
 #include "audio_core/voice_context.h"
 #include "common/common_types.h"
@@ -41,10 +42,10 @@ public:
     void PreCommand();
     void PostCommand();
 
-    [[nodiscard]] s32* GetChannelMixBuffer(s32 channel);
-    [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const;
-    [[nodiscard]] s32* GetMixBuffer(std::size_t index);
-    [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const;
+    [[nodiscard]] std::span<s32> GetChannelMixBuffer(s32 channel);
+    [[nodiscard]] std::span<const s32> GetChannelMixBuffer(s32 channel) const;
+    [[nodiscard]] std::span<s32> GetMixBuffer(std::size_t index);
+    [[nodiscard]] std::span<const s32> GetMixBuffer(std::size_t index) const;
     [[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const;
 
     [[nodiscard]] std::size_t GetTotalMixBufferCount() const;
@@ -77,21 +78,24 @@ private:
     void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled);
     [[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index);
 
-    s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data,
-                       u32 sample_count, u32 write_offset, u32 write_count);
-    s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
-                      u32 sample_count, u32 read_offset, u32 read_count);
+    s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
+                       std::span<const s32> data, u32 sample_count, u32 write_offset,
+                       u32 write_count);
+    s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
+                      std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count);
 
     void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
                                std::vector<u8>& work_buffer);
     void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
     // DSP Code
-    s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
-                    s32 channel, std::size_t mix_offset);
-    s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
-                    s32 channel, std::size_t mix_offset);
-    void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state,
-                               s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id);
+    template <typename T>
+    s32 DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
+                  s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
+    s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
+                    s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
+    void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
+                               VoiceState& dsp_state, s32 channel, s32 target_sample_rate,
+                               s32 sample_count, s32 node_id);
 
     AudioCommon::AudioRendererParameter& worker_params;
     VoiceContext& voice_context;
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index fe546c55d..1ab537588 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -15,7 +15,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
 constexpr ResultCode ERR_SPLITTER_SORT_FAILED{ErrorModule::Audio, 43};
 } // namespace Audren
 
-constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8');
+constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '9');
 constexpr std::size_t MAX_MIX_BUFFERS = 24;
 constexpr std::size_t MAX_BIQUAD_FILTERS = 2;
 constexpr std::size_t MAX_CHANNEL_COUNT = 6;
diff --git a/src/audio_core/info_updater.cpp b/src/audio_core/info_updater.cpp
index 4a5b1b4ab..9b4ca1851 100644
--- a/src/audio_core/info_updater.cpp
+++ b/src/audio_core/info_updater.cpp
@@ -189,9 +189,6 @@ bool InfoUpdater::UpdateVoices(VoiceContext& voice_context,
         if (voice_in_params.is_new) {
             // Default our values for our voice
             voice_info.Initialize();
-            if (channel_count == 0 || channel_count > AudioCommon::MAX_CHANNEL_COUNT) {
-                continue;
-            }
 
             // Zero out our voice states
             for (std::size_t channel = 0; channel < channel_count; channel++) {
diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp
index a69543696..cc55b290c 100644
--- a/src/audio_core/sink_context.cpp
+++ b/src/audio_core/sink_context.cpp
@@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const {
 void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) {
     ASSERT(in.type == SinkTypes::Device);
 
-    has_downmix_coefs = in.device.down_matrix_enabled;
-    if (has_downmix_coefs) {
+    if (in.device.down_matrix_enabled) {
         downmix_coefficients = in.device.down_matrix_coef;
+    } else {
+        downmix_coefficients = {
+            1.0f,   // front
+            0.707f, // center
+            0.0f,   // lfe
+            0.707f, // back
+        };
     }
+
     in_use = in.in_use;
     use_count = in.device.input_count;
     buffers = in.device.input;
@@ -34,10 +41,6 @@ std::vector<u8> SinkContext::OutputBuffers() const {
     return buffer_ret;
 }
 
-bool SinkContext::HasDownMixingCoefficients() const {
-    return has_downmix_coefs;
-}
-
 const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const {
     return downmix_coefficients;
 }
diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h
index 9e2b69785..254961fe2 100644
--- a/src/audio_core/sink_context.h
+++ b/src/audio_core/sink_context.h
@@ -84,7 +84,6 @@ public:
     [[nodiscard]] bool InUse() const;
     [[nodiscard]] std::vector<u8> OutputBuffers() const;
 
-    [[nodiscard]] bool HasDownMixingCoefficients() const;
     [[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const;
 
 private:
@@ -92,7 +91,6 @@ private:
     s32 use_count{};
     std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{};
     std::size_t sink_count{};
-    bool has_downmix_coefs{false};
     DownmixCoefficients downmix_coefficients{};
 };
 } // namespace AudioCore
diff --git a/src/audio_core/voice_context.cpp b/src/audio_core/voice_context.cpp
index 867b8fc6b..d8c954b60 100644
--- a/src/audio_core/voice_context.cpp
+++ b/src/audio_core/voice_context.cpp
@@ -66,7 +66,7 @@ void ServerVoiceInfo::Initialize() {
     in_params.last_volume = 0.0f;
     in_params.biquad_filter.fill({});
     in_params.wave_buffer_count = 0;
-    in_params.wave_bufffer_head = 0;
+    in_params.wave_buffer_head = 0;
     in_params.mix_id = AudioCommon::NO_MIX;
     in_params.splitter_info_id = AudioCommon::NO_SPLITTER;
     in_params.additional_params_address = 0;
@@ -75,7 +75,7 @@ void ServerVoiceInfo::Initialize() {
     out_params.played_sample_count = 0;
     out_params.wave_buffer_consumed = 0;
     in_params.voice_drop_flag = false;
-    in_params.buffer_mapped = false;
+    in_params.buffer_mapped = true;
     in_params.wave_buffer_flush_request_count = 0;
     in_params.was_biquad_filter_enabled.fill(false);
 
@@ -126,7 +126,7 @@ void ServerVoiceInfo::UpdateParameters(const VoiceInfo::InParams& voice_in,
     in_params.volume = voice_in.volume;
     in_params.biquad_filter = voice_in.biquad_filter;
     in_params.wave_buffer_count = voice_in.wave_buffer_count;
-    in_params.wave_bufffer_head = voice_in.wave_buffer_head;
+    in_params.wave_buffer_head = voice_in.wave_buffer_head;
     if (behavior_info.IsFlushVoiceWaveBuffersSupported()) {
         const auto in_request_count = in_params.wave_buffer_flush_request_count;
         const auto voice_request_count = voice_in.wave_buffer_flush_request_count;
@@ -185,14 +185,16 @@ void ServerVoiceInfo::UpdateWaveBuffers(
             wave_buffer.buffer_size = 0;
             wave_buffer.context_address = 0;
             wave_buffer.context_size = 0;
+            wave_buffer.loop_start_sample = 0;
+            wave_buffer.loop_end_sample = 0;
             wave_buffer.sent_to_dsp = true;
         }
 
         // Mark all our wave buffers as invalid
         for (std::size_t channel = 0; channel < static_cast<std::size_t>(in_params.channel_count);
              channel++) {
-            for (auto& is_valid : voice_states[channel]->is_wave_buffer_valid) {
-                is_valid = false;
+            for (std::size_t i = 0; i < AudioCommon::MAX_WAVE_BUFFERS; ++i) {
+                voice_states[channel]->is_wave_buffer_valid[i] = false;
             }
         }
     }
@@ -211,7 +213,7 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
                                        const WaveBuffer& in_wave_buffer, SampleFormat sample_format,
                                        bool is_buffer_valid,
                                        [[maybe_unused]] BehaviorInfo& behavior_info) {
-    if (!is_buffer_valid && out_wavebuffer.sent_to_dsp) {
+    if (!is_buffer_valid && out_wavebuffer.sent_to_dsp && out_wavebuffer.buffer_address != 0) {
         out_wavebuffer.buffer_address = 0;
         out_wavebuffer.buffer_size = 0;
     }
@@ -219,11 +221,40 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
     if (!in_wave_buffer.sent_to_server || !in_params.buffer_mapped) {
         // Validate sample offset sizings
         if (sample_format == SampleFormat::Pcm16) {
-            const auto buffer_size = in_wave_buffer.buffer_size;
-            if (in_wave_buffer.start_sample_offset < 0 || in_wave_buffer.end_sample_offset < 0 ||
-                (buffer_size < (sizeof(s16) * in_wave_buffer.start_sample_offset)) ||
-                (buffer_size < (sizeof(s16) * in_wave_buffer.end_sample_offset))) {
+            const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size);
+            const s64 start = sizeof(s16) * in_wave_buffer.start_sample_offset;
+            const s64 end = sizeof(s16) * in_wave_buffer.end_sample_offset;
+            if (0 > start || start > buffer_size || 0 > end || end > buffer_size) {
                 // TODO(ogniK): Write error info
+                LOG_ERROR(Audio,
+                          "PCM16 wavebuffer has an invalid size. Buffer has size 0x{:08X}, but "
+                          "offsets were "
+                          "{:08X} - 0x{:08X}",
+                          buffer_size, sizeof(s16) * in_wave_buffer.start_sample_offset,
+                          sizeof(s16) * in_wave_buffer.end_sample_offset);
+                return;
+            }
+        } else if (sample_format == SampleFormat::Adpcm) {
+            const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size);
+            const s64 start_frames = in_wave_buffer.start_sample_offset / 14;
+            const s64 start_extra = in_wave_buffer.start_sample_offset % 14 == 0
+                                        ? 0
+                                        : (in_wave_buffer.start_sample_offset % 14) / 2 + 1 +
+                                              (in_wave_buffer.start_sample_offset % 2);
+            const s64 start = start_frames * 8 + start_extra;
+            const s64 end_frames = in_wave_buffer.end_sample_offset / 14;
+            const s64 end_extra = in_wave_buffer.end_sample_offset % 14 == 0
+                                      ? 0
+                                      : (in_wave_buffer.end_sample_offset % 14) / 2 + 1 +
+                                            (in_wave_buffer.end_sample_offset % 2);
+            const s64 end = end_frames * 8 + end_extra;
+            if (in_wave_buffer.start_sample_offset < 0 || start > buffer_size ||
+                in_wave_buffer.end_sample_offset < 0 || end > buffer_size) {
+                LOG_ERROR(Audio,
+                          "ADPMC wavebuffer has an invalid size. Buffer has size 0x{:08X}, but "
+                          "offsets were "
+                          "{:08X} - 0x{:08X}",
+                          in_wave_buffer.buffer_size, start, end);
                 return;
             }
         }
@@ -239,29 +270,34 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
         out_wavebuffer.buffer_size = in_wave_buffer.buffer_size;
         out_wavebuffer.context_address = in_wave_buffer.context_address;
         out_wavebuffer.context_size = in_wave_buffer.context_size;
+        out_wavebuffer.loop_start_sample = in_wave_buffer.loop_start_sample;
+        out_wavebuffer.loop_end_sample = in_wave_buffer.loop_end_sample;
         in_params.buffer_mapped =
             in_wave_buffer.buffer_address != 0 && in_wave_buffer.buffer_size != 0;
         // TODO(ogniK): Pool mapper attachment
         // TODO(ogniK): IsAdpcmLoopContextBugFixed
+        if (sample_format == SampleFormat::Adpcm && in_wave_buffer.context_address != 0 &&
+            in_wave_buffer.context_size != 0 && behavior_info.IsAdpcmLoopContextBugFixed()) {
+        } else {
+            out_wavebuffer.context_address = 0;
+            out_wavebuffer.context_size = 0;
+        }
     }
 }
 
 void ServerVoiceInfo::WriteOutStatus(
     VoiceInfo::OutParams& voice_out, VoiceInfo::InParams& voice_in,
     std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& voice_states) {
-    if (voice_in.is_new) {
+    if (voice_in.is_new || in_params.is_new) {
         in_params.is_new = true;
         voice_out.wave_buffer_consumed = 0;
         voice_out.played_sample_count = 0;
         voice_out.voice_dropped = false;
-    } else if (!in_params.is_new) {
-        voice_out.wave_buffer_consumed = voice_states[0]->wave_buffer_consumed;
-        voice_out.played_sample_count = voice_states[0]->played_sample_count;
-        voice_out.voice_dropped = in_params.voice_drop_flag;
     } else {
-        voice_out.wave_buffer_consumed = 0;
-        voice_out.played_sample_count = 0;
-        voice_out.voice_dropped = false;
+        const auto& state = voice_states[0];
+        voice_out.wave_buffer_consumed = state->wave_buffer_consumed;
+        voice_out.played_sample_count = state->played_sample_count;
+        voice_out.voice_dropped = state->voice_dropped;
     }
 }
 
@@ -283,7 +319,8 @@ ServerVoiceInfo::OutParams& ServerVoiceInfo::GetOutParams() {
 
 bool ServerVoiceInfo::ShouldSkip() const {
     // TODO(ogniK): Handle unmapped wave buffers or parameters
-    return !in_params.in_use || (in_params.wave_buffer_count == 0) || in_params.voice_drop_flag;
+    return !in_params.in_use || in_params.wave_buffer_count == 0 || !in_params.buffer_mapped ||
+           in_params.voice_drop_flag;
 }
 
 bool ServerVoiceInfo::UpdateForCommandGeneration(VoiceContext& voice_context) {
@@ -381,7 +418,7 @@ bool ServerVoiceInfo::UpdateParametersForCommandGeneration(
 void ServerVoiceInfo::FlushWaveBuffers(
     u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states,
     s32 channel_count) {
-    auto wave_head = in_params.wave_bufffer_head;
+    auto wave_head = in_params.wave_buffer_head;
 
     for (u8 i = 0; i < flush_count; i++) {
         in_params.wave_buffer[wave_head].sent_to_dsp = true;
@@ -401,6 +438,17 @@ bool ServerVoiceInfo::HasValidWaveBuffer(const VoiceState* state) const {
     return std::find(valid_wb.begin(), valid_wb.end(), true) != valid_wb.end();
 }
 
+void ServerVoiceInfo::SetWaveBufferCompleted(VoiceState& dsp_state,
+                                             const ServerWaveBuffer& wave_buffer) {
+    dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false;
+    dsp_state.wave_buffer_consumed++;
+    dsp_state.wave_buffer_index = (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS;
+    dsp_state.loop_count = 0;
+    if (wave_buffer.end_of_stream) {
+        dsp_state.played_sample_count = 0;
+    }
+}
+
 VoiceContext::VoiceContext(std::size_t voice_count_) : voice_count{voice_count_} {
     for (std::size_t i = 0; i < voice_count; i++) {
         voice_channel_resources.emplace_back(static_cast<s32>(i));
diff --git a/src/audio_core/voice_context.h b/src/audio_core/voice_context.h
index 70359cadb..e1050897b 100644
--- a/src/audio_core/voice_context.h
+++ b/src/audio_core/voice_context.h
@@ -60,10 +60,12 @@ struct WaveBuffer {
     u8 is_looping{};
     u8 end_of_stream{};
     u8 sent_to_server{};
-    INSERT_PADDING_BYTES(5);
+    INSERT_PADDING_BYTES(1);
+    s32 loop_count{};
     u64 context_address{};
     u64 context_size{};
-    INSERT_PADDING_BYTES(8);
+    u32 loop_start_sample{};
+    u32 loop_end_sample{};
 };
 static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer is an invalid size");
 
@@ -76,6 +78,9 @@ struct ServerWaveBuffer {
     bool end_of_stream{};
     VAddr context_address{};
     std::size_t context_size{};
+    s32 loop_count{};
+    u32 loop_start_sample{};
+    u32 loop_end_sample{};
     bool sent_to_dsp{true};
 };
 
@@ -108,6 +113,7 @@ struct VoiceState {
     u32 external_context_size;
     bool is_external_context_used;
     bool voice_dropped;
+    s32 loop_count;
 };
 
 class VoiceChannelResource {
@@ -206,7 +212,7 @@ public:
         float last_volume{};
         std::array<BiquadFilterParameter, AudioCommon::MAX_BIQUAD_FILTERS> biquad_filter{};
         s32 wave_buffer_count{};
-        s16 wave_bufffer_head{};
+        s16 wave_buffer_head{};
         INSERT_PADDING_BYTES(2);
         BehaviorFlags behavior_flags{};
         VAddr additional_params_address{};
@@ -252,6 +258,7 @@ public:
     void FlushWaveBuffers(u8 flush_count,
                           std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states,
                           s32 channel_count);
+    void SetWaveBufferCompleted(VoiceState& dsp_state, const ServerWaveBuffer& wave_buffer);
 
 private:
     std::vector<s16> stored_samples;
diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp
index 077f34995..274f57659 100644
--- a/src/common/fs/file.cpp
+++ b/src/common/fs/file.cpp
@@ -306,9 +306,9 @@ bool IOFile::Flush() const {
     errno = 0;
 
 #ifdef _WIN32
-    const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
+    const auto flush_result = std::fflush(file) == 0;
 #else
-    const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
+    const auto flush_result = std::fflush(file) == 0;
 #endif
 
     if (!flush_result) {
@@ -320,6 +320,28 @@ bool IOFile::Flush() const {
     return flush_result;
 }
 
+bool IOFile::Commit() const {
+    if (!IsOpen()) {
+        return false;
+    }
+
+    errno = 0;
+
+#ifdef _WIN32
+    const auto commit_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
+#else
+    const auto commit_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
+#endif
+
+    if (!commit_result) {
+        const auto ec = std::error_code{errno, std::generic_category()};
+        LOG_ERROR(Common_Filesystem, "Failed to commit the file at path={}, ec_message={}",
+                  PathToUTF8String(file_path), ec.message());
+    }
+
+    return commit_result;
+}
+
 bool IOFile::SetSize(u64 size) const {
     if (!IsOpen()) {
         return false;
@@ -347,6 +369,9 @@ u64 IOFile::GetSize() const {
         return 0;
     }
 
+    // Flush any unwritten buffered data into the file prior to retrieving the file size.
+    std::fflush(file);
+
     std::error_code ec;
 
     const auto file_size = fs::file_size(file_path, ec);
diff --git a/src/common/fs/file.h b/src/common/fs/file.h
index 588fe619d..2c4ab4332 100644
--- a/src/common/fs/file.h
+++ b/src/common/fs/file.h
@@ -396,13 +396,22 @@ public:
     [[nodiscard]] size_t WriteString(std::span<const char> string) const;
 
     /**
-     * Attempts to flush any unwritten buffered data into the file and flush the file into the disk.
+     * Attempts to flush any unwritten buffered data into the file.
      *
      * @returns True if the flush was successful, false otherwise.
      */
     bool Flush() const;
 
     /**
+     * Attempts to commit the file into the disk.
+     * Note that this is an expensive operation as this forces the operating system to write
+     * the contents of the file associated with the file descriptor into the disk.
+     *
+     * @returns True if the commit was successful, false otherwise.
+     */
+    bool Commit() const;
+
+    /**
      * Resizes the file to a given size.
      * If the file is resized to a smaller size, the remainder of the file is discarded.
      * If the file is resized to a larger size, the new area appears as if zero-filled.
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index b6fa4affb..61dddab3f 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -171,19 +171,22 @@ FileBackend::FileBackend(const std::filesystem::path& filename) {
 FileBackend::~FileBackend() = default;
 
 void FileBackend::Write(const Entry& entry) {
+    if (!file->IsOpen()) {
+        return;
+    }
+
     using namespace Common::Literals;
-    // prevent logs from going over the maximum size (in case its spamming and the user doesn't
-    // know)
+    // Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
     constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB;
     constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB;
 
-    if (!file->IsOpen()) {
-        return;
-    }
+    const bool write_limit_exceeded =
+        bytes_written > MAX_BYTES_WRITTEN_EXTENDED ||
+        (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging);
 
-    if (Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN_EXTENDED) {
-        return;
-    } else if (!Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN) {
+    // Close the file after the write limit is exceeded.
+    if (write_limit_exceeded) {
+        file->Close();
         return;
     }
 
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 83b5b7676..b2b0dbe05 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -654,24 +654,19 @@ endif()
 
 if (MSVC)
     target_compile_options(core PRIVATE
-        /we4018 # 'expression' : signed/unsigned mismatch
-        /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
-        /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
+        /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+        /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
+        /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch
         /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
-        /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
-        /we4305 # 'context' : truncation from 'type1' to 'type2'
         /we4456 # Declaration of 'identifier' hides previous local declaration
         /we4457 # Declaration of 'identifier' hides function parameter
         /we4458 # Declaration of 'identifier' hides class member
         /we4459 # Declaration of 'identifier' hides global declaration
-        /we4715 # 'function' : not all control paths return a value
     )
 else()
     target_compile_options(core PRIVATE
         -Werror=conversion
         -Werror=ignored-qualifiers
-        -Werror=implicit-fallthrough
-        -Werror=sign-compare
         -Werror=shadow
 
         $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 53b8b7ca0..7c0950bb0 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -345,8 +345,10 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
 static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type,
                            const Service::FileSystem::FileSystemController& fs_controller) {
     const auto load_dir = fs_controller.GetModificationLoadRoot(title_id);
+    const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
     if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
-        load_dir == nullptr || load_dir->GetSize() <= 0) {
+        ((load_dir == nullptr || load_dir->GetSize() <= 0) &&
+         (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) {
         return;
     }
 
@@ -356,7 +358,10 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
     }
 
     const auto& disabled = Settings::values.disabled_addons[title_id];
-    auto patch_dirs = load_dir->GetSubdirectories();
+    std::vector<VirtualDir> patch_dirs = load_dir->GetSubdirectories();
+    if (std::find(disabled.cbegin(), disabled.cend(), "SDMC") == disabled.cend()) {
+        patch_dirs.push_back(sdmc_load_dir);
+    }
     std::sort(patch_dirs.begin(), patch_dirs.end(),
               [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
 
@@ -402,7 +407,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
 }
 
 VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type,
-                                     VirtualFile update_raw) const {
+                                     VirtualFile update_raw, bool apply_layeredfs) const {
     const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}",
                                         title_id, static_cast<u8>(type));
 
@@ -442,7 +447,9 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
     }
 
     // LayeredFS
-    ApplyLayeredFS(romfs, title_id, type, fs_controller);
+    if (apply_layeredfs) {
+        ApplyLayeredFS(romfs, title_id, type, fs_controller);
+    }
 
     return romfs;
 }
@@ -524,6 +531,15 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
         }
     }
 
+    // SDMC mod directory (RomFS LayeredFS)
+    const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
+    if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0 &&
+        IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "romfs"))) {
+        const auto mod_disabled =
+            std::find(disabled.begin(), disabled.end(), "SDMC") != disabled.end();
+        out.insert_or_assign(mod_disabled ? "[D] SDMC" : "SDMC", "LayeredFS");
+    }
+
     // DLC
     const auto dlc_entries =
         content_provider.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data);
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index fb1853035..3be871f35 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -64,7 +64,8 @@ public:
     // - LayeredFS
     [[nodiscard]] VirtualFile PatchRomFS(VirtualFile base, u64 ivfc_offset,
                                          ContentRecordType type = ContentRecordType::Program,
-                                         VirtualFile update_raw = nullptr) const;
+                                         VirtualFile update_raw = nullptr,
+                                         bool apply_layeredfs = true) const;
 
     // Returns a vector of pairs between patch names and patch versions.
     // i.e. Update 3.2.2 will return {"Update", "3.2.2"}
diff --git a/src/core/file_sys/sdmc_factory.cpp b/src/core/file_sys/sdmc_factory.cpp
index cb56d8f2d..e5c72cd4d 100644
--- a/src/core/file_sys/sdmc_factory.cpp
+++ b/src/core/file_sys/sdmc_factory.cpp
@@ -12,23 +12,32 @@ namespace FileSys {
 
 constexpr u64 SDMC_TOTAL_SIZE = 0x10000000000; // 1 TiB
 
-SDMCFactory::SDMCFactory(VirtualDir dir_)
-    : dir(std::move(dir_)), contents(std::make_unique<RegisteredCache>(
-                                GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/registered"),
-                                [](const VirtualFile& file, const NcaID& id) {
-                                    return NAX{file, id}.GetDecrypted();
-                                })),
+SDMCFactory::SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_)
+    : sd_dir(std::move(sd_dir_)), sd_mod_dir(std::move(sd_mod_dir_)),
+      contents(std::make_unique<RegisteredCache>(
+          GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/registered"),
+          [](const VirtualFile& file, const NcaID& id) {
+              return NAX{file, id}.GetDecrypted();
+          })),
       placeholder(std::make_unique<PlaceholderCache>(
-          GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/placehld"))) {}
+          GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/placehld"))) {}
 
 SDMCFactory::~SDMCFactory() = default;
 
 ResultVal<VirtualDir> SDMCFactory::Open() const {
-    return MakeResult<VirtualDir>(dir);
+    return MakeResult<VirtualDir>(sd_dir);
+}
+
+VirtualDir SDMCFactory::GetSDMCModificationLoadRoot(u64 title_id) const {
+    // LayeredFS doesn't work on updates and title id-less homebrew
+    if (title_id == 0 || (title_id & 0xFFF) == 0x800) {
+        return nullptr;
+    }
+    return GetOrCreateDirectoryRelative(sd_mod_dir, fmt::format("/{:016X}", title_id));
 }
 
 VirtualDir SDMCFactory::GetSDMCContentDirectory() const {
-    return GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents");
+    return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents");
 }
 
 RegisteredCache* SDMCFactory::GetSDMCContents() const {
@@ -40,11 +49,11 @@ PlaceholderCache* SDMCFactory::GetSDMCPlaceholder() const {
 }
 
 VirtualDir SDMCFactory::GetImageDirectory() const {
-    return GetOrCreateDirectoryRelative(dir, "/Nintendo/Album");
+    return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Album");
 }
 
 u64 SDMCFactory::GetSDMCFreeSpace() const {
-    return GetSDMCTotalSpace() - dir->GetSize();
+    return GetSDMCTotalSpace() - sd_dir->GetSize();
 }
 
 u64 SDMCFactory::GetSDMCTotalSpace() const {
diff --git a/src/core/file_sys/sdmc_factory.h b/src/core/file_sys/sdmc_factory.h
index 2bb92ba93..3a3d11f3a 100644
--- a/src/core/file_sys/sdmc_factory.h
+++ b/src/core/file_sys/sdmc_factory.h
@@ -16,11 +16,12 @@ class PlaceholderCache;
 /// File system interface to the SDCard archive
 class SDMCFactory {
 public:
-    explicit SDMCFactory(VirtualDir dir);
+    explicit SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_);
     ~SDMCFactory();
 
     ResultVal<VirtualDir> Open() const;
 
+    VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const;
     VirtualDir GetSDMCContentDirectory() const;
 
     RegisteredCache* GetSDMCContents() const;
@@ -32,7 +33,8 @@ public:
     u64 GetSDMCTotalSpace() const;
 
 private:
-    VirtualDir dir;
+    VirtualDir sd_dir;
+    VirtualDir sd_mod_dir;
 
     std::unique_ptr<RegisteredCache> contents;
     std::unique_ptr<PlaceholderCache> placeholder;
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 61bda3786..ceff2532d 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -345,8 +345,12 @@ public:
     explicit RequestParser(u32* command_buffer) : RequestHelperBase(command_buffer) {}
 
     explicit RequestParser(Kernel::HLERequestContext& ctx) : RequestHelperBase(ctx) {
-        ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete");
-        Skip(ctx.GetDataPayloadOffset(), false);
+        // TIPC does not have data payload offset
+        if (!ctx.IsTipc()) {
+            ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete");
+            Skip(ctx.GetDataPayloadOffset(), false);
+        }
+
         // Skip the u64 command id, it's already stored in the context
         static constexpr u32 CommandIdSize = 2;
         Skip(CommandIdSize, false);
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index fec704c65..dd945e058 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -117,7 +117,7 @@ AOC_U::AOC_U(Core::System& system_)
         {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
         {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
         {9, nullptr, "GetAddOnContentLostErrorCode"},
-        {10, nullptr, "GetAddOnContentListChangedEventWithProcessId"},
+        {10, &AOC_U::GetAddOnContentListChangedEventWithProcessId, "GetAddOnContentListChangedEventWithProcessId"},
         {100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"},
         {101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"},
         {110, nullptr, "CreateContentsServiceManager"},
@@ -257,6 +257,14 @@ void AOC_U::GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx) {
     rb.PushCopyObjects(aoc_change_event.GetReadableEvent());
 }
 
+void AOC_U::GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_AOC, "(STUBBED) called");
+
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(ResultSuccess);
+    rb.PushCopyObjects(aoc_change_event.GetReadableEvent());
+}
+
 void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_AOC, "(STUBBED) called");
 
diff --git a/src/core/hle/service/aoc/aoc_u.h b/src/core/hle/service/aoc/aoc_u.h
index 65095baa2..bb6ffb8eb 100644
--- a/src/core/hle/service/aoc/aoc_u.h
+++ b/src/core/hle/service/aoc/aoc_u.h
@@ -28,6 +28,7 @@ private:
     void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx);
     void PrepareAddOnContent(Kernel::HLERequestContext& ctx);
     void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx);
+    void GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx);
     void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
     void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
 
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 800feba6e..b769fe959 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -96,7 +96,7 @@ private:
     void RequestUpdateImpl(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_Audio, "(STUBBED) called");
 
-        std::vector<u8> output_params(ctx.GetWriteBufferSize());
+        std::vector<u8> output_params(ctx.GetWriteBufferSize(), 0);
         auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer(), output_params);
 
         if (result.IsSuccess()) {
@@ -110,17 +110,19 @@ private:
     void Start(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_Audio, "(STUBBED) called");
 
-        IPC::ResponseBuilder rb{ctx, 2};
+        const auto result = renderer->Start();
 
-        rb.Push(ResultSuccess);
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(result);
     }
 
     void Stop(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_Audio, "(STUBBED) called");
 
-        IPC::ResponseBuilder rb{ctx, 2};
+        const auto result = renderer->Stop();
 
-        rb.Push(ResultSuccess);
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(result);
     }
 
     void QuerySystemEvent(Kernel::HLERequestContext& ctx) {
@@ -288,7 +290,7 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(ResultSuccess);
-        rb.Push<u32>(1);
+        rb.Push<u32>(2);
     }
 
     // Should be similar to QueryAudioDeviceOutputEvent
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 10e6f7a64..33a6dbbb6 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -253,7 +253,11 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
     rb.Push<u32>(worker_buffer_sz);
 }
 
-void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
+void HwOpus::GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx) {
+    GetWorkBufferSize(ctx);
+}
+
+void HwOpus::OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     const auto sample_rate = rp.Pop<u32>();
     const auto channel_count = rp.Pop<u32>();
@@ -291,14 +295,47 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
         system, OpusDecoderState{std::move(decoder), sample_rate, channel_count});
 }
 
+void HwOpus::OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto sample_rate = rp.Pop<u32>();
+    const auto channel_count = rp.Pop<u32>();
+
+    LOG_CRITICAL(Audio, "called sample_rate={}, channel_count={}", sample_rate, channel_count);
+
+    ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 ||
+                   sample_rate == 12000 || sample_rate == 8000,
+               "Invalid sample rate");
+    ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
+
+    const int num_stereo_streams = channel_count == 2 ? 1 : 0;
+    const auto mapping_table = CreateMappingTable(channel_count);
+
+    int error = 0;
+    OpusDecoderPtr decoder{
+        opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
+                                        num_stereo_streams, mapping_table.data(), &error)};
+    if (error != OPUS_OK || decoder == nullptr) {
+        LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
+        IPC::ResponseBuilder rb{ctx, 2};
+        // TODO(ogniK): Use correct error code
+        rb.Push(ResultUnknown);
+        return;
+    }
+
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+    rb.Push(ResultSuccess);
+    rb.PushIpcInterface<IHardwareOpusDecoderManager>(
+        system, OpusDecoderState{std::move(decoder), sample_rate, channel_count});
+}
+
 HwOpus::HwOpus(Core::System& system_) : ServiceFramework{system_, "hwopus"} {
     static const FunctionInfo functions[] = {
-        {0, &HwOpus::OpenOpusDecoder, "OpenOpusDecoder"},
+        {0, &HwOpus::OpenHardwareOpusDecoder, "OpenHardwareOpusDecoder"},
         {1, &HwOpus::GetWorkBufferSize, "GetWorkBufferSize"},
         {2, nullptr, "OpenOpusDecoderForMultiStream"},
         {3, nullptr, "GetWorkBufferSizeForMultiStream"},
-        {4, nullptr, "OpenHardwareOpusDecoderEx"},
-        {5, nullptr, "GetWorkBufferSizeEx"},
+        {4, &HwOpus::OpenHardwareOpusDecoderEx, "OpenHardwareOpusDecoderEx"},
+        {5, &HwOpus::GetWorkBufferSizeEx, "GetWorkBufferSizeEx"},
         {6, nullptr, "OpenHardwareOpusDecoderForMultiStreamEx"},
         {7, nullptr, "GetWorkBufferSizeForMultiStreamEx"},
     };
diff --git a/src/core/hle/service/audio/hwopus.h b/src/core/hle/service/audio/hwopus.h
index 4f921f18e..b74824ff3 100644
--- a/src/core/hle/service/audio/hwopus.h
+++ b/src/core/hle/service/audio/hwopus.h
@@ -18,8 +18,10 @@ public:
     ~HwOpus() override;
 
 private:
-    void OpenOpusDecoder(Kernel::HLERequestContext& ctx);
+    void OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx);
+    void OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx);
     void GetWorkBufferSize(Kernel::HLERequestContext& ctx);
+    void GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx);
 };
 
 } // namespace Service::Audio
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 3c16fe6c7..4a9b13e45 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -703,6 +703,16 @@ FileSys::VirtualDir FileSystemController::GetModificationLoadRoot(u64 title_id)
     return bis_factory->GetModificationLoadRoot(title_id);
 }
 
+FileSys::VirtualDir FileSystemController::GetSDMCModificationLoadRoot(u64 title_id) const {
+    LOG_TRACE(Service_FS, "Opening SDMC mod load root for tid={:016X}", title_id);
+
+    if (sdmc_factory == nullptr) {
+        return nullptr;
+    }
+
+    return sdmc_factory->GetSDMCModificationLoadRoot(title_id);
+}
+
 FileSys::VirtualDir FileSystemController::GetModificationDumpRoot(u64 title_id) const {
     LOG_TRACE(Service_FS, "Opening mod dump root for tid={:016X}", title_id);
 
@@ -733,20 +743,23 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
     }
 
     using YuzuPath = Common::FS::YuzuPath;
+    const auto sdmc_dir_path = Common::FS::GetYuzuPath(YuzuPath::SDMCDir);
+    const auto sdmc_load_dir_path = sdmc_dir_path / "atmosphere/contents";
     const auto rw_mode = FileSys::Mode::ReadWrite;
 
     auto nand_directory =
         vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::NANDDir), rw_mode);
-    auto sd_directory =
-        vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::SDMCDir), rw_mode);
+    auto sd_directory = vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_dir_path), rw_mode);
     auto load_directory =
         vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::LoadDir), FileSys::Mode::Read);
+    auto sd_load_directory =
+        vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_load_dir_path), FileSys::Mode::Read);
     auto dump_directory =
         vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::DumpDir), rw_mode);
 
     if (bis_factory == nullptr) {
-        bis_factory =
-            std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory);
+        bis_factory = std::make_unique<FileSys::BISFactory>(
+            nand_directory, std::move(load_directory), std::move(dump_directory));
         system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SysNAND,
                                        bis_factory->GetSystemNANDContents());
         system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::UserNAND,
@@ -759,7 +772,8 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
     }
 
     if (sdmc_factory == nullptr) {
-        sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory));
+        sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory),
+                                                              std::move(sd_load_directory));
         system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC,
                                        sdmc_factory->GetSDMCContents());
     }
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index b6b1b9220..d387af3cb 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -115,6 +115,7 @@ public:
     FileSys::VirtualDir GetContentDirectory(ContentStorageId id) const;
     FileSys::VirtualDir GetImageDirectory(ImageDirectoryId id) const;
 
+    FileSys::VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const;
     FileSys::VirtualDir GetModificationLoadRoot(u64 title_id) const;
     FileSys::VirtualDir GetModificationDumpRoot(u64 title_id) const;
 
diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp
index 114aff31c..869d2763f 100644
--- a/src/core/hle/service/mii/manager.cpp
+++ b/src/core/hle/service/mii/manager.cpp
@@ -20,6 +20,7 @@ namespace {
 
 constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4};
 
+constexpr std::size_t BaseMiiCount{2};
 constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()};
 
 constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'};
@@ -415,7 +416,7 @@ u32 MiiManager::GetCount(SourceFlag source_flag) const {
         count += 0;
     }
     if ((source_flag & SourceFlag::Default) != SourceFlag::None) {
-        count += DefaultMiiCount;
+        count += (DefaultMiiCount - BaseMiiCount);
     }
     return static_cast<u32>(count);
 }
@@ -445,7 +446,7 @@ ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_
         return MakeResult(std::move(result));
     }
 
-    for (std::size_t index = 0; index < DefaultMiiCount; index++) {
+    for (std::size_t index = BaseMiiCount; index < DefaultMiiCount; index++) {
         result.emplace_back(BuildDefault(index), Source::Default);
     }
 
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 7c5763f9c..c4283a952 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -34,28 +34,17 @@ if (MSVC)
         /W4
         /WX
 
-        # 'expression' : signed/unsigned mismatch
-        /we4018
-        # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
-        /we4244
-        # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
-        /we4245
-        # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
-        /we4254
-        # 'var' : conversion from 'size_t' to 'type', possible loss of data
-        /we4267
-        # 'context' : truncation from 'type1' to 'type2'
-        /we4305
+        /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+        /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
+        /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch
+        /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
     )
 else()
     target_compile_options(input_common PRIVATE
         -Werror
         -Werror=conversion
         -Werror=ignored-qualifiers
-        -Werror=implicit-fallthrough
-        -Werror=reorder
         -Werror=shadow
-        -Werror=sign-compare
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
         -Werror=unused-variable
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp
index 100138d11..2fafd077f 100755
--- a/src/input_common/analog_from_button.cpp
+++ b/src/input_common/analog_from_button.cpp
@@ -27,6 +27,7 @@ public:
         down->SetCallback(callbacks);
         left->SetCallback(callbacks);
         right->SetCallback(callbacks);
+        modifier->SetCallback(callbacks);
     }
 
     bool IsAngleGreater(float old_angle, float new_angle) const {
diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp
index 320f51ee6..a2f1bb67c 100644
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@@ -5,14 +5,7 @@
 #include <chrono>
 #include <thread>
 
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4200) // nonstandard extension used : zero-sized array in struct/union
-#endif
 #include <libusb.h>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
 
 #include "common/logging/log.h"
 #include "common/param_package.h"
diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h
index a3d276697..1bdc9209e 100644
--- a/src/input_common/udp/protocol.h
+++ b/src/input_common/udp/protocol.h
@@ -8,14 +8,7 @@
 #include <optional>
 #include <type_traits>
 
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4701)
-#endif
 #include <boost/crc.hpp>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
 
 #include "common/bit_field.h"
 #include "common/swap.h"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f9454bbaa..e4de55f4d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -292,12 +292,12 @@ endif()
 
 if (MSVC)
     target_compile_options(video_core PRIVATE
-        /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
+        /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+        /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
         /we4456 # Declaration of 'identifier' hides previous local declaration
         /we4457 # Declaration of 'identifier' hides function parameter
         /we4458 # Declaration of 'identifier' hides class member
         /we4459 # Declaration of 'identifier' hides global declaration
-        /we4715 # 'function' : not all control paths return a value
     )
 else()
     target_compile_options(video_core PRIVATE
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9d726a6fb..cad7f902d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -99,7 +99,7 @@ class BufferCache {
     };
 
 public:
-    static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4_KiB;
+    static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
 
     explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
                          Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -109,8 +109,6 @@ public:
 
     void TickFrame();
 
-    void RunGarbageCollector();
-
     void WriteMemory(VAddr cpu_addr, u64 size);
 
     void CachedWriteMemory(VAddr cpu_addr, u64 size);
@@ -197,6 +195,8 @@ private:
                ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
     }
 
+    void RunGarbageCollector();
+
     void BindHostIndexBuffer();
 
     void BindHostVertexBuffers();
@@ -416,8 +416,9 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
 
 template <class P>
 void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
-    ForEachBufferInRange(cpu_addr, size,
-                         [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); });
+    ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+        DownloadBufferMemory(buffer, cpu_addr, size);
+    });
 }
 
 template <class P>
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index a3fda1094..8b86ad050 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
         case ThiMethod::SetMethod1:
             LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
                       static_cast<u32>(nvdec_thi_state.method_0));
-            nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
-                                           data);
+            nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
             break;
         default:
             break;
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index d02dc6260..1b4bbc8ac 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) {
     av_free(ptr);
 }
 
-Codec::Codec(GPU& gpu_)
-    : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
+Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
+    : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
       vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
 
 Codec::~Codec() {
@@ -43,46 +43,48 @@ Codec::~Codec() {
     avcodec_close(av_codec_ctx);
 }
 
+void Codec::Initialize() {
+    AVCodecID codec{AV_CODEC_ID_NONE};
+    switch (current_codec) {
+    case NvdecCommon::VideoCodec::H264:
+        codec = AV_CODEC_ID_H264;
+        break;
+    case NvdecCommon::VideoCodec::Vp9:
+        codec = AV_CODEC_ID_VP9;
+        break;
+    default:
+        return;
+    }
+    av_codec = avcodec_find_decoder(codec);
+    av_codec_ctx = avcodec_alloc_context3(av_codec);
+    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+
+    // TODO(ameerj): libavcodec gpu hw acceleration
+
+    const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
+    if (av_error < 0) {
+        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
+        avcodec_close(av_codec_ctx);
+        return;
+    }
+    initialized = true;
+    return;
+}
+
 void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
     if (current_codec != codec) {
-        LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
         current_codec = codec;
+        LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
     }
 }
 
-void Codec::StateWrite(u32 offset, u64 arguments) {
-    u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
-    std::memcpy(state_offset, &arguments, sizeof(u64));
-}
-
 void Codec::Decode() {
-    bool is_first_frame = false;
+    const bool is_first_frame = !initialized;
     if (!initialized) {
-        if (current_codec == NvdecCommon::VideoCodec::H264) {
-            av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
-        } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
-            av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
-        } else {
-            LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
-            return;
-        }
-
-        av_codec_ctx = avcodec_alloc_context3(av_codec);
-        av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
-
-        // TODO(ameerj): libavcodec gpu hw acceleration
-
-        const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
-        if (av_error < 0) {
-            LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
-            avcodec_close(av_codec_ctx);
-            return;
-        }
-        initialized = true;
-        is_first_frame = true;
+        Initialize();
     }
-    bool vp9_hidden_frame = false;
 
+    bool vp9_hidden_frame = false;
     AVPacket packet{};
     av_init_packet(&packet);
     std::vector<u8> frame_data;
@@ -95,7 +97,7 @@ void Codec::Decode() {
     }
 
     packet.data = frame_data.data();
-    packet.size = static_cast<int>(frame_data.size());
+    packet.size = static_cast<s32>(frame_data.size());
 
     avcodec_send_packet(av_codec_ctx, &packet);
 
@@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
     return current_codec;
 }
 
+std::string_view Codec::GetCurrentCodecName() const {
+    switch (current_codec) {
+    case NvdecCommon::VideoCodec::None:
+        return "None";
+    case NvdecCommon::VideoCodec::H264:
+        return "H264";
+    case NvdecCommon::VideoCodec::Vp8:
+        return "VP8";
+    case NvdecCommon::VideoCodec::H265:
+        return "H265";
+    case NvdecCommon::VideoCodec::Vp9:
+        return "VP9";
+    default:
+        return "Unknown";
+    }
+};
+
 } // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 8a2a6c360..96c823c76 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -34,15 +34,15 @@ class VP9;
 
 class Codec {
 public:
-    explicit Codec(GPU& gpu);
+    explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
     ~Codec();
 
+    /// Initialize the codec, returning success or failure
+    void Initialize();
+
     /// Sets NVDEC video stream codec
     void SetTargetCodec(NvdecCommon::VideoCodec codec);
 
-    /// Populate NvdecRegisters state with argument value at the provided offset
-    void StateWrite(u32 offset, u64 arguments);
-
     /// Call decoders to construct headers, decode AVFrame with ffmpeg
     void Decode();
 
@@ -51,6 +51,8 @@ public:
 
     /// Returns the value of current_codec
     [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+    /// Return name of the current codec
+    [[nodiscard]] std::string_view GetCurrentCodecName() const;
 
 private:
     bool initialized{};
@@ -60,10 +62,10 @@ private:
     AVCodecContext* av_codec_ctx{nullptr};
 
     GPU& gpu;
+    const NvdecCommon::NvdecRegisters& state;
     std::unique_ptr<Decoder::H264> h264_decoder;
     std::unique_ptr<Decoder::VP9> vp9_decoder;
 
-    NvdecCommon::NvdecRegisters state{};
     std::queue<AVFramePtr> av_frames{};
 };
 
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index fea6aed98..5fb6d45ee 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -45,134 +45,129 @@ H264::~H264() = default;
 
 const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
                                                 bool is_first_frame) {
-    H264DecoderContext context{};
+    H264DecoderContext context;
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
 
-    const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
+    const s64 frame_number = context.h264_parameter_set.frame_number.Value();
     if (!is_first_frame && frame_number != 0) {
-        frame.resize(context.frame_data_size);
-
+        frame.resize(context.stream_len);
         gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
-    } else {
-        /// Encode header
-        H264BitWriter writer{};
-        writer.WriteU(1, 24);
-        writer.WriteU(0, 1);
-        writer.WriteU(3, 2);
-        writer.WriteU(7, 5);
-        writer.WriteU(100, 8);
-        writer.WriteU(0, 8);
-        writer.WriteU(31, 8);
-        writer.WriteUe(0);
-        const auto chroma_format_idc =
-            static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
-        writer.WriteUe(chroma_format_idc);
-        if (chroma_format_idc == 3) {
-            writer.WriteBit(false);
-        }
-
-        writer.WriteUe(0);
-        writer.WriteUe(0);
-        writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
-        writer.WriteBit(false); // Scaling matrix present flag
-
-        const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
-        writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
-        writer.WriteUe(order_cnt_type);
-        if (order_cnt_type == 0) {
-            writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
-        } else if (order_cnt_type == 1) {
-            writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
-
-            writer.WriteSe(0);
-            writer.WriteSe(0);
-            writer.WriteUe(0);
-        }
-
-        const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
-                               (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+        return frame;
+    }
 
-        writer.WriteUe(16);
+    // Encode header
+    H264BitWriter writer{};
+    writer.WriteU(1, 24);
+    writer.WriteU(0, 1);
+    writer.WriteU(3, 2);
+    writer.WriteU(7, 5);
+    writer.WriteU(100, 8);
+    writer.WriteU(0, 8);
+    writer.WriteU(31, 8);
+    writer.WriteUe(0);
+    const u32 chroma_format_idc =
+        static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
+    writer.WriteUe(chroma_format_idc);
+    if (chroma_format_idc == 3) {
         writer.WriteBit(false);
-        writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
-        writer.WriteUe(pic_height - 1);
-        writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
-
-        if (!context.h264_parameter_set.frame_mbs_only_flag) {
-            writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
-        }
+    }
 
-        writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
-        writer.WriteBit(false); // Frame cropping flag
-        writer.WriteBit(false); // VUI parameter present flag
+    writer.WriteUe(0);
+    writer.WriteUe(0);
+    writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
+    writer.WriteBit(false); // Scaling matrix present flag
 
-        writer.End();
+    writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
 
-        // H264 PPS
-        writer.WriteU(1, 24);
-        writer.WriteU(0, 1);
-        writer.WriteU(3, 2);
-        writer.WriteU(8, 5);
+    const auto order_cnt_type =
+        static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
+    writer.WriteUe(order_cnt_type);
+    if (order_cnt_type == 0) {
+        writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
+    } else if (order_cnt_type == 1) {
+        writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
 
+        writer.WriteSe(0);
+        writer.WriteSe(0);
         writer.WriteUe(0);
-        writer.WriteUe(0);
+    }
 
-        writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
-        writer.WriteBit(false);
-        writer.WriteUe(0);
-        writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
-        writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
-        writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
-        writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
-        s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
-        pic_init_qp = (pic_init_qp << 26) >> 26;
-        writer.WriteSe(pic_init_qp);
-        writer.WriteSe(0);
-        s32 chroma_qp_index_offset =
-            static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
-        chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
+    const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
+                           (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+
+    writer.WriteUe(16);
+    writer.WriteBit(false);
+    writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
+    writer.WriteUe(pic_height - 1);
+    writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
 
-        writer.WriteSe(chroma_qp_index_offset);
-        writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
-        writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
-        writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
-        writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+    if (!context.h264_parameter_set.frame_mbs_only_flag) {
+        writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
+    }
 
+    writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
+    writer.WriteBit(false); // Frame cropping flag
+    writer.WriteBit(false); // VUI parameter present flag
+
+    writer.End();
+
+    // H264 PPS
+    writer.WriteU(1, 24);
+    writer.WriteU(0, 1);
+    writer.WriteU(3, 2);
+    writer.WriteU(8, 5);
+
+    writer.WriteUe(0);
+    writer.WriteUe(0);
+
+    writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
+    writer.WriteBit(false);
+    writer.WriteUe(0);
+    writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
+    writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
+    writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
+    writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
+    s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
+    writer.WriteSe(pic_init_qp);
+    writer.WriteSe(0);
+    s32 chroma_qp_index_offset =
+        static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
+
+    writer.WriteSe(chroma_qp_index_offset);
+    writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
+    writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
+    writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
+    writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+
+    writer.WriteBit(true);
+
+    for (s32 index = 0; index < 6; index++) {
         writer.WriteBit(true);
+        std::span<const u8> matrix{context.weight_scale};
+        writer.WriteScalingList(matrix, index * 16, 16);
+    }
 
-        for (s32 index = 0; index < 6; index++) {
+    if (context.h264_parameter_set.transform_8x8_mode_flag) {
+        for (s32 index = 0; index < 2; index++) {
             writer.WriteBit(true);
-            const auto matrix_x4 =
-                std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
-            writer.WriteScalingList(matrix_x4, index * 16, 16);
-        }
-
-        if (context.h264_parameter_set.transform_8x8_mode_flag) {
-            for (s32 index = 0; index < 2; index++) {
-                writer.WriteBit(true);
-                const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
-                                                       context.scaling_matrix_8.end());
-
-                writer.WriteScalingList(matrix_x8, index * 64, 64);
-            }
+            std::span<const u8> matrix{context.weight_scale_8x8};
+            writer.WriteScalingList(matrix, index * 64, 64);
         }
+    }
 
-        s32 chroma_qp_index_offset2 =
-            static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
-        chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
+    s32 chroma_qp_index_offset2 =
+        static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
 
-        writer.WriteSe(chroma_qp_index_offset2);
+    writer.WriteSe(chroma_qp_index_offset2);
 
-        writer.End();
+    writer.End();
 
-        const auto& encoded_header = writer.GetByteArray();
-        frame.resize(encoded_header.size() + context.frame_data_size);
-        std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
+    const auto& encoded_header = writer.GetByteArray();
+    frame.resize(encoded_header.size() + context.stream_len);
+    std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
 
-        gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
-                                      frame.data() + encoded_header.size(),
-                                      context.frame_data_size);
-    }
+    gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+                                  frame.data() + encoded_header.size(), context.stream_len);
 
     return frame;
 }
@@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) {
     WriteBits(state ? 1 : 0, 1);
 }
 
-void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
+void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
     std::vector<u8> scan(count);
     if (count == 16) {
         std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 0f3a1d9f3..bfe84a472 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -20,7 +20,9 @@
 
 #pragma once
 
+#include <span>
 #include <vector>
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/command_classes/nvdec_common.h"
@@ -48,7 +50,7 @@ public:
 
     /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
     /// Writes the scaling matrices of the sream
-    void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
+    void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
 
     /// Return the bitstream as a vector.
     [[nodiscard]] std::vector<u8>& GetByteArray();
@@ -78,40 +80,110 @@ public:
         const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
 
 private:
+    std::vector<u8> frame;
+    GPU& gpu;
+
     struct H264ParameterSet {
-        u32 log2_max_pic_order_cnt{};
-        u32 delta_pic_order_always_zero_flag{};
-        u32 frame_mbs_only_flag{};
-        u32 pic_width_in_mbs{};
-        u32 pic_height_in_map_units{};
-        INSERT_PADDING_WORDS(1);
-        u32 entropy_coding_mode_flag{};
-        u32 bottom_field_pic_order_flag{};
-        u32 num_refidx_l0_default_active{};
-        u32 num_refidx_l1_default_active{};
-        u32 deblocking_filter_control_flag{};
-        u32 redundant_pic_count_flag{};
-        u32 transform_8x8_mode_flag{};
-        INSERT_PADDING_WORDS(9);
-        u64 flags{};
-        u32 frame_number{};
-        u32 frame_number2{};
+        s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
+        s32 delta_pic_order_always_zero_flag;  ///< 0x04
+        s32 frame_mbs_only_flag;               ///< 0x08
+        u32 pic_width_in_mbs;                  ///< 0x0C
+        u32 frame_height_in_map_units;         ///< 0x10
+        union {                                ///< 0x14
+            BitField<0, 2, u32> tile_format;
+            BitField<2, 3, u32> gob_height;
+        };
+        u32 entropy_coding_mode_flag;               ///< 0x18
+        s32 pic_order_present_flag;                 ///< 0x1C
+        s32 num_refidx_l0_default_active;           ///< 0x20
+        s32 num_refidx_l1_default_active;           ///< 0x24
+        s32 deblocking_filter_control_present_flag; ///< 0x28
+        s32 redundant_pic_cnt_present_flag;         ///< 0x2C
+        u32 transform_8x8_mode_flag;                ///< 0x30
+        u32 pitch_luma;                             ///< 0x34
+        u32 pitch_chroma;                           ///< 0x38
+        u32 luma_top_offset;                        ///< 0x3C
+        u32 luma_bot_offset;                        ///< 0x40
+        u32 luma_frame_offset;                      ///< 0x44
+        u32 chroma_top_offset;                      ///< 0x48
+        u32 chroma_bot_offset;                      ///< 0x4C
+        u32 chroma_frame_offset;                    ///< 0x50
+        u32 hist_buffer_size;                       ///< 0x54
+        union {                                     ///< 0x58
+            union {
+                BitField<0, 1, u64> mbaff_frame;
+                BitField<1, 1, u64> direct_8x8_inference;
+                BitField<2, 1, u64> weighted_pred;
+                BitField<3, 1, u64> constrained_intra_pred;
+                BitField<4, 1, u64> ref_pic;
+                BitField<5, 1, u64> field_pic;
+                BitField<6, 1, u64> bottom_field;
+                BitField<7, 1, u64> second_field;
+            } flags;
+            BitField<8, 4, u64> log2_max_frame_num_minus4;
+            BitField<12, 2, u64> chroma_format_idc;
+            BitField<14, 2, u64> pic_order_cnt_type;
+            BitField<16, 6, s64> pic_init_qp_minus26;
+            BitField<22, 5, s64> chroma_qp_index_offset;
+            BitField<27, 5, s64> second_chroma_qp_index_offset;
+            BitField<32, 2, u64> weighted_bipred_idc;
+            BitField<34, 7, u64> curr_pic_idx;
+            BitField<41, 5, u64> curr_col_idx;
+            BitField<46, 16, u64> frame_number;
+            BitField<62, 1, u64> frame_surfaces;
+            BitField<63, 1, u64> output_memory_layout;
+        };
     };
-    static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
+    static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
 
     struct H264DecoderContext {
-        INSERT_PADDING_BYTES(0x48);
-        u32 frame_data_size{};
-        INSERT_PADDING_BYTES(0xc);
-        H264ParameterSet h264_parameter_set{};
-        INSERT_PADDING_BYTES(0x100);
-        std::array<u8, 0x60> scaling_matrix_4;
-        std::array<u8, 0x80> scaling_matrix_8;
+        INSERT_PADDING_WORDS_NOINIT(18);       ///< 0x0000
+        u32 stream_len;                        ///< 0x0048
+        INSERT_PADDING_WORDS_NOINIT(3);        ///< 0x004C
+        H264ParameterSet h264_parameter_set;   ///< 0x0058
+        INSERT_PADDING_WORDS_NOINIT(66);       ///< 0x00B8
+        std::array<u8, 0x60> weight_scale;     ///< 0x01C0
+        std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
     };
-    static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
-
-    std::vector<u8> frame;
-    GPU& gpu;
+    static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
+
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(H264ParameterSet, field_name) == position,                              \
+                  "Field " #field_name " has invalid position")
+
+    ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
+    ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
+    ASSERT_POSITION(frame_mbs_only_flag, 0x08);
+    ASSERT_POSITION(pic_width_in_mbs, 0x0C);
+    ASSERT_POSITION(frame_height_in_map_units, 0x10);
+    ASSERT_POSITION(tile_format, 0x14);
+    ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
+    ASSERT_POSITION(pic_order_present_flag, 0x1C);
+    ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
+    ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
+    ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
+    ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
+    ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
+    ASSERT_POSITION(pitch_luma, 0x34);
+    ASSERT_POSITION(pitch_chroma, 0x38);
+    ASSERT_POSITION(luma_top_offset, 0x3C);
+    ASSERT_POSITION(luma_bot_offset, 0x40);
+    ASSERT_POSITION(luma_frame_offset, 0x44);
+    ASSERT_POSITION(chroma_top_offset, 0x48);
+    ASSERT_POSITION(chroma_bot_offset, 0x4C);
+    ASSERT_POSITION(chroma_frame_offset, 0x50);
+    ASSERT_POSITION(hist_buffer_size, 0x54);
+    ASSERT_POSITION(flags, 0x58);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(H264DecoderContext, field_name) == position,                            \
+                  "Field " #field_name " has invalid position")
+
+    ASSERT_POSITION(stream_len, 0x48);
+    ASSERT_POSITION(h264_parameter_set, 0x58);
+    ASSERT_POSITION(weight_scale, 0x1C0);
+#undef ASSERT_POSITION
 };
 
 } // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 29bb31418..902bc2a98 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
 }
 
 Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
-    PictureInfo picture_info{};
+    PictureInfo picture_info;
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
     Vp9PictureInfo vp9_info = picture_info.Convert();
 
@@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
 }
 
 void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
-    EntropyProbs entropy{};
+    EntropyProbs entropy;
     gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
     entropy.Convert(dst);
 }
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 139501a1c..2da14f3ca 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -15,10 +15,10 @@ class GPU;
 
 namespace Decoder {
 struct Vp9FrameDimensions {
-    s16 width{};
-    s16 height{};
-    s16 luma_pitch{};
-    s16 chroma_pitch{};
+    s16 width;
+    s16 height;
+    s16 luma_pitch;
+    s16 chroma_pitch;
 };
 static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
 
@@ -49,87 +49,87 @@ enum class TxMode {
 };
 
 struct Segmentation {
-    u8 enabled{};
-    u8 update_map{};
-    u8 temporal_update{};
-    u8 abs_delta{};
-    std::array<u32, 8> feature_mask{};
-    std::array<std::array<s16, 4>, 8> feature_data{};
+    u8 enabled;
+    u8 update_map;
+    u8 temporal_update;
+    u8 abs_delta;
+    std::array<u32, 8> feature_mask;
+    std::array<std::array<s16, 4>, 8> feature_data;
 };
 static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
 
 struct LoopFilter {
-    u8 mode_ref_delta_enabled{};
-    std::array<s8, 4> ref_deltas{};
-    std::array<s8, 2> mode_deltas{};
+    u8 mode_ref_delta_enabled;
+    std::array<s8, 4> ref_deltas;
+    std::array<s8, 2> mode_deltas;
 };
 static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
 
 struct Vp9EntropyProbs {
-    std::array<u8, 36> y_mode_prob{};
-    std::array<u8, 64> partition_prob{};
-    std::array<u8, 1728> coef_probs{};
-    std::array<u8, 8> switchable_interp_prob{};
-    std::array<u8, 28> inter_mode_prob{};
-    std::array<u8, 4> intra_inter_prob{};
-    std::array<u8, 5> comp_inter_prob{};
-    std::array<u8, 10> single_ref_prob{};
-    std::array<u8, 5> comp_ref_prob{};
-    std::array<u8, 6> tx_32x32_prob{};
-    std::array<u8, 4> tx_16x16_prob{};
-    std::array<u8, 2> tx_8x8_prob{};
-    std::array<u8, 3> skip_probs{};
-    std::array<u8, 3> joints{};
-    std::array<u8, 2> sign{};
-    std::array<u8, 20> classes{};
-    std::array<u8, 2> class_0{};
-    std::array<u8, 20> prob_bits{};
-    std::array<u8, 12> class_0_fr{};
-    std::array<u8, 6> fr{};
-    std::array<u8, 2> class_0_hp{};
-    std::array<u8, 2> high_precision{};
+    std::array<u8, 36> y_mode_prob;           ///< 0x0000
+    std::array<u8, 64> partition_prob;        ///< 0x0024
+    std::array<u8, 1728> coef_probs;          ///< 0x0064
+    std::array<u8, 8> switchable_interp_prob; ///< 0x0724
+    std::array<u8, 28> inter_mode_prob;       ///< 0x072C
+    std::array<u8, 4> intra_inter_prob;       ///< 0x0748
+    std::array<u8, 5> comp_inter_prob;        ///< 0x074C
+    std::array<u8, 10> single_ref_prob;       ///< 0x0751
+    std::array<u8, 5> comp_ref_prob;          ///< 0x075B
+    std::array<u8, 6> tx_32x32_prob;          ///< 0x0760
+    std::array<u8, 4> tx_16x16_prob;          ///< 0x0766
+    std::array<u8, 2> tx_8x8_prob;            ///< 0x076A
+    std::array<u8, 3> skip_probs;             ///< 0x076C
+    std::array<u8, 3> joints;                 ///< 0x076F
+    std::array<u8, 2> sign;                   ///< 0x0772
+    std::array<u8, 20> classes;               ///< 0x0774
+    std::array<u8, 2> class_0;                ///< 0x0788
+    std::array<u8, 20> prob_bits;             ///< 0x078A
+    std::array<u8, 12> class_0_fr;            ///< 0x079E
+    std::array<u8, 6> fr;                     ///< 0x07AA
+    std::array<u8, 2> class_0_hp;             ///< 0x07B0
+    std::array<u8, 2> high_precision;         ///< 0x07B2
 };
 static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
 
 struct Vp9PictureInfo {
-    bool is_key_frame{};
-    bool intra_only{};
-    bool last_frame_was_key{};
-    bool frame_size_changed{};
-    bool error_resilient_mode{};
-    bool last_frame_shown{};
-    bool show_frame{};
-    std::array<s8, 4> ref_frame_sign_bias{};
-    s32 base_q_index{};
-    s32 y_dc_delta_q{};
-    s32 uv_dc_delta_q{};
-    s32 uv_ac_delta_q{};
-    bool lossless{};
-    s32 transform_mode{};
-    bool allow_high_precision_mv{};
-    s32 interp_filter{};
-    s32 reference_mode{};
-    s8 comp_fixed_ref{};
-    std::array<s8, 2> comp_var_ref{};
-    s32 log2_tile_cols{};
-    s32 log2_tile_rows{};
-    bool segment_enabled{};
-    bool segment_map_update{};
-    bool segment_map_temporal_update{};
-    s32 segment_abs_delta{};
-    std::array<u32, 8> segment_feature_enable{};
-    std::array<std::array<s16, 4>, 8> segment_feature_data{};
-    bool mode_ref_delta_enabled{};
-    bool use_prev_in_find_mv_refs{};
-    std::array<s8, 4> ref_deltas{};
-    std::array<s8, 2> mode_deltas{};
-    Vp9EntropyProbs entropy{};
-    Vp9FrameDimensions frame_size{};
-    u8 first_level{};
-    u8 sharpness_level{};
-    u32 bitstream_size{};
-    std::array<u64, 4> frame_offsets{};
-    std::array<bool, 4> refresh_frame{};
+    bool is_key_frame;
+    bool intra_only;
+    bool last_frame_was_key;
+    bool frame_size_changed;
+    bool error_resilient_mode;
+    bool last_frame_shown;
+    bool show_frame;
+    std::array<s8, 4> ref_frame_sign_bias;
+    s32 base_q_index;
+    s32 y_dc_delta_q;
+    s32 uv_dc_delta_q;
+    s32 uv_ac_delta_q;
+    bool lossless;
+    s32 transform_mode;
+    bool allow_high_precision_mv;
+    s32 interp_filter;
+    s32 reference_mode;
+    s8 comp_fixed_ref;
+    std::array<s8, 2> comp_var_ref;
+    s32 log2_tile_cols;
+    s32 log2_tile_rows;
+    bool segment_enabled;
+    bool segment_map_update;
+    bool segment_map_temporal_update;
+    s32 segment_abs_delta;
+    std::array<u32, 8> segment_feature_enable;
+    std::array<std::array<s16, 4>, 8> segment_feature_data;
+    bool mode_ref_delta_enabled;
+    bool use_prev_in_find_mv_refs;
+    std::array<s8, 4> ref_deltas;
+    std::array<s8, 2> mode_deltas;
+    Vp9EntropyProbs entropy;
+    Vp9FrameDimensions frame_size;
+    u8 first_level;
+    u8 sharpness_level;
+    u32 bitstream_size;
+    std::array<u64, 4> frame_offsets;
+    std::array<bool, 4> refresh_frame;
 };
 
 struct Vp9FrameContainer {
@@ -138,35 +138,35 @@ struct Vp9FrameContainer {
 };
 
 struct PictureInfo {
-    INSERT_PADDING_WORDS(12);
-    u32 bitstream_size{};
-    INSERT_PADDING_WORDS(5);
-    Vp9FrameDimensions last_frame_size{};
-    Vp9FrameDimensions golden_frame_size{};
-    Vp9FrameDimensions alt_frame_size{};
-    Vp9FrameDimensions current_frame_size{};
-    u32 vp9_flags{};
-    std::array<s8, 4> ref_frame_sign_bias{};
-    u8 first_level{};
-    u8 sharpness_level{};
-    u8 base_q_index{};
-    u8 y_dc_delta_q{};
-    u8 uv_ac_delta_q{};
-    u8 uv_dc_delta_q{};
-    u8 lossless{};
-    u8 tx_mode{};
-    u8 allow_high_precision_mv{};
-    u8 interp_filter{};
-    u8 reference_mode{};
-    s8 comp_fixed_ref{};
-    std::array<s8, 2> comp_var_ref{};
-    u8 log2_tile_cols{};
-    u8 log2_tile_rows{};
-    Segmentation segmentation{};
-    LoopFilter loop_filter{};
-    INSERT_PADDING_BYTES(5);
-    u32 surface_params{};
-    INSERT_PADDING_WORDS(3);
+    INSERT_PADDING_WORDS_NOINIT(12);       ///< 0x00
+    u32 bitstream_size;                    ///< 0x30
+    INSERT_PADDING_WORDS_NOINIT(5);        ///< 0x34
+    Vp9FrameDimensions last_frame_size;    ///< 0x48
+    Vp9FrameDimensions golden_frame_size;  ///< 0x50
+    Vp9FrameDimensions alt_frame_size;     ///< 0x58
+    Vp9FrameDimensions current_frame_size; ///< 0x60
+    u32 vp9_flags;                         ///< 0x68
+    std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
+    u8 first_level;                        ///< 0x70
+    u8 sharpness_level;                    ///< 0x71
+    u8 base_q_index;                       ///< 0x72
+    u8 y_dc_delta_q;                       ///< 0x73
+    u8 uv_ac_delta_q;                      ///< 0x74
+    u8 uv_dc_delta_q;                      ///< 0x75
+    u8 lossless;                           ///< 0x76
+    u8 tx_mode;                            ///< 0x77
+    u8 allow_high_precision_mv;            ///< 0x78
+    u8 interp_filter;                      ///< 0x79
+    u8 reference_mode;                     ///< 0x7A
+    s8 comp_fixed_ref;                     ///< 0x7B
+    std::array<s8, 2> comp_var_ref;        ///< 0x7C
+    u8 log2_tile_cols;                     ///< 0x7E
+    u8 log2_tile_rows;                     ///< 0x7F
+    Segmentation segmentation;             ///< 0x80
+    LoopFilter loop_filter;                ///< 0xE4
+    INSERT_PADDING_BYTES_NOINIT(5);        ///< 0xEB
+    u32 surface_params;                    ///< 0xF0
+    INSERT_PADDING_WORDS_NOINIT(3);        ///< 0xF4
 
     [[nodiscard]] Vp9PictureInfo Convert() const {
         return {
@@ -176,6 +176,7 @@ struct PictureInfo {
             .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
             .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
             .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
+            .show_frame = false,
             .ref_frame_sign_bias = ref_frame_sign_bias,
             .base_q_index = base_q_index,
             .y_dc_delta_q = y_dc_delta_q,
@@ -204,45 +205,48 @@ struct PictureInfo {
                                         !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
             .ref_deltas = loop_filter.ref_deltas,
             .mode_deltas = loop_filter.mode_deltas,
+            .entropy{},
             .frame_size = current_frame_size,
             .first_level = first_level,
             .sharpness_level = sharpness_level,
             .bitstream_size = bitstream_size,
+            .frame_offsets{},
+            .refresh_frame{},
         };
     }
 };
 static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
 
 struct EntropyProbs {
-    INSERT_PADDING_BYTES(1024);
-    std::array<u8, 28> inter_mode_prob{};
-    std::array<u8, 4> intra_inter_prob{};
-    INSERT_PADDING_BYTES(80);
-    std::array<u8, 2> tx_8x8_prob{};
-    std::array<u8, 4> tx_16x16_prob{};
-    std::array<u8, 6> tx_32x32_prob{};
-    std::array<u8, 4> y_mode_prob_e8{};
-    std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
-    INSERT_PADDING_BYTES(64);
-    std::array<u8, 64> partition_prob{};
-    INSERT_PADDING_BYTES(10);
-    std::array<u8, 8> switchable_interp_prob{};
-    std::array<u8, 5> comp_inter_prob{};
-    std::array<u8, 3> skip_probs{};
-    INSERT_PADDING_BYTES(1);
-    std::array<u8, 3> joints{};
-    std::array<u8, 2> sign{};
-    std::array<u8, 2> class_0{};
-    std::array<u8, 6> fr{};
-    std::array<u8, 2> class_0_hp{};
-    std::array<u8, 2> high_precision{};
-    std::array<u8, 20> classes{};
-    std::array<u8, 12> class_0_fr{};
-    std::array<u8, 20> pred_bits{};
-    std::array<u8, 10> single_ref_prob{};
-    std::array<u8, 5> comp_ref_prob{};
-    INSERT_PADDING_BYTES(17);
-    std::array<u8, 2304> coef_probs{};
+    INSERT_PADDING_BYTES_NOINIT(1024);                 ///< 0x0000
+    std::array<u8, 28> inter_mode_prob;                ///< 0x0400
+    std::array<u8, 4> intra_inter_prob;                ///< 0x041C
+    INSERT_PADDING_BYTES_NOINIT(80);                   ///< 0x0420
+    std::array<u8, 2> tx_8x8_prob;                     ///< 0x0470
+    std::array<u8, 4> tx_16x16_prob;                   ///< 0x0472
+    std::array<u8, 6> tx_32x32_prob;                   ///< 0x0476
+    std::array<u8, 4> y_mode_prob_e8;                  ///< 0x047C
+    std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
+    INSERT_PADDING_BYTES_NOINIT(64);                   ///< 0x04A0
+    std::array<u8, 64> partition_prob;                 ///< 0x04E0
+    INSERT_PADDING_BYTES_NOINIT(10);                   ///< 0x0520
+    std::array<u8, 8> switchable_interp_prob;          ///< 0x052A
+    std::array<u8, 5> comp_inter_prob;                 ///< 0x0532
+    std::array<u8, 3> skip_probs;                      ///< 0x0537
+    INSERT_PADDING_BYTES_NOINIT(1);                    ///< 0x053A
+    std::array<u8, 3> joints;                          ///< 0x053B
+    std::array<u8, 2> sign;                            ///< 0x053E
+    std::array<u8, 2> class_0;                         ///< 0x0540
+    std::array<u8, 6> fr;                              ///< 0x0542
+    std::array<u8, 2> class_0_hp;                      ///< 0x0548
+    std::array<u8, 2> high_precision;                  ///< 0x054A
+    std::array<u8, 20> classes;                        ///< 0x054C
+    std::array<u8, 12> class_0_fr;                     ///< 0x0560
+    std::array<u8, 20> pred_bits;                      ///< 0x056C
+    std::array<u8, 10> single_ref_prob;                ///< 0x0580
+    std::array<u8, 5> comp_ref_prob;                   ///< 0x058A
+    INSERT_PADDING_BYTES_NOINIT(17);                   ///< 0x058F
+    std::array<u8, 2304> coef_probs;                   ///< 0x05A0
 
     void Convert(Vp9EntropyProbs& fc) {
         fc.inter_mode_prob = inter_mode_prob;
@@ -293,10 +297,45 @@ struct RefPoolElement {
 };
 
 struct FrameContexts {
-    s64 from{};
-    bool adapted{};
-    Vp9EntropyProbs probs{};
+    s64 from;
+    bool adapted;
+    Vp9EntropyProbs probs;
 };
 
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(Vp9EntropyProbs, field_name) == position,                               \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(partition_prob, 0x0024);
+ASSERT_POSITION(switchable_interp_prob, 0x0724);
+ASSERT_POSITION(sign, 0x0772);
+ASSERT_POSITION(class_0_fr, 0x079E);
+ASSERT_POSITION(high_precision, 0x07B2);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(PictureInfo, field_name) == position,                                   \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(bitstream_size, 0x30);
+ASSERT_POSITION(last_frame_size, 0x48);
+ASSERT_POSITION(first_level, 0x70);
+ASSERT_POSITION(segmentation, 0x80);
+ASSERT_POSITION(loop_filter, 0xE4);
+ASSERT_POSITION(surface_params, 0xF0);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(EntropyProbs, field_name) == position,                                  \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(inter_mode_prob, 0x400);
+ASSERT_POSITION(tx_8x8_prob, 0x470);
+ASSERT_POSITION(partition_prob, 0x4E0);
+ASSERT_POSITION(class_0, 0x540);
+ASSERT_POSITION(class_0_fr, 0x560);
+ASSERT_POSITION(coef_probs, 0x5A0);
+#undef ASSERT_POSITION
+
 }; // namespace Decoder
 }; // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index e4f919afd..b5e3b70fc 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -8,22 +8,21 @@
 
 namespace Tegra {
 
-Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
+#define NVDEC_REG_INDEX(field_name)                                                                \
+    (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
+
+Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
 
 Nvdec::~Nvdec() = default;
 
-void Nvdec::ProcessMethod(Method method, u32 argument) {
-    if (method == Method::SetVideoCodec) {
-        codec->StateWrite(static_cast<u32>(method), argument);
-    } else {
-        codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
-    }
+void Nvdec::ProcessMethod(u32 method, u32 argument) {
+    state.reg_array[method] = static_cast<u64>(argument) << 8;
 
     switch (method) {
-    case Method::SetVideoCodec:
+    case NVDEC_REG_INDEX(set_codec_id):
         codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
         break;
-    case Method::Execute:
+    case NVDEC_REG_INDEX(execute):
         Execute();
         break;
     }
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index e66be80b8..6e1da0b04 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -14,16 +14,11 @@ class GPU;
 
 class Nvdec {
 public:
-    enum class Method : u32 {
-        SetVideoCodec = 0x80,
-        Execute = 0xc0,
-    };
-
     explicit Nvdec(GPU& gpu);
     ~Nvdec();
 
     /// Writes the method into the state, Invoke Execute() if encountered
-    void ProcessMethod(Method method, u32 argument);
+    void ProcessMethod(u32 method, u32 argument);
 
     /// Return most recently decoded frame
     [[nodiscard]] AVFramePtr GetFrame();
@@ -33,6 +28,7 @@ private:
     void Execute();
 
     GPU& gpu;
+    NvdecCommon::NvdecRegisters state;
     std::unique_ptr<Codec> codec;
 };
 } // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
index 01b5e086d..6a24e00a0 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -4,40 +4,13 @@
 
 #pragma once
 
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 
 namespace Tegra::NvdecCommon {
 
-struct NvdecRegisters {
-    INSERT_PADDING_WORDS(256);
-    u64 set_codec_id{};
-    INSERT_PADDING_WORDS(254);
-    u64 set_platform_id{};
-    u64 picture_info_offset{};
-    u64 frame_bitstream_offset{};
-    u64 frame_number{};
-    u64 h264_slice_data_offsets{};
-    u64 h264_mv_dump_offset{};
-    INSERT_PADDING_WORDS(6);
-    u64 frame_stats_offset{};
-    u64 h264_last_surface_luma_offset{};
-    u64 h264_last_surface_chroma_offset{};
-    std::array<u64, 17> surface_luma_offset{};
-    std::array<u64, 17> surface_chroma_offset{};
-    INSERT_PADDING_WORDS(132);
-    u64 vp9_entropy_probs_offset{};
-    u64 vp9_backward_updates_offset{};
-    u64 vp9_last_frame_segmap_offset{};
-    u64 vp9_curr_frame_segmap_offset{};
-    INSERT_PADDING_WORDS(2);
-    u64 vp9_last_frame_mvs_offset{};
-    u64 vp9_curr_frame_mvs_offset{};
-    INSERT_PADDING_WORDS(2);
-};
-static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
-
-enum class VideoCodec : u32 {
+enum class VideoCodec : u64 {
     None = 0x0,
     H264 = 0x3,
     Vp8 = 0x5,
@@ -45,4 +18,76 @@ enum class VideoCodec : u32 {
     Vp9 = 0x9,
 };
 
+// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
+// doubling the sizes here is compensating for that.
+struct NvdecRegisters {
+    static constexpr std::size_t NUM_REGS = 0x178;
+
+    union {
+        struct {
+            INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
+            VideoCodec set_codec_id;          ///< 0x0400
+            INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
+            u64 execute;                      ///< 0x0600
+            INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
+            struct {                          ///< 0x0800
+                union {
+                    BitField<0, 3, VideoCodec> codec;
+                    BitField<4, 1, u64> gp_timer_on;
+                    BitField<13, 1, u64> mb_timer_on;
+                    BitField<14, 1, u64> intra_frame_pslc;
+                    BitField<17, 1, u64> all_intra_frame;
+                };
+            } control_params;
+            u64 picture_info_offset;                   ///< 0x0808
+            u64 frame_bitstream_offset;                ///< 0x0810
+            u64 frame_number;                          ///< 0x0818
+            u64 h264_slice_data_offsets;               ///< 0x0820
+            u64 h264_mv_dump_offset;                   ///< 0x0828
+            INSERT_PADDING_WORDS_NOINIT(6);            ///< 0x0830
+            u64 frame_stats_offset;                    ///< 0x0848
+            u64 h264_last_surface_luma_offset;         ///< 0x0850
+            u64 h264_last_surface_chroma_offset;       ///< 0x0858
+            std::array<u64, 17> surface_luma_offset;   ///< 0x0860
+            std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
+            INSERT_PADDING_WORDS_NOINIT(132);          ///< 0x0970
+            u64 vp9_entropy_probs_offset;              ///< 0x0B80
+            u64 vp9_backward_updates_offset;           ///< 0x0B88
+            u64 vp9_last_frame_segmap_offset;          ///< 0x0B90
+            u64 vp9_curr_frame_segmap_offset;          ///< 0x0B98
+            INSERT_PADDING_WORDS_NOINIT(2);            ///< 0x0BA0
+            u64 vp9_last_frame_mvs_offset;             ///< 0x0BA8
+            u64 vp9_curr_frame_mvs_offset;             ///< 0x0BB0
+            INSERT_PADDING_WORDS_NOINIT(2);            ///< 0x0BB8
+        };
+        std::array<u64, NUM_REGS> reg_array;
+    };
+};
+static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64),                  \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(set_codec_id, 0x80);
+ASSERT_REG_POSITION(execute, 0xC0);
+ASSERT_REG_POSITION(control_params, 0x100);
+ASSERT_REG_POSITION(picture_info_offset, 0x101);
+ASSERT_REG_POSITION(frame_bitstream_offset, 0x102);
+ASSERT_REG_POSITION(frame_number, 0x103);
+ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
+ASSERT_REG_POSITION(frame_stats_offset, 0x109);
+ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
+ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
+ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
+ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
+ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
+ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
+ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
+ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
+ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
+ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
+
+#undef ASSERT_REG_POSITION
+
 } // namespace Tegra::NvdecCommon
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 0a8b82f2b..ff3db0aee 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -3,7 +3,21 @@
 // Refer to the license.txt file included.
 
 #include <array>
+
+extern "C" {
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+#include <libswscale/swscale.h>
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+
 #include "common/assert.h"
+#include "common/logging/log.h"
+
 #include "video_core/command_classes/nvdec.h"
 #include "video_core/command_classes/vic.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -11,10 +25,6 @@
 #include "video_core/memory_manager.h"
 #include "video_core/textures/decoders.h"
 
-extern "C" {
-#include <libswscale/swscale.h>
-}
-
 namespace Tegra {
 
 Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 0f640fdae..f26530ede 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -7,6 +7,10 @@
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
+
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
 
 namespace Tegra::Engines {
 
@@ -49,7 +53,7 @@ void Fermi2D::Blit() {
     UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
 
     const auto& args = regs.pixels_from_memory;
-    const Config config{
+    Config config{
         .operation = regs.operation,
         .filter = args.sample_mode.filter,
         .dst_x0 = args.dst_x0,
@@ -61,7 +65,21 @@ void Fermi2D::Blit() {
         .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
         .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
     };
-    if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
+    Surface src = regs.src;
+    const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
+    const auto need_align_to_pitch =
+        src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
+        static_cast<s32>(src.width) == config.src_x1 &&
+        config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
+    if (need_align_to_pitch) {
+        auto address = src.Address() + config.src_x0 * bytes_per_pixel;
+        src.addr_upper = static_cast<u32>(address >> 32);
+        src.addr_lower = static_cast<u32>(address);
+        src.width -= config.src_x0;
+        config.src_x1 -= config.src_x0;
+        config.src_x0 = 0;
+    }
+    if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
         UNIMPLEMENTED();
     }
 }
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 2208e1922..c9cff7450 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,7 +18,10 @@ set(SHADER_FILES
     vulkan_uint8.comp
 )
 
-find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
+find_program(GLSLANGVALIDATOR "glslangValidator")
+if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND")
+    message(FATAL_ERROR "Required program `glslangValidator` not found.")
+endif()
 
 set(GLSL_FLAGS "")
 set(QUIET_FLAG "--quiet")
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7124c755c..d2b9d5f2b 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
     } else {
         UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
     }
-    // Flush and invalidate through the GPU interface, to be asynchronous if possible.
-    const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
-    ASSERT(cpu_addr);
 
-    rasterizer->UnmapMemory(*cpu_addr, size);
+    const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
+
+    for (const auto& map : submapped_ranges) {
+        // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+        const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
+        ASSERT(cpu_addr);
+
+        rasterizer->UnmapMemory(*cpu_addr, map.second);
+    }
 
     UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
 }
@@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
 
     //// Lock the new page
     // TryLockPage(page_entry, size);
+    auto& current_page = page_table[PageEntryIndex(gpu_addr)];
 
-    page_table[PageEntryIndex(gpu_addr)] = page_entry;
+    if ((!current_page.IsValid() && page_entry.IsValid()) ||
+        current_page.ToAddress() != page_entry.ToAddress()) {
+        rasterizer->ModifyGPUMemory(gpu_addr, size);
+    }
+
+    current_page = page_entry;
 }
 
 std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
@@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
     return page_entry.ToAddress() + (gpu_addr & page_mask);
 }
 
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
+    size_t page_index{addr >> page_bits};
+    const size_t page_last{(addr + size + page_size - 1) >> page_bits};
+    while (page_index < page_last) {
+        const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+        if (page_addr && *page_addr != 0) {
+            return page_addr;
+        }
+        ++page_index;
+    }
+    return std::nullopt;
+}
+
 template <typename T>
 T MemoryManager::Read(GPUVAddr addr) const {
     if (auto page_pointer{GetPointer(addr)}; page_pointer) {
@@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
     return page <= Core::Memory::PAGE_SIZE;
 }
 
+bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
+    size_t page_index{gpu_addr >> page_bits};
+    const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
+    std::optional<VAddr> old_page_addr{};
+    while (page_index != page_last) {
+        const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+        if (!page_addr || *page_addr == 0) {
+            return false;
+        }
+        if (old_page_addr) {
+            if (*old_page_addr + page_size != *page_addr) {
+                return false;
+            }
+        }
+        old_page_addr = page_addr;
+        ++page_index;
+    }
+    return true;
+}
+
+bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
+    size_t page_index{gpu_addr >> page_bits};
+    const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
+    while (page_index < page_last) {
+        if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
+            return false;
+        }
+        ++page_index;
+    }
+    return true;
+}
+
+std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
+    GPUVAddr gpu_addr, std::size_t size) const {
+    std::vector<std::pair<GPUVAddr, std::size_t>> result{};
+    size_t page_index{gpu_addr >> page_bits};
+    size_t remaining_size{size};
+    size_t page_offset{gpu_addr & page_mask};
+    std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
+    std::optional<VAddr> old_page_addr{};
+    const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
+        if (!last_segment) {
+            GPUVAddr new_base_addr = page_index << page_bits;
+            last_segment = {new_base_addr, bytes};
+        } else {
+            last_segment->second += bytes;
+        }
+    };
+    const auto split = [this, &last_segment, &result] {
+        if (last_segment) {
+            result.push_back(*last_segment);
+            last_segment = std::nullopt;
+        }
+    };
+    while (remaining_size > 0) {
+        const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
+        const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+        if (!page_addr) {
+            split();
+        } else if (old_page_addr) {
+            if (*old_page_addr + page_size != *page_addr) {
+                split();
+            }
+            extend_size(num_bytes);
+        } else {
+            extend_size(num_bytes);
+        }
+        ++page_index;
+        page_offset = 0;
+        remaining_size -= num_bytes;
+    }
+    split();
+    return result;
+}
+
 } // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b3538d503..99d13e7f6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -76,6 +76,8 @@ public:
 
     [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
 
+    [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
+
     template <typename T>
     [[nodiscard]] T Read(GPUVAddr addr) const;
 
@@ -112,10 +114,28 @@ public:
     void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
 
     /**
-     * IsGranularRange checks if a gpu region can be simply read with a pointer.
+     * Checks if a gpu region can be simply read with a pointer.
      */
     [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
 
+    /**
+     * Checks if a gpu region is mapped by a single range of cpu addresses.
+     */
+    [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
+
+    /**
+     * Checks if a gpu region is mapped entirely.
+     */
+    [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
+
+    /**
+     * Returns a vector with all the subranges of cpu addresses mapped beneath.
+     * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
+     * will be returned;
+     */
+    std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
+                                                                    std::size_t size) const;
+
     [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
     [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
     [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 07939432f..0cec4225b 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -87,6 +87,9 @@ public:
     /// Unmap memory range
     virtual void UnmapMemory(VAddr addr, u64 size) = 0;
 
+    /// Remap GPU memory range. This means underneath backing memory changed
+    virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
+
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     /// and invalidated
     virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 320ee8d30..63d8ad42a 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -42,6 +42,8 @@ public:
 
     [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
 
+    [[nodiscard]] virtual std::string GetDeviceVendor() const = 0;
+
     // Getter/setter functions:
     // ------------------------
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3f4532ca7..3b00614e7 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -202,13 +202,13 @@ Device::Device() {
         LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
         throw std::runtime_error{"Insufficient version"};
     }
-    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+    vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
     const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
     const std::vector extensions = GetExtensions();
 
-    const bool is_nvidia = vendor == "NVIDIA Corporation";
-    const bool is_amd = vendor == "ATI Technologies Inc.";
-    const bool is_intel = vendor == "Intel";
+    const bool is_nvidia = vendor_name == "NVIDIA Corporation";
+    const bool is_amd = vendor_name == "ATI Technologies Inc.";
+    const bool is_intel = vendor_name == "Intel";
 
 #ifdef __unix__
     const bool is_linux = true;
@@ -275,6 +275,56 @@ Device::Device() {
     }
 }
 
+std::string Device::GetVendorName() const {
+    if (vendor_name == "NVIDIA Corporation") {
+        return "NVIDIA";
+    }
+    if (vendor_name == "ATI Technologies Inc.") {
+        return "AMD";
+    }
+    if (vendor_name == "Intel") {
+        // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris.
+        // Simply return `INTEL` for those as well as the Windows driver.
+        return "INTEL";
+    }
+    if (vendor_name == "Intel Open Source Technology Center") {
+        return "I965";
+    }
+    if (vendor_name == "Mesa Project") {
+        return "I915";
+    }
+    if (vendor_name == "Mesa/X.org") {
+        // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return
+        // MESA instead of one of those driver names.
+        return "MESA";
+    }
+    if (vendor_name == "AMD") {
+        return "RADEONSI";
+    }
+    if (vendor_name == "nouveau") {
+        return "NOUVEAU";
+    }
+    if (vendor_name == "X.Org") {
+        return "R600";
+    }
+    if (vendor_name == "Collabora Ltd") {
+        return "ZINK";
+    }
+    if (vendor_name == "Intel Corporation") {
+        return "OPENSWR";
+    }
+    if (vendor_name == "Microsoft Corporation") {
+        return "D3D12";
+    }
+    if (vendor_name == "NVIDIA") {
+        // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default
+        // strategy would have returned `NVIDIA` here for this driver, the same result as the
+        // proprietary driver.
+        return "TEGRA";
+    }
+    return vendor_name;
+}
+
 Device::Device(std::nullptr_t) {
     max_uniform_buffers.fill(std::numeric_limits<u32>::max());
     uniform_buffer_alignment = 4;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index f24bd0c7b..2c2b13767 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -22,6 +22,8 @@ public:
     explicit Device();
     explicit Device(std::nullptr_t);
 
+    [[nodiscard]] std::string GetVendorName() const;
+
     u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
         return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
     }
@@ -130,6 +132,7 @@ private:
     static bool TestVariableAoffi();
     static bool TestPreciseBug();
 
+    std::string vendor_name;
     std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
     std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
     size_t uniform_buffer_alignment{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eb8bdaa85..07ad0e205 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
     shader_cache.OnCPUWrite(addr, size);
 }
 
+void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+    {
+        std::scoped_lock lock{texture_cache.mutex};
+        texture_cache.UnmapGPUMemory(addr, size);
+    }
+}
+
 void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
     if (!gpu.IsAsync()) {
         gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9995a563b..482efed7a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,6 +80,7 @@ public:
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
     void UnmapMemory(VAddr addr, u64 size) override;
+    void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
     void SignalSemaphore(GPUVAddr addr, u32 value) override;
     void SignalSyncPoint(u32 value) override;
     void ReleaseFences() override;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 23948feed..ff0f03e99 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -327,7 +327,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
     if (format_info.is_compressed) {
         return false;
     }
-    if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
+    if (std::ranges::find(ACCELERATED_FORMATS, static_cast<int>(internal_format)) ==
+        ACCELERATED_FORMATS.end()) {
         return false;
     }
     if (format_info.compatibility_by_size) {
@@ -341,6 +342,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
 [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
                                         VideoCommon::SubresourceLayers subresource, GLenum target) {
     switch (target) {
+    case GL_TEXTURE_1D:
+        return CopyOrigin{
+            .level = static_cast<GLint>(subresource.base_level),
+            .x = static_cast<GLint>(offset.x),
+            .y = static_cast<GLint>(0),
+            .z = static_cast<GLint>(0),
+        };
+    case GL_TEXTURE_1D_ARRAY:
+        return CopyOrigin{
+            .level = static_cast<GLint>(subresource.base_level),
+            .x = static_cast<GLint>(offset.x),
+            .y = static_cast<GLint>(0),
+            .z = static_cast<GLint>(subresource.base_layer),
+        };
     case GL_TEXTURE_2D_ARRAY:
     case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
         return CopyOrigin{
@@ -366,6 +381,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
                                         VideoCommon::SubresourceLayers dst_subresource,
                                         GLenum target) {
     switch (target) {
+    case GL_TEXTURE_1D:
+        return CopyRegion{
+            .width = static_cast<GLsizei>(extent.width),
+            .height = static_cast<GLsizei>(1),
+            .depth = static_cast<GLsizei>(1),
+        };
+    case GL_TEXTURE_1D_ARRAY:
+        return CopyRegion{
+            .width = static_cast<GLsizei>(extent.width),
+            .height = static_cast<GLsizei>(1),
+            .depth = static_cast<GLsizei>(dst_subresource.num_layers),
+        };
     case GL_TEXTURE_2D_ARRAY:
     case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
         return CopyRegion{
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cc19a110f..0b66f8332 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -70,6 +70,10 @@ public:
         return &rasterizer;
     }
 
+    [[nodiscard]] std::string GetDeviceVendor() const override {
+        return device.GetVendorName();
+    }
+
 private:
     /// Initializes the OpenGL state and creates persistent objects.
     void InitOpenGLObjects();
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index abaf1ee6a..8fb5be393 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -261,9 +261,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
         glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
         glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
         glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
-                           copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+                           copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
         glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
-                           copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
+                           copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
         glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
     }
     program_manager.RestoreGuestCompute();
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 72071316c..d7d17e110 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -47,6 +47,10 @@ public:
         return &rasterizer;
     }
 
+    [[nodiscard]] std::string GetDeviceVendor() const override {
+        return device.GetDriverName();
+    }
+
 private:
     void Report() const;
 
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8cb65e588..0df4e1a1c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) {
 template <typename T>
 std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
     std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
-    std::ranges::transform(indices, indices.begin(),
-                           [quad, first](u32 index) { return first + index + quad * 4; });
+    for (T& index : indices) {
+        index = static_cast<T>(first + index + quad * 4);
+    }
     return indices;
 }
 } // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1c9120170..bd4d649cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
     pipeline_cache.OnCPUWrite(addr, size);
 }
 
+void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+    {
+        std::scoped_lock lock{texture_cache.mutex};
+        texture_cache.UnmapGPUMemory(addr, size);
+    }
+}
+
 void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
     if (!gpu.IsAsync()) {
         gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index cb8c5c279..41459c5c5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -72,6 +72,7 @@ public:
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
     void UnmapMemory(VAddr addr, u64 size) override;
+    void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
     void SignalSemaphore(GPUVAddr addr, u32 value) override;
     void SignalSyncPoint(u32 value) override;
     void ReleaseFences() override;
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index ad69d32d1..6052d148a 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
     }
 }
 
+ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
+    : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
+
 std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
     if (other_addr < gpu_addr) {
         // Subresource address can't be lower than the base
@@ -82,7 +85,7 @@ std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const
     if (info.type != ImageType::e3D) {
         const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
         const auto end = mip_level_offsets.begin() + info.resources.levels;
-        const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
+        const auto it = std::find(mip_level_offsets.begin(), end, static_cast<u32>(mip_offset));
         if (layer > info.resources.layers || it == end) {
             return std::nullopt;
         }
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index e326cab71..ff1feda9b 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 {
     Strong = 1 << 5,      ///< Exists in the image table, the dimensions are can be trusted
     Registered = 1 << 6,  ///< True when the image is registered
     Picked = 1 << 7,      ///< Temporary flag to mark the image as picked
+    Remapped = 1 << 8,    ///< Image has been remapped.
+    Sparse = 1 << 9,      ///< Image has non continous submemory.
 
     // Garbage Collection Flags
-    BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
-                         ///< garbage collection priority
-    Alias = 1 << 9,      ///< This image has aliases and has priority on garbage
-                         ///< collection
+    BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher
+                          ///< garbage collection priority
+    Alias = 1 << 11,      ///< This image has aliases and has priority on garbage
+                          ///< collection
 };
 DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
 
@@ -57,6 +59,12 @@ struct ImageBase {
         return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
     }
 
+    [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
+        const VAddr overlap_end = overlap_gpu_addr + overlap_size;
+        const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
+        return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
+    }
+
     void CheckBadOverlapState();
     void CheckAliasState();
 
@@ -84,6 +92,29 @@ struct ImageBase {
 
     std::vector<AliasedImage> aliased_images;
     std::vector<ImageId> overlapping_images;
+    ImageMapId map_view_id{};
+};
+
+struct ImageMapView {
+    explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id);
+
+    [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
+        const VAddr overlap_end = overlap_cpu_addr + overlap_size;
+        const VAddr cpu_addr_end = cpu_addr + size;
+        return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
+    }
+
+    [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
+        const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size;
+        const GPUVAddr gpu_addr_end = gpu_addr + size;
+        return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
+    }
+
+    GPUVAddr gpu_addr;
+    VAddr cpu_addr;
+    size_t size;
+    ImageId image_id;
+    bool picked{};
 };
 
 struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 84530a179..01de2d498 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -13,6 +13,7 @@
 #include <span>
 #include <type_traits>
 #include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -110,9 +111,6 @@ public:
     /// Notify the cache that a new frame has been queued
     void TickFrame();
 
-    /// Runs the Garbage Collector.
-    void RunGarbageCollector();
-
     /// Return a constant reference to the given image view id
     [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
 
@@ -155,12 +153,13 @@ public:
     /// Remove images in a region
     void UnmapMemory(VAddr cpu_addr, size_t size);
 
+    /// Remove images in a region
+    void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
+
     /// Blit an image with the given parameters
     void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
                    const Tegra::Engines::Fermi2D::Surface& src,
-                   const Tegra::Engines::Fermi2D::Config& copy,
-                   std::optional<Region2D> src_region_override = {},
-                   std::optional<Region2D> dst_region_override = {});
+                   const Tegra::Engines::Fermi2D::Config& copy);
 
     /// Invalidate the contents of the color buffer index
     /// These contents become unspecified, the cache can assume aggressive optimizations.
@@ -193,7 +192,22 @@ public:
 private:
     /// Iterate over all page indices in a range
     template <typename Func>
-    static void ForEachPage(VAddr addr, size_t size, Func&& func) {
+    static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
+        static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
+        const u64 page_end = (addr + size - 1) >> PAGE_BITS;
+        for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+            if constexpr (RETURNS_BOOL) {
+                if (func(page)) {
+                    break;
+                }
+            } else {
+                func(page);
+            }
+        }
+    }
+
+    template <typename Func>
+    static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
         static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
         const u64 page_end = (addr + size - 1) >> PAGE_BITS;
         for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
@@ -207,6 +221,9 @@ private:
         }
     }
 
+    /// Runs the Garbage Collector.
+    void RunGarbageCollector();
+
     /// Fills image_view_ids in the image views in indices
     void FillImageViews(DescriptorTable<TICEntry>& table,
                         std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
@@ -220,7 +237,7 @@ private:
     FramebufferId GetFramebufferId(const RenderTargets& key);
 
     /// Refresh the contents (pixel data) of an image
-    void RefreshContents(Image& image);
+    void RefreshContents(Image& image, ImageId image_id);
 
     /// Upload data from guest to an image
     template <typename StagingBuffer>
@@ -269,6 +286,16 @@ private:
     template <typename Func>
     void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
 
+    template <typename Func>
+    void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+    template <typename Func>
+    void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+    /// Iterates over all the images in a region calling func
+    template <typename Func>
+    void ForEachSparseSegment(ImageBase& image, Func&& func);
+
     /// Find or create an image view in the given image with the passed parameters
     [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
 
@@ -279,10 +306,10 @@ private:
     void UnregisterImage(ImageId image);
 
     /// Track CPU reads and writes for image
-    void TrackImage(ImageBase& image);
+    void TrackImage(ImageBase& image, ImageId image_id);
 
     /// Stop tracking CPU reads and writes for image
-    void UntrackImage(ImageBase& image);
+    void UntrackImage(ImageBase& image, ImageId image_id);
 
     /// Delete image from the cache
     void DeleteImage(ImageId image);
@@ -340,7 +367,13 @@ private:
     std::unordered_map<TSCEntry, SamplerId> samplers;
     std::unordered_map<RenderTargets, FramebufferId> framebuffers;
 
-    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
+    std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
+    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
+    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
+
+    std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
+
+    VAddr virtual_invalid_space{};
 
     bool has_deleted_images = false;
     u64 total_used_memory = 0;
@@ -349,6 +382,7 @@ private:
     u64 critical_memory;
 
     SlotVector<Image> slot_images;
+    SlotVector<ImageMapView> slot_map_views;
     SlotVector<ImageView> slot_image_views;
     SlotVector<ImageAlloc> slot_image_allocs;
     SlotVector<Sampler> slot_samplers;
@@ -459,7 +493,7 @@ void TextureCache<P>::RunGarbageCollector() {
                 }
             }
             if (True(image->flags & ImageFlagBits::Tracked)) {
-                UntrackImage(*image);
+                UntrackImage(*image, image_id);
             }
             UnregisterImage(image_id);
             DeleteImage(image_id);
@@ -658,7 +692,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
             return;
         }
         image.flags |= ImageFlagBits::CpuModified;
-        UntrackImage(image);
+        if (True(image.flags & ImageFlagBits::Tracked)) {
+            UntrackImage(image, image_id);
+        }
     });
 }
 
@@ -695,7 +731,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
     for (const ImageId id : deleted_images) {
         Image& image = slot_images[id];
         if (True(image.flags & ImageFlagBits::Tracked)) {
-            UntrackImage(image);
+            UntrackImage(image, id);
         }
         UnregisterImage(id);
         DeleteImage(id);
@@ -703,11 +739,26 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
 }
 
 template <class P>
+void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
+    std::vector<ImageId> deleted_images;
+    ForEachImageInRegionGPU(gpu_addr, size,
+                            [&](ImageId id, Image&) { deleted_images.push_back(id); });
+    for (const ImageId id : deleted_images) {
+        Image& image = slot_images[id];
+        if (True(image.flags & ImageFlagBits::Remapped)) {
+            continue;
+        }
+        image.flags |= ImageFlagBits::Remapped;
+        if (True(image.flags & ImageFlagBits::Tracked)) {
+            UntrackImage(image, id);
+        }
+    }
+}
+
+template <class P>
 void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
                                 const Tegra::Engines::Fermi2D::Surface& src,
-                                const Tegra::Engines::Fermi2D::Config& copy,
-                                std::optional<Region2D> src_override,
-                                std::optional<Region2D> dst_override) {
+                                const Tegra::Engines::Fermi2D::Config& copy) {
     const BlitImages images = GetBlitImages(dst, src);
     const ImageId dst_id = images.dst_id;
     const ImageId src_id = images.src_id;
@@ -718,47 +769,25 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
     const ImageBase& src_image = slot_images[src_id];
 
     // TODO: Deduplicate
-    const std::optional dst_base = dst_image.TryFindBase(dst.Address());
-    const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
-    const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
-    const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
-    const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
-
-    // out of bounds texture blit checking
-    const bool use_override = src_override.has_value();
-    const s32 src_x0 = copy.src_x0 >> src_samples_x;
-    s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x;
-    const s32 src_y0 = copy.src_y0 >> src_samples_y;
-    const s32 src_y1 = copy.src_y1 >> src_samples_y;
-
-    const auto src_width = static_cast<s32>(src_image.info.size.width);
-    const bool width_oob = src_x1 > src_width;
-    const auto width_diff = width_oob ? src_x1 - src_width : 0;
-    if (width_oob) {
-        src_x1 = src_width;
-    }
-
-    const Region2D src_dimensions{
-        Offset2D{.x = src_x0, .y = src_y0},
-        Offset2D{.x = src_x1, .y = src_y1},
-    };
-    const auto src_region = use_override ? *src_override : src_dimensions;
-
     const std::optional src_base = src_image.TryFindBase(src.Address());
     const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
     const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
     const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
-    const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
+    const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
+    const Region2D src_region{
+        Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
+        Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
+    };
 
-    const s32 dst_x0 = copy.dst_x0 >> dst_samples_x;
-    const s32 dst_x1 = copy.dst_x1 >> dst_samples_x;
-    const s32 dst_y0 = copy.dst_y0 >> dst_samples_y;
-    const s32 dst_y1 = copy.dst_y1 >> dst_samples_y;
-    const Region2D dst_dimensions{
-        Offset2D{.x = dst_x0, .y = dst_y0},
-        Offset2D{.x = dst_x1 - width_diff, .y = dst_y1},
+    const std::optional dst_base = dst_image.TryFindBase(dst.Address());
+    const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
+    const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
+    const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
+    const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
+    const Region2D dst_region{
+        Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
+        Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
     };
-    const auto dst_region = use_override ? *dst_override : dst_dimensions;
 
     // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
     Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -775,21 +804,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
         runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
                           copy.operation);
     }
-
-    if (width_oob) {
-        // Continue copy of the oob region of the texture on the next row
-        auto oob_src = src;
-        oob_src.height++;
-        const Region2D src_region_override{
-            Offset2D{.x = 0, .y = src_y0 + 1},
-            Offset2D{.x = width_diff, .y = src_y1 + 1},
-        };
-        const Region2D dst_region_override{
-            Offset2D{.x = dst_x1 - width_diff, .y = dst_y0},
-            Offset2D{.x = dst_x1, .y = dst_y1},
-        };
-        BlitImage(dst, oob_src, copy, src_region_override, dst_region_override);
-    }
 }
 
 template <class P>
@@ -833,9 +847,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
     if (it == page_table.end()) {
         return nullptr;
     }
-    const auto& image_ids = it->second;
-    for (const ImageId image_id : image_ids) {
-        const ImageBase& image = slot_images[image_id];
+    const auto& image_map_ids = it->second;
+    for (const ImageMapId map_id : image_map_ids) {
+        const ImageMapView& map = slot_map_views[map_id];
+        const ImageBase& image = slot_images[map.image_id];
         if (image.cpu_addr != cpu_addr) {
             continue;
         }
@@ -915,13 +930,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
 }
 
 template <class P>
-void TextureCache<P>::RefreshContents(Image& image) {
+void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
     if (False(image.flags & ImageFlagBits::CpuModified)) {
         // Only upload modified images
         return;
     }
     image.flags &= ~ImageFlagBits::CpuModified;
-    TrackImage(image);
+    TrackImage(image, image_id);
 
     if (image.info.num_samples > 1) {
         LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
@@ -958,7 +973,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
 
 template <class P>
 ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
-    if (!IsValidAddress(gpu_memory, config)) {
+    if (!IsValidEntry(gpu_memory, config)) {
         return NULL_IMAGE_VIEW_ID;
     }
     const auto [pair, is_new] = image_views.try_emplace(config);
@@ -1000,14 +1015,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
 template <class P>
 ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
                                    RelaxedOptions options) {
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
     if (!cpu_addr) {
-        return ImageId{};
+        cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+        if (!cpu_addr) {
+            return ImageId{};
+        }
     }
     const bool broken_views = runtime.HasBrokenTextureViewFormats();
     const bool native_bgr = runtime.HasNativeBgr();
     ImageId image_id;
     const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
+        if (True(existing_image.flags & ImageFlagBits::Remapped)) {
+            return false;
+        }
         if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
             const bool strict_size = False(options & RelaxedOptions::Size) &&
                                      True(existing_image.flags & ImageFlagBits::Strong);
@@ -1033,7 +1054,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
 template <class P>
 ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
                                      RelaxedOptions options) {
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    if (!cpu_addr) {
+        const auto size = CalculateGuestSizeInBytes(info);
+        cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
+        if (!cpu_addr) {
+            const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
+            virtual_invalid_space += Common::AlignUp(size, 32);
+            cpu_addr = std::optional<VAddr>(fake_addr);
+        }
+    }
     ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
     const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
     const Image& image = slot_images[image_id];
@@ -1053,11 +1083,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
     const bool broken_views = runtime.HasBrokenTextureViewFormats();
     const bool native_bgr = runtime.HasNativeBgr();
     std::vector<ImageId> overlap_ids;
+    std::unordered_set<ImageId> overlaps_found;
     std::vector<ImageId> left_aliased_ids;
     std::vector<ImageId> right_aliased_ids;
+    std::unordered_set<ImageId> ignore_textures;
     std::vector<ImageId> bad_overlap_ids;
-    ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
-        if (info.type != overlap.info.type) {
+    const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
+        if (True(overlap.flags & ImageFlagBits::Remapped)) {
+            ignore_textures.insert(overlap_id);
             return;
         }
         if (info.type == ImageType::Linear) {
@@ -1067,6 +1100,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
             }
             return;
         }
+        overlaps_found.insert(overlap_id);
         static constexpr bool strict_size = true;
         const std::optional<OverlapResult> solution = ResolveOverlap(
             new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1090,12 +1124,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
             bad_overlap_ids.push_back(overlap_id);
             overlap.flags |= ImageFlagBits::BadOverlap;
         }
-    });
+    };
+    ForEachImageInRegion(cpu_addr, size_bytes, region_check);
+    const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
+        if (!overlaps_found.contains(overlap_id)) {
+            if (True(overlap.flags & ImageFlagBits::Remapped)) {
+                ignore_textures.insert(overlap_id);
+            }
+            if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
+                ignore_textures.insert(overlap_id);
+            }
+        }
+    };
+    ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
     const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
     Image& new_image = slot_images[new_image_id];
 
+    if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+        new_image.flags |= ImageFlagBits::Sparse;
+    }
+
+    for (const ImageId overlap_id : ignore_textures) {
+        Image& overlap = slot_images[overlap_id];
+        if (True(overlap.flags & ImageFlagBits::GpuModified)) {
+            UNIMPLEMENTED();
+        }
+        if (True(overlap.flags & ImageFlagBits::Tracked)) {
+            UntrackImage(overlap, overlap_id);
+        }
+        UnregisterImage(overlap_id);
+        DeleteImage(overlap_id);
+    }
+
     // TODO: Only upload what we need
-    RefreshContents(new_image);
+    RefreshContents(new_image, new_image_id);
 
     for (const ImageId overlap_id : overlap_ids) {
         Image& overlap = slot_images[overlap_id];
@@ -1107,7 +1169,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
             runtime.CopyImage(new_image, overlap, copies);
         }
         if (True(overlap.flags & ImageFlagBits::Tracked)) {
-            UntrackImage(overlap);
+            UntrackImage(overlap, overlap_id);
         }
         UnregisterImage(overlap_id);
         DeleteImage(overlap_id);
@@ -1242,7 +1304,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
     boost::container::small_vector<ImageId, 32> images;
-    ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
+    boost::container::small_vector<ImageMapId, 32> maps;
+    ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
         const auto it = page_table.find(page);
         if (it == page_table.end()) {
             if constexpr (BOOL_BREAK) {
@@ -1251,12 +1314,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
                 return;
             }
         }
+        for (const ImageMapId map_id : it->second) {
+            ImageMapView& map = slot_map_views[map_id];
+            if (map.picked) {
+                continue;
+            }
+            if (!map.Overlaps(cpu_addr, size)) {
+                continue;
+            }
+            map.picked = true;
+            maps.push_back(map_id);
+            Image& image = slot_images[map.image_id];
+            if (True(image.flags & ImageFlagBits::Picked)) {
+                continue;
+            }
+            image.flags |= ImageFlagBits::Picked;
+            images.push_back(map.image_id);
+            if constexpr (BOOL_BREAK) {
+                if (func(map.image_id, image)) {
+                    return true;
+                }
+            } else {
+                func(map.image_id, image);
+            }
+        }
+        if constexpr (BOOL_BREAK) {
+            return false;
+        }
+    });
+    for (const ImageId image_id : images) {
+        slot_images[image_id].flags &= ~ImageFlagBits::Picked;
+    }
+    for (const ImageMapId map_id : maps) {
+        slot_map_views[map_id].picked = false;
+    }
+}
+
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
+    using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+    static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+    boost::container::small_vector<ImageId, 8> images;
+    ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+        const auto it = gpu_page_table.find(page);
+        if (it == gpu_page_table.end()) {
+            if constexpr (BOOL_BREAK) {
+                return false;
+            } else {
+                return;
+            }
+        }
+        for (const ImageId image_id : it->second) {
+            Image& image = slot_images[image_id];
+            if (True(image.flags & ImageFlagBits::Picked)) {
+                continue;
+            }
+            if (!image.OverlapsGPU(gpu_addr, size)) {
+                continue;
+            }
+            image.flags |= ImageFlagBits::Picked;
+            images.push_back(image_id);
+            if constexpr (BOOL_BREAK) {
+                if (func(image_id, image)) {
+                    return true;
+                }
+            } else {
+                func(image_id, image);
+            }
+        }
+        if constexpr (BOOL_BREAK) {
+            return false;
+        }
+    });
+    for (const ImageId image_id : images) {
+        slot_images[image_id].flags &= ~ImageFlagBits::Picked;
+    }
+}
+
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
+    using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+    static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+    boost::container::small_vector<ImageId, 8> images;
+    ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+        const auto it = sparse_page_table.find(page);
+        if (it == sparse_page_table.end()) {
+            if constexpr (BOOL_BREAK) {
+                return false;
+            } else {
+                return;
+            }
+        }
         for (const ImageId image_id : it->second) {
             Image& image = slot_images[image_id];
             if (True(image.flags & ImageFlagBits::Picked)) {
                 continue;
             }
-            if (!image.Overlaps(cpu_addr, size)) {
+            if (!image.OverlapsGPU(gpu_addr, size)) {
                 continue;
             }
             image.flags |= ImageFlagBits::Picked;
@@ -1279,6 +1435,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
 }
 
 template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
+    using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
+    static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
+    const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
+    for (auto& segment : segments) {
+        const auto gpu_addr = segment.first;
+        const auto size = segment.second;
+        std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+        ASSERT(cpu_addr);
+        if constexpr (RETURNS_BOOL) {
+            if (func(gpu_addr, *cpu_addr, size)) {
+                break;
+            }
+        } else {
+            func(gpu_addr, *cpu_addr, size);
+        }
+    }
+}
+
+template <class P>
 ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
     Image& image = slot_images[image_id];
     if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
@@ -1295,8 +1472,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
     ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
                "Trying to register an already registered image");
     image.flags |= ImageFlagBits::Registered;
-    ForEachPage(image.cpu_addr, image.guest_size_bytes,
-                [this, image_id](u64 page) { page_table[page].push_back(image_id); });
     u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
     if ((IsPixelFormatASTC(image.info.format) &&
          True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
@@ -1304,6 +1479,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
     }
     total_used_memory += Common::AlignUp(tentative_size, 1024);
+    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+                   [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
+    if (False(image.flags & ImageFlagBits::Sparse)) {
+        auto map_id =
+            slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
+        ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
+                       [this, map_id](u64 page) { page_table[page].push_back(map_id); });
+        image.map_view_id = map_id;
+        return;
+    }
+    std::vector<ImageViewId> sparse_maps{};
+    ForEachSparseSegment(
+        image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
+            auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
+            ForEachCPUPage(cpu_addr, size,
+                           [this, map_id](u64 page) { page_table[page].push_back(map_id); });
+            sparse_maps.push_back(map_id);
+        });
+    sparse_views.emplace(image_id, std::move(sparse_maps));
+    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+                   [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
 }
 
 template <class P>
@@ -1320,34 +1516,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
     }
     total_used_memory -= Common::AlignUp(tentative_size, 1024);
-    ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
-        const auto page_it = page_table.find(page);
-        if (page_it == page_table.end()) {
-            UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
-            return;
-        }
-        std::vector<ImageId>& image_ids = page_it->second;
-        const auto vector_it = std::ranges::find(image_ids, image_id);
-        if (vector_it == image_ids.end()) {
-            UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
-            return;
-        }
-        image_ids.erase(vector_it);
+    const auto& clear_page_table =
+        [this, image_id](
+            u64 page,
+            std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
+            const auto page_it = selected_page_table.find(page);
+            if (page_it == selected_page_table.end()) {
+                UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+                return;
+            }
+            std::vector<ImageId>& image_ids = page_it->second;
+            const auto vector_it = std::ranges::find(image_ids, image_id);
+            if (vector_it == image_ids.end()) {
+                UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
+                                page << PAGE_BITS);
+                return;
+            }
+            image_ids.erase(vector_it);
+        };
+    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+                   [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
+    if (False(image.flags & ImageFlagBits::Sparse)) {
+        const auto map_id = image.map_view_id;
+        ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
+            const auto page_it = page_table.find(page);
+            if (page_it == page_table.end()) {
+                UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+                return;
+            }
+            std::vector<ImageMapId>& image_map_ids = page_it->second;
+            const auto vector_it = std::ranges::find(image_map_ids, map_id);
+            if (vector_it == image_map_ids.end()) {
+                UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
+                                page << PAGE_BITS);
+                return;
+            }
+            image_map_ids.erase(vector_it);
+        });
+        slot_map_views.erase(map_id);
+        return;
+    }
+    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
+        clear_page_table(page, sparse_page_table);
     });
+    auto it = sparse_views.find(image_id);
+    ASSERT(it != sparse_views.end());
+    auto& sparse_maps = it->second;
+    for (auto& map_view_id : sparse_maps) {
+        const auto& map_range = slot_map_views[map_view_id];
+        const VAddr cpu_addr = map_range.cpu_addr;
+        const std::size_t size = map_range.size;
+        ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
+            const auto page_it = page_table.find(page);
+            if (page_it == page_table.end()) {
+                UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+                return;
+            }
+            std::vector<ImageMapId>& image_map_ids = page_it->second;
+            auto vector_it = image_map_ids.begin();
+            while (vector_it != image_map_ids.end()) {
+                ImageMapView& map = slot_map_views[*vector_it];
+                if (map.image_id != image_id) {
+                    vector_it++;
+                    continue;
+                }
+                if (!map.picked) {
+                    map.picked = true;
+                }
+                vector_it = image_map_ids.erase(vector_it);
+            }
+        });
+        slot_map_views.erase(map_view_id);
+    }
+    sparse_views.erase(it);
 }
 
 template <class P>
-void TextureCache<P>::TrackImage(ImageBase& image) {
+void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
     ASSERT(False(image.flags & ImageFlagBits::Tracked));
     image.flags |= ImageFlagBits::Tracked;
-    rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+    if (False(image.flags & ImageFlagBits::Sparse)) {
+        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+        return;
+    }
+    if (True(image.flags & ImageFlagBits::Registered)) {
+        auto it = sparse_views.find(image_id);
+        ASSERT(it != sparse_views.end());
+        auto& sparse_maps = it->second;
+        for (auto& map_view_id : sparse_maps) {
+            const auto& map = slot_map_views[map_view_id];
+            const VAddr cpu_addr = map.cpu_addr;
+            const std::size_t size = map.size;
+            rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+        }
+        return;
+    }
+    ForEachSparseSegment(image,
+                         [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
+                             rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+                         });
 }
 
 template <class P>
-void TextureCache<P>::UntrackImage(ImageBase& image) {
+void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
     ASSERT(True(image.flags & ImageFlagBits::Tracked));
     image.flags &= ~ImageFlagBits::Tracked;
-    rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+    if (False(image.flags & ImageFlagBits::Sparse)) {
+        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+        return;
+    }
+    ASSERT(True(image.flags & ImageFlagBits::Registered));
+    auto it = sparse_views.find(image_id);
+    ASSERT(it != sparse_views.end());
+    auto& sparse_maps = it->second;
+    for (auto& map_view_id : sparse_maps) {
+        const auto& map = slot_map_views[map_view_id];
+        const VAddr cpu_addr = map.cpu_addr;
+        const std::size_t size = map.size;
+        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
+    }
 }
 
 template <class P>
@@ -1489,10 +1776,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
     if (invalidate) {
         image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
         if (False(image.flags & ImageFlagBits::Tracked)) {
-            TrackImage(image);
+            TrackImage(image, image_id);
         }
     } else {
-        RefreshContents(image);
+        RefreshContents(image, image_id);
         SynchronizeAliases(image_id);
     }
     if (is_modification) {
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index c9571f7e4..9fbdc1ac6 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
 constexpr SlotId CORRUPT_ID{0xfffffffe};
 
 using ImageId = SlotId;
+using ImageMapId = SlotId;
 using ImageViewId = SlotId;
 using ImageAllocId = SlotId;
 using SamplerId = SlotId;
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 4efe042b6..c872517b8 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -394,7 +394,7 @@ template <u32 GOB_EXTENT>
     const s32 mip_offset = diff % layer_stride;
     const std::array offsets = CalculateMipLevelOffsets(new_info);
     const auto end = offsets.begin() + new_info.resources.levels;
-    const auto it = std::find(offsets.begin(), end, mip_offset);
+    const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset));
     if (it == end) {
         // Mipmap is not aligned to any valid size
         return std::nullopt;
@@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
     return offsets;
 }
 
+LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
+    const u32 num_levels = info.resources.levels;
+    const LevelInfo level_info = MakeLevelInfo(info);
+    LevelArray sizes{};
+    for (u32 level = 0; level < num_levels; ++level) {
+        sizes[level] = CalculateLevelSize(level_info, level);
+    }
+    return sizes;
+}
+
 std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
     ASSERT(info.type == ImageType::e3D);
     std::vector<u32> offsets;
@@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
     return copies;
 }
 
-bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
-    if (config.Address() == 0) {
+bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
+    const GPUVAddr address = config.Address();
+    if (address == 0) {
         return false;
     }
-    if (config.Address() > (u64(1) << 48)) {
+    if (address > (1ULL << 48)) {
         return false;
     }
-    return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
+    if (gpu_memory.GpuToCpuAddress(address).has_value()) {
+        return true;
+    }
+    const ImageInfo info{config};
+    const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
+    return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
 }
 
 std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index cdc5cbc75..766502908 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -40,6 +40,8 @@ struct OverlapResult {
 
 [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
 
+[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
+
 [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
 
 [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
@@ -55,7 +57,7 @@ struct OverlapResult {
                                                            const ImageInfo& src,
                                                            SubresourceBase base);
 
-[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
+[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
 
 [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
                                                           GPUVAddr gpu_addr, const ImageInfo& info,
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 7b756ba41..3ab500760 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1365,8 +1365,8 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
     // each partition.
 
     // Determine partitions, partition index, and color endpoint modes
-    s32 planeIdx = -1;
-    u32 partitionIndex;
+    u32 planeIdx{UINT32_MAX};
+    u32 partitionIndex{};
     u32 colorEndpointMode[4] = {0, 0, 0, 0};
 
     // Define color data.
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 23814afd2..f214510da 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -532,6 +532,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
     return (supported_usage & wanted_usage) == wanted_usage;
 }
 
+std::string Device::GetDriverName() const {
+    switch (driver_id) {
+    case VK_DRIVER_ID_AMD_PROPRIETARY:
+        return "AMD";
+    case VK_DRIVER_ID_AMD_OPEN_SOURCE:
+        return "AMDVLK";
+    case VK_DRIVER_ID_MESA_RADV:
+        return "RADV";
+    case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
+        return "NVIDIA";
+    case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS:
+        return "INTEL";
+    case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
+        return "ANV";
+    case VK_DRIVER_ID_MESA_LLVMPIPE:
+        return "LAVAPIPE";
+    default:
+        return vendor_name;
+    }
+}
+
 void Device::CheckSuitability(bool requires_swapchain) const {
     std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
     bool has_swapchain = false;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 88b298196..96c0f8c60 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -45,6 +45,9 @@ public:
     /// Reports a shader to Nsight Aftermath.
     void SaveShader(const std::vector<u32>& spirv) const;
 
+    /// Returns the name of the VkDriverId reported from Vulkan.
+    std::string GetDriverName() const;
+
     /// Returns the dispatch loader with direct function pointers of the device.
     const vk::DeviceDispatch& GetDispatchLoader() const {
         return dld;
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index efdc6aa50..7a6f84d96 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -143,24 +143,25 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) {
 }
 
 void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
-    MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+    MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
     ev->accept();
 }
 
 void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
-    MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+    MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
     MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
     ev->accept();
 }
 
 void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
-    MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+    MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
     MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
     ev->accept();
 }
 
 void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
-    MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120);
+    MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale,
+                              ev->angleDelta().y() / 120);
     ev->accept();
 }
 
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 9c5aeb833..218b4782b 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -522,7 +522,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
     QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
     remove_menu->addSeparator();
     QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
-    QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
+    QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
+    QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
+    QAction* dump_romfs_sdmc = dump_romfs_menu->addAction(tr("Dump RomFS to SDMC"));
     QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
     QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry"));
     context_menu.addSeparator();
@@ -571,8 +573,12 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
     connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() {
         emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path);
     });
-    connect(dump_romfs, &QAction::triggered,
-            [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); });
+    connect(dump_romfs, &QAction::triggered, [this, program_id, path]() {
+        emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal);
+    });
+    connect(dump_romfs_sdmc, &QAction::triggered, [this, program_id, path]() {
+        emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::SDMC);
+    });
     connect(copy_tid, &QAction::triggered,
             [this, program_id]() { emit CopyTIDRequested(program_id); });
     connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() {
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index b630e34ff..50402da51 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -45,6 +45,11 @@ enum class GameListRemoveTarget {
     CustomConfiguration,
 };
 
+enum class DumpRomFSTarget {
+    Normal,
+    SDMC,
+};
+
 enum class InstalledEntryType {
     Game,
     Update,
@@ -92,7 +97,7 @@ signals:
     void RemoveInstalledEntryRequested(u64 program_id, InstalledEntryType type);
     void RemoveFileRequested(u64 program_id, GameListRemoveTarget target,
                              const std::string& game_path);
-    void DumpRomFSRequested(u64 program_id, const std::string& game_path);
+    void DumpRomFSRequested(u64 program_id, const std::string& game_path, DumpRomFSTarget target);
     void CopyTIDRequested(u64 program_id);
     void NavigateToGamedbEntryRequested(u64 program_id,
                                         const CompatibilityList& compatibility_list);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index cb9d7a863..5ed3b90b8 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -104,6 +104,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "input_common/main.h"
 #include "util/overlay_dialog.h"
 #include "video_core/gpu.h"
+#include "video_core/renderer_base.h"
 #include "video_core/shader_notify.h"
 #include "yuzu/about_dialog.h"
 #include "yuzu/bootmanager.h"
@@ -1426,8 +1427,12 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S
         title_name = Common::FS::PathToUTF8String(
             std::filesystem::path{filename.toStdU16String()}.filename());
     }
+    const bool is_64bit = system.Kernel().CurrentProcess()->Is64BitProcess();
+    const auto instruction_set_suffix = is_64bit ? " (64-bit)" : " (32-bit)";
+    title_name += instruction_set_suffix;
     LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version);
-    UpdateWindowTitle(title_name, title_version);
+    const auto gpu_vendor = system.GPU().Renderer().GetDeviceVendor();
+    UpdateWindowTitle(title_name, title_version, gpu_vendor);
 
     loading_screen->Prepare(system.GetAppLoader());
     loading_screen->show();
@@ -1881,7 +1886,8 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g
     }
 }
 
-void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path) {
+void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path,
+                                      DumpRomFSTarget target) {
     const auto failed = [this] {
         QMessageBox::warning(this, tr("RomFS Extraction Failed!"),
                              tr("There was an error copying the RomFS files or the user "
@@ -1909,7 +1915,10 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
         return;
     }
 
-    const auto dump_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir);
+    const auto dump_dir =
+        target == DumpRomFSTarget::Normal
+            ? Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)
+            : Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "atmosphere" / "contents";
     const auto romfs_dir = fmt::format("{:016X}/romfs", *romfs_title_id);
 
     const auto path = Common::FS::PathToUTF8String(dump_dir / romfs_dir);
@@ -1919,7 +1928,8 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
     if (*romfs_title_id == program_id) {
         const u64 ivfc_offset = loader->ReadRomFSIVFCOffset();
         const FileSys::PatchManager pm{program_id, system.GetFileSystemController(), installed};
-        romfs = pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program);
+        romfs =
+            pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program, nullptr, false);
     } else {
         romfs = installed.GetEntry(*romfs_title_id, FileSys::ContentRecordType::Data)->GetRomFS();
     }
@@ -2858,8 +2868,8 @@ void GMainWindow::MigrateConfigFiles() {
     }
 }
 
-void GMainWindow::UpdateWindowTitle(const std::string& title_name,
-                                    const std::string& title_version) {
+void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version,
+                                    std::string_view gpu_vendor) {
     const auto branch_name = std::string(Common::g_scm_branch);
     const auto description = std::string(Common::g_scm_desc);
     const auto build_id = std::string(Common::g_build_id);
@@ -2872,7 +2882,8 @@ void GMainWindow::UpdateWindowTitle(const std::string& title_name,
     if (title_name.empty()) {
         setWindowTitle(QString::fromStdString(window_title));
     } else {
-        const auto run_title = fmt::format("{} | {} | {}", window_title, title_name, title_version);
+        const auto run_title =
+            fmt::format("{} | {} | {} | {}", window_title, title_name, title_version, gpu_vendor);
         setWindowTitle(QString::fromStdString(run_title));
     }
 }
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 11f152cbe..45c8310e1 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -34,6 +34,7 @@ class QProgressDialog;
 class WaitTreeWidget;
 enum class GameListOpenTarget;
 enum class GameListRemoveTarget;
+enum class DumpRomFSTarget;
 enum class InstalledEntryType;
 class GameListPlaceholder;
 
@@ -244,7 +245,7 @@ private slots:
     void OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryType type);
     void OnGameListRemoveFile(u64 program_id, GameListRemoveTarget target,
                               const std::string& game_path);
-    void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
+    void OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target);
     void OnGameListCopyTID(u64 program_id);
     void OnGameListNavigateToGamedbEntry(u64 program_id,
                                          const CompatibilityList& compatibility_list);
@@ -287,8 +288,8 @@ private:
     InstallResult InstallNSPXCI(const QString& filename);
     InstallResult InstallNCA(const QString& filename);
     void MigrateConfigFiles();
-    void UpdateWindowTitle(const std::string& title_name = {},
-                           const std::string& title_version = {});
+    void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {},
+                           std::string_view gpu_vendor = {});
     void UpdateStatusBar();
     void UpdateStatusButtons();
     void UpdateUISettings();