20 files changed, 415 insertions, 242 deletions
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 2db28dcf0..ab05788d7 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -284,13 +284,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
 
 std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
     std::vector<u8> buffer;
-    const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
+    const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
+                           BufferDescriptorA()[buffer_index].Size()};
     auto& memory = Core::System::GetInstance().Memory();
 
     if (is_buffer_a) {
+        ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
+                   "BufferDescriptorA invalid buffer_index {}", buffer_index);
         buffer.resize(BufferDescriptorA()[buffer_index].Size());
         memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());
     } else {
+        ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
+                   "BufferDescriptorX invalid buffer_index {}", buffer_index);
         buffer.resize(BufferDescriptorX()[buffer_index].Size());
         memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());
     }
@@ -305,7 +310,8 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
         return 0;
     }
 
-    const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
+    const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
+                           BufferDescriptorB()[buffer_index].Size()};
     const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
     if (size > buffer_size) {
         LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
@@ -315,8 +321,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
 
     auto& memory = Core::System::GetInstance().Memory();
     if (is_buffer_b) {
+        ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
+                   "BufferDescriptorB invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size,
+                   "BufferDescriptorB buffer_index {} is not large enough", buffer_index);
         memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
     } else {
+        ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
+                   "BufferDescriptorC invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size,
+                   "BufferDescriptorC buffer_index {} is not large enough", buffer_index);
         memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
     }
 
@@ -324,15 +338,35 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
 }
 
 std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
-    const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
-    return is_buffer_a ? BufferDescriptorA()[buffer_index].Size()
-                       : BufferDescriptorX()[buffer_index].Size();
+    const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
+                           BufferDescriptorA()[buffer_index].Size()};
+    if (is_buffer_a) {
+        ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
+                   "BufferDescriptorA invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0,
+                   "BufferDescriptorA buffer_index {} is empty", buffer_index);
+        return BufferDescriptorA()[buffer_index].Size();
+    } else {
+        ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
+                   "BufferDescriptorX invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0,
+                   "BufferDescriptorX buffer_index {} is empty", buffer_index);
+        return BufferDescriptorX()[buffer_index].Size();
+    }
 }
 
 std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const {
-    const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
-    return is_buffer_b ? BufferDescriptorB()[buffer_index].Size()
-                       : BufferDescriptorC()[buffer_index].Size();
+    const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
+                           BufferDescriptorB()[buffer_index].Size()};
+    if (is_buffer_b) {
+        ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
+                   "BufferDescriptorB invalid buffer_index {}", buffer_index);
+        return BufferDescriptorB()[buffer_index].Size();
+    } else {
+        ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
+                   "BufferDescriptorC invalid buffer_index {}", buffer_index);
+        return BufferDescriptorC()[buffer_index].Size();
+    }
 }
 
 std::string HLERequestContext::Description() const {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 1d99bf7a2..9cae5c73d 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1863,10 +1863,14 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
     }
 
     auto& kernel = system.Kernel();
-    auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
+    auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms);
+
+    if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) {
+        return reserve_result;
+    }
 
     auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
-    const auto result = handle_table.Create(std::move(transfer_mem_handle));
+    const auto result{handle_table.Create(std::move(transfer_mem_handle))};
     if (result.Failed()) {
         return result.Code();
     }
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index f0e73f57b..f2d3f8b49 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -8,15 +8,23 @@
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/result.h"
+#include "core/memory.h"
 
 namespace Kernel {
 
-TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
-TransferMemory::~TransferMemory() = default;
+TransferMemory::TransferMemory(KernelCore& kernel, Memory::Memory& memory)
+    : Object{kernel}, memory{memory} {}
 
-std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address,
-                                                       u64 size, MemoryPermission permissions) {
-    std::shared_ptr<TransferMemory> transfer_memory{std::make_shared<TransferMemory>(kernel)};
+TransferMemory::~TransferMemory() {
+    // Release memory region when transfer memory is destroyed
+    Reset();
+}
+
+std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, Memory::Memory& memory,
+                                                       VAddr base_address, u64 size,
+                                                       MemoryPermission permissions) {
+    std::shared_ptr<TransferMemory> transfer_memory{
+        std::make_shared<TransferMemory>(kernel, memory)};
 
     transfer_memory->base_address = base_address;
     transfer_memory->memory_size = size;
@@ -27,7 +35,7 @@ std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr
 }
 
 const u8* TransferMemory::GetPointer() const {
-    return backing_block.get()->data();
+    return memory.GetPointer(base_address);
 }
 
 u64 TransferMemory::GetSize() const {
@@ -62,6 +70,52 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p
     return RESULT_SUCCESS;
 }
 
+ResultCode TransferMemory::Reserve() {
+    auto& vm_manager{owner_process->VMManager()};
+    const auto check_range_result{vm_manager.CheckRangeState(
+        base_address, memory_size, MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated,
+        MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::All,
+        VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None,
+        MemoryAttribute::IpcAndDeviceMapped)};
+
+    if (check_range_result.Failed()) {
+        return check_range_result.Code();
+    }
+
+    auto [state_, permissions_, attribute] = *check_range_result;
+
+    if (const auto result{vm_manager.ReprotectRange(
+            base_address, memory_size, SharedMemory::ConvertPermissions(owner_permissions))};
+        result.IsError()) {
+        return result;
+    }
+
+    return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask,
+                                         attribute | MemoryAttribute::Locked);
+}
+
+ResultCode TransferMemory::Reset() {
+    auto& vm_manager{owner_process->VMManager()};
+    if (const auto result{vm_manager.CheckRangeState(
+            base_address, memory_size,
+            MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated,
+            MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::None,
+            VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked,
+            MemoryAttribute::IpcAndDeviceMapped)};
+        result.Failed()) {
+        return result.Code();
+    }
+
+    if (const auto result{
+            vm_manager.ReprotectRange(base_address, memory_size, VMAPermission::ReadWrite)};
+        result.IsError()) {
+        return result;
+    }
+
+    return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask,
+                                         MemoryAttribute::None);
+}
+
 ResultCode TransferMemory::UnmapMemory(VAddr address, u64 size) {
     if (memory_size != size) {
         return ERR_INVALID_SIZE;
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
index 0a6e15d18..6e388536a 100644
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -11,6 +11,10 @@
 
 union ResultCode;
 
+namespace Memory {
+class Memory;
+}
+
 namespace Kernel {
 
 class KernelCore;
@@ -26,12 +30,13 @@ enum class MemoryPermission : u32;
 ///
 class TransferMemory final : public Object {
 public:
-    explicit TransferMemory(KernelCore& kernel);
+    explicit TransferMemory(KernelCore& kernel, Memory::Memory& memory);
     ~TransferMemory() override;
 
     static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;
 
-    static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, u64 size,
+    static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, Memory::Memory& memory,
+                                                  VAddr base_address, u64 size,
                                                   MemoryPermission permissions);
 
     TransferMemory(const TransferMemory&) = delete;
@@ -80,6 +85,14 @@ public:
     ///
     ResultCode UnmapMemory(VAddr address, u64 size);
 
+    /// Reserves the region to be used for the transfer memory, called after the transfer memory is
+    /// created.
+    ResultCode Reserve();
+
+    /// Resets the region previously used for the transfer memory, called after the transfer memory
+    /// is closed.
+    ResultCode Reset();
+
 private:
     /// Memory block backing this instance.
     std::shared_ptr<PhysicalMemory> backing_block;
@@ -98,6 +111,8 @@ private:
 
     /// Whether or not this transfer memory instance has mapped memory.
     bool is_mapped = false;
+
+    Memory::Memory& memory;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 0b3500fce..024c22901 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -544,7 +544,8 @@ MemoryInfo VMManager::QueryMemory(VAddr address) const {
 
 ResultCode VMManager::SetMemoryAttribute(VAddr address, u64 size, MemoryAttribute mask,
                                          MemoryAttribute attribute) {
-    constexpr auto ignore_mask = MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped;
+    constexpr auto ignore_mask =
+        MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped | MemoryAttribute::Locked;
     constexpr auto attribute_mask = ~ignore_mask;
 
     const auto result = CheckRangeState(
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 850a7ebc3..90b4b006a 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -98,6 +98,8 @@ enum class MemoryAttribute : u32 {
     DeviceMapped = 4,
     /// Uncached memory
     Uncached = 8,
+
+    IpcAndDeviceMapped = LockedForIPC | DeviceMapped,
 };
 
 constexpr MemoryAttribute operator|(MemoryAttribute lhs, MemoryAttribute rhs) {
@@ -654,6 +656,35 @@ public:
     /// is scheduled.
     Common::PageTable page_table{Memory::PAGE_BITS};
 
+    using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>;
+
+    /// Checks if an address range adheres to the specified states provided.
+    ///
+    /// @param address         The starting address of the address range.
+    /// @param size            The size of the address range.
+    /// @param state_mask      The memory state mask.
+    /// @param state           The state to compare the individual VMA states against,
+    ///                        which is done in the form of: (vma.state & state_mask) != state.
+    /// @param permission_mask The memory permissions mask.
+    /// @param permissions     The permission to compare the individual VMA permissions against,
+    ///                        which is done in the form of:
+    ///                        (vma.permission & permission_mask) != permission.
+    /// @param attribute_mask  The memory attribute mask.
+    /// @param attribute       The memory attributes to compare the individual VMA attributes
+    ///                        against, which is done in the form of:
+    ///                        (vma.attributes & attribute_mask) != attribute.
+    /// @param ignore_mask     The memory attributes to ignore during the check.
+    ///
+    /// @returns If successful, returns a tuple containing the memory attributes
+    ///          (with ignored bits specified by ignore_mask unset), memory permissions, and
+    ///          memory state across the memory range.
+    /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE.
+    ///
+    CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state,
+                                 VMAPermission permission_mask, VMAPermission permissions,
+                                 MemoryAttribute attribute_mask, MemoryAttribute attribute,
+                                 MemoryAttribute ignore_mask) const;
+
 private:
     using VMAIter = VMAMap::iterator;
 
@@ -707,35 +738,6 @@ private:
     /// Clears out the page table
     void ClearPageTable();
 
-    using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>;
-
-    /// Checks if an address range adheres to the specified states provided.
-    ///
-    /// @param address         The starting address of the address range.
-    /// @param size            The size of the address range.
-    /// @param state_mask      The memory state mask.
-    /// @param state           The state to compare the individual VMA states against,
-    ///                        which is done in the form of: (vma.state & state_mask) != state.
-    /// @param permission_mask The memory permissions mask.
-    /// @param permissions     The permission to compare the individual VMA permissions against,
-    ///                        which is done in the form of:
-    ///                        (vma.permission & permission_mask) != permission.
-    /// @param attribute_mask  The memory attribute mask.
-    /// @param attribute       The memory attributes to compare the individual VMA attributes
-    ///                        against, which is done in the form of:
-    ///                        (vma.attributes & attribute_mask) != attribute.
-    /// @param ignore_mask     The memory attributes to ignore during the check.
-    ///
-    /// @returns If successful, returns a tuple containing the memory attributes
-    ///          (with ignored bits specified by ignore_mask unset), memory permissions, and
-    ///          memory state across the memory range.
-    /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE.
-    ///
-    CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state,
-                                 VMAPermission permission_mask, VMAPermission permissions,
-                                 MemoryAttribute attribute_mask, MemoryAttribute attribute,
-                                 MemoryAttribute ignore_mask) const;
-
     /// Gets the amount of memory currently mapped (state != Unmapped) in a range.
     ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;
 
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index a0c806e8f..1838260fd 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -50,17 +50,8 @@ std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
         if (ShouldWait(thread.get()))
             continue;
 
-        // A thread is ready to run if it's either in ThreadStatus::WaitSynch
-        // and the rest of the objects it is waiting on are ready.
-        bool ready_to_run = true;
-        if (thread_status == ThreadStatus::WaitSynch) {
-            ready_to_run = thread->AllWaitObjectsReady();
-        }
-
-        if (ready_to_run) {
-            candidate = thread.get();
-            candidate_priority = thread->GetPriority();
-        }
+        candidate = thread.get();
+        candidate_priority = thread->GetPriority();
     }
 
     return SharedFrom(candidate);
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 95aa5d23d..c1550013a 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -709,8 +709,34 @@ void ICommonStateGetter::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
     apm_sys->SetCpuBoostMode(ctx);
 }
 
-IStorage::IStorage(std::vector<u8> buffer)
-    : ServiceFramework("IStorage"), buffer(std::move(buffer)) {
+IStorageImpl::~IStorageImpl() = default;
+
+class StorageDataImpl final : public IStorageImpl {
+public:
+    explicit StorageDataImpl(std::vector<u8>&& buffer) : buffer{std::move(buffer)} {}
+
+    std::vector<u8>& GetData() override {
+        return buffer;
+    }
+
+    const std::vector<u8>& GetData() const override {
+        return buffer;
+    }
+
+    std::size_t GetSize() const override {
+        return buffer.size();
+    }
+
+private:
+    std::vector<u8> buffer;
+};
+
+IStorage::IStorage(std::vector<u8>&& buffer)
+    : ServiceFramework("IStorage"), impl{std::make_shared<StorageDataImpl>(std::move(buffer))} {
+    Register();
+}
+
+void IStorage::Register() {
     // clang-format off
         static const FunctionInfo functions[] = {
             {0, &IStorage::Open, "Open"},
@@ -723,8 +749,13 @@ IStorage::IStorage(std::vector<u8> buffer)
 
 IStorage::~IStorage() = default;
 
-const std::vector<u8>& IStorage::GetData() const {
-    return buffer;
+void IStorage::Open(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_AM, "called");
+
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.PushIpcInterface<IStorageAccessor>(*this);
 }
 
 void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) {
@@ -825,17 +856,16 @@ private:
     void PopOutData(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_AM, "called");
 
-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
         const auto storage = applet->GetBroker().PopNormalDataToGame();
         if (storage == nullptr) {
             LOG_ERROR(Service_AM,
                       "storage is a nullptr. There is no data in the current normal channel");
-
+            IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERR_NO_DATA_IN_CHANNEL);
             return;
         }
 
+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
         rb.Push(RESULT_SUCCESS);
         rb.PushIpcInterface<IStorage>(std::move(*storage));
     }
@@ -857,17 +887,16 @@ private:
     void PopInteractiveOutData(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_AM, "called");
 
-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
         const auto storage = applet->GetBroker().PopInteractiveDataToGame();
         if (storage == nullptr) {
             LOG_ERROR(Service_AM,
                       "storage is a nullptr. There is no data in the current interactive channel");
-
+            IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERR_NO_DATA_IN_CHANNEL);
             return;
         }
 
+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
         rb.Push(RESULT_SUCCESS);
         rb.PushIpcInterface<IStorage>(std::move(*storage));
     }
@@ -891,15 +920,6 @@ private:
     std::shared_ptr<Applets::Applet> applet;
 };
 
-void IStorage::Open(Kernel::HLERequestContext& ctx) {
-    LOG_DEBUG(Service_AM, "called");
-
-    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
-    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IStorageAccessor>(*this);
-}
-
 IStorageAccessor::IStorageAccessor(IStorage& storage)
     : ServiceFramework("IStorageAccessor"), backing(storage) {
     // clang-format off
@@ -921,7 +941,7 @@ void IStorageAccessor::GetSize(Kernel::HLERequestContext& ctx) {
     IPC::ResponseBuilder rb{ctx, 4};
 
     rb.Push(RESULT_SUCCESS);
-    rb.Push(static_cast<u64>(backing.buffer.size()));
+    rb.Push(static_cast<u64>(backing.GetSize()));
 }
 
 void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
@@ -932,17 +952,17 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
 
     LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size());
 
-    if (data.size() > backing.buffer.size() - offset) {
+    if (data.size() > backing.GetSize() - offset) {
         LOG_ERROR(Service_AM,
                   "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}",
-                  backing.buffer.size(), data.size(), offset);
+                  backing.GetSize(), data.size(), offset);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
         return;
     }
 
-    std::memcpy(backing.buffer.data() + offset, data.data(), data.size());
+    std::memcpy(backing.GetData().data() + offset, data.data(), data.size());
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
@@ -956,16 +976,16 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
 
     LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
 
-    if (size > backing.buffer.size() - offset) {
+    if (size > backing.GetSize() - offset) {
         LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
-                  backing.buffer.size(), size, offset);
+                  backing.GetSize(), size, offset);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
         return;
     }
 
-    ctx.WriteBuffer(backing.buffer.data() + offset, size);
+    ctx.WriteBuffer(backing.GetData().data() + offset, size);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
@@ -1031,7 +1051,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
     rp.SetCurrentOffset(3);
     const auto handle{rp.Pop<Kernel::Handle>()};
 
-    const auto transfer_mem =
+    auto transfer_mem =
         system.CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(handle);
 
     if (transfer_mem == nullptr) {
@@ -1047,7 +1067,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
 
     IPC::ResponseBuilder rb{ctx, 2, 0, 1};
     rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory)));
+    rb.PushIpcInterface<IStorage>(std::move(memory));
 }
 
 IApplicationFunctions::IApplicationFunctions(Core::System& system_)
@@ -1189,13 +1209,11 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
         u64 build_id{};
         std::memcpy(&build_id, build_id_full.data(), sizeof(u64));
 
-        const auto data =
-            backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id});
-
+        auto data = backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id});
         if (data.has_value()) {
             IPC::ResponseBuilder rb{ctx, 2, 0, 1};
             rb.Push(RESULT_SUCCESS);
-            rb.PushIpcInterface<AM::IStorage>(*data);
+            rb.PushIpcInterface<IStorage>(std::move(*data));
             launch_popped_application_specific = true;
             return;
         }
@@ -1218,7 +1236,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
         std::vector<u8> buffer(sizeof(LaunchParameterAccountPreselectedUser));
         std::memcpy(buffer.data(), &params, buffer.size());
 
-        rb.PushIpcInterface<AM::IStorage>(buffer);
+        rb.PushIpcInterface<IStorage>(std::move(buffer));
         launch_popped_account_preselect = true;
         return;
     }
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 448817be9..0b9a4332d 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -12,7 +12,8 @@
 
 namespace Kernel {
 class KernelCore;
-}
+class TransferMemory;
+} // namespace Kernel
 
 namespace Service::NVFlinger {
 class NVFlinger;
@@ -188,19 +189,36 @@ private:
     std::shared_ptr<AppletMessageQueue> msg_queue;
 };
 
+class IStorageImpl {
+public:
+    virtual ~IStorageImpl();
+    virtual std::vector<u8>& GetData() = 0;
+    virtual const std::vector<u8>& GetData() const = 0;
+    virtual std::size_t GetSize() const = 0;
+};
+
 class IStorage final : public ServiceFramework<IStorage> {
 public:
-    explicit IStorage(std::vector<u8> buffer);
+    explicit IStorage(std::vector<u8>&& buffer);
     ~IStorage() override;
 
-    const std::vector<u8>& GetData() const;
+    std::vector<u8>& GetData() {
+        return impl->GetData();
+    }
+
+    const std::vector<u8>& GetData() const {
+        return impl->GetData();
+    }
+
+    std::size_t GetSize() const {
+        return impl->GetSize();
+    }
 
 private:
+    void Register();
     void Open(Kernel::HLERequestContext& ctx);
 
-    std::vector<u8> buffer;
-
-    friend class IStorageAccessor;
+    std::shared_ptr<IStorageImpl> impl;
 };
 
 class IStorageAccessor final : public ServiceFramework<IStorageAccessor> {
diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp
index 92f995f8f..3e97ba218 100644
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -56,6 +56,7 @@ std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() {
 
     auto out = std::move(out_channel.front());
     out_channel.pop_front();
+    pop_out_data_event.writable->Clear();
     return out;
 }
 
@@ -74,6 +75,7 @@ std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() {
 
     auto out = std::move(out_interactive_channel.front());
     out_interactive_channel.pop_front();
+    pop_interactive_out_data_event.writable->Clear();
     return out;
 }
 
diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp
index eab0d42c9..e6c4e8b87 100644
--- a/src/core/hle/service/am/applets/error.cpp
+++ b/src/core/hle/service/am/applets/error.cpp
@@ -186,7 +186,7 @@ void Error::Execute() {
 
 void Error::DisplayCompleted() {
     complete = true;
-    broker.PushNormalDataFromApplet(IStorage{{}});
+    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>{}});
     broker.SignalStateChanged();
 }
 
diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp
index 328438a1d..fe8400a15 100644
--- a/src/core/hle/service/am/applets/general_backend.cpp
+++ b/src/core/hle/service/am/applets/general_backend.cpp
@@ -148,7 +148,7 @@ void Auth::AuthFinished(bool successful) {
     std::vector<u8> out(sizeof(Return));
     std::memcpy(out.data(), &return_, sizeof(Return));
 
-    broker.PushNormalDataFromApplet(IStorage{out});
+    broker.PushNormalDataFromApplet(IStorage{std::move(out)});
     broker.SignalStateChanged();
 }
 
@@ -198,7 +198,7 @@ void PhotoViewer::Execute() {
 }
 
 void PhotoViewer::ViewFinished() {
-    broker.PushNormalDataFromApplet(IStorage{{}});
+    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>{}});
     broker.SignalStateChanged();
 }
 
diff --git a/src/core/hle/service/am/applets/profile_select.cpp b/src/core/hle/service/am/applets/profile_select.cpp
index 3eba696ca..91d00f72a 100644
--- a/src/core/hle/service/am/applets/profile_select.cpp
+++ b/src/core/hle/service/am/applets/profile_select.cpp
@@ -50,7 +50,7 @@ void ProfileSelect::ExecuteInteractive() {
 
 void ProfileSelect::Execute() {
     if (complete) {
-        broker.PushNormalDataFromApplet(IStorage{final_data});
+        broker.PushNormalDataFromApplet(IStorage{std::move(final_data)});
         return;
     }
 
@@ -71,7 +71,7 @@ void ProfileSelect::SelectionComplete(std::optional<Common::UUID> uuid) {
 
     final_data = std::vector<u8>(sizeof(UserSelectionOutput));
     std::memcpy(final_data.data(), &output, final_data.size());
-    broker.PushNormalDataFromApplet(IStorage{final_data});
+    broker.PushNormalDataFromApplet(IStorage{std::move(final_data)});
     broker.SignalStateChanged();
 }
 
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index 748559cd0..964c67202 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -102,7 +102,8 @@ void SoftwareKeyboard::ExecuteInteractive() {
 
 void SoftwareKeyboard::Execute() {
     if (complete) {
-        broker.PushNormalDataFromApplet(IStorage{final_data});
+        broker.PushNormalDataFromApplet(IStorage{std::move(final_data)});
+        broker.SignalStateChanged();
         return;
     }
 
@@ -119,7 +120,7 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
         std::vector<u8> output_sub(SWKBD_OUTPUT_BUFFER_SIZE);
 
         if (config.utf_8) {
-            const u64 size = text->size() + 8;
+            const u64 size = text->size() + sizeof(u64);
             const auto new_text = Common::UTF16ToUTF8(*text);
 
             std::memcpy(output_sub.data(), &size, sizeof(u64));
@@ -130,7 +131,7 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
             std::memcpy(output_main.data() + 4, new_text.data(),
                         std::min(new_text.size(), SWKBD_OUTPUT_BUFFER_SIZE - 4));
         } else {
-            const u64 size = text->size() * 2 + 8;
+            const u64 size = text->size() * 2 + sizeof(u64);
             std::memcpy(output_sub.data(), &size, sizeof(u64));
             std::memcpy(output_sub.data() + 8, text->data(),
                         std::min(text->size() * 2, SWKBD_OUTPUT_BUFFER_SIZE - 8));
@@ -144,15 +145,15 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
         final_data = output_main;
 
         if (complete) {
-            broker.PushNormalDataFromApplet(IStorage{output_main});
+            broker.PushNormalDataFromApplet(IStorage{std::move(output_main)});
             broker.SignalStateChanged();
         } else {
-            broker.PushInteractiveDataFromApplet(IStorage{output_sub});
+            broker.PushInteractiveDataFromApplet(IStorage{std::move(output_sub)});
         }
     } else {
         output_main[0] = 1;
         complete = true;
-        broker.PushNormalDataFromApplet(IStorage{output_main});
+        broker.PushNormalDataFromApplet(IStorage{std::move(output_main)});
         broker.SignalStateChanged();
     }
 }
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp
index 5546ef6e8..05d6b3a19 100644
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -284,7 +284,7 @@ void WebBrowser::Finalize() {
     std::vector<u8> data(sizeof(WebCommonReturnValue));
     std::memcpy(data.data(), &out, sizeof(WebCommonReturnValue));
 
-    broker.PushNormalDataFromApplet(IStorage{data});
+    broker.PushNormalDataFromApplet(IStorage{std::move(data)});
     broker.SignalStateChanged();
 
     if (!temporary_dir.empty() && FileUtil::IsDirectory(temporary_dir)) {
diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp
index 5eb26caf8..8f1be0e48 100644
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -50,16 +50,16 @@ private:
         IPC::RequestParser rp{ctx};
         const auto process_id = rp.PopRaw<u64>();
 
-        const auto data1 = ctx.ReadBuffer(0);
-        const auto data2 = ctx.ReadBuffer(1);
+        std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
+        if (Type == Core::Reporter::PlayReportType::New) {
+            data.emplace_back(ctx.ReadBuffer(1));
+        }
 
-        LOG_DEBUG(Service_PREPO,
-                  "called, type={:02X}, process_id={:016X}, data1_size={:016X}, data2_size={:016X}",
-                  static_cast<u8>(Type), process_id, data1.size(), data2.size());
+        LOG_DEBUG(Service_PREPO, "called, type={:02X}, process_id={:016X}, data1_size={:016X}",
+                  static_cast<u8>(Type), process_id, data[0].size());
 
         const auto& reporter{system.GetReporter()};
-        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2},
-                                process_id);
+        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
@@ -70,19 +70,19 @@ private:
         IPC::RequestParser rp{ctx};
         const auto user_id = rp.PopRaw<u128>();
         const auto process_id = rp.PopRaw<u64>();
-
-        const auto data1 = ctx.ReadBuffer(0);
-        const auto data2 = ctx.ReadBuffer(1);
+        std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
+        if (Type == Core::Reporter::PlayReportType::New) {
+            data.emplace_back(ctx.ReadBuffer(1));
+        }
 
         LOG_DEBUG(
             Service_PREPO,
-            "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}, "
-            "data2_size={:016X}",
-            static_cast<u8>(Type), user_id[1], user_id[0], process_id, data1.size(), data2.size());
+            "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}",
+            static_cast<u8>(Type), user_id[1], user_id[0], process_id, data[0].size());
 
         const auto& reporter{system.GetReporter()};
-        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2},
-                                process_id, user_id);
+        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id,
+                                user_id);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 81b6d9eff..402869fde 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -624,6 +624,19 @@ enum class ShuffleOperation : u64 {
     Bfly = 3, // shuffleXorNV
 };
 
+enum class ShfType : u64 {
+    Bits32 = 0,
+    U64 = 2,
+    S64 = 3,
+};
+
+enum class ShfXmode : u64 {
+    None = 0,
+    HI = 1,
+    X = 2,
+    XHI = 3,
+};
+
 union Instruction {
     constexpr Instruction& operator=(const Instruction& instr) {
         value = instr.value;
@@ -776,6 +789,13 @@ union Instruction {
     } shr;
 
     union {
+        BitField<37, 2, ShfType> type;
+        BitField<48, 2, ShfXmode> xmode;
+        BitField<50, 1, u64> wrap;
+        BitField<20, 6, u64> immediate;
+    } shf;
+
+    union {
         BitField<39, 5, u64> shift_amount;
         BitField<48, 1, u64> negate_b;
         BitField<49, 1, u64> negate_a;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 362942e09..46a7433ea 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -248,9 +248,6 @@ void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
 }
 
 GLintptr RasterizerOpenGL::SetupIndexBuffer() {
-    if (accelerate_draw != AccelDraw::Indexed) {
-        return 0;
-    }
     MICROPROFILE_SCOPE(OpenGL_Index);
     const auto& regs = system.GPU().Maxwell3D().regs;
     const std::size_t size = CalculateIndexBufferSize();
@@ -546,7 +543,8 @@ void RasterizerOpenGL::Clear() {
     }
 }
 
-void RasterizerOpenGL::DrawPrelude() {
+void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
+    MICROPROFILE_SCOPE(OpenGL_Drawing);
     auto& gpu = system.GPU().Maxwell3D();
 
     SyncRasterizeEnable(state);
@@ -567,9 +565,6 @@ void RasterizerOpenGL::DrawPrelude() {
 
     buffer_cache.Acquire();
 
-    // Draw the vertex batch
-    const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-
     std::size_t buffer_size = CalculateVertexArraysSize();
 
     // Add space for index buffer
@@ -596,7 +591,11 @@ void RasterizerOpenGL::DrawPrelude() {
     // Upload vertex and index data.
     SetupVertexBuffer(vao);
     SetupVertexInstances(vao);
-    index_buffer_offset = SetupIndexBuffer();
+
+    GLintptr index_buffer_offset;
+    if (is_indexed) {
+        index_buffer_offset = SetupIndexBuffer();
+    }
 
     // Prepare packed bindings.
     bind_ubo_pushbuffer.Setup();
@@ -630,6 +629,7 @@ void RasterizerOpenGL::DrawPrelude() {
         // As all cached buffers are invalidated, we need to recheck their state.
         gpu.dirty.ResetVertexArrays();
     }
+    gpu.dirty.memory_general = false;
 
     shader_program_manager->ApplyTo(state);
     state.Apply();
@@ -637,106 +637,33 @@ void RasterizerOpenGL::DrawPrelude() {
     if (texture_cache.TextureBarrier()) {
         glTextureBarrier();
     }
-}
-
-struct DrawParams {
-    bool is_indexed{};
-    bool is_instanced{};
-    GLenum primitive_mode{};
-    GLint count{};
-    GLint base_vertex{};
-
-    // Indexed settings
-    GLenum index_format{};
-    GLintptr index_buffer_offset{};
-
-    // Instanced setting
-    GLint num_instances{};
-    GLint base_instance{};
-
-    void DispatchDraw() {
-        if (is_indexed) {
-            const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
-            if (is_instanced) {
-                glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
-                                                              index_buffer_ptr, num_instances,
-                                                              base_vertex, base_instance);
-            } else {
-                glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
-                                         base_vertex);
-            }
-        } else {
-            if (is_instanced) {
-                glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances,
-                                                  base_instance);
-            } else {
-                glDrawArrays(primitive_mode, base_vertex, count);
-            }
-        }
-    }
-};
-
-bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
-    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
 
-    MICROPROFILE_SCOPE(OpenGL_Drawing);
-
-    DrawPrelude();
-
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    const auto& regs = maxwell3d.regs;
-    const auto current_instance = maxwell3d.state.current_instance;
-    DrawParams draw_call{};
-    draw_call.is_indexed = is_indexed;
-    draw_call.num_instances = static_cast<GLint>(1);
-    draw_call.base_instance = static_cast<GLint>(current_instance);
-    draw_call.is_instanced = current_instance > 0;
-    draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
-    if (draw_call.is_indexed) {
-        draw_call.count = static_cast<GLint>(regs.index_array.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
-        draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
-        draw_call.index_buffer_offset = index_buffer_offset;
+    const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+    const GLsizei num_instances =
+        static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+    if (is_indexed) {
+        const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+        const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
+        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+        glDrawElementsInstancedBaseVertexBaseInstance(
+            primitive_mode, num_vertices, index_format,
+            reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex,
+            base_instance);
     } else {
-        draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
+        const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
+        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+        glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances,
+                                          base_instance);
     }
-    draw_call.DispatchDraw();
+}
 
-    maxwell3d.dirty.memory_general = false;
-    accelerate_draw = AccelDraw::Disabled;
+bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
+    Draw(is_indexed, false);
     return true;
 }
 
 bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
-    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
-
-    MICROPROFILE_SCOPE(OpenGL_Drawing);
-
-    DrawPrelude();
-
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    const auto& regs = maxwell3d.regs;
-    const auto& draw_setup = maxwell3d.mme_draw;
-    DrawParams draw_call{};
-    draw_call.is_indexed = is_indexed;
-    draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count);
-    draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance);
-    draw_call.is_instanced = draw_setup.instance_count > 1;
-    draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
-    if (draw_call.is_indexed) {
-        draw_call.count = static_cast<GLint>(regs.index_array.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
-        draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
-        draw_call.index_buffer_offset = index_buffer_offset;
-    } else {
-        draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
-    }
-    draw_call.DispatchDraw();
-
-    maxwell3d.dirty.memory_general = false;
-    accelerate_draw = AccelDraw::Disabled;
+    Draw(is_indexed, true);
     return true;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 6a27cf497..0501f3828 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -103,7 +103,7 @@ private:
                            std::size_t size);
 
     /// Syncs all the state, shaders, render targets and textures setting before a draw call.
-    void DrawPrelude();
+    void Draw(bool is_indexed, bool is_instanced);
 
     /// Configures the current textures to use for the draw command.
     void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
@@ -220,12 +220,7 @@ private:
 
     GLintptr SetupIndexBuffer();
 
-    GLintptr index_buffer_offset;
-
     void SetupShaders(GLenum primitive_mode);
-
-    enum class AccelDraw { Disabled, Arrays, Indexed };
-    AccelDraw accelerate_draw = AccelDraw::Disabled;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index d419e9c45..3b391d3e6 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -10,8 +10,80 @@
 
 namespace VideoCommon::Shader {
 
+using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::ShfType;
+using Tegra::Shader::ShfXmode;
+
+namespace {
+
+Node IsFull(Node shift) {
+    return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
+}
+
+Node Shift(OperationCode opcode, Node value, Node shift) {
+    Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32));
+    Node shifted = Operation(opcode, move(value), shift);
+    return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
+}
+
+Node ClampShift(Node shift, s32 size = 32) {
+    shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
+    return Operation(OperationCode::IMin, move(shift), Immediate(size));
+}
+
+Node WrapShift(Node shift, s32 size = 32) {
+    return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
+}
+
+Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
+    // These values are used when the shift value is less than 32
+    Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
+    Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
+    Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
+
+    if (type == ShfType::Bits32) {
+        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
+        return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
+    }
+
+    // And these when it's larger than or 32
+    const bool is_signed = type == ShfType::S64;
+    const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
+    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
+    Node greater = Shift(opcode, high, move(reduced));
+
+    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
+    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
+
+    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
+    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
+}
+
+Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
+    // These values are used when the shift value is less than 32
+    Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
+    Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
+    Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
+
+    if (type == ShfType::Bits32) {
+        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
+        return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
+    }
+
+    // And these when it's larger than or 32
+    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
+    Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
+
+    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
+    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
+
+    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
+    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
+}
+
+} // Anonymous namespace
 
 u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
@@ -28,29 +100,48 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
         }
     }();
 
-    switch (opcode->get().GetId()) {
+    switch (const auto opid = opcode->get().GetId(); opid) {
     case OpCode::Id::SHR_C:
     case OpCode::Id::SHR_R:
     case OpCode::Id::SHR_IMM: {
-        if (instr.shr.wrap) {
-            op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f));
-        } else {
-            op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0));
-            op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31));
-        }
+        op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
 
         Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
-                                     std::move(op_a), std::move(op_b));
+                                     move(op_a), move(op_b));
         SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, std::move(value));
+        SetRegister(bb, instr.gpr0, move(value));
         break;
     }
     case OpCode::Id::SHL_C:
     case OpCode::Id::SHL_R:
     case OpCode::Id::SHL_IMM: {
-        const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
+        Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
         SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
+        SetRegister(bb, instr.gpr0, move(value));
+        break;
+    }
+    case OpCode::Id::SHF_RIGHT_R:
+    case OpCode::Id::SHF_RIGHT_IMM:
+    case OpCode::Id::SHF_LEFT_R:
+    case OpCode::Id::SHF_LEFT_IMM: {
+        UNIMPLEMENTED_IF(instr.generates_cc);
+        UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
+                             static_cast<int>(instr.shf.xmode.Value()));
+
+        if (instr.is_b_imm) {
+            op_b = Immediate(static_cast<u32>(instr.shf.immediate));
+        }
+        const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
+        Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
+
+        Node negated_shift = Operation(OperationCode::INegate, shift);
+        Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
+
+        const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
+        Node value = (is_right ? ShiftRight : ShiftLeft)(
+            move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
+
+        SetRegister(bb, instr.gpr0, move(value));
         break;
     }
     default: