13 files changed, 375 insertions, 290 deletions
diff --git a/.travis-build.sh b/.travis-build.sh
index bb4e6fc47..fc5a5f8b2 100755
--- a/.travis-build.sh
+++ b/.travis-build.sh
@@ -52,8 +52,8 @@ elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
     export Qt5_DIR=$(brew --prefix)/opt/qt5
 
     mkdir build && cd build
-    cmake .. -DUSE_SYSTEM_CURL=ON -GXcode
-    xcodebuild -configuration Release
+    cmake .. -DUSE_SYSTEM_CURL=ON -DCMAKE_OSX_ARCHITECTURES="x86_64;x86_64h" -DCMAKE_BUILD_TYPE=Release
+    make -j4
 
     ctest -VV -C Release
 fi
diff --git a/.travis-upload.sh b/.travis-upload.sh
index 8c1fa21c5..edf195f7d 100755
--- a/.travis-upload.sh
+++ b/.travis-upload.sh
@@ -16,8 +16,8 @@ elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
     COMPRESSION_FLAGS="-czvf"
     mkdir "$REV_NAME"
 
-    cp build/src/citra/Release/citra "$REV_NAME"
-    cp -r build/src/citra_qt/Release/citra-qt.app "$REV_NAME"
+    cp build/src/citra/citra "$REV_NAME"
+    cp -r build/src/citra_qt/citra-qt.app "$REV_NAME"
 
     # move qt libs into app bundle for deployment
     $(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/citra-qt.app"
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 02d5a7a36..d45daca35 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -55,22 +55,19 @@ SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u
             Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
         }
     } else {
-        // TODO(Subv): What happens if an application tries to create multiple memory blocks
-        // pointing to the same address?
         auto& vm_manager = shared_memory->owner_process->vm_manager;
         // The memory is already available and mapped in the owner process.
-        auto vma = vm_manager.FindVMA(address)->second;
-        // Copy it over to our own storage
-        shared_memory->backing_block = std::make_shared<std::vector<u8>>(
-            vma.backing_block->data() + vma.offset, vma.backing_block->data() + vma.offset + size);
-        shared_memory->backing_block_offset = 0;
-        // Unmap the existing pages
-        vm_manager.UnmapRange(address, size);
-        // Map our own block into the address space
-        vm_manager.MapMemoryBlock(address, shared_memory->backing_block, 0, size,
-                                  MemoryState::Shared);
-        // Reprotect the block with the new permissions
-        vm_manager.ReprotectRange(address, size, ConvertPermissions(permissions));
+        auto vma = vm_manager.FindVMA(address);
+        ASSERT_MSG(vma != vm_manager.vma_map.end(), "Invalid memory address");
+        ASSERT_MSG(vma->second.backing_block, "Backing block doesn't exist for address");
+
+        // The returned VMA might be a bigger one encompassing the desired address.
+        auto vma_offset = address - vma->first;
+        ASSERT_MSG(vma_offset + size <= vma->second.size,
+                   "Shared memory exceeds bounds of mapped block");
+
+        shared_memory->backing_block = vma->second.backing_block;
+        shared_memory->backing_block_offset = vma->second.offset + vma_offset;
     }
 
     shared_memory->base_address = address;
@@ -184,4 +181,4 @@ u8* SharedMemory::GetPointer(u32 offset) {
     return backing_block->data() + backing_block_offset + offset;
 }
 
-} // namespace
+} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 2614a260c..0f7970ebe 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -541,6 +541,12 @@ s32 Thread::GetWaitObjectIndex(WaitObject* object) const {
     return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
 }
 
+VAddr Thread::GetCommandBufferAddress() const {
+    // Offset from the start of TLS at which the IPC command buffer begins.
+    static constexpr int CommandHeaderOffset = 0x80;
+    return GetTLSAddress() + CommandHeaderOffset;
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 4679c2022..314fba81f 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -164,6 +164,12 @@ public:
         return tls_address;
     }
 
+    /*
+     * Returns the address of the current thread's command buffer, located in the TLS.
+     * @returns VAddr of the thread's command buffer.
+     */
+    VAddr GetCommandBufferAddress() const;
+
     /**
      * Returns whether this thread is waiting for all the objects in
      * its wait list to become ready, as a result of a WaitSynchronizationN call
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 2f7362748..59ea9823d 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -171,7 +171,11 @@ void SendParameter(const MessageParameter& parameter) {
     next_parameter = parameter;
     // Signal the event to let the receiver know that a new parameter is ready to be read
     auto* const slot_data = GetAppletSlotData(static_cast<AppletId>(parameter.destination_id));
-    ASSERT(slot_data);
+    if (slot_data == nullptr) {
+        LOG_DEBUG(Service_APT, "No applet was registered with the id %03X",
+                  parameter.destination_id);
+        return;
+    }
 
     slot_data->parameter_event->Signal();
 }
@@ -505,9 +509,6 @@ void SendParameter(Service::Interface* self) {
     size_t size;
     VAddr buffer = rp.PopStaticBuffer(&size);
 
-    std::shared_ptr<HLE::Applets::Applet> dest_applet =
-        HLE::Applets::Applet::Get(static_cast<AppletId>(dst_app_id));
-
     LOG_DEBUG(Service_APT,
               "called src_app_id=0x%08X, dst_app_id=0x%08X, signal_type=0x%08X,"
               "buffer_size=0x%08X, handle=0x%08X, size=0x%08zX, in_param_buffer_ptr=0x%08X",
@@ -522,12 +523,6 @@ void SendParameter(Service::Interface* self) {
         return;
     }
 
-    if (dest_applet == nullptr) {
-        LOG_ERROR(Service_APT, "Unknown applet id=0x%08X", dst_app_id);
-        rb.Push<u32>(-1); // TODO(Subv): Find the right error code
-        return;
-    }
-
     MessageParameter param;
     param.destination_id = dst_app_id;
     param.sender_id = src_app_id;
@@ -536,7 +531,14 @@ void SendParameter(Service::Interface* self) {
     param.buffer.resize(buffer_size);
     Memory::ReadBlock(buffer, param.buffer.data(), param.buffer.size());
 
-    rb.Push(dest_applet->ReceiveParameter(param));
+    SendParameter(param);
+
+    // If the applet is running in HLE mode, use the HLE interface to communicate with it.
+    if (auto dest_applet = HLE::Applets::Applet::Get(static_cast<AppletId>(dst_app_id))) {
+        rb.Push(dest_applet->ReceiveParameter(param));
+    } else {
+        rb.Push(RESULT_SUCCESS);
+    }
 }
 
 void ReceiveParameter(Service::Interface* self) {
@@ -765,7 +767,12 @@ void PrepareToStartLibraryApplet(Service::Interface* self) {
     IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x18, 1, 0); // 0x180040
     AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
 
+    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
+
     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+
+    // TODO(Subv): Launch the requested applet application.
+
     auto applet = HLE::Applets::Applet::Get(applet_id);
     if (applet) {
         LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id);
@@ -773,7 +780,6 @@ void PrepareToStartLibraryApplet(Service::Interface* self) {
     } else {
         rb.Push(HLE::Applets::Applet::Create(applet_id));
     }
-    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
 }
 
 void PrepareToStartNewestHomeMenu(Service::Interface* self) {
@@ -794,7 +800,12 @@ void PreloadLibraryApplet(Service::Interface* self) {
     IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x16, 1, 0); // 0x160040
     AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
 
+    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
+
     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+
+    // TODO(Subv): Launch the requested applet application.
+
     auto applet = HLE::Applets::Applet::Get(applet_id);
     if (applet) {
         LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id);
@@ -802,34 +813,40 @@ void PreloadLibraryApplet(Service::Interface* self) {
     } else {
         rb.Push(HLE::Applets::Applet::Create(applet_id));
     }
-    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
 }
 
 void StartLibraryApplet(Service::Interface* self) {
     IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x1E, 2, 4); // 0x1E0084
     AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
-    std::shared_ptr<HLE::Applets::Applet> applet = HLE::Applets::Applet::Get(applet_id);
-
-    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
-
-    if (applet == nullptr) {
-        LOG_ERROR(Service_APT, "unknown applet id=%08X", applet_id);
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0, false);
-        rb.Push<u32>(-1); // TODO(Subv): Find the right error code
-        return;
-    }
 
     size_t buffer_size = rp.Pop<u32>();
     Kernel::Handle handle = rp.PopHandle();
     VAddr buffer_addr = rp.PopStaticBuffer();
 
-    AppletStartupParameter parameter;
-    parameter.object = Kernel::g_handle_table.GetGeneric(handle);
-    parameter.buffer.resize(buffer_size);
-    Memory::ReadBlock(buffer_addr, parameter.buffer.data(), parameter.buffer.size());
+    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
 
     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-    rb.Push(applet->Start(parameter));
+
+    // Send the Wakeup signal to the applet
+    MessageParameter param;
+    param.destination_id = static_cast<u32>(applet_id);
+    param.sender_id = static_cast<u32>(AppletId::Application);
+    param.object = Kernel::g_handle_table.GetGeneric(handle);
+    param.signal = static_cast<u32>(SignalType::Wakeup);
+    param.buffer.resize(buffer_size);
+    Memory::ReadBlock(buffer_addr, param.buffer.data(), param.buffer.size());
+    SendParameter(param);
+
+    // In case the applet is being HLEd, attempt to communicate with it.
+    if (auto applet = HLE::Applets::Applet::Get(applet_id)) {
+        AppletStartupParameter parameter;
+        parameter.object = Kernel::g_handle_table.GetGeneric(handle);
+        parameter.buffer.resize(buffer_size);
+        Memory::ReadBlock(buffer_addr, parameter.buffer.data(), parameter.buffer.size());
+        rb.Push(applet->Start(parameter));
+    } else {
+        rb.Push(RESULT_SUCCESS);
+    }
 }
 
 void CancelLibraryApplet(Service::Interface* self) {
diff --git a/src/core/hle/service/apt/apt_s.cpp b/src/core/hle/service/apt/apt_s.cpp
index fe1d21fff..bb78ee7d7 100644
--- a/src/core/hle/service/apt/apt_s.cpp
+++ b/src/core/hle/service/apt/apt_s.cpp
@@ -20,7 +20,7 @@ const Interface::FunctionInfo FunctionTable[] = {
     {0x00090040, IsRegistered, "IsRegistered"},
     {0x000A0040, nullptr, "GetAttribute"},
     {0x000B0040, InquireNotification, "InquireNotification"},
-    {0x000C0104, nullptr, "SendParameter"},
+    {0x000C0104, SendParameter, "SendParameter"},
     {0x000D0080, ReceiveParameter, "ReceiveParameter"},
     {0x000E0080, GlanceParameter, "GlanceParameter"},
     {0x000F0100, nullptr, "CancelParameter"},
@@ -38,7 +38,7 @@ const Interface::FunctionInfo FunctionTable[] = {
     {0x001B00C4, nullptr, "StartApplication"},
     {0x001C0000, nullptr, "WakeupApplication"},
     {0x001D0000, nullptr, "CancelApplication"},
-    {0x001E0084, nullptr, "StartLibraryApplet"},
+    {0x001E0084, StartLibraryApplet, "StartLibraryApplet"},
     {0x001F0084, nullptr, "StartSystemApplet"},
     {0x00200044, nullptr, "StartNewestHomeMenu"},
     {0x00210000, nullptr, "OrderToCloseApplication"},
diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp
index d5624fe54..b10d5852b 100644
--- a/src/core/hle/service/nim/nim.cpp
+++ b/src/core/hle/service/nim/nim.cpp
@@ -5,6 +5,8 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/event.h"
 #include "core/hle/service/nim/nim.h"
 #include "core/hle/service/nim/nim_aoc.h"
 #include "core/hle/service/nim/nim_s.h"
@@ -14,6 +16,16 @@
 namespace Service {
 namespace NIM {
 
+static Kernel::SharedPtr<Kernel::Event> nim_system_update_event;
+
+void CheckForSysUpdateEvent(Service::Interface* self) {
+    IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x5, 0, 0); // 0x50000
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 2);
+    rb.Push(RESULT_SUCCESS);
+    rb.PushCopyHandles(Kernel::g_handle_table.Create(nim_system_update_event).Unwrap());
+    LOG_TRACE(Service_NIM, "called");
+}
+
 void CheckSysUpdateAvailable(Service::Interface* self) {
     u32* cmd_buff = Kernel::GetCommandBuffer();
 
@@ -29,9 +41,13 @@ void Init() {
     AddService(new NIM_AOC_Interface);
     AddService(new NIM_S_Interface);
     AddService(new NIM_U_Interface);
+
+    nim_system_update_event = Kernel::Event::Create(ResetType::OneShot, "NIM System Update Event");
 }
 
-void Shutdown() {}
+void Shutdown() {
+    nim_system_update_event = nullptr;
+}
 
 } // namespace NIM
 
diff --git a/src/core/hle/service/nim/nim.h b/src/core/hle/service/nim/nim.h
index c3106f18b..dbf605e5a 100644
--- a/src/core/hle/service/nim/nim.h
+++ b/src/core/hle/service/nim/nim.h
@@ -11,6 +11,17 @@ class Interface;
 namespace NIM {
 
 /**
+ * NIM::CheckForSysUpdateEvent service function
+ *  Inputs:
+ *      1 : None
+ *  Outputs:
+ *      1 : Result of function, 0 on success, otherwise error code
+ *      2 : Copy handle descriptor
+ *      3 : System Update event handle
+ */
+void CheckForSysUpdateEvent(Service::Interface* self);
+
+/**
  * NIM::CheckSysUpdateAvailable service function
  *  Inputs:
  *      1 : None
diff --git a/src/core/hle/service/nim/nim_u.cpp b/src/core/hle/service/nim/nim_u.cpp
index 7664bad60..569660278 100644
--- a/src/core/hle/service/nim/nim_u.cpp
+++ b/src/core/hle/service/nim/nim_u.cpp
@@ -12,7 +12,7 @@ const Interface::FunctionInfo FunctionTable[] = {
     {0x00010000, nullptr, "StartSysUpdate"},
     {0x00020000, nullptr, "GetUpdateDownloadProgress"},
     {0x00040000, nullptr, "FinishTitlesInstall"},
-    {0x00050000, nullptr, "CheckForSysUpdateEvent"},
+    {0x00050000, CheckForSysUpdateEvent, "CheckForSysUpdateEvent"},
     {0x00090000, CheckSysUpdateAvailable, "CheckSysUpdateAvailable"},
     {0x000A0000, nullptr, "GetState"},
     {0x000B0000, nullptr, "GetSystemTitleHash"},
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 847e69710..7f58be6de 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -82,10 +82,10 @@ void UnmapRegion(PageTable& page_table, VAddr base, u32 size) {
  * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
  * using a VMA from the current process
  */
-static u8* GetPointerFromVMA(VAddr vaddr) {
+static u8* GetPointerFromVMA(const Kernel::Process& process, VAddr vaddr) {
     u8* direct_pointer = nullptr;
 
-    auto& vm_manager = Kernel::g_current_process->vm_manager;
+    auto& vm_manager = process.vm_manager;
 
     auto it = vm_manager.FindVMA(vaddr);
     ASSERT(it != vm_manager.vma_map.end());
@@ -108,6 +108,14 @@ static u8* GetPointerFromVMA(VAddr vaddr) {
 }
 
 /**
+ * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
+ * using a VMA from the current process.
+ */
+static u8* GetPointerFromVMA(VAddr vaddr) {
+    return GetPointerFromVMA(*Kernel::g_current_process, vaddr);
+}
+
+/**
  * This function should only be called for virtual addreses with attribute `PageType::Special`.
  */
 static MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) {
@@ -470,7 +478,10 @@ u64 Read64(const VAddr addr) {
     return Read<u64_le>(addr);
 }
 
-void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
+void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
+               const size_t size) {
+    auto& page_table = process.vm_manager.page_table;
+
     size_t remaining_size = size;
     size_t page_index = src_addr >> PAGE_BITS;
     size_t page_offset = src_addr & PAGE_MASK;
@@ -479,7 +490,7 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
         const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
-        switch (current_page_table->attributes[page_index]) {
+        switch (page_table.attributes[page_index]) {
         case PageType::Unmapped: {
             LOG_ERROR(HW_Memory, "unmapped ReadBlock @ 0x%08X (start address = 0x%08X, size = %zu)",
                       current_vaddr, src_addr, size);
@@ -487,29 +498,30 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
             break;
         }
         case PageType::Memory: {
-            DEBUG_ASSERT(current_page_table->pointers[page_index]);
+            DEBUG_ASSERT(page_table.pointers[page_index]);
 
-            const u8* src_ptr = current_page_table->pointers[page_index] + page_offset;
+            const u8* src_ptr = page_table.pointers[page_index] + page_offset;
             std::memcpy(dest_buffer, src_ptr, copy_amount);
             break;
         }
         case PageType::Special: {
-            DEBUG_ASSERT(GetMMIOHandler(current_vaddr));
-
-            GetMMIOHandler(current_vaddr)->ReadBlock(current_vaddr, dest_buffer, copy_amount);
+            MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
+            DEBUG_ASSERT(handler);
+            handler->ReadBlock(current_vaddr, dest_buffer, copy_amount);
             break;
         }
         case PageType::RasterizerCachedMemory: {
             RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
                                          FlushMode::Flush);
-            std::memcpy(dest_buffer, GetPointerFromVMA(current_vaddr), copy_amount);
+            std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
             break;
         }
         case PageType::RasterizerCachedSpecial: {
-            DEBUG_ASSERT(GetMMIOHandler(current_vaddr));
+            MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
+            DEBUG_ASSERT(handler);
             RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
                                          FlushMode::Flush);
-            GetMMIOHandler(current_vaddr)->ReadBlock(current_vaddr, dest_buffer, copy_amount);
+            handler->ReadBlock(current_vaddr, dest_buffer, copy_amount);
             break;
         }
         default:
@@ -523,6 +535,10 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
     }
 }
 
+void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
+    ReadBlock(*Kernel::g_current_process, src_addr, dest_buffer, size);
+}
+
 void Write8(const VAddr addr, const u8 data) {
     Write<u8>(addr, data);
 }
@@ -539,7 +555,9 @@ void Write64(const VAddr addr, const u64 data) {
     Write<u64_le>(addr, data);
 }
 
-void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size) {
+void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
+                const size_t size) {
+    auto& page_table = process.vm_manager.page_table;
     size_t remaining_size = size;
     size_t page_index = dest_addr >> PAGE_BITS;
     size_t page_offset = dest_addr & PAGE_MASK;
@@ -548,7 +566,7 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size
         const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
-        switch (current_page_table->attributes[page_index]) {
+        switch (page_table.attributes[page_index]) {
         case PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "unmapped WriteBlock @ 0x%08X (start address = 0x%08X, size = %zu)",
@@ -556,29 +574,30 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size
             break;
         }
         case PageType::Memory: {
-            DEBUG_ASSERT(current_page_table->pointers[page_index]);
+            DEBUG_ASSERT(page_table.pointers[page_index]);
 
-            u8* dest_ptr = current_page_table->pointers[page_index] + page_offset;
+            u8* dest_ptr = page_table.pointers[page_index] + page_offset;
             std::memcpy(dest_ptr, src_buffer, copy_amount);
             break;
         }
         case PageType::Special: {
-            DEBUG_ASSERT(GetMMIOHandler(current_vaddr));
-
-            GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, src_buffer, copy_amount);
+            MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
+            DEBUG_ASSERT(handler);
+            handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
             break;
         }
         case PageType::RasterizerCachedMemory: {
             RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
                                          FlushMode::FlushAndInvalidate);
-            std::memcpy(GetPointerFromVMA(current_vaddr), src_buffer, copy_amount);
+            std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
             break;
         }
         case PageType::RasterizerCachedSpecial: {
-            DEBUG_ASSERT(GetMMIOHandler(current_vaddr));
+            MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
+            DEBUG_ASSERT(handler);
             RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
                                          FlushMode::FlushAndInvalidate);
-            GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, src_buffer, copy_amount);
+            handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
             break;
         }
         default:
@@ -592,6 +611,10 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size
     }
 }
 
+void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size) {
+    WriteBlock(*Kernel::g_current_process, dest_addr, src_buffer, size);
+}
+
 void ZeroBlock(const VAddr dest_addr, const size_t size) {
     size_t remaining_size = size;
     size_t page_index = dest_addr >> PAGE_BITS;
diff --git a/src/core/memory.h b/src/core/memory.h
index 347c08c78..dd599f73e 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -205,7 +205,11 @@ void Write16(VAddr addr, u16 data);
 void Write32(VAddr addr, u32 data);
 void Write64(VAddr addr, u64 data);
 
+void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
+               size_t size);
 void ReadBlock(const VAddr src_addr, void* dest_buffer, size_t size);
+void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
+                size_t size);
 void WriteBlock(const VAddr dest_addr, const void* src_buffer, size_t size);
 void ZeroBlock(const VAddr dest_addr, const size_t size);
 void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size);
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3ab4af374..caf9f7a06 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -119,6 +119,224 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup,
     }
 }
 
+static void LoadDefaultVertexAttributes(u32 register_value) {
+    auto& regs = g_state.regs;
+
+    // TODO: Does actual hardware indeed keep an intermediate buffer or does
+    //       it directly write the values?
+    default_attr_write_buffer[default_attr_counter++] = register_value;
+
+    // Default attributes are written in a packed format such that four float24 values are encoded
+    // in three 32-bit numbers.
+    // We write to internal memory once a full such vector is written.
+    if (default_attr_counter >= 3) {
+        default_attr_counter = 0;
+
+        auto& setup = regs.pipeline.vs_default_attributes_setup;
+
+        if (setup.index >= 16) {
+            LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
+            return;
+        }
+
+        Math::Vec4<float24> attribute;
+
+        // NOTE: The destination component order indeed is "backwards"
+        attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
+        attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
+                                       ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
+        attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
+                                       ((default_attr_write_buffer[2] >> 24) & 0xFF));
+        attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
+
+        LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
+                  attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
+                  attribute.w.ToFloat32());
+
+        // TODO: Verify that this actually modifies the register!
+        if (setup.index < 15) {
+            g_state.input_default_attributes.attr[setup.index] = attribute;
+            setup.index++;
+        } else {
+            // Put each attribute into an immediate input buffer.  When all specified immediate
+            // attributes are present, the Vertex Shader is invoked and everything is sent to
+            // the primitive assembler.
+
+            auto& immediate_input = g_state.immediate.input_vertex;
+            auto& immediate_attribute_id = g_state.immediate.current_attribute;
+
+            immediate_input.attr[immediate_attribute_id] = attribute;
+
+            if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
+                immediate_attribute_id += 1;
+            } else {
+                MICROPROFILE_SCOPE(GPU_Drawing);
+                immediate_attribute_id = 0;
+
+                auto* shader_engine = Shader::GetEngine();
+                shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
+
+                // Send to vertex shader
+                if (g_debug_context)
+                    g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
+                                             static_cast<void*>(&immediate_input));
+                Shader::UnitState shader_unit;
+                Shader::AttributeBuffer output{};
+
+                shader_unit.LoadInput(regs.vs, immediate_input);
+                shader_engine->Run(g_state.vs, shader_unit);
+                shader_unit.WriteOutput(regs.vs, output);
+
+                // Send to geometry pipeline
+                if (g_state.immediate.reset_geometry_pipeline) {
+                    g_state.geometry_pipeline.Reconfigure();
+                    g_state.immediate.reset_geometry_pipeline = false;
+                }
+                ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
+                g_state.geometry_pipeline.Setup(shader_engine);
+                g_state.geometry_pipeline.SubmitVertex(output);
+
+                // TODO: If drawing after every immediate mode triangle kills performance,
+                // change it to flush triangles whenever a drawing config register changes
+                // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
+                VideoCore::g_renderer->Rasterizer()->DrawTriangles();
+                if (g_debug_context) {
+                    g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+                }
+            }
+        }
+    }
+}
+
+static void Draw(u32 command_id) {
+    MICROPROFILE_SCOPE(GPU_Drawing);
+    auto& regs = g_state.regs;
+
+#if PICA_LOG_TEV
+    DebugUtils::DumpTevStageConfig(regs.GetTevStages());
+#endif
+    if (g_debug_context)
+        g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
+
+    // Processes information about internal vertex attributes to figure out how a vertex is
+    // loaded.
+    // Later, these can be compiled and cached.
+    const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
+    VertexLoader loader(regs.pipeline);
+
+    // Load vertices
+    bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
+
+    const auto& index_info = regs.pipeline.index_array;
+    const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
+    const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
+    bool index_u16 = index_info.format != 0;
+
+    PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
+
+    if (g_debug_context && g_debug_context->recorder) {
+        for (int i = 0; i < 3; ++i) {
+            const auto texture = regs.texturing.GetTextures()[i];
+            if (!texture.enabled)
+                continue;
+
+            u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
+            g_debug_context->recorder->MemoryAccessed(
+                texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
+                                  texture.config.width / 2 * texture.config.height,
+                texture.config.GetPhysicalAddress());
+        }
+    }
+
+    DebugUtils::MemoryAccessTracker memory_accesses;
+
+    // Simple circular-replacement vertex cache
+    // The size has been tuned for optimal balance between hit-rate and the cost of lookup
+    const size_t VERTEX_CACHE_SIZE = 32;
+    std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
+    std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
+    Shader::AttributeBuffer vs_output;
+
+    unsigned int vertex_cache_pos = 0;
+    vertex_cache_ids.fill(-1);
+
+    auto* shader_engine = Shader::GetEngine();
+    Shader::UnitState shader_unit;
+
+    shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
+
+    g_state.geometry_pipeline.Reconfigure();
+    g_state.geometry_pipeline.Setup(shader_engine);
+    if (g_state.geometry_pipeline.NeedIndexInput())
+        ASSERT(is_indexed);
+
+    for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
+        // Indexed rendering doesn't use the start offset
+        unsigned int vertex = is_indexed
+                                  ? (index_u16 ? index_address_16[index] : index_address_8[index])
+                                  : (index + regs.pipeline.vertex_offset);
+
+        // -1 is a common special value used for primitive restart. Since it's unknown if
+        // the PICA supports it, and it would mess up the caching, guard against it here.
+        ASSERT(vertex != -1);
+
+        bool vertex_cache_hit = false;
+
+        if (is_indexed) {
+            if (g_state.geometry_pipeline.NeedIndexInput()) {
+                g_state.geometry_pipeline.SubmitIndex(vertex);
+                continue;
+            }
+
+            if (g_debug_context && Pica::g_debug_context->recorder) {
+                int size = index_u16 ? 2 : 1;
+                memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
+            }
+
+            for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
+                if (vertex == vertex_cache_ids[i]) {
+                    vs_output = vertex_cache[i];
+                    vertex_cache_hit = true;
+                    break;
+                }
+            }
+        }
+
+        if (!vertex_cache_hit) {
+            // Initialize data for the current vertex
+            Shader::AttributeBuffer input;
+            loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
+
+            // Send to vertex shader
+            if (g_debug_context)
+                g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
+                                         (void*)&input);
+            shader_unit.LoadInput(regs.vs, input);
+            shader_engine->Run(g_state.vs, shader_unit);
+            shader_unit.WriteOutput(regs.vs, vs_output);
+
+            if (is_indexed) {
+                vertex_cache[vertex_cache_pos] = vs_output;
+                vertex_cache_ids[vertex_cache_pos] = vertex;
+                vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
+            }
+        }
+
+        // Send to geometry pipeline
+        g_state.geometry_pipeline.SubmitVertex(vs_output);
+    }
+
+    for (auto& range : memory_accesses.ranges) {
+        g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
+                                                  range.second, range.first);
+    }
+
+    VideoCore::g_renderer->Rasterizer()->DrawTriangles();
+    if (g_debug_context) {
+        g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+    }
+}
+
 static void WritePicaReg(u32 id, u32 value, u32 mask) {
     auto& regs = g_state.regs;
 
@@ -168,95 +386,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
     // Load default vertex input attributes
     case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233):
     case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234):
-    case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): {
-        // TODO: Does actual hardware indeed keep an intermediate buffer or does
-        //       it directly write the values?
-        default_attr_write_buffer[default_attr_counter++] = value;
-
-        // Default attributes are written in a packed format such that four float24 values are
-        // encoded in
-        // three 32-bit numbers. We write to internal memory once a full such vector is
-        // written.
-        if (default_attr_counter >= 3) {
-            default_attr_counter = 0;
-
-            auto& setup = regs.pipeline.vs_default_attributes_setup;
-
-            if (setup.index >= 16) {
-                LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
-                break;
-            }
-
-            Math::Vec4<float24> attribute;
-
-            // NOTE: The destination component order indeed is "backwards"
-            attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
-            attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
-                                           ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
-            attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
-                                           ((default_attr_write_buffer[2] >> 24) & 0xFF));
-            attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
-
-            LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
-                      attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
-                      attribute.w.ToFloat32());
-
-            // TODO: Verify that this actually modifies the register!
-            if (setup.index < 15) {
-                g_state.input_default_attributes.attr[setup.index] = attribute;
-                setup.index++;
-            } else {
-                // Put each attribute into an immediate input buffer.  When all specified immediate
-                // attributes are present, the Vertex Shader is invoked and everything is sent to
-                // the primitive assembler.
-
-                auto& immediate_input = g_state.immediate.input_vertex;
-                auto& immediate_attribute_id = g_state.immediate.current_attribute;
-
-                immediate_input.attr[immediate_attribute_id] = attribute;
-
-                if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
-                    immediate_attribute_id += 1;
-                } else {
-                    MICROPROFILE_SCOPE(GPU_Drawing);
-                    immediate_attribute_id = 0;
-
-                    auto* shader_engine = Shader::GetEngine();
-                    shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
-
-                    // Send to vertex shader
-                    if (g_debug_context)
-                        g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
-                                                 static_cast<void*>(&immediate_input));
-                    Shader::UnitState shader_unit;
-                    Shader::AttributeBuffer output{};
-
-                    shader_unit.LoadInput(regs.vs, immediate_input);
-                    shader_engine->Run(g_state.vs, shader_unit);
-                    shader_unit.WriteOutput(regs.vs, output);
-
-                    // Send to geometry pipeline
-                    if (g_state.immediate.reset_geometry_pipeline) {
-                        g_state.geometry_pipeline.Reconfigure();
-                        g_state.immediate.reset_geometry_pipeline = false;
-                    }
-                    ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
-                    g_state.geometry_pipeline.Setup(shader_engine);
-                    g_state.geometry_pipeline.SubmitVertex(output);
-
-                    // TODO: If drawing after every immediate mode triangle kills performance,
-                    // change it to flush triangles whenever a drawing config register changes
-                    // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
-                    VideoCore::g_renderer->Rasterizer()->DrawTriangles();
-                    if (g_debug_context) {
-                        g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch,
-                                                 nullptr);
-                    }
-                }
-            }
-        }
+    case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235):
+        LoadDefaultVertexAttributes(value);
         break;
-    }
 
     case PICA_REG_INDEX(pipeline.gpu_mode):
         // This register likely just enables vertex processing and doesn't need any special handling
@@ -275,136 +407,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 
     // It seems like these trigger vertex rendering
     case PICA_REG_INDEX(pipeline.trigger_draw):
-    case PICA_REG_INDEX(pipeline.trigger_draw_indexed): {
-        MICROPROFILE_SCOPE(GPU_Drawing);
-
-#if PICA_LOG_TEV
-        DebugUtils::DumpTevStageConfig(regs.GetTevStages());
-#endif
-        if (g_debug_context)
-            g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
-
-        // Processes information about internal vertex attributes to figure out how a vertex is
-        // loaded.
-        // Later, these can be compiled and cached.
-        const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
-        VertexLoader loader(regs.pipeline);
-
-        // Load vertices
-        bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
-
-        const auto& index_info = regs.pipeline.index_array;
-        const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
-        const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
-        bool index_u16 = index_info.format != 0;
-
-        PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
-
-        if (g_debug_context && g_debug_context->recorder) {
-            for (int i = 0; i < 3; ++i) {
-                const auto texture = regs.texturing.GetTextures()[i];
-                if (!texture.enabled)
-                    continue;
-
-                u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
-                g_debug_context->recorder->MemoryAccessed(
-                    texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
-                                      texture.config.width / 2 * texture.config.height,
-                    texture.config.GetPhysicalAddress());
-            }
-        }
-
-        DebugUtils::MemoryAccessTracker memory_accesses;
-
-        // Simple circular-replacement vertex cache
-        // The size has been tuned for optimal balance between hit-rate and the cost of lookup
-        const size_t VERTEX_CACHE_SIZE = 32;
-        std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
-        std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
-        Shader::AttributeBuffer vs_output;
-
-        unsigned int vertex_cache_pos = 0;
-        vertex_cache_ids.fill(-1);
-
-        auto* shader_engine = Shader::GetEngine();
-        Shader::UnitState shader_unit;
-
-        shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
-
-        g_state.geometry_pipeline.Reconfigure();
-        g_state.geometry_pipeline.Setup(shader_engine);
-        if (g_state.geometry_pipeline.NeedIndexInput())
-            ASSERT(is_indexed);
-
-        for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
-            // Indexed rendering doesn't use the start offset
-            unsigned int vertex =
-                is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
-                           : (index + regs.pipeline.vertex_offset);
-
-            // -1 is a common special value used for primitive restart. Since it's unknown if
-            // the PICA supports it, and it would mess up the caching, guard against it here.
-            ASSERT(vertex != -1);
-
-            bool vertex_cache_hit = false;
-
-            if (is_indexed) {
-                if (g_state.geometry_pipeline.NeedIndexInput()) {
-                    g_state.geometry_pipeline.SubmitIndex(vertex);
-                    continue;
-                }
-
-                if (g_debug_context && Pica::g_debug_context->recorder) {
-                    int size = index_u16 ? 2 : 1;
-                    memory_accesses.AddAccess(base_address + index_info.offset + size * index,
-                                              size);
-                }
-
-                for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
-                    if (vertex == vertex_cache_ids[i]) {
-                        vs_output = vertex_cache[i];
-                        vertex_cache_hit = true;
-                        break;
-                    }
-                }
-            }
-
-            if (!vertex_cache_hit) {
-                // Initialize data for the current vertex
-                Shader::AttributeBuffer input;
-                loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
-
-                // Send to vertex shader
-                if (g_debug_context)
-                    g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
-                                             (void*)&input);
-                shader_unit.LoadInput(regs.vs, input);
-                shader_engine->Run(g_state.vs, shader_unit);
-                shader_unit.WriteOutput(regs.vs, vs_output);
-
-                if (is_indexed) {
-                    vertex_cache[vertex_cache_pos] = vs_output;
-                    vertex_cache_ids[vertex_cache_pos] = vertex;
-                    vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
-                }
-            }
-
-            // Send to geometry pipeline
-            g_state.geometry_pipeline.SubmitVertex(vs_output);
-        }
-
-        for (auto& range : memory_accesses.ranges) {
-            g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
-                                                      range.second, range.first);
-        }
-
-        VideoCore::g_renderer->Rasterizer()->DrawTriangles();
-        if (g_debug_context) {
-            g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
-        }
-
+    case PICA_REG_INDEX(pipeline.trigger_draw_indexed):
+        Draw(id);
         break;
-    }
 
     case PICA_REG_INDEX(gs.bool_uniforms):
         WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value());