46 files changed, 579 insertions, 457 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 3d30f0e3e..43ae8a9e7 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -92,10 +92,14 @@ add_library(common STATIC
     logging/text_formatter.cpp
     logging/text_formatter.h
     math_util.h
+    memory_hook.cpp
+    memory_hook.h
     microprofile.cpp
     microprofile.h
     microprofileui.h
     misc.cpp
+    page_table.cpp
+    page_table.h
     param_package.cpp
     param_package.h
     quaternion.h
@@ -114,6 +118,8 @@ add_library(common STATIC
     threadsafe_queue.h
     timer.cpp
     timer.h
+    uint128.cpp
+    uint128.h
     vector_math.h
     web_result.h
 )
diff --git a/src/core/memory_hook.cpp b/src/common/memory_hook.cpp
index c61c6c1fb..3986986d6 100644
--- a/src/core/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/memory_hook.h"
+#include "common/memory_hook.h"
 
-namespace Memory {
+namespace Common {
 
 MemoryHook::~MemoryHook() = default;
 
-} // namespace Memory
+} // namespace Common
diff --git a/src/core/memory_hook.h b/src/common/memory_hook.h
index 940777107..adaa4c2c5 100644
--- a/src/core/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@
 
 #include "common/common_types.h"
 
-namespace Memory {
+namespace Common {
 
 /**
  * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
 };
 
 using MemoryHookPointer = std::shared_ptr<MemoryHook>;
-} // namespace Memory
+} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
new file mode 100644
index 000000000..8eba1c3f1
--- /dev/null
+++ b/src/common/page_table.cpp
@@ -0,0 +1,29 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/page_table.h"
+
+namespace Common {
+
+PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
+
+PageTable::~PageTable() = default;
+
+void PageTable::Resize(std::size_t address_space_width_in_bits) {
+    const std::size_t num_page_table_entries = 1ULL
+                                               << (address_space_width_in_bits - page_size_in_bits);
+
+    pointers.resize(num_page_table_entries);
+    attributes.resize(num_page_table_entries);
+
+    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
+    // vector size is subsequently decreased (via resize), the vector might not automatically
+    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
+    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
+
+    pointers.shrink_to_fit();
+    attributes.shrink_to_fit();
+}
+
+} // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
new file mode 100644
index 000000000..8339f2890
--- /dev/null
+++ b/src/common/page_table.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include <boost/icl/interval_map.hpp>
+#include "common/common_types.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+
+enum class PageType : u8 {
+    /// Page is unmapped and should cause an access error.
+    Unmapped,
+    /// Page is mapped to regular memory. This is the only type you can get pointers to.
+    Memory,
+    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
+    /// invalidation
+    RasterizerCachedMemory,
+    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
+    Special,
+};
+
+struct SpecialRegion {
+    enum class Type {
+        DebugHook,
+        IODevice,
+    } type;
+
+    MemoryHookPointer handler;
+
+    bool operator<(const SpecialRegion& other) const {
+        return std::tie(type, handler) < std::tie(other.type, other.handler);
+    }
+
+    bool operator==(const SpecialRegion& other) const {
+        return std::tie(type, handler) == std::tie(other.type, other.handler);
+    }
+};
+
+/**
+ * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
+ * mimics the way a real CPU page table works.
+ */
+struct PageTable {
+    explicit PageTable(std::size_t page_size_in_bits);
+    ~PageTable();
+
+    /**
+     * Resizes the page table to be able to accomodate enough pages within
+     * a given address space.
+     *
+     * @param address_space_width_in_bits The address size width in bits.
+     */
+    void Resize(std::size_t address_space_width_in_bits);
+
+    /**
+     * Vector of memory pointers backing each page. An entry can only be non-null if the
+     * corresponding entry in the `attributes` vector is of type `Memory`.
+     */
+    std::vector<u8*> pointers;
+
+    /**
+     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
+     * of type `Special`.
+     */
+    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
+
+    /**
+     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
+     * the corresponding entry in `pointers` MUST be set to null.
+     */
+    std::vector<PageType> attributes;
+
+    const std::size_t page_size_in_bits{};
+};
+
+} // namespace Common
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..2238a52c5
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,41 @@
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include <cstring>
+#include "common/uint128.h"
+
+namespace Common {
+
+u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
+
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+    u64 remainder = dividend[0] % divisor;
+    u64 accum = dividend[0] / divisor;
+    if (dividend[1] == 0)
+        return {accum, remainder};
+    // We ignore dividend[1] / divisor as that overflows
+    const u64 first_segment = (dividend[1] % divisor) << 32;
+    accum += (first_segment / divisor) << 32;
+    const u64 second_segment = (first_segment % divisor) << 32;
+    accum += (second_segment / divisor);
+    remainder += second_segment % divisor;
+    if (remainder >= divisor) {
+        accum++;
+        remainder -= divisor;
+    }
+    return {accum, remainder};
+}
+
+} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..52e6b46eb
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,14 @@
+
+#include <utility>
+#include "common/common_types.h"
+
+namespace Common {
+
+// This function multiplies 2 u64 values and produces a u128 value;
+u128 Multiply64Into128(u64 a, u64 b);
+
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+
+} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 8ccb2d5f0..aee8bc27d 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -437,8 +437,6 @@ add_library(core STATIC
     loader/xci.h
     memory.cpp
     memory.h
-    memory_hook.cpp
-    memory_hook.h
     memory_setup.h
     perf_stats.cpp
     perf_stats.h
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 9b7ca4030..4fdc12f11 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
@@ -119,7 +120,7 @@ public:
         return std::max(parent.core_timing.GetDowncount(), 0);
     }
     u64 GetCNTPCT() override {
-        return parent.core_timing.GetTicks();
+        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
     }
 
     ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
     config.tpidr_el0 = &cb->tpidr_el0;
     config.dczid_el0 = 4;
     config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = 19200000; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ;
 
     // Unpredictable instructions
     config.define_unpredictable_behaviour = true;
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 6cc458296..aada1e862 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,7 +12,7 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
 
-namespace Memory {
+namespace Common {
 struct PageTable;
 }
 
@@ -70,7 +70,7 @@ private:
     Timing::CoreTiming& core_timing;
     DynarmicExclusiveMonitor& exclusive_monitor;
 
-    Memory::PageTable* current_page_table = nullptr;
+    Common::PageTable* current_page_table = nullptr;
 };
 
 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 88ff70233..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,6 +7,7 @@
 #include <cinttypes>
 #include <limits>
 #include "common/logging/log.h"
+#include "common/uint128.h"
 
 namespace Core::Timing {
 
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
     return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }
 
+u64 CpuCyclesToClockCycles(u64 ticks) {
+    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+}
+
 } // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 513cfac1b..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,6 +11,7 @@ namespace Core::Timing {
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Value from fusee.
 
 inline s64 msToCycles(int ms) {
     // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
     return cycles * 1000 / BASE_CLOCK_RATE;
 }
 
+u64 CpuCyclesToClockCycles(u64 ticks);
+
 } // namespace Core::Timing
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index a1e4be070..68406eb63 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -275,6 +275,20 @@ inline void ResponseBuilder::Push(u64 value) {
 }
 
 template <>
+inline void ResponseBuilder::Push(float value) {
+    u32 integral;
+    std::memcpy(&integral, &value, sizeof(u32));
+    Push(integral);
+}
+
+template <>
+inline void ResponseBuilder::Push(double value) {
+    u64 integral;
+    std::memcpy(&integral, &value, sizeof(u64));
+    Push(integral);
+}
+
+template <>
 inline void ResponseBuilder::Push(bool value) {
     Push(static_cast<u8>(value));
 }
@@ -416,6 +430,22 @@ inline s64 RequestParser::Pop() {
 }
 
 template <>
+inline float RequestParser::Pop() {
+    const u32 value = Pop<u32>();
+    float real;
+    std::memcpy(&real, &value, sizeof(real));
+    return real;
+}
+
+template <>
+inline double RequestParser::Pop() {
+    const u64 value = Pop<u64>();
+    float real;
+    std::memcpy(&real, &value, sizeof(real));
+    return real;
+}
+
+template <>
 inline bool RequestParser::Pop() {
     return Pop<u8>() != 0;
 }
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 49fced7b1..65c51003d 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -31,7 +31,7 @@ namespace {
  */
 void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
     // Setup page table so we can write to memory
-    SetCurrentPageTable(&owner_process.VMManager().page_table);
+    Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);
 
     // Initialize new "main" thread
     const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index e524509df..cc189cc64 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -96,7 +96,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
         if (previous_process != thread_owner_process) {
             system.Kernel().MakeCurrentProcess(thread_owner_process);
-            SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
+            Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
         }
 
         cpu_core.LoadContext(new_thread->GetContext());
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 2e712c9cb..d9ffebc3f 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -7,8 +7,6 @@
 #include <optional>
 #include <vector>
 
-#include <boost/range/algorithm_ext/erase.hpp>
-
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -68,12 +66,6 @@ void Thread::Stop() {
     owner_process->FreeTLSSlot(tls_address);
 }
 
-void ExitCurrentThread() {
-    Thread* thread = GetCurrentThread();
-    thread->Stop();
-    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
-}
-
 void Thread::WakeAfterDelay(s64 nanoseconds) {
     // Don't schedule a wakeup if the thread wants to wait forever
     if (nanoseconds == -1)
@@ -264,8 +256,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
     if (thread->lock_owner == this) {
         // If the thread is already waiting for this thread to release the mutex, ensure that the
         // waiters list is consistent and return without doing anything.
-        auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-        ASSERT(itr != wait_mutex_threads.end());
+        const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+        ASSERT(iter != wait_mutex_threads.end());
         return;
     }
 
@@ -273,11 +265,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
     ASSERT(thread->lock_owner == nullptr);
 
     // Ensure that the thread is not already in the list of mutex waiters
-    auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(itr == wait_mutex_threads.end());
-
+    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+    ASSERT(iter == wait_mutex_threads.end());
+
+    // Keep the list in an ordered fashion
+    const auto insertion_point = std::find_if(
+        wait_mutex_threads.begin(), wait_mutex_threads.end(),
+        [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
+    wait_mutex_threads.insert(insertion_point, thread);
     thread->lock_owner = this;
-    wait_mutex_threads.emplace_back(std::move(thread));
+
     UpdatePriority();
 }
 
@@ -285,32 +282,43 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
     ASSERT(thread->lock_owner == this);
 
     // Ensure that the thread is in the list of mutex waiters
-    auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(itr != wait_mutex_threads.end());
+    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+    ASSERT(iter != wait_mutex_threads.end());
+
+    wait_mutex_threads.erase(iter);
 
-    boost::remove_erase(wait_mutex_threads, thread);
     thread->lock_owner = nullptr;
     UpdatePriority();
 }
 
 void Thread::UpdatePriority() {
-    // Find the highest priority among all the threads that are waiting for this thread's lock
+    // If any of the threads waiting on the mutex have a higher priority
+    // (taking into account priority inheritance), then this thread inherits
+    // that thread's priority.
     u32 new_priority = nominal_priority;
-    for (const auto& thread : wait_mutex_threads) {
-        if (thread->nominal_priority < new_priority)
-            new_priority = thread->nominal_priority;
+    if (!wait_mutex_threads.empty()) {
+        if (wait_mutex_threads.front()->current_priority < new_priority) {
+            new_priority = wait_mutex_threads.front()->current_priority;
+        }
     }
 
-    if (new_priority == current_priority)
+    if (new_priority == current_priority) {
         return;
+    }
 
     scheduler->SetThreadPriority(this, new_priority);
-
     current_priority = new_priority;
 
+    if (!lock_owner) {
+        return;
+    }
+
+    // Ensure that the thread is within the correct location in the waiting list.
+    lock_owner->RemoveMutexWaiter(this);
+    lock_owner->AddMutexWaiter(this);
+
     // Recursively update the priority of the thread that depends on the priority of this one.
-    if (lock_owner)
-        lock_owner->UpdatePriority();
+    lock_owner->UpdatePriority();
 }
 
 void Thread::ChangeCore(u32 core, u64 mask) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index ccdefeecc..faad5f391 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -401,8 +401,14 @@ private:
     VAddr entry_point = 0;
     VAddr stack_top = 0;
 
-    u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application
-    u32 current_priority = 0; ///< Current thread priority, can be temporarily changed
+    /// Nominal thread priority, as set by the emulated application.
+    /// The nominal priority is the thread priority without priority
+    /// inheritance taken into account.
+    u32 nominal_priority = 0;
+
+    /// Current thread priority. This may change over the course of the
+    /// thread's lifetime in order to facilitate priority inheritance.
+    u32 current_priority = 0;
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
     u64 last_running_ticks = 0;   ///< CPU tick when thread was last running
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 05c59af34..3def3e52c 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,13 +7,13 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/memory_hook.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 #include "core/memory_setup.h"
 
 namespace Kernel {
@@ -177,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
 
 ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
                                                    MemoryState state,
-                                                   Memory::MemoryHookPointer mmio_handler) {
+                                                   Common::MemoryHookPointer mmio_handler) {
     // This is the appropriately sized VMA that will turn into our allocation.
     CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
     VirtualMemoryArea& final_vma = vma_handle->second;
@@ -624,7 +624,7 @@ void VMManager::ClearPageTable() {
     std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
     page_table.special_regions.clear();
     std::fill(page_table.attributes.begin(), page_table.attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 }
 
 VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 88e0b3c02..b96980f8f 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
 #include <tuple>
 #include <vector>
 #include "common/common_types.h"
+#include "common/memory_hook.h"
+#include "common/page_table.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 
 namespace FileSys {
 enum class ProgramAddressSpaceType : u8;
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
     // Settings for type = MMIO
     /// Physical address of the register area this VMA maps to.
     PAddr paddr = 0;
-    Memory::MemoryHookPointer mmio_handler = nullptr;
+    Common::MemoryHookPointer mmio_handler = nullptr;
 
     /// Tests if this area can be merged to the right with `next`.
     bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
      * @param mmio_handler The handler that will implement read and write for this MMIO region.
      */
     ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
-                                 Memory::MemoryHookPointer mmio_handler);
+                                 Common::MemoryHookPointer mmio_handler);
 
     /// Unmaps a range of addresses, splitting VMAs as necessary.
     ResultCode UnmapRange(VAddr target, u64 size);
@@ -509,7 +510,7 @@ public:
 
     /// Each VMManager has its own page table, which is set as the main one when the owning process
     /// is scheduled.
-    Memory::PageTable page_table;
+    Common::PageTable page_table{Memory::PAGE_BITS};
 
 private:
     using VMAIter = VMAMap::iterator;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 4fde53033..365ac82b4 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "common/page_table.h"
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
@@ -18,13 +19,14 @@
 #include "core/hle/lock.h"
 #include "core/memory.h"
 #include "core/memory_setup.h"
+#include "video_core/gpu.h"
 #include "video_core/renderer_base.h"
 
 namespace Memory {
 
-static PageTable* current_page_table = nullptr;
+static Common::PageTable* current_page_table = nullptr;
 
-void SetCurrentPageTable(PageTable* page_table) {
+void SetCurrentPageTable(Common::PageTable* page_table) {
     current_page_table = page_table;
 
     auto& system = Core::System::GetInstance();
@@ -36,41 +38,19 @@ void SetCurrentPageTable(PageTable* page_table) {
     }
 }
 
-PageTable* GetCurrentPageTable() {
+Common::PageTable* GetCurrentPageTable() {
     return current_page_table;
 }
 
-PageTable::PageTable() = default;
-
-PageTable::PageTable(std::size_t address_space_width_in_bits) {
-    Resize(address_space_width_in_bits);
-}
-
-PageTable::~PageTable() = default;
-
-void PageTable::Resize(std::size_t address_space_width_in_bits) {
-    const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
-
-    pointers.resize(num_page_table_entries);
-    attributes.resize(num_page_table_entries);
-
-    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
-    // vector size is subsequently decreased (via resize), the vector might not automatically
-    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
-    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
-
-    pointers.shrink_to_fit();
-    attributes.shrink_to_fit();
-}
-
-static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
+static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
+                     Common::PageType type) {
     LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
               (base + size) * PAGE_SIZE);
 
     // During boot, current_page_table might not be set yet, in which case we need not flush
     if (current_page_table) {
-        RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
-                                     FlushMode::FlushAndInvalidate);
+        Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
+                                                                   size * PAGE_SIZE);
     }
 
     VAddr end = base + size;
@@ -91,41 +71,47 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
     }
 }
 
-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) {
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
 }
 
-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) {
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);
 
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
-void UnmapRegion(PageTable& page_table, VAddr base, u64 size) {
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);
 
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
     page_table.special_regions.erase(interval);
 }
 
-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook) {
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook) {
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.subtract(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
 /**
@@ -174,19 +160,19 @@ T Read(const VAddr vaddr) {
         return value;
     }
 
-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
     switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
         LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
         return 0;
-    case PageType::Memory:
+    case Common::PageType::Memory:
         ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
         break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush);
-
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
         T value;
-        std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
+        std::memcpy(&value, host_ptr, sizeof(T));
         return value;
     }
     default:
@@ -204,18 +190,19 @@ void Write(const VAddr vaddr, const T data) {
         return;
     }
 
-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
     switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
         LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
                   static_cast<u32>(data), vaddr);
         return;
-    case PageType::Memory:
+    case Common::PageType::Memory:
         ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
         break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
-        std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
+        std::memcpy(host_ptr, &data, sizeof(T));
         break;
     }
     default:
@@ -230,10 +217,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
     if (page_pointer)
         return true;
 
-    if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory)
+    if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
         return true;
 
-    if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special)
+    if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
         return false;
 
     return false;
@@ -253,7 +240,8 @@ u8* GetPointer(const VAddr vaddr) {
         return page_pointer + (vaddr & PAGE_MASK);
     }
 
-    if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
+    if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
+        Common::PageType::RasterizerCachedMemory) {
         return GetPointerFromVMA(vaddr);
     }
 
@@ -287,20 +275,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
 
     u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
     for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
-        PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
+        Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
 
         if (cached) {
             // Switch page type to cached if now cached
             switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                 // It is not necessary for a process to have this region mapped into its address
                 // space, for example, a system module need not have a VRAM mapping.
                 break;
-            case PageType::Memory:
-                page_type = PageType::RasterizerCachedMemory;
+            case Common::PageType::Memory:
+                page_type = Common::PageType::RasterizerCachedMemory;
                 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
                 break;
-            case PageType::RasterizerCachedMemory:
+            case Common::PageType::RasterizerCachedMemory:
                 // There can be more than one GPU region mapped per CPU region, so it's common that
                 // this area is already marked as cached.
                 break;
@@ -310,23 +298,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
         } else {
             // Switch page type to uncached if now uncached
             switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                 // It is not necessary for a process to have this region mapped into its address
                 // space, for example, a system module need not have a VRAM mapping.
                 break;
-            case PageType::Memory:
+            case Common::PageType::Memory:
                 // There can be more than one GPU region mapped per CPU region, so it's common that
                 // this area is already unmarked as cached.
                 break;
-            case PageType::RasterizerCachedMemory: {
+            case Common::PageType::RasterizerCachedMemory: {
                 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
                 if (pointer == nullptr) {
                     // It's possible that this function has been called while updating the pagetable
                     // after unmapping a VMA. In that case the underlying VMA will no longer exist,
                     // and we should just leave the pagetable entry blank.
-                    page_type = PageType::Unmapped;
+                    page_type = Common::PageType::Unmapped;
                 } else {
-                    page_type = PageType::Memory;
+                    page_type = Common::PageType::Memory;
                     current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
                 }
                 break;
@@ -338,47 +326,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
     }
 }
 
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
-    auto& system_instance = Core::System::GetInstance();
-
-    // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
-    // null here
-    if (!system_instance.IsPoweredOn()) {
-        return;
-    }
-
-    const VAddr end = start + size;
-
-    const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
-        if (start >= region_end || end <= region_start) {
-            // No overlap with region
-            return;
-        }
-
-        const VAddr overlap_start = std::max(start, region_start);
-        const VAddr overlap_end = std::min(end, region_end);
-        const VAddr overlap_size = overlap_end - overlap_start;
-
-        auto& gpu = system_instance.GPU();
-        switch (mode) {
-        case FlushMode::Flush:
-            gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
-            break;
-        case FlushMode::Invalidate:
-            gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
-            break;
-        case FlushMode::FlushAndInvalidate:
-            gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
-            break;
-        }
-    };
-
-    const auto& vm_manager = Core::CurrentProcess()->VMManager();
-
-    CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
-    CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
-}
-
 u8 Read8(const VAddr addr) {
     return Read<u8>(addr);
 }
@@ -409,24 +356,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, src_addr, size);
             std::memset(dest_buffer, 0, copy_amount);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             const u8* src_ptr = page_table.pointers[page_index] + page_offset;
             std::memcpy(dest_buffer, src_ptr, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(dest_buffer, host_ptr, copy_amount);
             break;
         }
         default:
@@ -473,23 +420,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, dest_addr, size);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             u8* dest_ptr = page_table.pointers[page_index] + page_offset;
             std::memcpy(dest_ptr, src_buffer, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(host_ptr, src_buffer, copy_amount);
             break;
         }
         default:
@@ -519,23 +466,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, dest_addr, size);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             u8* dest_ptr = page_table.pointers[page_index] + page_offset;
             std::memset(dest_ptr, 0, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memset(host_ptr, 0, copy_amount);
             break;
         }
         default:
@@ -561,23 +508,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, src_addr, size);
             ZeroBlock(process, dest_addr, copy_amount);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
             const u8* src_ptr = page_table.pointers[page_index] + page_offset;
             WriteBlock(process, dest_addr, src_ptr, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            WriteBlock(process, dest_addr, host_ptr, copy_amount);
             break;
         }
         default:
diff --git a/src/core/memory.h b/src/core/memory.h
index 1acf5ce8c..3f60d868c 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -10,7 +10,10 @@
 #include <vector>
 #include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"
-#include "core/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}
 
 namespace Kernel {
 class Process;
@@ -26,71 +29,6 @@ constexpr std::size_t PAGE_BITS = 12;
 constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
 constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
 
-enum class PageType : u8 {
-    /// Page is unmapped and should cause an access error.
-    Unmapped,
-    /// Page is mapped to regular memory. This is the only type you can get pointers to.
-    Memory,
-    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
-    /// invalidation
-    RasterizerCachedMemory,
-    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
-    Special,
-};
-
-struct SpecialRegion {
-    enum class Type {
-        DebugHook,
-        IODevice,
-    } type;
-
-    MemoryHookPointer handler;
-
-    bool operator<(const SpecialRegion& other) const {
-        return std::tie(type, handler) < std::tie(other.type, other.handler);
-    }
-
-    bool operator==(const SpecialRegion& other) const {
-        return std::tie(type, handler) == std::tie(other.type, other.handler);
-    }
-};
-
-/**
- * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
- * mimics the way a real CPU page table works.
- */
-struct PageTable {
-    explicit PageTable();
-    explicit PageTable(std::size_t address_space_width_in_bits);
-    ~PageTable();
-
-    /**
-     * Resizes the page table to be able to accomodate enough pages within
-     * a given address space.
-     *
-     * @param address_space_width_in_bits The address size width in bits.
-     */
-    void Resize(std::size_t address_space_width_in_bits);
-
-    /**
-     * Vector of memory pointers backing each page. An entry can only be non-null if the
-     * corresponding entry in the `attributes` vector is of type `Memory`.
-     */
-    std::vector<u8*> pointers;
-
-    /**
-     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
-     * of type `Special`.
-     */
-    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
-
-    /**
-     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
-     * the corresponding entry in `pointers` MUST be set to null.
-     */
-    std::vector<PageType> attributes;
-};
-
 /// Virtual user-space memory regions
 enum : VAddr {
     /// Read-only page containing kernel and system configuration values.
@@ -116,8 +54,8 @@ enum : VAddr {
 };
 
 /// Currently active page table
-void SetCurrentPageTable(PageTable* page_table);
-PageTable* GetCurrentPageTable();
+void SetCurrentPageTable(Common::PageTable* page_table);
+Common::PageTable* GetCurrentPageTable();
 
 /// Determines if the given VAddr is valid for the specified process.
 bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +99,4 @@ enum class FlushMode {
  */
 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
 
-/**
- * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
- * address region.
- */
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
-
 } // namespace Memory
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 9a1a4f4be..5225ee8e2 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
 #pragma once
 
 #include "common/common_types.h"
-#include "core/memory_hook.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}
 
 namespace Memory {
 
@@ -17,7 +21,7 @@ namespace Memory {
  * @param size The amount of bytes to map. Must be page-aligned.
  * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
  */
-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
 
 /**
  * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
  * @param size The amount of bytes to map. Must be page-aligned.
  * @param mmio_handler The handler that backs the mapping.
  */
-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler);
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler);
 
-void UnmapRegion(PageTable& page_table, VAddr base, u64 size);
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
 
-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook);
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook);
 
 } // namespace Memory
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 6fe56833d..3e1a735c3 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 
+#include "common/page_table.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
 #include "core/memory.h"
@@ -22,7 +23,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
     std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
     page_table->special_regions.clear();
     std::fill(page_table->attributes.begin(), page_table->attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 
     Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
     Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index 0b7539601..d145dbfcc 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/memory_hook.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/memory_hook.h"
 
-namespace Memory {
+namespace Common {
 struct PageTable;
 }
 
@@ -58,7 +58,7 @@ public:
 
 private:
     friend struct TestMemory;
-    struct TestMemory final : Memory::MemoryHook {
+    struct TestMemory final : Common::MemoryHook {
         explicit TestMemory(TestEnvironment* env_) : env(env_) {}
         TestEnvironment* env;
 
@@ -86,7 +86,7 @@ private:
     bool mutable_memory;
     std::shared_ptr<TestMemory> test_memory;
     std::vector<WriteRecord> write_records;
-    Memory::PageTable* page_table = nullptr;
+    Common::PageTable* page_table = nullptr;
     Kernel::KernelCore kernel;
 };
 
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index bff1a37ff..8b1bea1ae 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -55,12 +55,9 @@ bool DmaPusher::Step() {
     }
 
     // Push buffer non-empty, read a word
-    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-    ASSERT_MSG(address, "Invalid GPU address");
-
     command_headers.resize(command_list_header.size);
-
-    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
+    gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
+                                  command_list_header.size * sizeof(u32));
 
     for (const CommandHeader& command_header : command_headers) {
 
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index daefa43a6..0931b9626 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -41,18 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
     ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
     ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
 
-    const GPUVAddr address = regs.dest.Address();
-    const auto dest_address =
-        memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
-    ASSERT_MSG(dest_address, "Invalid GPU address");
-
     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
-    // We do this before actually writing the new data because the destination address might contain
-    // a dirty surface that will have to be written back to memory.
-    system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)),
-                                                    sizeof(u32));
+    // We do this before actually writing the new data because the destination address might
+    // contain a dirty surface that will have to be written back to memory.
+    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
+    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
+    memory_manager.Write32(address, data);
 
-    Memory::Write32(*dest_address, data);
     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 
     state.write_offset++;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 49979694e..c5d5be4ef 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
 }
 
 void Maxwell3D::ProcessQueryGet() {
-    GPUVAddr sequence_address = regs.query.QueryAddress();
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
     // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
     // VAddr before writing.
-    const auto address = memory_manager.GpuToCpuAddress(sequence_address);
-    ASSERT_MSG(address, "Invalid GPU address");
 
     // TODO(Subv): Support the other query units.
     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
             // Write the current query sequence to the sequence address.
             // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
             // query.
-            Memory::Write32(*address, sequence);
+            memory_manager.Write32(sequence_address, sequence);
         } else {
             // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
             // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
             query_result.value = result;
             // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
             query_result.timestamp = system.CoreTiming().GetTicks();
-            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
         }
         dirty_flags.OnMemoryWrite();
         break;
@@ -393,12 +391,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
     // Don't allow writing past the end of the buffer.
     ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
 
-    const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
-    ASSERT_MSG(address, "Invalid GPU address");
+    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
 
-    u8* ptr{Memory::GetPointer(*address)};
+    u8* ptr{memory_manager.GetPointer(address)};
     rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
-    std::memcpy(ptr, &value, sizeof(u32));
+    memory_manager.Write32(address, value);
 
     dirty_flags.OnMemoryWrite();
 
@@ -407,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
 }
 
 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
-    const GPUVAddr tic_base_address = regs.tic.TICAddress();
-
-    const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
-    ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
+    const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
 
     Texture::TICEntry tic_entry;
-    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+    memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
 
     ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -432,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
 }
 
 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
-    const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
-
-    const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
-    ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
+    const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
 
     Texture::TSCEntry tsc_entry;
-    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
     return tsc_entry;
 }
 
@@ -458,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
 
-        const auto address = memory_manager.GpuToCpuAddress(current_texture);
-        ASSERT_MSG(address, "Invalid GPU address");
-
-        const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
+        const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
 
         Texture::FullTextureInfo tex_info{};
         // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -496,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
 
     ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
 
-    const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
-    ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
-
-    const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+    const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
 
     Texture::FullTextureInfo tex_info{};
     tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 415a6319a..a0ded4c25 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -43,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
     const GPUVAddr source = regs.src_address.Address();
     const GPUVAddr dest = regs.dst_address.Address();
 
-    const auto source_cpu = memory_manager.GpuToCpuAddress(source);
-    const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
-    ASSERT_MSG(source_cpu, "Invalid source GPU address");
-    ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
-
     // TODO(Subv): Perform more research and implement all features of this engine.
     ASSERT(regs.exec.enable_swizzle == 0);
     ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -70,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
         // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
         // y_count).
         if (!regs.exec.enable_2d) {
-            Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
+            memory_manager.CopyBlock(dest, source, regs.x_count);
             return;
         }
 
@@ -79,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
         // rectangle. There is no need to manually flush/invalidate the regions because
         // CopyBlock does that for us.
         for (u32 line = 0; line < regs.y_count; ++line) {
-            const VAddr source_line = *source_cpu + line * regs.src_pitch;
-            const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
-            Memory::CopyBlock(dest_line, source_line, regs.x_count);
+            const GPUVAddr source_line = source + line * regs.src_pitch;
+            const GPUVAddr dest_line = dest + line * regs.dst_pitch;
+            memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
         }
         return;
     }
@@ -90,17 +85,18 @@ void MaxwellDMA::HandleCopy() {
 
     const std::size_t copy_size = regs.x_count * regs.y_count;
 
+    auto source_ptr{memory_manager.GetPointer(source)};
+    auto dst_ptr{memory_manager.GetPointer(dest)};
+
     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
         // copying.
-        Core::System::GetInstance().Renderer().Rasterizer().FlushRegion(
-            ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
+        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
 
         // We have to invalidate the destination region to evict any outdated surfaces from the
         // cache. We do this before actually writing the new data because the destination address
         // might contain a dirty surface that will have to be written back to memory.
-        Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion(
-            ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
+        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
     };
 
     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -113,8 +109,8 @@ void MaxwellDMA::HandleCopy() {
                            copy_size * src_bytes_per_pixel);
 
         Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
-                                  *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
+                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
+                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
                                   regs.src_params.pos_y);
     } else {
         ASSERT(regs.dst_params.size_z == 1);
@@ -127,7 +123,7 @@ void MaxwellDMA::HandleCopy() {
 
         // If the input is linear and the output is tiled, swizzle the input and copy it over.
         Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
+                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
     }
 }
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 08abf8ac9..66c690494 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -274,7 +274,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
     const auto op =
         static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
     if (op == GpuSemaphoreOperation::WriteLong) {
-        auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
         struct Block {
             u32 sequence;
             u32 zeros = 0;
@@ -286,11 +285,9 @@ void GPU::ProcessSemaphoreTriggerMethod() {
         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
         // CoreTiming
         block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
-        Memory::WriteBlock(*address, &block, sizeof(block));
+        memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
     } else {
-        const auto address =
-            memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-        const u32 word = Memory::Read32(*address);
+        const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())};
         if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
             (op == GpuSemaphoreOperation::AcquireGequal &&
              static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -317,13 +314,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
 }
 
 void GPU::ProcessSemaphoreRelease() {
-    const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-    Memory::Write32(*address, regs.semaphore_release);
+    memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
 }
 
 void GPU::ProcessSemaphoreAcquire() {
-    const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-    const u32 word = Memory::Read32(*address);
+    const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress());
     const auto value = regs.semaphore_acquire;
     if (word != value) {
         regs.acquire_active = true;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 54abe5298..8e8f36f28 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,6 +5,7 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/memory.h"
 #include "video_core/memory_manager.h"
 
 namespace Tegra {
@@ -162,15 +163,51 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
     return base_addr + (gpu_addr & PAGE_MASK);
 }
 
-std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
-    std::vector<GPUVAddr> results;
-    for (const auto& region : mapped_regions) {
-        if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
-            const u64 offset{cpu_addr - region.cpu_addr};
-            results.push_back(region.gpu_addr + offset);
-        }
-    }
-    return results;
+u8 MemoryManager::Read8(GPUVAddr addr) {
+    return Memory::Read8(*GpuToCpuAddress(addr));
+}
+
+u16 MemoryManager::Read16(GPUVAddr addr) {
+    return Memory::Read16(*GpuToCpuAddress(addr));
+}
+
+u32 MemoryManager::Read32(GPUVAddr addr) {
+    return Memory::Read32(*GpuToCpuAddress(addr));
+}
+
+u64 MemoryManager::Read64(GPUVAddr addr) {
+    return Memory::Read64(*GpuToCpuAddress(addr));
+}
+
+void MemoryManager::Write8(GPUVAddr addr, u8 data) {
+    Memory::Write8(*GpuToCpuAddress(addr), data);
+}
+
+void MemoryManager::Write16(GPUVAddr addr, u16 data) {
+    Memory::Write16(*GpuToCpuAddress(addr), data);
+}
+
+void MemoryManager::Write32(GPUVAddr addr, u32 data) {
+    Memory::Write32(*GpuToCpuAddress(addr), data);
+}
+
+void MemoryManager::Write64(GPUVAddr addr, u64 data) {
+    Memory::Write64(*GpuToCpuAddress(addr), data);
+}
+
+u8* MemoryManager::GetPointer(GPUVAddr addr) {
+    return Memory::GetPointer(*GpuToCpuAddress(addr));
+}
+
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
+    std::memcpy(dest_buffer, GetPointer(src_addr), size);
+}
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+    std::memcpy(GetPointer(dest_addr), src_buffer, size);
+}
+
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+    std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
 }
 
 VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fb03497ca..425e2f31c 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -27,12 +27,27 @@ public:
     GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
     GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
     std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
-    std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
 
     static constexpr u64 PAGE_BITS = 16;
     static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
     static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
 
+    u8 Read8(GPUVAddr addr);
+    u16 Read16(GPUVAddr addr);
+    u32 Read32(GPUVAddr addr);
+    u64 Read64(GPUVAddr addr);
+
+    void Write8(GPUVAddr addr, u8 data);
+    void Write16(GPUVAddr addr, u16 data);
+    void Write32(GPUVAddr addr, u32 data);
+    void Write64(GPUVAddr addr, u64 data);
+
+    u8* GetPointer(GPUVAddr vaddr);
+
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
+
 private:
     enum class PageStatus : u64 {
         Unmapped = 0xFFFFFFFFFFFFFFFFULL,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 9692ce143..3e91cbc83 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
 #include <cstring>
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "core/memory.h"
 #include "video_core/morton.h"
 #include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
 using Surface::GetBytesPerPixel;
 using Surface::PixelFormat;
 
-using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, VAddr);
+using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
 using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
 
 template <bool morton_to_linear, PixelFormat format>
 static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
-                       u32 tile_width_spacing, u8* buffer, VAddr addr) {
+                       u32 tile_width_spacing, u8* buffer, u8* addr) {
     constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
 
     // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,10 +33,10 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
                                          stride, height, depth, block_height, block_depth,
                                          tile_width_spacing);
     } else {
-        Tegra::Texture::CopySwizzledData(
-            (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
-            depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false,
-            block_height, block_depth, tile_width_spacing);
+        Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
+                                         (height + tile_size_y - 1) / tile_size_y, depth,
+                                         bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
+                                         block_height, block_depth, tile_width_spacing);
     }
 }
 
@@ -282,7 +281,7 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
 
 void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
-                   u8* buffer, VAddr addr) {
+                   u8* buffer, u8* addr) {
     GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
                                      tile_width_spacing, buffer, addr);
 }
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index b565204b5..ee5b45555 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,7 +13,7 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
 
 void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
-                   u8* buffer, VAddr addr);
+                   u8* buffer, u8* addr);
 
 void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
                          u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index a4eea61a6..5048ed6ce 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -24,14 +24,12 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
 GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
                                       std::size_t alignment, bool cache) {
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
 
     // Cache management is a big overhead, so only cache entries with a given size.
     // TODO: Figure out which size is the best for given games.
     cache &= size >= 2048;
 
-    const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
+    const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
     if (cache) {
         auto entry = TryGet(host_ptr);
         if (entry) {
@@ -54,8 +52,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
     buffer_offset += size;
 
     if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
-                                                         alignment, host_ptr);
+        auto entry = std::make_shared<CachedBufferEntry>(
+            *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
         Register(entry);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index a2c509c24..c8dbcacbd 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -7,7 +7,6 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -39,7 +38,7 @@ void CachedGlobalRegion::Reload(u32 size_) {
     glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
 }
 
-GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
     const auto search{reserve.find(addr)};
     if (search == reserve.end()) {
         return {};
@@ -47,11 +46,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
     return search->second;
 }
 
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) {
-    GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size,
+                                                              u8* host_ptr) {
+    GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
     if (!region) {
         // No reserved surface available, create a new one and reserve it
-        region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr);
+        auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+        const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
+        region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
         ReserveGlobalRegion(region);
     }
     region->Reload(size);
@@ -59,7 +61,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
 }
 
 void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
-    reserve.insert_or_assign(region->GetCpuAddr(), std::move(region));
+    reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
 }
 
 GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -70,23 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
     Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
 
     auto& gpu{Core::System::GetInstance().GPU()};
-    const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
-    const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
-        cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
-    ASSERT(cbuf_addr);
-
-    const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
-    const auto size = Memory::Read32(*cbuf_addr + 8);
-    const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
-    ASSERT(actual_addr);
+    auto& memory_manager{gpu.MemoryManager()};
+    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+    const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
+                    global_region.GetCbufOffset()};
+    const auto actual_addr{memory_manager.Read64(addr)};
+    const auto size{memory_manager.Read32(addr + 8)};
 
     // Look up global region in the cache based on address
-    const auto& host_ptr{Memory::GetPointer(*actual_addr)};
+    const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
     GlobalRegion region{TryGet(host_ptr)};
 
     if (!region) {
         // No global region found - create a new one
-        region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr);
+        region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
         Register(region);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index e497a0619..a840491f7 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -65,11 +65,11 @@ public:
                                  Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
 
 private:
-    GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr);
+    GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
+    GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr);
     void ReserveGlobalRegion(GlobalRegion region);
 
-    std::unordered_map<VAddr, GlobalRegion> reserve;
+    std::unordered_map<CacheAddr, GlobalRegion> reserve;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 77d5cedd2..75d816795 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -46,10 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
 
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
-    const u8* source{Memory::GetPointer(*cpu_addr)};
+    const u8* source{memory_manager.GetPointer(gpu_addr)};
 
     for (u32 primitive = 0; primitive < count / 4; ++primitive) {
         for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -64,4 +61,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
     return index_offset;
 }
 
-} // namespace OpenGL
-\ No newline at end of file
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bb6de5477..198c54872 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -749,11 +749,17 @@ void RasterizerOpenGL::FlushAll() {}
 
 void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    if (!addr || !size) {
+        return;
+    }
     res_cache.FlushRegion(addr, size);
 }
 
 void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    if (!addr || !size) {
+        return;
+    }
     res_cache.InvalidateRegion(addr, size);
     shader_cache.InvalidateRegion(addr, size);
     global_cache.InvalidateRegion(addr, size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 451de00e8..57329cd61 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -57,11 +57,9 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
 
 void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
 
-    addr = cpu_addr ? *cpu_addr : 0;
     gpu_addr = gpu_addr_;
-    host_ptr = Memory::GetPointer(addr);
+    host_ptr = memory_manager.GetPointer(gpu_addr_);
     size_in_bytes = SizeInBytesRaw();
 
     if (IsPixelFormatASTC(pixel_format)) {
@@ -447,7 +445,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
             MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                           params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
                           params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
-                          gl_buffer.data() + offset_gl, params.addr + offset);
+                          gl_buffer.data() + offset_gl, params.host_ptr + offset);
             offset += layer_size;
             offset_gl += gl_size;
         }
@@ -456,7 +454,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
         MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                       params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
                       params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
-                      gl_buffer.data(), params.addr + offset);
+                      gl_buffer.data(), params.host_ptr + offset);
     }
 }
 
@@ -514,9 +512,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
                               "reinterpretation but the texture is tiled.");
         }
         const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
-
+        auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
         glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
-                        Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
+                        memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
     }
 
     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -604,7 +602,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
 
     ApplyTextureDefaults(texture.handle, params.max_mip_level);
 
-    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
+    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
 
     // Clamp size to mapped GPU memory region
     // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -617,6 +615,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
         LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
         cached_size_in_bytes = max_size;
     }
+
+    cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr);
 }
 
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -925,7 +925,7 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
 }
 
 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
-    if (params.addr == 0 || params.height * params.width == 0) {
+    if (params.gpu_addr == 0 || params.height * params.width == 0) {
         return {};
     }
 
@@ -979,14 +979,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
                                                    const Surface& dst_surface) {
     const auto& init_params{src_surface->GetSurfaceParams()};
     const auto& dst_params{dst_surface->GetSurfaceParams()};
-    VAddr address = init_params.addr;
-    const std::size_t layer_size = dst_params.LayerMemorySize();
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    Tegra::GPUVAddr address{init_params.gpu_addr};
+    const std::size_t layer_size{dst_params.LayerMemorySize()};
     for (u32 layer = 0; layer < dst_params.depth; layer++) {
         for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
-            const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
-            const Surface& copy = TryGet(Memory::GetPointer(sub_address));
-            if (!copy)
+            const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+            const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
+            if (!copy) {
                 continue;
+            }
             const auto& src_params{copy->GetSurfaceParams()};
             const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
             const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1242,9 +1244,10 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
     return {};
 }
 
-static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
-    const std::size_t size = params.LayerMemorySize();
-    VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params,
+                                           u32 mipmap) {
+    const std::size_t size{params.LayerMemorySize()};
+    Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
     for (u32 i = 0; i < params.depth; i++) {
         if (start == addr) {
             return {i};
@@ -1266,7 +1269,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
             src_params.height == dst_params.MipHeight(*level) &&
             src_params.block_height >= dst_params.MipBlockHeight(*level)) {
             const std::optional<u32> slot =
-                TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level);
+                TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
             if (slot.has_value()) {
                 glCopyImageSubData(render_surface->Texture().handle,
                                    SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b3afad139..9366f47f2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -296,7 +296,6 @@ struct SurfaceParams {
     bool is_array;
     bool srgb_conversion;
     // Parameters used for caching
-    VAddr addr;
     u8* host_ptr;
     Tegra::GPUVAddr gpu_addr;
     std::size_t size_in_bytes;
@@ -349,7 +348,7 @@ public:
     explicit CachedSurface(const SurfaceParams& params);
 
     VAddr GetCpuAddr() const override {
-        return params.addr;
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
@@ -433,6 +432,7 @@ private:
     std::size_t memory_size;
     bool reinterpreted = false;
     bool must_reload = false;
+    VAddr cpu_addr{};
 };
 
 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 60a04e146..1ed740877 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -32,13 +32,10 @@ struct UnspecializedShader {
 namespace {
 
 /// Gets the address for the specified shader stage program
-VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
-    const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
-                                                            shader_config.offset);
-    ASSERT_MSG(address, "Invalid GPU address");
-    return *address;
+Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+    const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
 }
 
 /// Gets the shader program code from memory for the specified address
@@ -214,11 +211,11 @@ std::set<GLenum> GetSupportedFormats() {
 
 } // namespace
 
-CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                            const PrecompiledPrograms& precompiled_programs,
                            ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
-    : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier},
+    : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
       program_type{program_type}, disk_cache{disk_cache},
       precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
 
@@ -244,11 +241,11 @@ CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
     disk_cache.SaveRaw(raw);
 }
 
-CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                            const PrecompiledPrograms& precompiled_programs,
                            GLShader::ProgramResult result, u8* host_ptr)
-    : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type},
+    : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type},
       disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
                                                                               host_ptr} {
 
@@ -273,7 +270,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
                 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
             }
 
-            LabelGLObject(GL_PROGRAM, program->handle, guest_addr);
+            LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
         }
 
         handle = program->handle;
@@ -325,7 +322,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
         disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
     }
 
-    LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name);
+    LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
 
     return target_program->handle;
 };
@@ -488,31 +485,31 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
         return last_shaders[static_cast<u32>(program)];
     }
 
-    const VAddr program_addr{GetShaderAddress(program)};
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
 
     // Look up shader in the cache based on address
-    const auto& host_ptr{Memory::GetPointer(program_addr)};
+    const auto& host_ptr{memory_manager.GetPointer(program_addr)};
     Shader shader{TryGet(host_ptr)};
 
     if (!shader) {
         // No shader found - create a new one
-        const auto& host_ptr{Memory::GetPointer(program_addr)};
         ProgramCode program_code{GetShaderCode(host_ptr)};
         ProgramCode program_code_b;
         if (program == Maxwell::ShaderProgram::VertexA) {
             program_code_b = GetShaderCode(
-                Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
+                memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
         }
         const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
-
+        const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
         const auto found = precompiled_shaders.find(unique_identifier);
         if (found != precompiled_shaders.end()) {
             shader =
-                std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
+                std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
                                                precompiled_programs, found->second, host_ptr);
         } else {
             shader = std::make_shared<CachedShader>(
-                program_addr, unique_identifier, program, disk_cache, precompiled_programs,
+                cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
                 std::move(program_code), std::move(program_code_b), host_ptr);
         }
         Register(shader);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 81fe716b4..fd1c85115 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
 
 class CachedShader final : public RasterizerCacheObject {
 public:
-    explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
+    explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                           const PrecompiledPrograms& precompiled_programs,
                           ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
 
-    explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
+    explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                           const PrecompiledPrograms& precompiled_programs,
                           GLShader::ProgramResult result, u8* host_ptr);
 
     VAddr GetCpuAddr() const override {
-        return guest_addr;
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
@@ -92,7 +92,7 @@ private:
     ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
 
     u8* host_ptr{};
-    VAddr guest_addr{};
+    VAddr cpu_addr{};
     u64 unique_identifier{};
     Maxwell::ShaderProgram program_type{};
     ShaderDiskCacheOpenGL& disk_cache;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b97576309..5e3d862c6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -164,8 +164,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
         // Reset the screen info's display texture to its own permanent texture
         screen_info.display_texture = screen_info.texture.resource.handle;
 
-        Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
-                                             Memory::FlushMode::Flush);
+        rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
 
         constexpr u32 linear_bpp = 4;
         VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index cad7340f5..995d0e068 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
 #include <cstring>
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
 #include "video_core/gpu.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/textures/texture.h"
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
     }
 }
 
-void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
                       u32 block_depth, u32 width_spacing) {
     CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
                      (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
-                     bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
-                     block_height, block_depth, width_spacing);
+                     bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
+                     width_spacing);
 }
 
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
-                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
-                                 u32 block_height, u32 block_depth, u32 width_spacing) {
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+                                 u32 width, u32 height, u32 depth, u32 block_height,
+                                 u32 block_depth, u32 width_spacing) {
     std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
     UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
                      width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
 }
 
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                    u32 block_height) {
+                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
     const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
                                   gob_size_x};
     for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
             const u32 gob_address =
                 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
             const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
-            const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
-            const VAddr dest_addr = swizzled_data + swizzled_offset;
+            u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
+            u8* dest_addr = swizzled_data + swizzled_offset;
 
-            Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel);
+            std::memcpy(dest_addr, source_line, bytes_per_pixel);
         }
     }
 }
 
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                      u32 block_height, u32 offset_x, u32 offset_y) {
+                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+                      u32 offset_x, u32 offset_y) {
     for (u32 line = 0; line < subrect_height; ++line) {
         const u32 y2 = line + offset_y;
         const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
             const u32 x2 = (x + offset_x) * bytes_per_pixel;
             const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
             const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
-            const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
-            const VAddr source_addr = swizzled_data + swizzled_offset;
+            u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
+            u8* source_addr = swizzled_data + swizzled_offset;
 
-            Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel);
+            std::memcpy(dest_line, source_addr, bytes_per_pixel);
         }
     }
 }
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 65df86890..e078fa274 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -17,14 +17,14 @@ inline std::size_t GetGOBSize() {
 }
 
 /// Unswizzles a swizzled texture without changing its format.
-void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                       u32 block_height = TICEntry::DefaultBlockHeight,
                       u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
 
 /// Unswizzles a swizzled texture without changing its format.
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
-                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+                                 u32 width, u32 height, u32 depth,
                                  u32 block_height = TICEntry::DefaultBlockHeight,
                                  u32 block_depth = TICEntry::DefaultBlockHeight,
                                  u32 width_spacing = 0);
@@ -44,12 +44,11 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
 
 /// Copies an untiled subrectangle into a tiled surface.
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                    u32 block_height);
+                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
 
 /// Copies a tiled subrectangle into a linear surface.
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                      u32 block_height, u32 offset_x, u32 offset_y);
+                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+                      u32 offset_x, u32 offset_y);
 
 } // namespace Tegra::Texture
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 71683da8e..29f01dfb2 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -383,13 +383,12 @@ void GraphicsSurfaceWidget::OnUpdate() {
     // TODO: Implement a good way to visualize alpha components!
 
     QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
-    std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
 
     // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
     // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
     auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
-        *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width,
-        surface_height, 1U);
+        gpu.MemoryManager().GetPointer(surface_address), 1, 1,
+        Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U);
 
     auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
                                                       surface_width, surface_height);