174 files changed, 6335 insertions, 5212 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 61adbef28..478246b6f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -67,8 +67,11 @@ else()
         -Werror=implicit-fallthrough
         -Werror=missing-declarations
         -Werror=reorder
+        -Werror=switch
         -Werror=uninitialized
+        -Werror=unused-function
         -Werror=unused-result
+        -Werror=unused-variable
         -Wextra
         -Wmissing-declarations
         -Wno-attributes
@@ -127,7 +130,6 @@ add_subdirectory(tests)
 
 if (ENABLE_SDL2)
     add_subdirectory(yuzu_cmd)
-    add_subdirectory(yuzu_tester)
 endif()
 
 if (ENABLE_QT)
diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h
index 05541becb..66ee4e8a0 100644
--- a/src/audio_core/sink_context.h
+++ b/src/audio_core/sink_context.h
@@ -40,17 +40,17 @@ public:
         SinkSampleFormat sample_format;
         std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> input;
         bool in_use;
-        INSERT_UNION_PADDING_BYTES(5);
+        INSERT_PADDING_BYTES_NOINIT(5);
     };
     static_assert(sizeof(CircularBufferIn) == 0x28,
                   "SinkInfo::CircularBufferIn is in invalid size");
 
     struct DeviceIn {
         std::array<u8, 255> device_name;
-        INSERT_UNION_PADDING_BYTES(1);
+        INSERT_PADDING_BYTES_NOINIT(1);
         s32_le input_count;
         std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> input;
-        INSERT_UNION_PADDING_BYTES(1);
+        INSERT_PADDING_BYTES_NOINIT(1);
         bool down_matrix_enabled;
         DownmixCoefficients down_matrix_coef;
     };
diff --git a/src/audio_core/voice_context.h b/src/audio_core/voice_context.h
index 863248761..70359cadb 100644
--- a/src/audio_core/voice_context.h
+++ b/src/audio_core/voice_context.h
@@ -86,28 +86,28 @@ struct BehaviorFlags {
 static_assert(sizeof(BehaviorFlags) == 0x4, "BehaviorFlags is an invalid size");
 
 struct ADPCMContext {
-    u16 header{};
-    s16 yn1{};
-    s16 yn2{};
+    u16 header;
+    s16 yn1;
+    s16 yn2;
 };
 static_assert(sizeof(ADPCMContext) == 0x6, "ADPCMContext is an invalid size");
 
 struct VoiceState {
-    s64 played_sample_count{};
-    s32 offset{};
-    s32 wave_buffer_index{};
-    std::array<bool, AudioCommon::MAX_WAVE_BUFFERS> is_wave_buffer_valid{};
-    s32 wave_buffer_consumed{};
-    std::array<s32, AudioCommon::MAX_SAMPLE_HISTORY> sample_history{};
-    s32 fraction{};
-    VAddr context_address{};
-    Codec::ADPCM_Coeff coeff{};
-    ADPCMContext context{};
-    std::array<s64, 2> biquad_filter_state{};
-    std::array<s32, AudioCommon::MAX_MIX_BUFFERS> previous_samples{};
-    u32 external_context_size{};
-    bool is_external_context_used{};
-    bool voice_dropped{};
+    s64 played_sample_count;
+    s32 offset;
+    s32 wave_buffer_index;
+    std::array<bool, AudioCommon::MAX_WAVE_BUFFERS> is_wave_buffer_valid;
+    s32 wave_buffer_consumed;
+    std::array<s32, AudioCommon::MAX_SAMPLE_HISTORY> sample_history;
+    s32 fraction;
+    VAddr context_address;
+    Codec::ADPCM_Coeff coeff;
+    ADPCMContext context;
+    std::array<s64, 2> biquad_filter_state;
+    std::array<s32, AudioCommon::MAX_MIX_BUFFERS> previous_samples;
+    u32 external_context_size;
+    bool is_external_context_used;
+    bool voice_dropped;
 };
 
 class VoiceChannelResource {
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2c2bd2ee8..f77575a00 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -98,7 +98,6 @@ add_library(common STATIC
     algorithm.h
     alignment.h
     assert.h
-    atomic_ops.cpp
     atomic_ops.h
     detached_tasks.cpp
     detached_tasks.h
@@ -108,7 +107,6 @@ add_library(common STATIC
     bit_util.h
     cityhash.cpp
     cityhash.h
-    color.h
     common_funcs.h
     common_paths.h
     common_types.h
@@ -123,6 +121,7 @@ add_library(common STATIC
     hash.h
     hex_util.cpp
     hex_util.h
+    intrusive_red_black_tree.h
     logging/backend.cpp
     logging/backend.h
     logging/filter.cpp
@@ -143,6 +142,7 @@ add_library(common STATIC
     page_table.h
     param_package.cpp
     param_package.h
+    parent_of_member.h
     quaternion.h
     ring_buffer.h
     scm_rev.cpp
@@ -165,8 +165,7 @@ add_library(common STATIC
     threadsafe_queue.h
     time_zone.cpp
     time_zone.h
-    timer.cpp
-    timer.h
+    tree.h
     uint128.cpp
     uint128.h
     uuid.cpp
diff --git a/src/common/alignment.h b/src/common/alignment.h
index 5040043de..fb81f10d8 100644
--- a/src/common/alignment.h
+++ b/src/common/alignment.h
@@ -9,50 +9,45 @@
 namespace Common {
 
 template <typename T>
-[[nodiscard]] constexpr T AlignUp(T value, std::size_t size) {
-    static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
+requires std::is_unsigned_v<T>[[nodiscard]] constexpr T AlignUp(T value, size_t size) {
     auto mod{static_cast<T>(value % size)};
     value -= mod;
     return static_cast<T>(mod == T{0} ? value : value + size);
 }
 
 template <typename T>
-[[nodiscard]] constexpr T AlignDown(T value, std::size_t size) {
-    static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
-    return static_cast<T>(value - value % size);
+requires std::is_unsigned_v<T>[[nodiscard]] constexpr T AlignUpLog2(T value, size_t align_log2) {
+    return static_cast<T>((value + ((1ULL << align_log2) - 1)) >> align_log2 << align_log2);
 }
 
 template <typename T>
-[[nodiscard]] constexpr T AlignBits(T value, std::size_t align) {
-    static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
-    return static_cast<T>((value + ((1ULL << align) - 1)) >> align << align);
+requires std::is_unsigned_v<T>[[nodiscard]] constexpr T AlignDown(T value, size_t size) {
+    return static_cast<T>(value - value % size);
 }
 
 template <typename T>
-[[nodiscard]] constexpr bool Is4KBAligned(T value) {
-    static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
+requires std::is_unsigned_v<T>[[nodiscard]] constexpr bool Is4KBAligned(T value) {
     return (value & 0xFFF) == 0;
 }
 
 template <typename T>
-[[nodiscard]] constexpr bool IsWordAligned(T value) {
-    static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
+requires std::is_unsigned_v<T>[[nodiscard]] constexpr bool IsWordAligned(T value) {
     return (value & 0b11) == 0;
 }
 
 template <typename T>
-[[nodiscard]] constexpr bool IsAligned(T value, std::size_t alignment) {
-    using U = typename std::make_unsigned<T>::type;
+requires std::is_integral_v<T>[[nodiscard]] constexpr bool IsAligned(T value, size_t alignment) {
+    using U = typename std::make_unsigned_t<T>;
     const U mask = static_cast<U>(alignment - 1);
     return (value & mask) == 0;
 }
 
-template <typename T, std::size_t Align = 16>
+template <typename T, size_t Align = 16>
 class AlignmentAllocator {
 public:
     using value_type = T;
-    using size_type = std::size_t;
-    using difference_type = std::ptrdiff_t;
+    using size_type = size_t;
+    using difference_type = ptrdiff_t;
 
     using propagate_on_container_copy_assignment = std::true_type;
     using propagate_on_container_move_assignment = std::true_type;
diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp
deleted file mode 100644
index 1612d0e67..000000000
--- a/src/common/atomic_ops.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-
-#include "common/atomic_ops.h"
-
-#if _MSC_VER
-#include <intrin.h>
-#endif
-
-namespace Common {
-
-#if _MSC_VER
-
-bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
-    const u8 result =
-        _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
-    return result == expected;
-}
-
-bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
-    const u16 result =
-        _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
-    return result == expected;
-}
-
-bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
-    const u32 result =
-        _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
-    return result == expected;
-}
-
-bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
-    const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
-                                                     value, expected);
-    return result == expected;
-}
-
-bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
-    return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
-                                          value[0],
-                                          reinterpret_cast<__int64*>(expected.data())) != 0;
-}
-
-#else
-
-bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
-    return __sync_bool_compare_and_swap(pointer, expected, value);
-}
-
-bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
-    return __sync_bool_compare_and_swap(pointer, expected, value);
-}
-
-bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
-    return __sync_bool_compare_and_swap(pointer, expected, value);
-}
-
-bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
-    return __sync_bool_compare_and_swap(pointer, expected, value);
-}
-
-bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
-    unsigned __int128 value_a;
-    unsigned __int128 expected_a;
-    std::memcpy(&value_a, value.data(), sizeof(u128));
-    std::memcpy(&expected_a, expected.data(), sizeof(u128));
-    return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
-}
-
-#endif
-
-} // namespace Common
diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h
index b46888589..2b1f515e8 100644
--- a/src/common/atomic_ops.h
+++ b/src/common/atomic_ops.h
@@ -4,14 +4,75 @@
 
 #pragma once
 
+#include <cstring>
+#include <memory>
+
 #include "common/common_types.h"
 
+#if _MSC_VER
+#include <intrin.h>
+#endif
+
 namespace Common {
 
-[[nodiscard]] bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected);
-[[nodiscard]] bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected);
-[[nodiscard]] bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected);
-[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected);
-[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected);
+#if _MSC_VER
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
+    const u8 result =
+        _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
+    return result == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
+    const u16 result =
+        _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
+    return result == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
+    const u32 result =
+        _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
+    return result == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
+    const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
+                                                     value, expected);
+    return result == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
+    return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
+                                          value[0],
+                                          reinterpret_cast<__int64*>(expected.data())) != 0;
+}
+
+#else
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
+    return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
+    return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
+    return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
+    return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
+    unsigned __int128 value_a;
+    unsigned __int128 expected_a;
+    std::memcpy(&value_a, value.data(), sizeof(u128));
+    std::memcpy(&expected_a, expected.data(), sizeof(u128));
+    return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
+}
+
+#endif
 
 } // namespace Common
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 29f59a9a3..64520ca4e 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -4,13 +4,10 @@
 
 #pragma once
 
+#include <bit>
 #include <climits>
 #include <cstddef>
 
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
 #include "common/common_types.h"
 
 namespace Common {
@@ -21,124 +18,30 @@ template <typename T>
     return sizeof(T) * CHAR_BIT;
 }
 
-#ifdef _MSC_VER
-[[nodiscard]] inline u32 CountLeadingZeroes32(u32 value) {
-    unsigned long leading_zero = 0;
-
-    if (_BitScanReverse(&leading_zero, value) != 0) {
-        return 31 - leading_zero;
-    }
-
-    return 32;
-}
-
-[[nodiscard]] inline u32 CountLeadingZeroes64(u64 value) {
-    unsigned long leading_zero = 0;
-
-    if (_BitScanReverse64(&leading_zero, value) != 0) {
-        return 63 - leading_zero;
-    }
-
-    return 64;
-}
-#else
-[[nodiscard]] inline u32 CountLeadingZeroes32(u32 value) {
-    if (value == 0) {
-        return 32;
-    }
-
-    return static_cast<u32>(__builtin_clz(value));
-}
-
-[[nodiscard]] inline u32 CountLeadingZeroes64(u64 value) {
-    if (value == 0) {
-        return 64;
-    }
-
-    return static_cast<u32>(__builtin_clzll(value));
-}
-#endif
-
-#ifdef _MSC_VER
-[[nodiscard]] inline u32 CountTrailingZeroes32(u32 value) {
-    unsigned long trailing_zero = 0;
-
-    if (_BitScanForward(&trailing_zero, value) != 0) {
-        return trailing_zero;
-    }
-
-    return 32;
-}
-
-[[nodiscard]] inline u32 CountTrailingZeroes64(u64 value) {
-    unsigned long trailing_zero = 0;
-
-    if (_BitScanForward64(&trailing_zero, value) != 0) {
-        return trailing_zero;
-    }
-
-    return 64;
-}
-#else
-[[nodiscard]] inline u32 CountTrailingZeroes32(u32 value) {
-    if (value == 0) {
-        return 32;
-    }
-
-    return static_cast<u32>(__builtin_ctz(value));
-}
-
-[[nodiscard]] inline u32 CountTrailingZeroes64(u64 value) {
-    if (value == 0) {
-        return 64;
-    }
-
-    return static_cast<u32>(__builtin_ctzll(value));
+[[nodiscard]] constexpr u32 MostSignificantBit32(const u32 value) {
+    return 31U - static_cast<u32>(std::countl_zero(value));
 }
-#endif
-
-#ifdef _MSC_VER
 
-[[nodiscard]] inline u32 MostSignificantBit32(const u32 value) {
-    unsigned long result;
-    _BitScanReverse(&result, value);
-    return static_cast<u32>(result);
+[[nodiscard]] constexpr u32 MostSignificantBit64(const u64 value) {
+    return 63U - static_cast<u32>(std::countl_zero(value));
 }
 
-[[nodiscard]] inline u32 MostSignificantBit64(const u64 value) {
-    unsigned long result;
-    _BitScanReverse64(&result, value);
-    return static_cast<u32>(result);
-}
-
-#else
-
-[[nodiscard]] inline u32 MostSignificantBit32(const u32 value) {
-    return 31U - static_cast<u32>(__builtin_clz(value));
-}
-
-[[nodiscard]] inline u32 MostSignificantBit64(const u64 value) {
-    return 63U - static_cast<u32>(__builtin_clzll(value));
-}
-
-#endif
-
-[[nodiscard]] inline u32 Log2Floor32(const u32 value) {
+[[nodiscard]] constexpr u32 Log2Floor32(const u32 value) {
     return MostSignificantBit32(value);
 }
 
-[[nodiscard]] inline u32 Log2Ceil32(const u32 value) {
-    const u32 log2_f = Log2Floor32(value);
-    return log2_f + ((value ^ (1U << log2_f)) != 0U);
+[[nodiscard]] constexpr u32 Log2Floor64(const u64 value) {
+    return MostSignificantBit64(value);
 }
 
-[[nodiscard]] inline u32 Log2Floor64(const u64 value) {
-    return MostSignificantBit64(value);
+[[nodiscard]] constexpr u32 Log2Ceil32(const u32 value) {
+    const u32 log2_f = Log2Floor32(value);
+    return log2_f + static_cast<u32>((value ^ (1U << log2_f)) != 0U);
 }
 
-[[nodiscard]] inline u32 Log2Ceil64(const u64 value) {
-    const u64 log2_f = static_cast<u64>(Log2Floor64(value));
-    return static_cast<u32>(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL));
+[[nodiscard]] constexpr u32 Log2Ceil64(const u64 value) {
+    const u64 log2_f = Log2Floor64(value);
+    return static_cast<u32>(log2_f + static_cast<u64>((value ^ (1ULL << log2_f)) != 0ULL));
 }
 
 } // namespace Common
diff --git a/src/common/color.h b/src/common/color.h
deleted file mode 100644
index bbcac858e..000000000
--- a/src/common/color.h
+++ /dev/null
@@ -1,271 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstring>
-
-#include "common/common_types.h"
-#include "common/swap.h"
-#include "common/vector_math.h"
-
-namespace Common::Color {
-
-/// Convert a 1-bit color component to 8 bit
-[[nodiscard]] constexpr u8 Convert1To8(u8 value) {
-    return value * 255;
-}
-
-/// Convert a 4-bit color component to 8 bit
-[[nodiscard]] constexpr u8 Convert4To8(u8 value) {
-    return (value << 4) | value;
-}
-
-/// Convert a 5-bit color component to 8 bit
-[[nodiscard]] constexpr u8 Convert5To8(u8 value) {
-    return (value << 3) | (value >> 2);
-}
-
-/// Convert a 6-bit color component to 8 bit
-[[nodiscard]] constexpr u8 Convert6To8(u8 value) {
-    return (value << 2) | (value >> 4);
-}
-
-/// Convert a 8-bit color component to 1 bit
-[[nodiscard]] constexpr u8 Convert8To1(u8 value) {
-    return value >> 7;
-}
-
-/// Convert a 8-bit color component to 4 bit
-[[nodiscard]] constexpr u8 Convert8To4(u8 value) {
-    return value >> 4;
-}
-
-/// Convert a 8-bit color component to 5 bit
-[[nodiscard]] constexpr u8 Convert8To5(u8 value) {
-    return value >> 3;
-}
-
-/// Convert a 8-bit color component to 6 bit
-[[nodiscard]] constexpr u8 Convert8To6(u8 value) {
-    return value >> 2;
-}
-
-/**
- * Decode a color stored in RGBA8 format
- * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
- */
-[[nodiscard]] inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
-    return {bytes[3], bytes[2], bytes[1], bytes[0]};
-}
-
-/**
- * Decode a color stored in RGB8 format
- * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
- */
-[[nodiscard]] inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
-    return {bytes[2], bytes[1], bytes[0], 255};
-}
-
-/**
- * Decode a color stored in RG8 (aka HILO8) format
- * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
- */
-[[nodiscard]] inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
-    return {bytes[1], bytes[0], 0, 255};
-}
-
-/**
- * Decode a color stored in RGB565 format
- * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
- */
-[[nodiscard]] inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
-    u16_le pixel;
-    std::memcpy(&pixel, bytes, sizeof(pixel));
-    return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
-            Convert5To8(pixel & 0x1F), 255};
-}
-
-/**
- * Decode a color stored in RGB5A1 format
- * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
- */
-[[nodiscard]] inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
-    u16_le pixel;
-    std::memcpy(&pixel, bytes, sizeof(pixel));
-    return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
-            Convert5To8((pixel >> 1) & 0x1F), Convert1To8(pixel & 0x1)};
-}
-
-/**
- * Decode a color stored in RGBA4 format
- * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
- */
-[[nodiscard]] inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
-    u16_le pixel;
-    std::memcpy(&pixel, bytes, sizeof(pixel));
-    return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
-            Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF)};
-}
-
-/**
- * Decode a depth value stored in D16 format
- * @param bytes Pointer to encoded source value
- * @return Depth value as an u32
- */
-[[nodiscard]] inline u32 DecodeD16(const u8* bytes) {
-    u16_le data;
-    std::memcpy(&data, bytes, sizeof(data));
-    return data;
-}
-
-/**
- * Decode a depth value stored in D24 format
- * @param bytes Pointer to encoded source value
- * @return Depth value as an u32
- */
-[[nodiscard]] inline u32 DecodeD24(const u8* bytes) {
-    return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
-}
-
-/**
- * Decode a depth value and a stencil value stored in D24S8 format
- * @param bytes Pointer to encoded source values
- * @return Resulting values stored as a Common::Vec2
- */
-[[nodiscard]] inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
-    return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
-}
-
-/**
- * Encode a color as RGBA8 format
- * @param color Source color to encode
- * @param bytes Destination pointer to store encoded color
- */
-inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
-    bytes[3] = color.r();
-    bytes[2] = color.g();
-    bytes[1] = color.b();
-    bytes[0] = color.a();
-}
-
-/**
- * Encode a color as RGB8 format
- * @param color Source color to encode
- * @param bytes Destination pointer to store encoded color
- */
-inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
-    bytes[2] = color.r();
-    bytes[1] = color.g();
-    bytes[0] = color.b();
-}
-
-/**
- * Encode a color as RG8 (aka HILO8) format
- * @param color Source color to encode
- * @param bytes Destination pointer to store encoded color
- */
-inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
-    bytes[1] = color.r();
-    bytes[0] = color.g();
-}
-/**
- * Encode a color as RGB565 format
- * @param color Source color to encode
- * @param bytes Destination pointer to store encoded color
- */
-inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
-    const u16_le data =
-        (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
-
-    std::memcpy(bytes, &data, sizeof(data));
-}
-
-/**
- * Encode a color as RGB5A1 format
- * @param color Source color to encode
- * @param bytes Destination pointer to store encoded color
- */
-inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
-    const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
-                        (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
-
-    std::memcpy(bytes, &data, sizeof(data));
-}
-
-/**
- * Encode a color as RGBA4 format
- * @param color Source color to encode
- * @param bytes Destination pointer to store encoded color
- */
-inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
-    const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
-                     (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
-
-    std::memcpy(bytes, &data, sizeof(data));
-}
-
-/**
- * Encode a 16 bit depth value as D16 format
- * @param value 16 bit source depth value to encode
- * @param bytes Pointer where to store the encoded value
- */
-inline void EncodeD16(u32 value, u8* bytes) {
-    const u16_le data = static_cast<u16>(value);
-    std::memcpy(bytes, &data, sizeof(data));
-}
-
-/**
- * Encode a 24 bit depth value as D24 format
- * @param value 24 bit source depth value to encode
- * @param bytes Pointer where to store the encoded value
- */
-inline void EncodeD24(u32 value, u8* bytes) {
-    bytes[0] = value & 0xFF;
-    bytes[1] = (value >> 8) & 0xFF;
-    bytes[2] = (value >> 16) & 0xFF;
-}
-
-/**
- * Encode a 24 bit depth and 8 bit stencil values as D24S8 format
- * @param depth 24 bit source depth value to encode
- * @param stencil 8 bit source stencil value to encode
- * @param bytes Pointer where to store the encoded value
- */
-inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
-    bytes[0] = depth & 0xFF;
-    bytes[1] = (depth >> 8) & 0xFF;
-    bytes[2] = (depth >> 16) & 0xFF;
-    bytes[3] = stencil;
-}
-
-/**
- * Encode a 24 bit depth value as D24X8 format (32 bits per pixel with 8 bits unused)
- * @param depth 24 bit source depth value to encode
- * @param bytes Pointer where to store the encoded value
- * @note unused bits will not be modified
- */
-inline void EncodeD24X8(u32 depth, u8* bytes) {
-    bytes[0] = depth & 0xFF;
-    bytes[1] = (depth >> 8) & 0xFF;
-    bytes[2] = (depth >> 16) & 0xFF;
-}
-
-/**
- * Encode an 8 bit stencil value as X24S8 format (32 bits per pixel with 24 bits unused)
- * @param stencil 8 bit source stencil value to encode
- * @param bytes Pointer where to store the encoded value
- * @note unused bits will not be modified
- */
-inline void EncodeX24S8(u8 stencil, u8* bytes) {
-    bytes[3] = stencil;
-}
-
-} // namespace Common::Color
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 367b6bf6e..75f3027fb 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -24,10 +24,10 @@
 #define INSERT_PADDING_WORDS(num_words)                                                            \
     std::array<u32, num_words> CONCAT2(pad, __LINE__) {}
 
-/// These are similar to the INSERT_PADDING_* macros, but are needed for padding unions. This is
-/// because unions can only be initialized by one member.
-#define INSERT_UNION_PADDING_BYTES(num_bytes) std::array<u8, num_bytes> CONCAT2(pad, __LINE__)
-#define INSERT_UNION_PADDING_WORDS(num_words) std::array<u32, num_words> CONCAT2(pad, __LINE__)
+/// These are similar to the INSERT_PADDING_* macros but do not zero-initialize the contents.
+/// This keeps the structure trivial to construct.
+#define INSERT_PADDING_BYTES_NOINIT(num_bytes) std::array<u8, num_bytes> CONCAT2(pad, __LINE__)
+#define INSERT_PADDING_WORDS_NOINIT(num_words) std::array<u32, num_words> CONCAT2(pad, __LINE__)
 
 #ifndef _MSC_VER
 
@@ -93,6 +93,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
         return static_cast<T>(key) == 0;                                                           \
     }
 
+/// Evaluates a boolean expression, and returns a result unless that expression is true.
+#define R_UNLESS(expr, res)                                                                        \
+    {                                                                                              \
+        if (!(expr)) {                                                                             \
+            return res;                                                                            \
+        }                                                                                          \
+    }
+
 namespace Common {
 
 [[nodiscard]] constexpr u32 MakeMagic(char a, char b, char c, char d) {
diff --git a/src/common/intrusive_red_black_tree.h b/src/common/intrusive_red_black_tree.h
new file mode 100644
index 000000000..c0bbcd457
--- /dev/null
+++ b/src/common/intrusive_red_black_tree.h
@@ -0,0 +1,602 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/parent_of_member.h"
+#include "common/tree.h"
+
+namespace Common {
+
+namespace impl {
+
+class IntrusiveRedBlackTreeImpl;
+
+}
+
+struct IntrusiveRedBlackTreeNode {
+public:
+    using EntryType = RBEntry<IntrusiveRedBlackTreeNode>;
+
+    constexpr IntrusiveRedBlackTreeNode() = default;
+
+    void SetEntry(const EntryType& new_entry) {
+        entry = new_entry;
+    }
+
+    [[nodiscard]] EntryType& GetEntry() {
+        return entry;
+    }
+
+    [[nodiscard]] const EntryType& GetEntry() const {
+        return entry;
+    }
+
+private:
+    EntryType entry{};
+
+    friend class impl::IntrusiveRedBlackTreeImpl;
+
+    template <class, class, class>
+    friend class IntrusiveRedBlackTree;
+};
+
+template <class T, class Traits, class Comparator>
+class IntrusiveRedBlackTree;
+
+namespace impl {
+
+class IntrusiveRedBlackTreeImpl {
+private:
+    template <class, class, class>
+    friend class ::Common::IntrusiveRedBlackTree;
+
+    using RootType = RBHead<IntrusiveRedBlackTreeNode>;
+    RootType root;
+
+public:
+    template <bool Const>
+    class Iterator;
+
+    using value_type = IntrusiveRedBlackTreeNode;
+    using size_type = size_t;
+    using difference_type = ptrdiff_t;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using iterator = Iterator<false>;
+    using const_iterator = Iterator<true>;
+
+    template <bool Const>
+    class Iterator {
+    public:
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type = typename IntrusiveRedBlackTreeImpl::value_type;
+        using difference_type = typename IntrusiveRedBlackTreeImpl::difference_type;
+        using pointer = std::conditional_t<Const, IntrusiveRedBlackTreeImpl::const_pointer,
+                                           IntrusiveRedBlackTreeImpl::pointer>;
+        using reference = std::conditional_t<Const, IntrusiveRedBlackTreeImpl::const_reference,
+                                             IntrusiveRedBlackTreeImpl::reference>;
+
+    private:
+        pointer node;
+
+    public:
+        explicit Iterator(pointer n) : node(n) {}
+
+        bool operator==(const Iterator& rhs) const {
+            return this->node == rhs.node;
+        }
+
+        bool operator!=(const Iterator& rhs) const {
+            return !(*this == rhs);
+        }
+
+        pointer operator->() const {
+            return this->node;
+        }
+
+        reference operator*() const {
+            return *this->node;
+        }
+
+        Iterator& operator++() {
+            this->node = GetNext(this->node);
+            return *this;
+        }
+
+        Iterator& operator--() {
+            this->node = GetPrev(this->node);
+            return *this;
+        }
+
+        Iterator operator++(int) {
+            const Iterator it{*this};
+            ++(*this);
+            return it;
+        }
+
+        Iterator operator--(int) {
+            const Iterator it{*this};
+            --(*this);
+            return it;
+        }
+
+        operator Iterator<true>() const {
+            return Iterator<true>(this->node);
+        }
+    };
+
+private:
+    // Define accessors using RB_* functions.
+    bool EmptyImpl() const {
+        return root.IsEmpty();
+    }
+
+    IntrusiveRedBlackTreeNode* GetMinImpl() const {
+        return RB_MIN(const_cast<RootType*>(&root));
+    }
+
+    IntrusiveRedBlackTreeNode* GetMaxImpl() const {
+        return RB_MAX(const_cast<RootType*>(&root));
+    }
+
+    IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) {
+        return RB_REMOVE(&root, node);
+    }
+
+public:
+    static IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) {
+        return RB_NEXT(node);
+    }
+
+    static IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) {
+        return RB_PREV(node);
+    }
+
+    static const IntrusiveRedBlackTreeNode* GetNext(const IntrusiveRedBlackTreeNode* node) {
+        return static_cast<const IntrusiveRedBlackTreeNode*>(
+            GetNext(const_cast<IntrusiveRedBlackTreeNode*>(node)));
+    }
+
+    static const IntrusiveRedBlackTreeNode* GetPrev(const IntrusiveRedBlackTreeNode* node) {
+        return static_cast<const IntrusiveRedBlackTreeNode*>(
+            GetPrev(const_cast<IntrusiveRedBlackTreeNode*>(node)));
+    }
+
+public:
+    constexpr IntrusiveRedBlackTreeImpl() {}
+
+    // Iterator accessors.
+    iterator begin() {
+        return iterator(this->GetMinImpl());
+    }
+
+    const_iterator begin() const {
+        return const_iterator(this->GetMinImpl());
+    }
+
+    iterator end() {
+        return iterator(static_cast<IntrusiveRedBlackTreeNode*>(nullptr));
+    }
+
+    const_iterator end() const {
+        return const_iterator(static_cast<const IntrusiveRedBlackTreeNode*>(nullptr));
+    }
+
+    const_iterator cbegin() const {
+        return this->begin();
+    }
+
+    const_iterator cend() const {
+        return this->end();
+    }
+
+    iterator iterator_to(reference ref) {
+        return iterator(&ref);
+    }
+
+    const_iterator iterator_to(const_reference ref) const {
+        return const_iterator(&ref);
+    }
+
+    // Content management.
+    bool empty() const {
+        return this->EmptyImpl();
+    }
+
+    reference back() {
+        return *this->GetMaxImpl();
+    }
+
+    const_reference back() const {
+        return *this->GetMaxImpl();
+    }
+
+    reference front() {
+        return *this->GetMinImpl();
+    }
+
+    const_reference front() const {
+        return *this->GetMinImpl();
+    }
+
+    iterator erase(iterator it) {
+        auto cur = std::addressof(*it);
+        auto next = GetNext(cur);
+        this->RemoveImpl(cur);
+        return iterator(next);
+    }
+};
+
+} // namespace impl
+
+template <typename T>
+concept HasLightCompareType = requires {
+    { std::is_same<typename T::LightCompareType, void>::value }
+    ->std::convertible_to<bool>;
+};
+
+namespace impl {
+
+template <typename T, typename Default>
+consteval auto* GetLightCompareType() {
+    if constexpr (HasLightCompareType<T>) {
+        return static_cast<typename T::LightCompareType*>(nullptr);
+    } else {
+        return static_cast<Default*>(nullptr);
+    }
+}
+
+} // namespace impl
+
+template <typename T, typename Default>
+using LightCompareType = std::remove_pointer_t<decltype(impl::GetLightCompareType<T, Default>())>;
+
+template <class T, class Traits, class Comparator>
+class IntrusiveRedBlackTree {
+
+public:
+    using ImplType = impl::IntrusiveRedBlackTreeImpl;
+
+private:
+    ImplType impl{};
+
+public:
+    template <bool Const>
+    class Iterator;
+
+    using value_type = T;
+    using size_type = size_t;
+    using difference_type = ptrdiff_t;
+    using pointer = T*;
+    using const_pointer = const T*;
+    using reference = T&;
+    using const_reference = const T&;
+    using iterator = Iterator<false>;
+    using const_iterator = Iterator<true>;
+
+    using light_value_type = LightCompareType<Comparator, value_type>;
+    using const_light_pointer = const light_value_type*;
+    using const_light_reference = const light_value_type&;
+
+    template <bool Const>
+    class Iterator {
+    public:
+        friend class IntrusiveRedBlackTree<T, Traits, Comparator>;
+
+        using ImplIterator =
+            std::conditional_t<Const, ImplType::const_iterator, ImplType::iterator>;
+
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type = typename IntrusiveRedBlackTree::value_type;
+        using difference_type = typename IntrusiveRedBlackTree::difference_type;
+        using pointer = std::conditional_t<Const, IntrusiveRedBlackTree::const_pointer,
+                                           IntrusiveRedBlackTree::pointer>;
+        using reference = std::conditional_t<Const, IntrusiveRedBlackTree::const_reference,
+                                             IntrusiveRedBlackTree::reference>;
+
+    private:
+        ImplIterator iterator;
+
+    private:
+        explicit Iterator(ImplIterator it) : iterator(it) {}
+
+        explicit Iterator(typename std::conditional<Const, ImplType::const_iterator,
+                                                    ImplType::iterator>::type::pointer ptr)
+            : iterator(ptr) {}
+
+        ImplIterator GetImplIterator() const {
+            return this->iterator;
+        }
+
+    public:
+        bool operator==(const Iterator& rhs) const {
+            return this->iterator == rhs.iterator;
+        }
+
+        bool operator!=(const Iterator& rhs) const {
+            return !(*this == rhs);
+        }
+
+        pointer operator->() const {
+            return Traits::GetParent(std::addressof(*this->iterator));
+        }
+
+        reference operator*() const {
+            return *Traits::GetParent(std::addressof(*this->iterator));
+        }
+
+        Iterator& operator++() {
+            ++this->iterator;
+            return *this;
+        }
+
+        Iterator& operator--() {
+            --this->iterator;
+            return *this;
+        }
+
+        Iterator operator++(int) {
+            const Iterator it{*this};
+            ++this->iterator;
+            return it;
+        }
+
+        Iterator operator--(int) {
+            const Iterator it{*this};
+            --this->iterator;
+            return it;
+        }
+
+        operator Iterator<true>() const {
+            return Iterator<true>(this->iterator);
+        }
+    };
+
+private:
+    static int CompareImpl(const IntrusiveRedBlackTreeNode* lhs,
+                           const IntrusiveRedBlackTreeNode* rhs) {
+        return Comparator::Compare(*Traits::GetParent(lhs), *Traits::GetParent(rhs));
+    }
+
+    static int LightCompareImpl(const void* elm, const IntrusiveRedBlackTreeNode* rhs) {
+        return Comparator::Compare(*static_cast<const_light_pointer>(elm), *Traits::GetParent(rhs));
+    }
+
+    // Define accessors using RB_* functions.
+    IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) {
+        return RB_INSERT(&impl.root, node, CompareImpl);
+    }
+
+    IntrusiveRedBlackTreeNode* FindImpl(const IntrusiveRedBlackTreeNode* node) const {
+        return RB_FIND(const_cast<ImplType::RootType*>(&impl.root),
+                       const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
+    }
+
+    IntrusiveRedBlackTreeNode* NFindImpl(const IntrusiveRedBlackTreeNode* node) const {
+        return RB_NFIND(const_cast<ImplType::RootType*>(&impl.root),
+                        const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
+    }
+
+    IntrusiveRedBlackTreeNode* FindLightImpl(const_light_pointer lelm) const {
+        return RB_FIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root),
+                             static_cast<const void*>(lelm), LightCompareImpl);
+    }
+
+    IntrusiveRedBlackTreeNode* NFindLightImpl(const_light_pointer lelm) const {
+        return RB_NFIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root),
+                              static_cast<const void*>(lelm), LightCompareImpl);
+    }
+
+public:
+    constexpr IntrusiveRedBlackTree() = default;
+
+    // Iterator accessors.
+    iterator begin() {
+        return iterator(this->impl.begin());
+    }
+
+    const_iterator begin() const {
+        return const_iterator(this->impl.begin());
+    }
+
+    iterator end() {
+        return iterator(this->impl.end());
+    }
+
+    const_iterator end() const {
+        return const_iterator(this->impl.end());
+    }
+
+    const_iterator cbegin() const {
+        return this->begin();
+    }
+
+    const_iterator cend() const {
+        return this->end();
+    }
+
+    iterator iterator_to(reference ref) {
+        return iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
+    }
+
+    const_iterator iterator_to(const_reference ref) const {
+        return const_iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
+    }
+
+    // Content management.
+    bool empty() const {
+        return this->impl.empty();
+    }
+
+    reference back() {
+        return *Traits::GetParent(std::addressof(this->impl.back()));
+    }
+
+    const_reference back() const {
+        return *Traits::GetParent(std::addressof(this->impl.back()));
+    }
+
+    reference front() {
+        return *Traits::GetParent(std::addressof(this->impl.front()));
+    }
+
+    const_reference front() const {
+        return *Traits::GetParent(std::addressof(this->impl.front()));
+    }
+
+    iterator erase(iterator it) {
+        return iterator(this->impl.erase(it.GetImplIterator()));
+    }
+
+    iterator insert(reference ref) {
+        ImplType::pointer node = Traits::GetNode(std::addressof(ref));
+        this->InsertImpl(node);
+        return iterator(node);
+    }
+
+    iterator find(const_reference ref) const {
+        return iterator(this->FindImpl(Traits::GetNode(std::addressof(ref))));
+    }
+
+    iterator nfind(const_reference ref) const {
+        return iterator(this->NFindImpl(Traits::GetNode(std::addressof(ref))));
+    }
+
+    iterator find_light(const_light_reference ref) const {
+        return iterator(this->FindLightImpl(std::addressof(ref)));
+    }
+
+    iterator nfind_light(const_light_reference ref) const {
+        return iterator(this->NFindLightImpl(std::addressof(ref)));
+    }
+};
+
+template <auto T, class Derived = impl::GetParentType<T>>
+class IntrusiveRedBlackTreeMemberTraits;
+
+template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
+class IntrusiveRedBlackTreeMemberTraits<Member, Derived> {
+public:
+    template <class Comparator>
+    using TreeType = IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraits, Comparator>;
+    using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
+
+private:
+    template <class, class, class>
+    friend class IntrusiveRedBlackTree;
+
+    friend class impl::IntrusiveRedBlackTreeImpl;
+
+    static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
+        return std::addressof(parent->*Member);
+    }
+
+    static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
+        return std::addressof(parent->*Member);
+    }
+
+    static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+        return GetParentPointer<Member, Derived>(node);
+    }
+
+    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
+        return GetParentPointer<Member, Derived>(node);
+    }
+
+private:
+    static constexpr TypedStorage<Derived> DerivedStorage = {};
+    static_assert(GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage));
+};
+
+template <auto T, class Derived = impl::GetParentType<T>>
+class IntrusiveRedBlackTreeMemberTraitsDeferredAssert;
+
+template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
+class IntrusiveRedBlackTreeMemberTraitsDeferredAssert<Member, Derived> {
+public:
+    template <class Comparator>
+    using TreeType =
+        IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraitsDeferredAssert, Comparator>;
+    using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
+
+    static constexpr bool IsValid() {
+        TypedStorage<Derived> DerivedStorage = {};
+        return GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage);
+    }
+
+private:
+    template <class, class, class>
+    friend class IntrusiveRedBlackTree;
+
+    friend class impl::IntrusiveRedBlackTreeImpl;
+
+    static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
+        return std::addressof(parent->*Member);
+    }
+
+    static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
+        return std::addressof(parent->*Member);
+    }
+
+    static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+        return GetParentPointer<Member, Derived>(node);
+    }
+
+    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
+        return GetParentPointer<Member, Derived>(node);
+    }
+};
+
+template <class Derived>
+class IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode {
+public:
+    constexpr Derived* GetPrev() {
+        return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this));
+    }
+    constexpr const Derived* GetPrev() const {
+        return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this));
+    }
+
+    constexpr Derived* GetNext() {
+        return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this));
+    }
+    constexpr const Derived* GetNext() const {
+        return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this));
+    }
+};
+
+template <class Derived>
+class IntrusiveRedBlackTreeBaseTraits {
+public:
+    template <class Comparator>
+    using TreeType = IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeBaseTraits, Comparator>;
+    using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
+
+private:
+    template <class, class, class>
+    friend class IntrusiveRedBlackTree;
+
+    friend class impl::IntrusiveRedBlackTreeImpl;
+
+    static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
+        return static_cast<IntrusiveRedBlackTreeNode*>(parent);
+    }
+
+    static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
+        return static_cast<const IntrusiveRedBlackTreeNode*>(parent);
+    }
+
+    static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+        return static_cast<Derived*>(node);
+    }
+
+    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
+        return static_cast<const Derived*>(node);
+    }
+};
+
+} // namespace Common
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 631f64d05..2d4d2e9e7 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -145,10 +145,18 @@ void ColorConsoleBackend::Write(const Entry& entry) {
     PrintColoredMessage(entry);
 }
 
-// _SH_DENYWR allows read only access to the file for other programs.
-// It is #defined to 0 on other platforms
-FileBackend::FileBackend(const std::string& filename)
-    : file(filename, "w", _SH_DENYWR), bytes_written(0) {}
+FileBackend::FileBackend(const std::string& filename) : bytes_written(0) {
+    if (Common::FS::Exists(filename + ".old.txt")) {
+        Common::FS::Delete(filename + ".old.txt");
+    }
+    if (Common::FS::Exists(filename)) {
+        Common::FS::Rename(filename, filename + ".old.txt");
+    }
+
+    // _SH_DENYWR allows read only access to the file for other programs.
+    // It is #defined to 0 on other platforms
+    file = Common::FS::IOFile(filename, "w", _SH_DENYWR);
+}
 
 void FileBackend::Write(const Entry& entry) {
     // prevent logs from going over the maximum size (in case its spamming and the user doesn't
diff --git a/src/common/parent_of_member.h b/src/common/parent_of_member.h
new file mode 100644
index 000000000..d9a14529d
--- /dev/null
+++ b/src/common/parent_of_member.h
@@ -0,0 +1,191 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+namespace Common {
+namespace detail {
+template <typename T, size_t Size, size_t Align>
+struct TypedStorageImpl {
+    std::aligned_storage_t<Size, Align> storage_;
+};
+} // namespace detail
+
+template <typename T>
+using TypedStorage = detail::TypedStorageImpl<T, sizeof(T), alignof(T)>;
+
+template <typename T>
+static constexpr T* GetPointer(TypedStorage<T>& ts) {
+    return static_cast<T*>(static_cast<void*>(std::addressof(ts.storage_)));
+}
+
+template <typename T>
+static constexpr const T* GetPointer(const TypedStorage<T>& ts) {
+    return static_cast<const T*>(static_cast<const void*>(std::addressof(ts.storage_)));
+}
+
+namespace impl {
+
+template <size_t MaxDepth>
+struct OffsetOfUnionHolder {
+    template <typename ParentType, typename MemberType, size_t Offset>
+    union UnionImpl {
+        using PaddingMember = char;
+        static constexpr size_t GetOffset() {
+            return Offset;
+        }
+
+#pragma pack(push, 1)
+        struct {
+            PaddingMember padding[Offset];
+            MemberType members[(sizeof(ParentType) / sizeof(MemberType)) + 1];
+        } data;
+#pragma pack(pop)
+        UnionImpl<ParentType, MemberType, Offset + 1> next_union;
+    };
+
+    template <typename ParentType, typename MemberType>
+    union UnionImpl<ParentType, MemberType, 0> {
+        static constexpr size_t GetOffset() {
+            return 0;
+        }
+
+        struct {
+            MemberType members[(sizeof(ParentType) / sizeof(MemberType)) + 1];
+        } data;
+        UnionImpl<ParentType, MemberType, 1> next_union;
+    };
+
+    template <typename ParentType, typename MemberType>
+    union UnionImpl<ParentType, MemberType, MaxDepth> {};
+};
+
+template <typename ParentType, typename MemberType>
+struct OffsetOfCalculator {
+    using UnionHolder =
+        typename OffsetOfUnionHolder<sizeof(MemberType)>::template UnionImpl<ParentType, MemberType,
+                                                                             0>;
+    union Union {
+        char c{};
+        UnionHolder first_union;
+        TypedStorage<ParentType> parent;
+
+        constexpr Union() : c() {}
+    };
+    static constexpr Union U = {};
+
+    static constexpr const MemberType* GetNextAddress(const MemberType* start,
+                                                      const MemberType* target) {
+        while (start < target) {
+            start++;
+        }
+        return start;
+    }
+
+    static constexpr std::ptrdiff_t GetDifference(const MemberType* start,
+                                                  const MemberType* target) {
+        return (target - start) * sizeof(MemberType);
+    }
+
+    template <typename CurUnion>
+    static constexpr std::ptrdiff_t OffsetOfImpl(MemberType ParentType::*member,
+                                                 CurUnion& cur_union) {
+        constexpr size_t Offset = CurUnion::GetOffset();
+        const auto target = std::addressof(GetPointer(U.parent)->*member);
+        const auto start = std::addressof(cur_union.data.members[0]);
+        const auto next = GetNextAddress(start, target);
+
+        if (next != target) {
+            if constexpr (Offset < sizeof(MemberType) - 1) {
+                return OffsetOfImpl(member, cur_union.next_union);
+            } else {
+                UNREACHABLE();
+            }
+        }
+
+        return (next - start) * sizeof(MemberType) + Offset;
+    }
+
+    static constexpr std::ptrdiff_t OffsetOf(MemberType ParentType::*member) {
+        return OffsetOfImpl(member, U.first_union);
+    }
+};
+
+template <typename T>
+struct GetMemberPointerTraits;
+
+template <typename P, typename M>
+struct GetMemberPointerTraits<M P::*> {
+    using Parent = P;
+    using Member = M;
+};
+
+template <auto MemberPtr>
+using GetParentType = typename GetMemberPointerTraits<decltype(MemberPtr)>::Parent;
+
+template <auto MemberPtr>
+using GetMemberType = typename GetMemberPointerTraits<decltype(MemberPtr)>::Member;
+
+template <auto MemberPtr, typename RealParentType = GetParentType<MemberPtr>>
+static inline std::ptrdiff_t OffsetOf = [] {
+    using DeducedParentType = GetParentType<MemberPtr>;
+    using MemberType = GetMemberType<MemberPtr>;
+    static_assert(std::is_base_of<DeducedParentType, RealParentType>::value ||
+                  std::is_same<RealParentType, DeducedParentType>::value);
+
+    return OffsetOfCalculator<RealParentType, MemberType>::OffsetOf(MemberPtr);
+}();
+
+} // namespace impl
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType& GetParentReference(impl::GetMemberType<MemberPtr>* member) {
+    std::ptrdiff_t Offset = impl::OffsetOf<MemberPtr, RealParentType>;
+    return *static_cast<RealParentType*>(
+        static_cast<void*>(static_cast<uint8_t*>(static_cast<void*>(member)) - Offset));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType const& GetParentReference(impl::GetMemberType<MemberPtr> const* member) {
+    std::ptrdiff_t Offset = impl::OffsetOf<MemberPtr, RealParentType>;
+    return *static_cast<const RealParentType*>(static_cast<const void*>(
+        static_cast<const uint8_t*>(static_cast<const void*>(member)) - Offset));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType* GetParentPointer(impl::GetMemberType<MemberPtr>* member) {
+    return std::addressof(GetParentReference<MemberPtr, RealParentType>(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType const* GetParentPointer(impl::GetMemberType<MemberPtr> const* member) {
+    return std::addressof(GetParentReference<MemberPtr, RealParentType>(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType& GetParentReference(impl::GetMemberType<MemberPtr>& member) {
+    return GetParentReference<MemberPtr, RealParentType>(std::addressof(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType const& GetParentReference(impl::GetMemberType<MemberPtr> const& member) {
+    return GetParentReference<MemberPtr, RealParentType>(std::addressof(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType* GetParentPointer(impl::GetMemberType<MemberPtr>& member) {
+    return std::addressof(GetParentReference<MemberPtr, RealParentType>(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType const* GetParentPointer(impl::GetMemberType<MemberPtr> const& member) {
+    return std::addressof(GetParentReference<MemberPtr, RealParentType>(member));
+}
+
+} // namespace Common
diff --git a/src/common/timer.cpp b/src/common/timer.cpp
deleted file mode 100644
index d17dc2a50..000000000
--- a/src/common/timer.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <ctime>
-#include <fmt/format.h>
-#include "common/common_types.h"
-#include "common/string_util.h"
-#include "common/timer.h"
-
-namespace Common {
-
-std::chrono::milliseconds Timer::GetTimeMs() {
-    return std::chrono::duration_cast<std::chrono::milliseconds>(
-        std::chrono::system_clock::now().time_since_epoch());
-}
-
-// --------------------------------------------
-// Initiate, Start, Stop, and Update the time
-// --------------------------------------------
-
-// Set initial values for the class
-Timer::Timer() : m_LastTime(0), m_StartTime(0), m_Running(false) {
-    Update();
-}
-
-// Write the starting time
-void Timer::Start() {
-    m_StartTime = GetTimeMs();
-    m_Running = true;
-}
-
-// Stop the timer
-void Timer::Stop() {
-    // Write the final time
-    m_LastTime = GetTimeMs();
-    m_Running = false;
-}
-
-// Update the last time variable
-void Timer::Update() {
-    m_LastTime = GetTimeMs();
-    // TODO(ector) - QPF
-}
-
-// -------------------------------------
-// Get time difference and elapsed time
-// -------------------------------------
-
-// Get the number of milliseconds since the last Update()
-std::chrono::milliseconds Timer::GetTimeDifference() {
-    return GetTimeMs() - m_LastTime;
-}
-
-// Add the time difference since the last Update() to the starting time.
-// This is used to compensate for a paused game.
-void Timer::AddTimeDifference() {
-    m_StartTime += GetTimeDifference();
-}
-
-// Get the time elapsed since the Start()
-std::chrono::milliseconds Timer::GetTimeElapsed() {
-    // If we have not started yet, return 1 (because then I don't
-    // have to change the FPS calculation in CoreRerecording.cpp .
-    if (m_StartTime.count() == 0)
-        return std::chrono::milliseconds(1);
-
-    // Return the final timer time if the timer is stopped
-    if (!m_Running)
-        return (m_LastTime - m_StartTime);
-
-    return (GetTimeMs() - m_StartTime);
-}
-
-// Get the formatted time elapsed since the Start()
-std::string Timer::GetTimeElapsedFormatted() const {
-    // If we have not started yet, return zero
-    if (m_StartTime.count() == 0)
-        return "00:00:00:000";
-
-    // The number of milliseconds since the start.
-    // Use a different value if the timer is stopped.
-    std::chrono::milliseconds Milliseconds;
-    if (m_Running)
-        Milliseconds = GetTimeMs() - m_StartTime;
-    else
-        Milliseconds = m_LastTime - m_StartTime;
-    // Seconds
-    std::chrono::seconds Seconds = std::chrono::duration_cast<std::chrono::seconds>(Milliseconds);
-    // Minutes
-    std::chrono::minutes Minutes = std::chrono::duration_cast<std::chrono::minutes>(Milliseconds);
-    // Hours
-    std::chrono::hours Hours = std::chrono::duration_cast<std::chrono::hours>(Milliseconds);
-
-    std::string TmpStr = fmt::format("{:02}:{:02}:{:02}:{:03}", Hours.count(), Minutes.count() % 60,
-                                     Seconds.count() % 60, Milliseconds.count() % 1000);
-    return TmpStr;
-}
-
-// Get the number of seconds since January 1 1970
-std::chrono::seconds Timer::GetTimeSinceJan1970() {
-    return std::chrono::duration_cast<std::chrono::seconds>(GetTimeMs());
-}
-
-std::chrono::seconds Timer::GetLocalTimeSinceJan1970() {
-    time_t sysTime, tzDiff, tzDST;
-    struct tm* gmTime;
-
-    time(&sysTime);
-
-    // Account for DST where needed
-    gmTime = localtime(&sysTime);
-    if (gmTime->tm_isdst == 1)
-        tzDST = 3600;
-    else
-        tzDST = 0;
-
-    // Lazy way to get local time in sec
-    gmTime = gmtime(&sysTime);
-    tzDiff = sysTime - mktime(gmTime);
-
-    return std::chrono::seconds(sysTime + tzDiff + tzDST);
-}
-
-// Return the current time formatted as Minutes:Seconds:Milliseconds
-// in the form 00:00:000.
-std::string Timer::GetTimeFormatted() {
-    time_t sysTime;
-    struct tm* gmTime;
-    char tmp[13];
-
-    time(&sysTime);
-    gmTime = localtime(&sysTime);
-
-    strftime(tmp, 6, "%M:%S", gmTime);
-
-    u64 milliseconds = static_cast<u64>(GetTimeMs().count()) % 1000;
-    return fmt::format("{}:{:03}", tmp, milliseconds);
-}
-
-// Returns a timestamp with decimals for precise time comparisons
-// ----------------
-double Timer::GetDoubleTime() {
-    // Get continuous timestamp
-    auto tmp_seconds = static_cast<u64>(GetTimeSinceJan1970().count());
-    const auto ms = static_cast<double>(static_cast<u64>(GetTimeMs().count()) % 1000);
-
-    // Remove a few years. We only really want enough seconds to make
-    // sure that we are detecting actual actions, perhaps 60 seconds is
-    // enough really, but I leave a year of seconds anyway, in case the
-    // user's clock is incorrect or something like that.
-    tmp_seconds = tmp_seconds - (38 * 365 * 24 * 60 * 60);
-
-    // Make a smaller integer that fits in the double
-    const auto seconds = static_cast<u32>(tmp_seconds);
-    return seconds + ms;
-}
-
-} // Namespace Common
diff --git a/src/common/timer.h b/src/common/timer.h
deleted file mode 100644
index 8894a143d..000000000
--- a/src/common/timer.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <chrono>
-#include <string>
-#include "common/common_types.h"
-
-namespace Common {
-class Timer {
-public:
-    Timer();
-
-    void Start();
-    void Stop();
-    void Update();
-
-    // The time difference is always returned in milliseconds, regardless of alternative internal
-    // representation
-    [[nodiscard]] std::chrono::milliseconds GetTimeDifference();
-    void AddTimeDifference();
-
-    [[nodiscard]] static std::chrono::seconds GetTimeSinceJan1970();
-    [[nodiscard]] static std::chrono::seconds GetLocalTimeSinceJan1970();
-    [[nodiscard]] static double GetDoubleTime();
-
-    [[nodiscard]] static std::string GetTimeFormatted();
-    [[nodiscard]] std::string GetTimeElapsedFormatted() const;
-    [[nodiscard]] std::chrono::milliseconds GetTimeElapsed();
-
-    [[nodiscard]] static std::chrono::milliseconds GetTimeMs();
-
-private:
-    std::chrono::milliseconds m_LastTime;
-    std::chrono::milliseconds m_StartTime;
-    bool m_Running;
-};
-
-} // Namespace Common
diff --git a/src/common/tree.h b/src/common/tree.h
new file mode 100644
index 000000000..3da49e422
--- /dev/null
+++ b/src/common/tree.h
@@ -0,0 +1,674 @@
+/* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */
+/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+/*
+ * This file defines data structures for red-black trees.
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ * - every search path from the root to a leaf consists of the
+ *   same number of black nodes,
+ * - each red node (except for the root) has a black parent,
+ * - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+namespace Common {
+template <typename T>
+class RBHead {
+public:
+    [[nodiscard]] T* Root() {
+        return rbh_root;
+    }
+
+    [[nodiscard]] const T* Root() const {
+        return rbh_root;
+    }
+
+    void SetRoot(T* root) {
+        rbh_root = root;
+    }
+
+    [[nodiscard]] bool IsEmpty() const {
+        return Root() == nullptr;
+    }
+
+private:
+    T* rbh_root = nullptr;
+};
+
+enum class EntryColor {
+    Black,
+    Red,
+};
+
+template <typename T>
+class RBEntry {
+public:
+    [[nodiscard]] T* Left() {
+        return rbe_left;
+    }
+
+    [[nodiscard]] const T* Left() const {
+        return rbe_left;
+    }
+
+    void SetLeft(T* left) {
+        rbe_left = left;
+    }
+
+    [[nodiscard]] T* Right() {
+        return rbe_right;
+    }
+
+    [[nodiscard]] const T* Right() const {
+        return rbe_right;
+    }
+
+    void SetRight(T* right) {
+        rbe_right = right;
+    }
+
+    [[nodiscard]] T* Parent() {
+        return rbe_parent;
+    }
+
+    [[nodiscard]] const T* Parent() const {
+        return rbe_parent;
+    }
+
+    void SetParent(T* parent) {
+        rbe_parent = parent;
+    }
+
+    [[nodiscard]] bool IsBlack() const {
+        return rbe_color == EntryColor::Black;
+    }
+
+    [[nodiscard]] bool IsRed() const {
+        return rbe_color == EntryColor::Red;
+    }
+
+    [[nodiscard]] EntryColor Color() const {
+        return rbe_color;
+    }
+
+    void SetColor(EntryColor color) {
+        rbe_color = color;
+    }
+
+private:
+    T* rbe_left = nullptr;
+    T* rbe_right = nullptr;
+    T* rbe_parent = nullptr;
+    EntryColor rbe_color{};
+};
+
+template <typename Node>
+[[nodiscard]] RBEntry<Node>& RB_ENTRY(Node* node) {
+    return node->GetEntry();
+}
+
+template <typename Node>
+[[nodiscard]] const RBEntry<Node>& RB_ENTRY(const Node* node) {
+    return node->GetEntry();
+}
+
+template <typename Node>
+[[nodiscard]] Node* RB_PARENT(Node* node) {
+    return RB_ENTRY(node).Parent();
+}
+
+template <typename Node>
+[[nodiscard]] const Node* RB_PARENT(const Node* node) {
+    return RB_ENTRY(node).Parent();
+}
+
+template <typename Node>
+void RB_SET_PARENT(Node* node, Node* parent) {
+    return RB_ENTRY(node).SetParent(parent);
+}
+
+template <typename Node>
+[[nodiscard]] Node* RB_LEFT(Node* node) {
+    return RB_ENTRY(node).Left();
+}
+
+template <typename Node>
+[[nodiscard]] const Node* RB_LEFT(const Node* node) {
+    return RB_ENTRY(node).Left();
+}
+
+template <typename Node>
+void RB_SET_LEFT(Node* node, Node* left) {
+    return RB_ENTRY(node).SetLeft(left);
+}
+
+template <typename Node>
+[[nodiscard]] Node* RB_RIGHT(Node* node) {
+    return RB_ENTRY(node).Right();
+}
+
+template <typename Node>
+[[nodiscard]] const Node* RB_RIGHT(const Node* node) {
+    return RB_ENTRY(node).Right();
+}
+
+template <typename Node>
+void RB_SET_RIGHT(Node* node, Node* right) {
+    return RB_ENTRY(node).SetRight(right);
+}
+
+template <typename Node>
+[[nodiscard]] bool RB_IS_BLACK(const Node* node) {
+    return RB_ENTRY(node).IsBlack();
+}
+
+template <typename Node>
+[[nodiscard]] bool RB_IS_RED(const Node* node) {
+    return RB_ENTRY(node).IsRed();
+}
+
+template <typename Node>
+[[nodiscard]] EntryColor RB_COLOR(const Node* node) {
+    return RB_ENTRY(node).Color();
+}
+
+template <typename Node>
+void RB_SET_COLOR(Node* node, EntryColor color) {
+    return RB_ENTRY(node).SetColor(color);
+}
+
+template <typename Node>
+void RB_SET(Node* node, Node* parent) {
+    auto& entry = RB_ENTRY(node);
+    entry.SetParent(parent);
+    entry.SetLeft(nullptr);
+    entry.SetRight(nullptr);
+    entry.SetColor(EntryColor::Red);
+}
+
+template <typename Node>
+void RB_SET_BLACKRED(Node* black, Node* red) {
+    RB_SET_COLOR(black, EntryColor::Black);
+    RB_SET_COLOR(red, EntryColor::Red);
+}
+
+template <typename Node>
+void RB_ROTATE_LEFT(RBHead<Node>* head, Node* elm, Node*& tmp) {
+    tmp = RB_RIGHT(elm);
+    RB_SET_RIGHT(elm, RB_LEFT(tmp));
+    if (RB_RIGHT(elm) != nullptr) {
+        RB_SET_PARENT(RB_LEFT(tmp), elm);
+    }
+
+    RB_SET_PARENT(tmp, RB_PARENT(elm));
+    if (RB_PARENT(tmp) != nullptr) {
+        if (elm == RB_LEFT(RB_PARENT(elm))) {
+            RB_SET_LEFT(RB_PARENT(elm), tmp);
+        } else {
+            RB_SET_RIGHT(RB_PARENT(elm), tmp);
+        }
+    } else {
+        head->SetRoot(tmp);
+    }
+
+    RB_SET_LEFT(tmp, elm);
+    RB_SET_PARENT(elm, tmp);
+}
+
+template <typename Node>
+void RB_ROTATE_RIGHT(RBHead<Node>* head, Node* elm, Node*& tmp) {
+    tmp = RB_LEFT(elm);
+    RB_SET_LEFT(elm, RB_RIGHT(tmp));
+    if (RB_LEFT(elm) != nullptr) {
+        RB_SET_PARENT(RB_RIGHT(tmp), elm);
+    }
+
+    RB_SET_PARENT(tmp, RB_PARENT(elm));
+    if (RB_PARENT(tmp) != nullptr) {
+        if (elm == RB_LEFT(RB_PARENT(elm))) {
+            RB_SET_LEFT(RB_PARENT(elm), tmp);
+        } else {
+            RB_SET_RIGHT(RB_PARENT(elm), tmp);
+        }
+    } else {
+        head->SetRoot(tmp);
+    }
+
+    RB_SET_RIGHT(tmp, elm);
+    RB_SET_PARENT(elm, tmp);
+}
+
+template <typename Node>
+void RB_INSERT_COLOR(RBHead<Node>* head, Node* elm) {
+    Node* parent = nullptr;
+    Node* tmp = nullptr;
+
+    while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) {
+        Node* gparent = RB_PARENT(parent);
+        if (parent == RB_LEFT(gparent)) {
+            tmp = RB_RIGHT(gparent);
+            if (tmp && RB_IS_RED(tmp)) {
+                RB_SET_COLOR(tmp, EntryColor::Black);
+                RB_SET_BLACKRED(parent, gparent);
+                elm = gparent;
+                continue;
+            }
+
+            if (RB_RIGHT(parent) == elm) {
+                RB_ROTATE_LEFT(head, parent, tmp);
+                tmp = parent;
+                parent = elm;
+                elm = tmp;
+            }
+
+            RB_SET_BLACKRED(parent, gparent);
+            RB_ROTATE_RIGHT(head, gparent, tmp);
+        } else {
+            tmp = RB_LEFT(gparent);
+            if (tmp && RB_IS_RED(tmp)) {
+                RB_SET_COLOR(tmp, EntryColor::Black);
+                RB_SET_BLACKRED(parent, gparent);
+                elm = gparent;
+                continue;
+            }
+
+            if (RB_LEFT(parent) == elm) {
+                RB_ROTATE_RIGHT(head, parent, tmp);
+                tmp = parent;
+                parent = elm;
+                elm = tmp;
+            }
+
+            RB_SET_BLACKRED(parent, gparent);
+            RB_ROTATE_LEFT(head, gparent, tmp);
+        }
+    }
+
+    RB_SET_COLOR(head->Root(), EntryColor::Black);
+}
+
+template <typename Node>
+void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
+    Node* tmp;
+    while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head->Root()) {
+        if (RB_LEFT(parent) == elm) {
+            tmp = RB_RIGHT(parent);
+            if (RB_IS_RED(tmp)) {
+                RB_SET_BLACKRED(tmp, parent);
+                RB_ROTATE_LEFT(head, parent, tmp);
+                tmp = RB_RIGHT(parent);
+            }
+
+            if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) &&
+                (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) {
+                RB_SET_COLOR(tmp, EntryColor::Red);
+                elm = parent;
+                parent = RB_PARENT(elm);
+            } else {
+                if (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp))) {
+                    Node* oleft;
+                    if ((oleft = RB_LEFT(tmp)) != nullptr) {
+                        RB_SET_COLOR(oleft, EntryColor::Black);
+                    }
+
+                    RB_SET_COLOR(tmp, EntryColor::Red);
+                    RB_ROTATE_RIGHT(head, tmp, oleft);
+                    tmp = RB_RIGHT(parent);
+                }
+
+                RB_SET_COLOR(tmp, RB_COLOR(parent));
+                RB_SET_COLOR(parent, EntryColor::Black);
+                if (RB_RIGHT(tmp)) {
+                    RB_SET_COLOR(RB_RIGHT(tmp), EntryColor::Black);
+                }
+
+                RB_ROTATE_LEFT(head, parent, tmp);
+                elm = head->Root();
+                break;
+            }
+        } else {
+            tmp = RB_LEFT(parent);
+            if (RB_IS_RED(tmp)) {
+                RB_SET_BLACKRED(tmp, parent);
+                RB_ROTATE_RIGHT(head, parent, tmp);
+                tmp = RB_LEFT(parent);
+            }
+
+            if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) &&
+                (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) {
+                RB_SET_COLOR(tmp, EntryColor::Red);
+                elm = parent;
+                parent = RB_PARENT(elm);
+            } else {
+                if (RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) {
+                    Node* oright;
+                    if ((oright = RB_RIGHT(tmp)) != nullptr) {
+                        RB_SET_COLOR(oright, EntryColor::Black);
+                    }
+
+                    RB_SET_COLOR(tmp, EntryColor::Red);
+                    RB_ROTATE_LEFT(head, tmp, oright);
+                    tmp = RB_LEFT(parent);
+                }
+
+                RB_SET_COLOR(tmp, RB_COLOR(parent));
+                RB_SET_COLOR(parent, EntryColor::Black);
+
+                if (RB_LEFT(tmp)) {
+                    RB_SET_COLOR(RB_LEFT(tmp), EntryColor::Black);
+                }
+
+                RB_ROTATE_RIGHT(head, parent, tmp);
+                elm = head->Root();
+                break;
+            }
+        }
+    }
+
+    if (elm) {
+        RB_SET_COLOR(elm, EntryColor::Black);
+    }
+}
+
+template <typename Node>
+Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
+    Node* child = nullptr;
+    Node* parent = nullptr;
+    Node* old = elm;
+    EntryColor color{};
+
+    const auto finalize = [&] {
+        if (color == EntryColor::Black) {
+            RB_REMOVE_COLOR(head, parent, child);
+        }
+
+        return old;
+    };
+
+    if (RB_LEFT(elm) == nullptr) {
+        child = RB_RIGHT(elm);
+    } else if (RB_RIGHT(elm) == nullptr) {
+        child = RB_LEFT(elm);
+    } else {
+        Node* left;
+        elm = RB_RIGHT(elm);
+        while ((left = RB_LEFT(elm)) != nullptr) {
+            elm = left;
+        }
+
+        child = RB_RIGHT(elm);
+        parent = RB_PARENT(elm);
+        color = RB_COLOR(elm);
+
+        if (child) {
+            RB_SET_PARENT(child, parent);
+        }
+        if (parent) {
+            if (RB_LEFT(parent) == elm) {
+                RB_SET_LEFT(parent, child);
+            } else {
+                RB_SET_RIGHT(parent, child);
+            }
+        } else {
+            head->SetRoot(child);
+        }
+
+        if (RB_PARENT(elm) == old) {
+            parent = elm;
+        }
+
+        elm->SetEntry(old->GetEntry());
+
+        if (RB_PARENT(old)) {
+            if (RB_LEFT(RB_PARENT(old)) == old) {
+                RB_SET_LEFT(RB_PARENT(old), elm);
+            } else {
+                RB_SET_RIGHT(RB_PARENT(old), elm);
+            }
+        } else {
+            head->SetRoot(elm);
+        }
+        RB_SET_PARENT(RB_LEFT(old), elm);
+        if (RB_RIGHT(old)) {
+            RB_SET_PARENT(RB_RIGHT(old), elm);
+        }
+        if (parent) {
+            left = parent;
+        }
+
+        return finalize();
+    }
+
+    parent = RB_PARENT(elm);
+    color = RB_COLOR(elm);
+
+    if (child) {
+        RB_SET_PARENT(child, parent);
+    }
+    if (parent) {
+        if (RB_LEFT(parent) == elm) {
+            RB_SET_LEFT(parent, child);
+        } else {
+            RB_SET_RIGHT(parent, child);
+        }
+    } else {
+        head->SetRoot(child);
+    }
+
+    return finalize();
+}
+
+// Inserts a node into the RB tree
+template <typename Node, typename CompareFunction>
+Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
+    Node* parent = nullptr;
+    Node* tmp = head->Root();
+    int comp = 0;
+
+    while (tmp) {
+        parent = tmp;
+        comp = cmp(elm, parent);
+        if (comp < 0) {
+            tmp = RB_LEFT(tmp);
+        } else if (comp > 0) {
+            tmp = RB_RIGHT(tmp);
+        } else {
+            return tmp;
+        }
+    }
+
+    RB_SET(elm, parent);
+
+    if (parent != nullptr) {
+        if (comp < 0) {
+            RB_SET_LEFT(parent, elm);
+        } else {
+            RB_SET_RIGHT(parent, elm);
+        }
+    } else {
+        head->SetRoot(elm);
+    }
+
+    RB_INSERT_COLOR(head, elm);
+    return nullptr;
+}
+
+// Finds the node with the same key as elm
+template <typename Node, typename CompareFunction>
+Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
+    Node* tmp = head->Root();
+
+    while (tmp) {
+        const int comp = cmp(elm, tmp);
+        if (comp < 0) {
+            tmp = RB_LEFT(tmp);
+        } else if (comp > 0) {
+            tmp = RB_RIGHT(tmp);
+        } else {
+            return tmp;
+        }
+    }
+
+    return nullptr;
+}
+
+// Finds the first node greater than or equal to the search key
+template <typename Node, typename CompareFunction>
+Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
+    Node* tmp = head->Root();
+    Node* res = nullptr;
+
+    while (tmp) {
+        const int comp = cmp(elm, tmp);
+        if (comp < 0) {
+            res = tmp;
+            tmp = RB_LEFT(tmp);
+        } else if (comp > 0) {
+            tmp = RB_RIGHT(tmp);
+        } else {
+            return tmp;
+        }
+    }
+
+    return res;
+}
+
+// Finds the node with the same key as lelm
+template <typename Node, typename CompareFunction>
+Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) {
+    Node* tmp = head->Root();
+
+    while (tmp) {
+        const int comp = lcmp(lelm, tmp);
+        if (comp < 0) {
+            tmp = RB_LEFT(tmp);
+        } else if (comp > 0) {
+            tmp = RB_RIGHT(tmp);
+        } else {
+            return tmp;
+        }
+    }
+
+    return nullptr;
+}
+
+// Finds the first node greater than or equal to the search key
+template <typename Node, typename CompareFunction>
+Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) {
+    Node* tmp = head->Root();
+    Node* res = nullptr;
+
+    while (tmp) {
+        const int comp = lcmp(lelm, tmp);
+        if (comp < 0) {
+            res = tmp;
+            tmp = RB_LEFT(tmp);
+        } else if (comp > 0) {
+            tmp = RB_RIGHT(tmp);
+        } else {
+            return tmp;
+        }
+    }
+
+    return res;
+}
+
+template <typename Node>
+Node* RB_NEXT(Node* elm) {
+    if (RB_RIGHT(elm)) {
+        elm = RB_RIGHT(elm);
+        while (RB_LEFT(elm)) {
+            elm = RB_LEFT(elm);
+        }
+    } else {
+        if (RB_PARENT(elm) && (elm == RB_LEFT(RB_PARENT(elm)))) {
+            elm = RB_PARENT(elm);
+        } else {
+            while (RB_PARENT(elm) && (elm == RB_RIGHT(RB_PARENT(elm)))) {
+                elm = RB_PARENT(elm);
+            }
+            elm = RB_PARENT(elm);
+        }
+    }
+    return elm;
+}
+
+template <typename Node>
+Node* RB_PREV(Node* elm) {
+    if (RB_LEFT(elm)) {
+        elm = RB_LEFT(elm);
+        while (RB_RIGHT(elm)) {
+            elm = RB_RIGHT(elm);
+        }
+    } else {
+        if (RB_PARENT(elm) && (elm == RB_RIGHT(RB_PARENT(elm)))) {
+            elm = RB_PARENT(elm);
+        } else {
+            while (RB_PARENT(elm) && (elm == RB_LEFT(RB_PARENT(elm)))) {
+                elm = RB_PARENT(elm);
+            }
+            elm = RB_PARENT(elm);
+        }
+    }
+    return elm;
+}
+
+template <typename Node>
+Node* RB_MINMAX(RBHead<Node>* head, bool is_min) {
+    Node* tmp = head->Root();
+    Node* parent = nullptr;
+
+    while (tmp) {
+        parent = tmp;
+        if (is_min) {
+            tmp = RB_LEFT(tmp);
+        } else {
+            tmp = RB_RIGHT(tmp);
+        }
+    }
+
+    return parent;
+}
+
+template <typename Node>
+Node* RB_MIN(RBHead<Node>* head) {
+    return RB_MINMAX(head, true);
+}
+
+template <typename Node>
+Node* RB_MAX(RBHead<Node>* head) {
+    return RB_MINMAX(head, false);
+}
+} // namespace Common
diff --git a/src/common/uuid.h b/src/common/uuid.h
index 4ab9a25f0..2e7a18405 100644
--- a/src/common/uuid.h
+++ b/src/common/uuid.h
@@ -14,8 +14,8 @@ constexpr u128 INVALID_UUID{{0, 0}};
 
 struct UUID {
     // UUIDs which are 0 are considered invalid!
-    u128 uuid = INVALID_UUID;
-    constexpr UUID() = default;
+    u128 uuid;
+    UUID() = default;
     constexpr explicit UUID(const u128& id) : uuid{id} {}
     constexpr explicit UUID(const u64 lo, const u64 hi) : uuid{{lo, hi}} {}
 
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index eb8a7782f..a65f6b832 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -2,19 +2,74 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <array>
 #include <chrono>
+#include <limits>
 #include <mutex>
 #include <thread>
 
 #ifdef _MSC_VER
 #include <intrin.h>
+
+#pragma intrinsic(__umulh)
+#pragma intrinsic(_udiv128)
 #else
 #include <x86intrin.h>
 #endif
 
+#include "common/atomic_ops.h"
 #include "common/uint128.h"
 #include "common/x64/native_clock.h"
 
+namespace {
+
+[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
+#ifdef __SIZEOF_INT128__
+    const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
+    return static_cast<u64>(base / divisor);
+#elif defined(_M_X64) || defined(_M_ARM64)
+    std::array<u64, 2> r = {0, numerator};
+    u64 remainder;
+#if _MSC_VER < 1923
+    return udiv128(r[1], r[0], divisor, &remainder);
+#else
+    return _udiv128(r[1], r[0], divisor, &remainder);
+#endif
+#else
+    // This one is bit more inaccurate.
+    return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
+#endif
+}
+
+[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
+#ifdef __SIZEOF_INT128__
+    return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
+#elif defined(_M_X64) || defined(_M_ARM64)
+    return __umulh(a, b); // MSVC
+#else
+    // Generic fallback
+    const u64 a_lo = u32(a);
+    const u64 a_hi = a >> 32;
+    const u64 b_lo = u32(b);
+    const u64 b_hi = b >> 32;
+
+    const u64 a_x_b_hi = a_hi * b_hi;
+    const u64 a_x_b_mid = a_hi * b_lo;
+    const u64 b_x_a_mid = b_hi * a_lo;
+    const u64 a_x_b_lo = a_lo * b_lo;
+
+    const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
+                           static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
+                          32;
+
+    const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
+
+    return multhi;
+#endif
+}
+
+} // namespace
+
 namespace Common {
 
 u64 EstimateRDTSCFrequency() {
@@ -48,54 +103,71 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
     : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
                                                                                rtsc_frequency_} {
     _mm_mfence();
-    last_measure = __rdtsc();
-    accumulated_ticks = 0U;
+    time_point.inner.last_measure = __rdtsc();
+    time_point.inner.accumulated_ticks = 0U;
+    ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency);
+    us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency);
+    ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency);
+    clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency);
+    cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency);
 }
 
 u64 NativeClock::GetRTSC() {
-    std::scoped_lock scope{rtsc_serialize};
-    _mm_mfence();
-    const u64 current_measure = __rdtsc();
-    u64 diff = current_measure - last_measure;
-    diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
-    if (current_measure > last_measure) {
-        last_measure = current_measure;
-    }
-    accumulated_ticks += diff;
+    TimePoint new_time_point{};
+    TimePoint current_time_point{};
+    do {
+        current_time_point.pack = time_point.pack;
+        _mm_mfence();
+        const u64 current_measure = __rdtsc();
+        u64 diff = current_measure - current_time_point.inner.last_measure;
+        diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
+        new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
+                                                ? current_measure
+                                                : current_time_point.inner.last_measure;
+        new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
+    } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
+                                           current_time_point.pack));
     /// The clock cannot be more precise than the guest timer, remove the lower bits
-    return accumulated_ticks & inaccuracy_mask;
+    return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
 }
 
 void NativeClock::Pause(bool is_paused) {
     if (!is_paused) {
-        _mm_mfence();
-        last_measure = __rdtsc();
+        TimePoint current_time_point{};
+        TimePoint new_time_point{};
+        do {
+            current_time_point.pack = time_point.pack;
+            new_time_point.pack = current_time_point.pack;
+            _mm_mfence();
+            new_time_point.inner.last_measure = __rdtsc();
+        } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
+                                               current_time_point.pack));
     }
 }
 
 std::chrono::nanoseconds NativeClock::GetTimeNS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
+    return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)};
 }
 
 std::chrono::microseconds NativeClock::GetTimeUS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
+    return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)};
 }
 
 std::chrono::milliseconds NativeClock::GetTimeMS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
+    return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)};
 }
 
 u64 NativeClock::GetClockCycles() {
     const u64 rtsc_value = GetRTSC();
-    return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
+    return MultiplyHigh(rtsc_value, clock_rtsc_factor);
 }
 
 u64 NativeClock::GetCPUCycles() {
     const u64 rtsc_value = GetRTSC();
-    return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
+    return MultiplyHigh(rtsc_value, cpu_rtsc_factor);
 }
 
 } // namespace X64
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index 6d1e32ac8..7cbd400d2 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -6,7 +6,6 @@
 
 #include <optional>
 
-#include "common/spin_lock.h"
 #include "common/wall_clock.h"
 
 namespace Common {
@@ -32,14 +31,28 @@ public:
 private:
     u64 GetRTSC();
 
+    union alignas(16) TimePoint {
+        TimePoint() : pack{} {}
+        u128 pack{};
+        struct Inner {
+            u64 last_measure{};
+            u64 accumulated_ticks{};
+        } inner;
+    };
+
     /// value used to reduce the native clocks accuracy as some apss rely on
     /// undefined behavior where the level of accuracy in the clock shouldn't
     /// be higher.
     static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
 
-    SpinLock rtsc_serialize{};
-    u64 last_measure{};
-    u64 accumulated_ticks{};
+    TimePoint time_point;
+    // factors
+    u64 clock_rtsc_factor{};
+    u64 cpu_rtsc_factor{};
+    u64 ns_rtsc_factor{};
+    u64 us_rtsc_factor{};
+    u64 ms_rtsc_factor{};
+
     u64 rtsc_frequency;
 };
 } // namespace X64
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 893df433a..2f6b22747 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -142,8 +142,6 @@ add_library(core STATIC
     hardware_interrupt_manager.h
     hle/ipc.h
     hle/ipc_helpers.h
-    hle/kernel/address_arbiter.cpp
-    hle/kernel/address_arbiter.h
     hle/kernel/client_port.cpp
     hle/kernel/client_port.h
     hle/kernel/client_session.cpp
@@ -157,13 +155,19 @@ add_library(core STATIC
     hle/kernel/handle_table.h
     hle/kernel/hle_ipc.cpp
     hle/kernel/hle_ipc.h
+    hle/kernel/k_address_arbiter.cpp
+    hle/kernel/k_address_arbiter.h
     hle/kernel/k_affinity_mask.h
+    hle/kernel/k_condition_variable.cpp
+    hle/kernel/k_condition_variable.h
     hle/kernel/k_priority_queue.h
     hle/kernel/k_scheduler.cpp
     hle/kernel/k_scheduler.h
     hle/kernel/k_scheduler_lock.h
     hle/kernel/k_scoped_lock.h
     hle/kernel/k_scoped_scheduler_lock_and_sleep.h
+    hle/kernel/k_synchronization_object.cpp
+    hle/kernel/k_synchronization_object.h
     hle/kernel/kernel.cpp
     hle/kernel/kernel.h
     hle/kernel/memory/address_space_info.cpp
@@ -183,8 +187,6 @@ add_library(core STATIC
     hle/kernel/memory/slab_heap.h
     hle/kernel/memory/system_control.cpp
     hle/kernel/memory/system_control.h
-    hle/kernel/mutex.cpp
-    hle/kernel/mutex.h
     hle/kernel/object.cpp
     hle/kernel/object.h
     hle/kernel/physical_core.cpp
@@ -210,12 +212,10 @@ add_library(core STATIC
     hle/kernel/shared_memory.h
     hle/kernel/svc.cpp
     hle/kernel/svc.h
+    hle/kernel/svc_common.h
+    hle/kernel/svc_results.h
     hle/kernel/svc_types.h
     hle/kernel/svc_wrap.h
-    hle/kernel/synchronization_object.cpp
-    hle/kernel/synchronization_object.h
-    hle/kernel/synchronization.cpp
-    hle/kernel/synchronization.h
     hle/kernel/thread.cpp
     hle/kernel/thread.h
     hle/kernel/time_manager.cpp
@@ -643,10 +643,9 @@ else()
         -Werror=conversion
         -Werror=ignored-qualifiers
         -Werror=implicit-fallthrough
-        -Werror=reorder
         -Werror=sign-compare
-        -Werror=unused-variable
 
+        $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
 
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 70098c526..9a0151736 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -26,9 +26,10 @@ using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CO
 /// Generic ARMv8 CPU interface
 class ARM_Interface : NonCopyable {
 public:
-    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock)
-        : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{
-                                                                       uses_wall_clock} {}
+    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers_,
+                           bool uses_wall_clock_)
+        : system{system_}, interrupt_handlers{interrupt_handlers_}, uses_wall_clock{
+                                                                        uses_wall_clock_} {}
     virtual ~ARM_Interface() = default;
 
     struct ThreadContext32 {
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index e6c8461a5..874b5673a 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -49,6 +49,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) {
     Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh);
     instance.on_thread_init();
     instance.ThreadLoop();
+    MicroProfileOnThreadExit();
 }
 
 void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp
index a6c0337fa..d12218fc2 100644
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -43,17 +43,17 @@ static_assert(sizeof(IVFCLevel) == 0x18, "IVFCLevel has incorrect size.");
 struct IVFCHeader {
     u32_le magic;
     u32_le magic_number;
-    INSERT_UNION_PADDING_BYTES(8);
+    INSERT_PADDING_BYTES_NOINIT(8);
     std::array<IVFCLevel, 6> levels;
-    INSERT_UNION_PADDING_BYTES(64);
+    INSERT_PADDING_BYTES_NOINIT(64);
 };
 static_assert(sizeof(IVFCHeader) == 0xE0, "IVFCHeader has incorrect size.");
 
 struct NCASectionHeaderBlock {
-    INSERT_UNION_PADDING_BYTES(3);
+    INSERT_PADDING_BYTES_NOINIT(3);
     NCASectionFilesystemType filesystem_type;
     NCASectionCryptoType crypto_type;
-    INSERT_UNION_PADDING_BYTES(3);
+    INSERT_PADDING_BYTES_NOINIT(3);
 };
 static_assert(sizeof(NCASectionHeaderBlock) == 0x8, "NCASectionHeaderBlock has incorrect size.");
 
@@ -61,7 +61,7 @@ struct NCASectionRaw {
     NCASectionHeaderBlock header;
     std::array<u8, 0x138> block_data;
     std::array<u8, 0x8> section_ctr;
-    INSERT_UNION_PADDING_BYTES(0xB8);
+    INSERT_PADDING_BYTES_NOINIT(0xB8);
 };
 static_assert(sizeof(NCASectionRaw) == 0x200, "NCASectionRaw has incorrect size.");
 
@@ -69,19 +69,19 @@ struct PFS0Superblock {
     NCASectionHeaderBlock header_block;
     std::array<u8, 0x20> hash;
     u32_le size;
-    INSERT_UNION_PADDING_BYTES(4);
+    INSERT_PADDING_BYTES_NOINIT(4);
     u64_le hash_table_offset;
     u64_le hash_table_size;
     u64_le pfs0_header_offset;
     u64_le pfs0_size;
-    INSERT_UNION_PADDING_BYTES(0x1B0);
+    INSERT_PADDING_BYTES_NOINIT(0x1B0);
 };
 static_assert(sizeof(PFS0Superblock) == 0x200, "PFS0Superblock has incorrect size.");
 
 struct RomFSSuperblock {
     NCASectionHeaderBlock header_block;
     IVFCHeader ivfc;
-    INSERT_UNION_PADDING_BYTES(0x118);
+    INSERT_PADDING_BYTES_NOINIT(0x118);
 };
 static_assert(sizeof(RomFSSuperblock) == 0x200, "RomFSSuperblock has incorrect size.");
 
@@ -89,19 +89,19 @@ struct BKTRHeader {
     u64_le offset;
     u64_le size;
     u32_le magic;
-    INSERT_UNION_PADDING_BYTES(0x4);
+    INSERT_PADDING_BYTES_NOINIT(0x4);
     u32_le number_entries;
-    INSERT_UNION_PADDING_BYTES(0x4);
+    INSERT_PADDING_BYTES_NOINIT(0x4);
 };
 static_assert(sizeof(BKTRHeader) == 0x20, "BKTRHeader has incorrect size.");
 
 struct BKTRSuperblock {
     NCASectionHeaderBlock header_block;
     IVFCHeader ivfc;
-    INSERT_UNION_PADDING_BYTES(0x18);
+    INSERT_PADDING_BYTES_NOINIT(0x18);
     BKTRHeader relocation;
     BKTRHeader subsection;
-    INSERT_UNION_PADDING_BYTES(0xC0);
+    INSERT_PADDING_BYTES_NOINIT(0xC0);
 };
 static_assert(sizeof(BKTRSuperblock) == 0x200, "BKTRSuperblock has incorrect size.");
 
diff --git a/src/core/file_sys/savedata_factory.h b/src/core/file_sys/savedata_factory.h
index 17f774baa..86c9f5350 100644
--- a/src/core/file_sys/savedata_factory.h
+++ b/src/core/file_sys/savedata_factory.h
@@ -58,7 +58,7 @@ struct SaveDataAttribute {
     SaveDataType type;
     SaveDataRank rank;
     u16 index;
-    INSERT_PADDING_BYTES(4);
+    INSERT_PADDING_BYTES_NOINIT(4);
     u64 zero_1;
     u64 zero_2;
     u64 zero_3;
@@ -72,7 +72,7 @@ struct SaveDataExtraData {
     u64 owner_id;
     s64 timestamp;
     SaveDataFlags flags;
-    INSERT_PADDING_BYTES(4);
+    INSERT_PADDING_BYTES_NOINIT(4);
     s64 available_size;
     s64 journal_size;
     s64 commit_id;
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 8c1193894..ee7a58b1c 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -21,21 +21,18 @@ public:
 
     std::mutex mutex;
 
-    bool touch_pressed = false; ///< True if touchpad area is currently pressed, otherwise false
-
-    float touch_x = 0.0f; ///< Touchpad X-position
-    float touch_y = 0.0f; ///< Touchpad Y-position
+    Input::TouchStatus status;
 
 private:
     class Device : public Input::TouchDevice {
     public:
         explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {}
-        std::tuple<float, float, bool> GetStatus() const override {
+        Input::TouchStatus GetStatus() const override {
             if (auto state = touch_state.lock()) {
                 std::lock_guard guard{state->mutex};
-                return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed);
+                return state->status;
             }
-            return std::make_tuple(0.0f, 0.0f, false);
+            return {};
         }
 
     private:
@@ -79,36 +76,44 @@ std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsi
     return std::make_tuple(new_x, new_y);
 }
 
-void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
-    if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
+void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y, std::size_t id) {
+    if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y)) {
         return;
+    }
+    if (id >= touch_state->status.size()) {
+        return;
+    }
 
     std::lock_guard guard{touch_state->mutex};
-    touch_state->touch_x =
+    const float x =
         static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) /
         static_cast<float>(framebuffer_layout.screen.right - framebuffer_layout.screen.left);
-    touch_state->touch_y =
+    const float y =
         static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) /
         static_cast<float>(framebuffer_layout.screen.bottom - framebuffer_layout.screen.top);
 
-    touch_state->touch_pressed = true;
+    touch_state->status[id] = std::make_tuple(x, y, true);
 }
 
-void EmuWindow::TouchReleased() {
+void EmuWindow::TouchReleased(std::size_t id) {
+    if (id >= touch_state->status.size()) {
+        return;
+    }
     std::lock_guard guard{touch_state->mutex};
-    touch_state->touch_pressed = false;
-    touch_state->touch_x = 0;
-    touch_state->touch_y = 0;
+    touch_state->status[id] = std::make_tuple(0.0f, 0.0f, false);
 }
 
-void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) {
-    if (!touch_state->touch_pressed)
+void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y, std::size_t id) {
+    if (id >= touch_state->status.size()) {
+        return;
+    }
+    if (!std::get<2>(touch_state->status[id]))
         return;
 
     if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
         std::tie(framebuffer_x, framebuffer_y) = ClipToTouchScreen(framebuffer_x, framebuffer_y);
 
-    TouchPressed(framebuffer_x, framebuffer_y);
+    TouchPressed(framebuffer_x, framebuffer_y, id);
 }
 
 void EmuWindow::UpdateCurrentFramebufferLayout(unsigned width, unsigned height) {
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 276d2b906..2436c6580 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -117,18 +117,23 @@ public:
      * Signal that a touch pressed event has occurred (e.g. mouse click pressed)
      * @param framebuffer_x Framebuffer x-coordinate that was pressed
      * @param framebuffer_y Framebuffer y-coordinate that was pressed
+     * @param id Touch event ID
      */
-    void TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y);
+    void TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y, std::size_t id);
 
-    /// Signal that a touch released event has occurred (e.g. mouse click released)
-    void TouchReleased();
+    /**
+     * Signal that a touch released event has occurred (e.g. mouse click released)
+     * @param id Touch event ID
+     */
+    void TouchReleased(std::size_t id);
 
     /**
      * Signal that a touch movement event has occurred (e.g. mouse was moved over the emu window)
      * @param framebuffer_x Framebuffer x-coordinate
      * @param framebuffer_y Framebuffer y-coordinate
+     * @param id Touch event ID
      */
-    void TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y);
+    void TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y, std::size_t id);
 
     /**
      * Returns currently active configuration.
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index de51a754e..f014dfea3 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -163,10 +163,11 @@ using MotionStatus = std::tuple<Common::Vec3<float>, Common::Vec3<float>, Common
 using MotionDevice = InputDevice<MotionStatus>;
 
 /**
- * A touch status is an object that returns a tuple of two floats and a bool. The floats are
- * x and y coordinates in the range 0.0 - 1.0, and the bool indicates whether it is pressed.
+ * A touch status is an object that returns an array of 16 tuple elements of two floats and a bool.
+ * The floats are x and y coordinates in the range 0.0 - 1.0, and the bool indicates whether it is
+ * pressed.
  */
-using TouchStatus = std::tuple<float, float, bool>;
+using TouchStatus = std::array<std::tuple<float, float, bool>, 16>;
 
 /**
  * A touch device is an input device that returns a touch status object
diff --git a/src/core/frontend/input_interpreter.cpp b/src/core/frontend/input_interpreter.cpp
index 66ae506cd..ec5fe660e 100644
--- a/src/core/frontend/input_interpreter.cpp
+++ b/src/core/frontend/input_interpreter.cpp
@@ -25,6 +25,10 @@ void InputInterpreter::PollInput() {
     button_states[current_index] = button_state;
 }
 
+bool InputInterpreter::IsButtonPressed(HIDButton button) const {
+    return (button_states[current_index] & (1U << static_cast<u8>(button))) != 0;
+}
+
 bool InputInterpreter::IsButtonPressedOnce(HIDButton button) const {
     const bool current_press =
         (button_states[current_index] & (1U << static_cast<u8>(button))) != 0;
diff --git a/src/core/frontend/input_interpreter.h b/src/core/frontend/input_interpreter.h
index fea9aebe6..73fc47ffb 100644
--- a/src/core/frontend/input_interpreter.h
+++ b/src/core/frontend/input_interpreter.h
@@ -67,6 +67,27 @@ public:
     void PollInput();
 
     /**
+     * Checks whether the button is pressed.
+     *
+     * @param button The button to check.
+     *
+     * @returns True when the button is pressed.
+     */
+    [[nodiscard]] bool IsButtonPressed(HIDButton button) const;
+
+    /**
+     * Checks whether any of the buttons in the parameter list is pressed.
+     *
+     * @tparam HIDButton The buttons to check.
+     *
+     * @returns True when at least one of the buttons is pressed.
+     */
+    template <HIDButton... T>
+    [[nodiscard]] bool IsAnyButtonPressed() {
+        return (IsButtonPressed(T) || ...);
+    }
+
+    /**
      * The specified button is considered to be pressed once
      * if it is currently pressed and not pressed previously.
      *
@@ -79,12 +100,12 @@ public:
     /**
      * Checks whether any of the buttons in the parameter list is pressed once.
      *
-     * @tparam HIDButton The buttons to check.
+     * @tparam T The buttons to check.
      *
      * @returns True when at least one of the buttons is pressed once.
      */
     template <HIDButton... T>
-    [[nodiscard]] bool IsAnyButtonPressedOnce() {
+    [[nodiscard]] bool IsAnyButtonPressedOnce() const {
         return (IsButtonPressedOnce(T) || ...);
     }
 
@@ -100,12 +121,12 @@ public:
     /**
      * Checks whether any of the buttons in the parameter list is held down.
      *
-     * @tparam HIDButton The buttons to check.
+     * @tparam T The buttons to check.
      *
      * @returns True when at least one of the buttons is held down.
      */
     template <HIDButton... T>
-    [[nodiscard]] bool IsAnyButtonHeld() {
+    [[nodiscard]] bool IsAnyButtonHeld() const {
         return (IsButtonHeld(T) || ...);
     }
 
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index 7ce313190..55b1716e4 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -146,7 +146,7 @@ static_assert(sizeof(BufferDescriptorC) == 8, "BufferDescriptorC size is incorre
 
 struct DataPayloadHeader {
     u32_le magic;
-    INSERT_PADDING_WORDS(1);
+    INSERT_PADDING_WORDS_NOINIT(1);
 };
 static_assert(sizeof(DataPayloadHeader) == 8, "DataPayloadHeader size is incorrect");
 
@@ -160,7 +160,7 @@ struct DomainMessageHeader {
         // Used when responding to an IPC request, Server -> Client.
         struct {
             u32_le num_objects;
-            INSERT_UNION_PADDING_WORDS(3);
+            INSERT_PADDING_WORDS_NOINIT(3);
         };
 
         // Used when performing an IPC request, Client -> Server.
@@ -171,10 +171,10 @@ struct DomainMessageHeader {
                 BitField<16, 16, u32> size;
             };
             u32_le object_id;
-            INSERT_UNION_PADDING_WORDS(2);
+            INSERT_PADDING_WORDS_NOINIT(2);
         };
 
-        std::array<u32, 4> raw{};
+        std::array<u32, 4> raw;
     };
 };
 static_assert(sizeof(DomainMessageHeader) == 16, "DomainMessageHeader size is incorrect");
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
deleted file mode 100644
index 20ffa7d47..000000000
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ /dev/null
@@ -1,317 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/arm/exclusive_monitor.h"
-#include "core/core.h"
-#include "core/hle/kernel/address_arbiter.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/k_scheduler.h"
-#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/time_manager.h"
-#include "core/hle/result.h"
-#include "core/memory.h"
-
-namespace Kernel {
-
-// Wake up num_to_wake (or all) threads in a vector.
-void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
-                                 s32 num_to_wake) {
-    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
-    // them all.
-    std::size_t last = waiting_threads.size();
-    if (num_to_wake > 0) {
-        last = std::min(last, static_cast<std::size_t>(num_to_wake));
-    }
-
-    // Signal the waiting threads.
-    for (std::size_t i = 0; i < last; i++) {
-        waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-        RemoveThread(waiting_threads[i]);
-        waiting_threads[i]->WaitForArbitration(false);
-        waiting_threads[i]->ResumeFromWait();
-    }
-}
-
-AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
-AddressArbiter::~AddressArbiter() = default;
-
-ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
-                                           s32 num_to_wake) {
-    switch (type) {
-    case SignalType::Signal:
-        return SignalToAddressOnly(address, num_to_wake);
-    case SignalType::IncrementAndSignalIfEqual:
-        return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
-    case SignalType::ModifyByWaitingCountAndSignalIfEqual:
-        return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
-    default:
-        return ERR_INVALID_ENUM_VALUE;
-    }
-}
-
-ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
-    KScopedSchedulerLock lock(system.Kernel());
-    const std::vector<std::shared_ptr<Thread>> waiting_threads =
-        GetThreadsWaitingOnAddress(address);
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                              s32 num_to_wake) {
-    KScopedSchedulerLock lock(system.Kernel());
-    auto& memory = system.Memory();
-
-    // Ensure that we can write to the address.
-    if (!memory.IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    const std::size_t current_core = system.CurrentCoreIndex();
-    auto& monitor = system.Monitor();
-    u32 current_value;
-    do {
-        current_value = monitor.ExclusiveRead32(current_core, address);
-
-        if (current_value != static_cast<u32>(value)) {
-            return ERR_INVALID_STATE;
-        }
-        current_value++;
-    } while (!monitor.ExclusiveWrite32(current_core, address, current_value));
-
-    return SignalToAddressOnly(address, num_to_wake);
-}
-
-ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                                         s32 num_to_wake) {
-    KScopedSchedulerLock lock(system.Kernel());
-    auto& memory = system.Memory();
-
-    // Ensure that we can write to the address.
-    if (!memory.IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    // Get threads waiting on the address.
-    const std::vector<std::shared_ptr<Thread>> waiting_threads =
-        GetThreadsWaitingOnAddress(address);
-
-    const std::size_t current_core = system.CurrentCoreIndex();
-    auto& monitor = system.Monitor();
-    s32 updated_value;
-    do {
-        updated_value = monitor.ExclusiveRead32(current_core, address);
-
-        if (updated_value != value) {
-            return ERR_INVALID_STATE;
-        }
-        // Determine the modified value depending on the waiting count.
-        if (num_to_wake <= 0) {
-            if (waiting_threads.empty()) {
-                updated_value = value + 1;
-            } else {
-                updated_value = value - 1;
-            }
-        } else {
-            if (waiting_threads.empty()) {
-                updated_value = value + 1;
-            } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
-                updated_value = value - 1;
-            } else {
-                updated_value = value;
-            }
-        }
-    } while (!monitor.ExclusiveWrite32(current_core, address, updated_value));
-
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
-                                          s64 timeout_ns) {
-    switch (type) {
-    case ArbitrationType::WaitIfLessThan:
-        return WaitForAddressIfLessThan(address, value, timeout_ns, false);
-    case ArbitrationType::DecrementAndWaitIfLessThan:
-        return WaitForAddressIfLessThan(address, value, timeout_ns, true);
-    case ArbitrationType::WaitIfEqual:
-        return WaitForAddressIfEqual(address, value, timeout_ns);
-    default:
-        return ERR_INVALID_ENUM_VALUE;
-    }
-}
-
-ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
-                                                    bool should_decrement) {
-    auto& memory = system.Memory();
-    auto& kernel = system.Kernel();
-    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
-
-    Handle event_handle = InvalidHandle;
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
-
-        if (current_thread->IsPendingTermination()) {
-            lock.CancelSleep();
-            return ERR_THREAD_TERMINATING;
-        }
-
-        // Ensure that we can read the address.
-        if (!memory.IsValidVirtualAddress(address)) {
-            lock.CancelSleep();
-            return ERR_INVALID_ADDRESS_STATE;
-        }
-
-        s32 current_value = static_cast<s32>(memory.Read32(address));
-        if (current_value >= value) {
-            lock.CancelSleep();
-            return ERR_INVALID_STATE;
-        }
-
-        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-
-        s32 decrement_value;
-
-        const std::size_t current_core = system.CurrentCoreIndex();
-        auto& monitor = system.Monitor();
-        do {
-            current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
-            if (should_decrement) {
-                decrement_value = current_value - 1;
-            } else {
-                decrement_value = current_value;
-            }
-        } while (
-            !monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value)));
-
-        // Short-circuit without rescheduling, if timeout is zero.
-        if (timeout == 0) {
-            lock.CancelSleep();
-            return RESULT_TIMEOUT;
-        }
-
-        current_thread->SetArbiterWaitAddress(address);
-        InsertThread(SharedFrom(current_thread));
-        current_thread->SetStatus(ThreadStatus::WaitArb);
-        current_thread->WaitForArbitration(true);
-    }
-
-    if (event_handle != InvalidHandle) {
-        auto& time_manager = kernel.TimeManager();
-        time_manager.UnscheduleTimeEvent(event_handle);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-        if (current_thread->IsWaitingForArbitration()) {
-            RemoveThread(SharedFrom(current_thread));
-            current_thread->WaitForArbitration(false);
-        }
-    }
-
-    return current_thread->GetSignalingResult();
-}
-
-ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
-    auto& memory = system.Memory();
-    auto& kernel = system.Kernel();
-    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
-
-    Handle event_handle = InvalidHandle;
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
-
-        if (current_thread->IsPendingTermination()) {
-            lock.CancelSleep();
-            return ERR_THREAD_TERMINATING;
-        }
-
-        // Ensure that we can read the address.
-        if (!memory.IsValidVirtualAddress(address)) {
-            lock.CancelSleep();
-            return ERR_INVALID_ADDRESS_STATE;
-        }
-
-        s32 current_value = static_cast<s32>(memory.Read32(address));
-        if (current_value != value) {
-            lock.CancelSleep();
-            return ERR_INVALID_STATE;
-        }
-
-        // Short-circuit without rescheduling, if timeout is zero.
-        if (timeout == 0) {
-            lock.CancelSleep();
-            return RESULT_TIMEOUT;
-        }
-
-        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-        current_thread->SetArbiterWaitAddress(address);
-        InsertThread(SharedFrom(current_thread));
-        current_thread->SetStatus(ThreadStatus::WaitArb);
-        current_thread->WaitForArbitration(true);
-    }
-
-    if (event_handle != InvalidHandle) {
-        auto& time_manager = kernel.TimeManager();
-        time_manager.UnscheduleTimeEvent(event_handle);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-        if (current_thread->IsWaitingForArbitration()) {
-            RemoveThread(SharedFrom(current_thread));
-            current_thread->WaitForArbitration(false);
-        }
-    }
-
-    return current_thread->GetSignalingResult();
-}
-
-void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
-    const VAddr arb_addr = thread->GetArbiterWaitAddress();
-    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-
-    const auto iter =
-        std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) {
-            return entry->GetPriority() >= thread->GetPriority();
-        });
-
-    if (iter == thread_list.cend()) {
-        thread_list.push_back(std::move(thread));
-    } else {
-        thread_list.insert(iter, std::move(thread));
-    }
-}
-
-void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
-    const VAddr arb_addr = thread->GetArbiterWaitAddress();
-    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-
-    const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
-                                   [&thread](const auto& entry) { return thread == entry; });
-
-    if (iter != thread_list.cend()) {
-        thread_list.erase(iter);
-    }
-}
-
-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
-    VAddr address) const {
-    const auto iter = arb_threads.find(address);
-    if (iter == arb_threads.cend()) {
-        return {};
-    }
-
-    const std::list<std::shared_ptr<Thread>>& thread_list = iter->second;
-    return {thread_list.cbegin(), thread_list.cend()};
-}
-} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
deleted file mode 100644
index b91edc67d..000000000
--- a/src/core/hle/kernel/address_arbiter.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <list>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "common/common_types.h"
-
-union ResultCode;
-
-namespace Core {
-class System;
-}
-
-namespace Kernel {
-
-class Thread;
-
-class AddressArbiter {
-public:
-    enum class ArbitrationType {
-        WaitIfLessThan = 0,
-        DecrementAndWaitIfLessThan = 1,
-        WaitIfEqual = 2,
-    };
-
-    enum class SignalType {
-        Signal = 0,
-        IncrementAndSignalIfEqual = 1,
-        ModifyByWaitingCountAndSignalIfEqual = 2,
-    };
-
-    explicit AddressArbiter(Core::System& system);
-    ~AddressArbiter();
-
-    AddressArbiter(const AddressArbiter&) = delete;
-    AddressArbiter& operator=(const AddressArbiter&) = delete;
-
-    AddressArbiter(AddressArbiter&&) = default;
-    AddressArbiter& operator=(AddressArbiter&&) = delete;
-
-    /// Signals an address being waited on with a particular signaling type.
-    ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
-
-    /// Waits on an address with a particular arbitration type.
-    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
-
-private:
-    /// Signals an address being waited on.
-    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
-
-    /// Signals an address being waited on and increments its value if equal to the value argument.
-    ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-
-    /// Signals an address being waited on and modifies its value based on waiting thread count if
-    /// equal to the value argument.
-    ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                             s32 num_to_wake);
-
-    /// Waits on an address if the value passed is less than the argument value,
-    /// optionally decrementing.
-    ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
-                                        bool should_decrement);
-
-    /// Waits on an address if the value passed is equal to the argument value.
-    ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
-
-    /// Wake up num_to_wake (or all) threads in a vector.
-    void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
-
-    /// Insert a thread into the address arbiter container
-    void InsertThread(std::shared_ptr<Thread> thread);
-
-    /// Removes a thread from the address arbiter container
-    void RemoveThread(std::shared_ptr<Thread> thread);
-
-    // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
-
-    /// List of threads waiting for a address arbiter
-    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
-
-    Core::System& system;
-};
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index 8aff2227a..f8f005f15 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -33,9 +33,6 @@ ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
         server_port->AppendPendingSession(std::move(server));
     }
 
-    // Wake the threads waiting on the ServerPort
-    server_port->Signal();
-
     return MakeResult(std::move(client));
 }
 
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index be9eba519..e8e52900d 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -12,7 +12,7 @@
 
 namespace Kernel {
 
-ClientSession::ClientSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
+ClientSession::ClientSession(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 
 ClientSession::~ClientSession() {
     // This destructor will be called automatically when the last ClientSession handle is closed by
@@ -22,15 +22,6 @@ ClientSession::~ClientSession() {
     }
 }
 
-bool ClientSession::ShouldWait(const Thread* thread) const {
-    UNIMPLEMENTED();
-    return {};
-}
-
-void ClientSession::Acquire(Thread* thread) {
-    UNIMPLEMENTED();
-}
-
 bool ClientSession::IsSignaled() const {
     UNIMPLEMENTED();
     return true;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index e5e0690c2..d5c9ebee8 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -7,7 +7,7 @@
 #include <memory>
 #include <string>
 
-#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/result.h"
 
 union ResultCode;
@@ -26,7 +26,7 @@ class KernelCore;
 class Session;
 class Thread;
 
-class ClientSession final : public SynchronizationObject {
+class ClientSession final : public KSynchronizationObject {
 public:
     explicit ClientSession(KernelCore& kernel);
     ~ClientSession() override;
@@ -49,10 +49,6 @@ public:
     ResultCode SendSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory,
                                Core::Timing::CoreTiming& core_timing);
 
-    bool ShouldWait(const Thread* thread) const override;
-
-    void Acquire(Thread* thread) override;
-
     bool IsSignaled() const override;
 
 private:
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d4e5d88cf..7d32a39f0 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -13,12 +13,14 @@ namespace Kernel {
 constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
 constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
 constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
+constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59};
 constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
 constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
 constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
 constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
 constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
 constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
+constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106};
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
 constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110};
 constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113};
@@ -28,6 +30,7 @@ constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115};
 constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116};
 constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117};
 constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118};
+constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118};
 constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119};
 constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120};
 constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121};
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
new file mode 100644
index 000000000..d9e702f13
--- /dev/null
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -0,0 +1,367 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/arm/exclusive_monitor.h"
+#include "core/core.h"
+#include "core/hle/kernel/k_address_arbiter.h"
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/svc_results.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
+#include "core/memory.h"
+
+namespace Kernel {
+
+KAddressArbiter::KAddressArbiter(Core::System& system_)
+    : system{system_}, kernel{system.Kernel()} {}
+KAddressArbiter::~KAddressArbiter() = default;
+
+namespace {
+
+bool ReadFromUser(Core::System& system, s32* out, VAddr address) {
+    *out = system.Memory().Read32(address);
+    return true;
+}
+
+bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 value) {
+    auto& monitor = system.Monitor();
+    const auto current_core = system.CurrentCoreIndex();
+
+    // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
+    // TODO(bunnei): We should call CanAccessAtomic(..) here.
+
+    // Load the value from the address.
+    const s32 current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
+
+    // Compare it to the desired one.
+    if (current_value < value) {
+        // If less than, we want to try to decrement.
+        const s32 decrement_value = current_value - 1;
+
+        // Decrement and try to store.
+        if (!monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value))) {
+            // If we failed to store, try again.
+            DecrementIfLessThan(system, out, address, value);
+        }
+    } else {
+        // Otherwise, clear our exclusive hold and finish
+        monitor.ClearExclusive();
+    }
+
+    // We're done.
+    *out = current_value;
+    return true;
+}
+
+bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 new_value) {
+    auto& monitor = system.Monitor();
+    const auto current_core = system.CurrentCoreIndex();
+
+    // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
+    // TODO(bunnei): We should call CanAccessAtomic(..) here.
+
+    // Load the value from the address.
+    const s32 current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
+
+    // Compare it to the desired one.
+    if (current_value == value) {
+        // If equal, we want to try to write the new value.
+
+        // Try to store.
+        if (!monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(new_value))) {
+            // If we failed to store, try again.
+            UpdateIfEqual(system, out, address, value, new_value);
+        }
+    } else {
+        // Otherwise, clear our exclusive hold and finish.
+        monitor.ClearExclusive();
+    }
+
+    // We're done.
+    *out = current_value;
+    return true;
+}
+
+} // namespace
+
+ResultCode KAddressArbiter::Signal(VAddr addr, s32 count) {
+    // Perform signaling.
+    s32 num_waiters{};
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        auto it = thread_tree.nfind_light({addr, -1});
+        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
+               (it->GetAddressArbiterKey() == addr)) {
+            Thread* target_thread = std::addressof(*it);
+            target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+
+            ASSERT(target_thread->IsWaitingForAddressArbiter());
+            target_thread->Wakeup();
+
+            it = thread_tree.erase(it);
+            target_thread->ClearAddressArbiter();
+            ++num_waiters;
+        }
+    }
+    return RESULT_SUCCESS;
+}
+
+ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 count) {
+    // Perform signaling.
+    s32 num_waiters{};
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        // Check the userspace value.
+        s32 user_value{};
+        R_UNLESS(UpdateIfEqual(system, std::addressof(user_value), addr, value, value + 1),
+                 Svc::ResultInvalidCurrentMemory);
+        R_UNLESS(user_value == value, Svc::ResultInvalidState);
+
+        auto it = thread_tree.nfind_light({addr, -1});
+        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
+               (it->GetAddressArbiterKey() == addr)) {
+            Thread* target_thread = std::addressof(*it);
+            target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+
+            ASSERT(target_thread->IsWaitingForAddressArbiter());
+            target_thread->Wakeup();
+
+            it = thread_tree.erase(it);
+            target_thread->ClearAddressArbiter();
+            ++num_waiters;
+        }
+    }
+    return RESULT_SUCCESS;
+}
+
+ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 value, s32 count) {
+    // Perform signaling.
+    s32 num_waiters{};
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        auto it = thread_tree.nfind_light({addr, -1});
+        // Determine the updated value.
+        s32 new_value{};
+        if (/*GetTargetFirmware() >= TargetFirmware_7_0_0*/ true) {
+            if (count <= 0) {
+                if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) {
+                    new_value = value - 2;
+                } else {
+                    new_value = value + 1;
+                }
+            } else {
+                if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) {
+                    auto tmp_it = it;
+                    s32 tmp_num_waiters{};
+                    while ((++tmp_it != thread_tree.end()) &&
+                           (tmp_it->GetAddressArbiterKey() == addr)) {
+                        if ((tmp_num_waiters++) >= count) {
+                            break;
+                        }
+                    }
+
+                    if (tmp_num_waiters < count) {
+                        new_value = value - 1;
+                    } else {
+                        new_value = value;
+                    }
+                } else {
+                    new_value = value + 1;
+                }
+            }
+        } else {
+            if (count <= 0) {
+                if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) {
+                    new_value = value - 1;
+                } else {
+                    new_value = value + 1;
+                }
+            } else {
+                auto tmp_it = it;
+                s32 tmp_num_waiters{};
+                while ((tmp_it != thread_tree.end()) && (tmp_it->GetAddressArbiterKey() == addr) &&
+                       (tmp_num_waiters < count + 1)) {
+                    ++tmp_num_waiters;
+                    ++tmp_it;
+                }
+
+                if (tmp_num_waiters == 0) {
+                    new_value = value + 1;
+                } else if (tmp_num_waiters <= count) {
+                    new_value = value - 1;
+                } else {
+                    new_value = value;
+                }
+            }
+        }
+
+        // Check the userspace value.
+        s32 user_value{};
+        bool succeeded{};
+        if (value != new_value) {
+            succeeded = UpdateIfEqual(system, std::addressof(user_value), addr, value, new_value);
+        } else {
+            succeeded = ReadFromUser(system, std::addressof(user_value), addr);
+        }
+
+        R_UNLESS(succeeded, Svc::ResultInvalidCurrentMemory);
+        R_UNLESS(user_value == value, Svc::ResultInvalidState);
+
+        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
+               (it->GetAddressArbiterKey() == addr)) {
+            Thread* target_thread = std::addressof(*it);
+            target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+
+            ASSERT(target_thread->IsWaitingForAddressArbiter());
+            target_thread->Wakeup();
+
+            it = thread_tree.erase(it);
+            target_thread->ClearAddressArbiter();
+            ++num_waiters;
+        }
+    }
+    return RESULT_SUCCESS;
+}
+
+ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement, s64 timeout) {
+    // Prepare to wait.
+    Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread();
+    Handle timer = InvalidHandle;
+
+    {
+        KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout);
+
+        // Check that the thread isn't terminating.
+        if (cur_thread->IsTerminationRequested()) {
+            slp.CancelSleep();
+            return Svc::ResultTerminationRequested;
+        }
+
+        // Set the synced object.
+        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+
+        // Read the value from userspace.
+        s32 user_value{};
+        bool succeeded{};
+        if (decrement) {
+            succeeded = DecrementIfLessThan(system, std::addressof(user_value), addr, value);
+        } else {
+            succeeded = ReadFromUser(system, std::addressof(user_value), addr);
+        }
+
+        if (!succeeded) {
+            slp.CancelSleep();
+            return Svc::ResultInvalidCurrentMemory;
+        }
+
+        // Check that the value is less than the specified one.
+        if (user_value >= value) {
+            slp.CancelSleep();
+            return Svc::ResultInvalidState;
+        }
+
+        // Check that the timeout is non-zero.
+        if (timeout == 0) {
+            slp.CancelSleep();
+            return Svc::ResultTimedOut;
+        }
+
+        // Set the arbiter.
+        cur_thread->SetAddressArbiter(std::addressof(thread_tree), addr);
+        thread_tree.insert(*cur_thread);
+        cur_thread->SetState(ThreadState::Waiting);
+        cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Arbitration);
+    }
+
+    // Cancel the timer wait.
+    if (timer != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(timer);
+    }
+
+    // Remove from the address arbiter.
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        if (cur_thread->IsWaitingForAddressArbiter()) {
+            thread_tree.erase(thread_tree.iterator_to(*cur_thread));
+            cur_thread->ClearAddressArbiter();
+        }
+    }
+
+    // Get the result.
+    KSynchronizationObject* dummy{};
+    return cur_thread->GetWaitResult(std::addressof(dummy));
+}
+
+ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) {
+    // Prepare to wait.
+    Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread();
+    Handle timer = InvalidHandle;
+
+    {
+        KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout);
+
+        // Check that the thread isn't terminating.
+        if (cur_thread->IsTerminationRequested()) {
+            slp.CancelSleep();
+            return Svc::ResultTerminationRequested;
+        }
+
+        // Set the synced object.
+        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+
+        // Read the value from userspace.
+        s32 user_value{};
+        if (!ReadFromUser(system, std::addressof(user_value), addr)) {
+            slp.CancelSleep();
+            return Svc::ResultInvalidCurrentMemory;
+        }
+
+        // Check that the value is equal.
+        if (value != user_value) {
+            slp.CancelSleep();
+            return Svc::ResultInvalidState;
+        }
+
+        // Check that the timeout is non-zero.
+        if (timeout == 0) {
+            slp.CancelSleep();
+            return Svc::ResultTimedOut;
+        }
+
+        // Set the arbiter.
+        cur_thread->SetAddressArbiter(std::addressof(thread_tree), addr);
+        thread_tree.insert(*cur_thread);
+        cur_thread->SetState(ThreadState::Waiting);
+        cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Arbitration);
+    }
+
+    // Cancel the timer wait.
+    if (timer != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(timer);
+    }
+
+    // Remove from the address arbiter.
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        if (cur_thread->IsWaitingForAddressArbiter()) {
+            thread_tree.erase(thread_tree.iterator_to(*cur_thread));
+            cur_thread->ClearAddressArbiter();
+        }
+    }
+
+    // Get the result.
+    KSynchronizationObject* dummy{};
+    return cur_thread->GetWaitResult(std::addressof(dummy));
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_address_arbiter.h b/src/core/hle/kernel/k_address_arbiter.h
new file mode 100644
index 000000000..8d379b524
--- /dev/null
+++ b/src/core/hle/kernel/k_address_arbiter.h
@@ -0,0 +1,70 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/hle/kernel/k_condition_variable.h"
+#include "core/hle/kernel/svc_types.h"
+
+union ResultCode;
+
+namespace Core {
+class System;
+}
+
+namespace Kernel {
+
+class KernelCore;
+
+class KAddressArbiter {
+public:
+    using ThreadTree = KConditionVariable::ThreadTree;
+
+    explicit KAddressArbiter(Core::System& system_);
+    ~KAddressArbiter();
+
+    [[nodiscard]] ResultCode SignalToAddress(VAddr addr, Svc::SignalType type, s32 value,
+                                             s32 count) {
+        switch (type) {
+        case Svc::SignalType::Signal:
+            return Signal(addr, count);
+        case Svc::SignalType::SignalAndIncrementIfEqual:
+            return SignalAndIncrementIfEqual(addr, value, count);
+        case Svc::SignalType::SignalAndModifyByWaitingCountIfEqual:
+            return SignalAndModifyByWaitingCountIfEqual(addr, value, count);
+        }
+        UNREACHABLE();
+        return RESULT_UNKNOWN;
+    }
+
+    [[nodiscard]] ResultCode WaitForAddress(VAddr addr, Svc::ArbitrationType type, s32 value,
+                                            s64 timeout) {
+        switch (type) {
+        case Svc::ArbitrationType::WaitIfLessThan:
+            return WaitIfLessThan(addr, value, false, timeout);
+        case Svc::ArbitrationType::DecrementAndWaitIfLessThan:
+            return WaitIfLessThan(addr, value, true, timeout);
+        case Svc::ArbitrationType::WaitIfEqual:
+            return WaitIfEqual(addr, value, timeout);
+        }
+        UNREACHABLE();
+        return RESULT_UNKNOWN;
+    }
+
+private:
+    [[nodiscard]] ResultCode Signal(VAddr addr, s32 count);
+    [[nodiscard]] ResultCode SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 count);
+    [[nodiscard]] ResultCode SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 value, s32 count);
+    [[nodiscard]] ResultCode WaitIfLessThan(VAddr addr, s32 value, bool decrement, s64 timeout);
+    [[nodiscard]] ResultCode WaitIfEqual(VAddr addr, s32 value, s64 timeout);
+
+    ThreadTree thread_tree;
+
+    Core::System& system;
+    KernelCore& kernel;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
new file mode 100644
index 000000000..49a068310
--- /dev/null
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -0,0 +1,349 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "core/arm/exclusive_monitor.h"
+#include "core/core.h"
+#include "core/hle/kernel/k_condition_variable.h"
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/k_synchronization_object.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_common.h"
+#include "core/hle/kernel/svc_results.h"
+#include "core/hle/kernel/thread.h"
+#include "core/memory.h"
+
+namespace Kernel {
+
+namespace {
+
+bool ReadFromUser(Core::System& system, u32* out, VAddr address) {
+    *out = system.Memory().Read32(address);
+    return true;
+}
+
+bool WriteToUser(Core::System& system, VAddr address, const u32* p) {
+    system.Memory().Write32(address, *p);
+    return true;
+}
+
+bool UpdateLockAtomic(Core::System& system, u32* out, VAddr address, u32 if_zero,
+                      u32 new_orr_mask) {
+    auto& monitor = system.Monitor();
+    const auto current_core = system.CurrentCoreIndex();
+
+    // Load the value from the address.
+    const auto expected = monitor.ExclusiveRead32(current_core, address);
+
+    // Orr in the new mask.
+    u32 value = expected | new_orr_mask;
+
+    // If the value is zero, use the if_zero value, otherwise use the newly orr'd value.
+    if (!expected) {
+        value = if_zero;
+    }
+
+    // Try to store.
+    if (!monitor.ExclusiveWrite32(current_core, address, value)) {
+        // If we failed to store, try again.
+        return UpdateLockAtomic(system, out, address, if_zero, new_orr_mask);
+    }
+
+    // We're done.
+    *out = expected;
+    return true;
+}
+
+} // namespace
+
+KConditionVariable::KConditionVariable(Core::System& system_)
+    : system{system_}, kernel{system.Kernel()} {}
+
+KConditionVariable::~KConditionVariable() = default;
+
+ResultCode KConditionVariable::SignalToAddress(VAddr addr) {
+    Thread* owner_thread = kernel.CurrentScheduler()->GetCurrentThread();
+
+    // Signal the address.
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        // Remove waiter thread.
+        s32 num_waiters{};
+        Thread* next_owner_thread =
+            owner_thread->RemoveWaiterByKey(std::addressof(num_waiters), addr);
+
+        // Determine the next tag.
+        u32 next_value{};
+        if (next_owner_thread) {
+            next_value = next_owner_thread->GetAddressKeyValue();
+            if (num_waiters > 1) {
+                next_value |= Svc::HandleWaitMask;
+            }
+
+            next_owner_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+            next_owner_thread->Wakeup();
+        }
+
+        // Write the value to userspace.
+        if (!WriteToUser(system, addr, std::addressof(next_value))) {
+            if (next_owner_thread) {
+                next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+            }
+
+            return Svc::ResultInvalidCurrentMemory;
+        }
+    }
+
+    return RESULT_SUCCESS;
+}
+
+ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 value) {
+    Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread();
+
+    // Wait for the address.
+    {
+        std::shared_ptr<Thread> owner_thread;
+        ASSERT(!owner_thread);
+        {
+            KScopedSchedulerLock sl(kernel);
+            cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+
+            // Check if the thread should terminate.
+            R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested);
+
+            {
+                // Read the tag from userspace.
+                u32 test_tag{};
+                R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr),
+                         Svc::ResultInvalidCurrentMemory);
+
+                // If the tag isn't the handle (with wait mask), we're done.
+                R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS);
+
+                // Get the lock owner thread.
+                owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<Thread>(handle);
+                R_UNLESS(owner_thread, Svc::ResultInvalidHandle);
+
+                // Update the lock.
+                cur_thread->SetAddressKey(addr, value);
+                owner_thread->AddWaiter(cur_thread);
+                cur_thread->SetState(ThreadState::Waiting);
+                cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::ConditionVar);
+                cur_thread->SetMutexWaitAddressForDebugging(addr);
+            }
+        }
+        ASSERT(owner_thread);
+    }
+
+    // Remove the thread as a waiter from the lock owner.
+    {
+        KScopedSchedulerLock sl(kernel);
+        Thread* owner_thread = cur_thread->GetLockOwner();
+        if (owner_thread != nullptr) {
+            owner_thread->RemoveWaiter(cur_thread);
+        }
+    }
+
+    // Get the wait result.
+    KSynchronizationObject* dummy{};
+    return cur_thread->GetWaitResult(std::addressof(dummy));
+}
+
+Thread* KConditionVariable::SignalImpl(Thread* thread) {
+    // Check pre-conditions.
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    // Update the tag.
+    VAddr address = thread->GetAddressKey();
+    u32 own_tag = thread->GetAddressKeyValue();
+
+    u32 prev_tag{};
+    bool can_access{};
+    {
+        // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
+        // TODO(bunnei): We should call CanAccessAtomic(..) here.
+        can_access = true;
+        if (can_access) {
+            UpdateLockAtomic(system, std::addressof(prev_tag), address, own_tag,
+                             Svc::HandleWaitMask);
+        }
+    }
+
+    Thread* thread_to_close = nullptr;
+    if (can_access) {
+        if (prev_tag == InvalidHandle) {
+            // If nobody held the lock previously, we're all good.
+            thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+            thread->Wakeup();
+        } else {
+            // Get the previous owner.
+            auto owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<Thread>(
+                prev_tag & ~Svc::HandleWaitMask);
+
+            if (owner_thread) {
+                // Add the thread as a waiter on the owner.
+                owner_thread->AddWaiter(thread);
+                thread_to_close = owner_thread.get();
+            } else {
+                // The lock was tagged with a thread that doesn't exist.
+                thread->SetSyncedObject(nullptr, Svc::ResultInvalidState);
+                thread->Wakeup();
+            }
+        }
+    } else {
+        // If the address wasn't accessible, note so.
+        thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+        thread->Wakeup();
+    }
+
+    return thread_to_close;
+}
+
+void KConditionVariable::Signal(u64 cv_key, s32 count) {
+    // Prepare for signaling.
+    constexpr int MaxThreads = 16;
+
+    // TODO(bunnei): This should just be Thread once we implement KAutoObject instead of using
+    // std::shared_ptr.
+    std::vector<std::shared_ptr<Thread>> thread_list;
+    std::array<Thread*, MaxThreads> thread_array;
+    s32 num_to_close{};
+
+    // Perform signaling.
+    s32 num_waiters{};
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        auto it = thread_tree.nfind_light({cv_key, -1});
+        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
+               (it->GetConditionVariableKey() == cv_key)) {
+            Thread* target_thread = std::addressof(*it);
+
+            if (Thread* thread = SignalImpl(target_thread); thread != nullptr) {
+                if (num_to_close < MaxThreads) {
+                    thread_array[num_to_close++] = thread;
+                } else {
+                    thread_list.push_back(SharedFrom(thread));
+                }
+            }
+
+            it = thread_tree.erase(it);
+            target_thread->ClearConditionVariable();
+            ++num_waiters;
+        }
+
+        // If we have no waiters, clear the has waiter flag.
+        if (it == thread_tree.end() || it->GetConditionVariableKey() != cv_key) {
+            const u32 has_waiter_flag{};
+            WriteToUser(system, cv_key, std::addressof(has_waiter_flag));
+        }
+    }
+
+    // Close threads in the array.
+    for (auto i = 0; i < num_to_close; ++i) {
+        thread_array[i]->Close();
+    }
+
+    // Close threads in the list.
+    for (auto it = thread_list.begin(); it != thread_list.end(); it = thread_list.erase(it)) {
+        (*it)->Close();
+    }
+}
+
+ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout) {
+    // Prepare to wait.
+    Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread();
+    Handle timer = InvalidHandle;
+
+    {
+        KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout);
+
+        // Set the synced object.
+        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+
+        // Check that the thread isn't terminating.
+        if (cur_thread->IsTerminationRequested()) {
+            slp.CancelSleep();
+            return Svc::ResultTerminationRequested;
+        }
+
+        // Update the value and process for the next owner.
+        {
+            // Remove waiter thread.
+            s32 num_waiters{};
+            Thread* next_owner_thread =
+                cur_thread->RemoveWaiterByKey(std::addressof(num_waiters), addr);
+
+            // Update for the next owner thread.
+            u32 next_value{};
+            if (next_owner_thread != nullptr) {
+                // Get the next tag value.
+                next_value = next_owner_thread->GetAddressKeyValue();
+                if (num_waiters > 1) {
+                    next_value |= Svc::HandleWaitMask;
+                }
+
+                // Wake up the next owner.
+                next_owner_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+                next_owner_thread->Wakeup();
+            }
+
+            // Write to the cv key.
+            {
+                const u32 has_waiter_flag = 1;
+                WriteToUser(system, key, std::addressof(has_waiter_flag));
+                // TODO(bunnei): We should call DataMemoryBarrier(..) here.
+            }
+
+            // Write the value to userspace.
+            if (!WriteToUser(system, addr, std::addressof(next_value))) {
+                slp.CancelSleep();
+                return Svc::ResultInvalidCurrentMemory;
+            }
+        }
+
+        // Update condition variable tracking.
+        {
+            cur_thread->SetConditionVariable(std::addressof(thread_tree), addr, key, value);
+            thread_tree.insert(*cur_thread);
+        }
+
+        // If the timeout is non-zero, set the thread as waiting.
+        if (timeout != 0) {
+            cur_thread->SetState(ThreadState::Waiting);
+            cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::ConditionVar);
+            cur_thread->SetMutexWaitAddressForDebugging(addr);
+        }
+    }
+
+    // Cancel the timer wait.
+    if (timer != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(timer);
+    }
+
+    // Remove from the condition variable.
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        if (Thread* owner = cur_thread->GetLockOwner(); owner != nullptr) {
+            owner->RemoveWaiter(cur_thread);
+        }
+
+        if (cur_thread->IsWaitingForConditionVariable()) {
+            thread_tree.erase(thread_tree.iterator_to(*cur_thread));
+            cur_thread->ClearConditionVariable();
+        }
+    }
+
+    // Get the result.
+    KSynchronizationObject* dummy{};
+    return cur_thread->GetWaitResult(std::addressof(dummy));
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_condition_variable.h b/src/core/hle/kernel/k_condition_variable.h
new file mode 100644
index 000000000..98ed5b323
--- /dev/null
+++ b/src/core/hle/kernel/k_condition_variable.h
@@ -0,0 +1,59 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/result.h"
+
+namespace Core {
+class System;
+}
+
+namespace Kernel {
+
+class KConditionVariable {
+public:
+    using ThreadTree = typename Thread::ConditionVariableThreadTreeType;
+
+    explicit KConditionVariable(Core::System& system_);
+    ~KConditionVariable();
+
+    // Arbitration
+    [[nodiscard]] ResultCode SignalToAddress(VAddr addr);
+    [[nodiscard]] ResultCode WaitForAddress(Handle handle, VAddr addr, u32 value);
+
+    // Condition variable
+    void Signal(u64 cv_key, s32 count);
+    [[nodiscard]] ResultCode Wait(VAddr addr, u64 key, u32 value, s64 timeout);
+
+private:
+    [[nodiscard]] Thread* SignalImpl(Thread* thread);
+
+    ThreadTree thread_tree;
+
+    Core::System& system;
+    KernelCore& kernel;
+};
+
+inline void BeforeUpdatePriority(const KernelCore& kernel, KConditionVariable::ThreadTree* tree,
+                                 Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    tree->erase(tree->iterator_to(*thread));
+}
+
+inline void AfterUpdatePriority(const KernelCore& kernel, KConditionVariable::ThreadTree* tree,
+                                Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    tree->insert(*thread);
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h
index 99fb8fe93..0dc929040 100644
--- a/src/core/hle/kernel/k_priority_queue.h
+++ b/src/core/hle/kernel/k_priority_queue.h
@@ -8,11 +8,11 @@
 #pragma once
 
 #include <array>
+#include <bit>
 #include <concepts>
 
 #include "common/assert.h"
 #include "common/bit_set.h"
-#include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/concepts.h"
 
@@ -268,7 +268,7 @@ private:
     }
 
     constexpr s32 GetNextCore(u64& affinity) {
-        const s32 core = Common::CountTrailingZeroes64(affinity);
+        const s32 core = std::countr_zero(affinity);
         ClearAffinityBit(affinity, core);
         return core;
     }
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
index c5fd82a6b..12b5619fb 100644
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -5,6 +5,8 @@
 // This file references various implementation details from Atmosphere, an open-source firmware for
 // the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
 
+#include <bit>
+
 #include "common/assert.h"
 #include "common/bit_util.h"
 #include "common/fiber.h"
@@ -31,12 +33,12 @@ static void IncrementScheduledCount(Kernel::Thread* thread) {
 
 void KScheduler::RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule,
                                  Core::EmuThreadHandle global_thread) {
-    u32 current_core = global_thread.host_handle;
+    const u32 current_core = global_thread.host_handle;
     bool must_context_switch = global_thread.guest_handle != InvalidHandle &&
                                (current_core < Core::Hardware::NUM_CPU_CORES);
 
     while (cores_pending_reschedule != 0) {
-        u32 core = Common::CountTrailingZeroes64(cores_pending_reschedule);
+        const auto core = static_cast<u32>(std::countr_zero(cores_pending_reschedule));
         ASSERT(core < Core::Hardware::NUM_CPU_CORES);
         if (!must_context_switch || core != current_core) {
             auto& phys_core = kernel.PhysicalCore(core);
@@ -109,7 +111,7 @@ u64 KScheduler::UpdateHighestPriorityThreadsImpl(KernelCore& kernel) {
 
     // Idle cores are bad. We're going to try to migrate threads to each idle core in turn.
     while (idle_cores != 0) {
-        u32 core_id = Common::CountTrailingZeroes64(idle_cores);
+        const auto core_id = static_cast<u32>(std::countr_zero(idle_cores));
         if (Thread* suggested = priority_queue.GetSuggestedFront(core_id); suggested != nullptr) {
             s32 migration_candidates[Core::Hardware::NUM_CPU_CORES];
             size_t num_candidates = 0;
@@ -180,22 +182,22 @@ u64 KScheduler::UpdateHighestPriorityThreadsImpl(KernelCore& kernel) {
     return cores_needing_scheduling;
 }
 
-void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state) {
+void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, ThreadState old_state) {
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // Check if the state has changed, because if it hasn't there's nothing to do.
-    const auto cur_state = thread->scheduling_state;
+    const auto cur_state = thread->GetRawState();
     if (cur_state == old_state) {
         return;
     }
 
     // Update the priority queues.
-    if (old_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+    if (old_state == ThreadState::Runnable) {
         // If we were previously runnable, then we're not runnable now, and we should remove.
         GetPriorityQueue(kernel).Remove(thread);
         IncrementScheduledCount(thread);
         SetSchedulerUpdateNeeded(kernel);
-    } else if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+    } else if (cur_state == ThreadState::Runnable) {
         // If we're now runnable, then we weren't previously, and we should add.
         GetPriorityQueue(kernel).PushBack(thread);
         IncrementScheduledCount(thread);
@@ -203,13 +205,11 @@ void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 ol
     }
 }
 
-void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread,
-                                         u32 old_priority) {
-
+void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, s32 old_priority) {
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // If the thread is runnable, we want to change its priority in the queue.
-    if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+    if (thread->GetRawState() == ThreadState::Runnable) {
         GetPriorityQueue(kernel).ChangePriority(
             old_priority, thread == kernel.CurrentScheduler()->GetCurrentThread(), thread);
         IncrementScheduledCount(thread);
@@ -222,7 +222,7 @@ void KScheduler::OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread,
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // If the thread is runnable, we want to change its affinity in the queue.
-    if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+    if (thread->GetRawState() == ThreadState::Runnable) {
         GetPriorityQueue(kernel).ChangeAffinityMask(old_core, old_affinity, thread);
         IncrementScheduledCount(thread);
         SetSchedulerUpdateNeeded(kernel);
@@ -292,7 +292,7 @@ void KScheduler::RotateScheduledQueue(s32 core_id, s32 priority) {
 
         // If the best thread we can choose has a priority the same or worse than ours, try to
         // migrate a higher priority thread.
-        if (best_thread != nullptr && best_thread->GetPriority() >= static_cast<u32>(priority)) {
+        if (best_thread != nullptr && best_thread->GetPriority() >= priority) {
             Thread* suggested = priority_queue.GetSuggestedFront(core_id);
             while (suggested != nullptr) {
                 // If the suggestion's priority is the same as ours, don't bother.
@@ -395,8 +395,8 @@ void KScheduler::YieldWithoutCoreMigration() {
     {
         KScopedSchedulerLock lock(kernel);
 
-        const auto cur_state = cur_thread.scheduling_state;
-        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        const auto cur_state = cur_thread.GetRawState();
+        if (cur_state == ThreadState::Runnable) {
             // Put the current thread at the back of the queue.
             Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread));
             IncrementScheduledCount(std::addressof(cur_thread));
@@ -436,8 +436,8 @@ void KScheduler::YieldWithCoreMigration() {
     {
         KScopedSchedulerLock lock(kernel);
 
-        const auto cur_state = cur_thread.scheduling_state;
-        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        const auto cur_state = cur_thread.GetRawState();
+        if (cur_state == ThreadState::Runnable) {
             // Get the current active core.
             const s32 core_id = cur_thread.GetActiveCore();
 
@@ -526,8 +526,8 @@ void KScheduler::YieldToAnyThread() {
     {
         KScopedSchedulerLock lock(kernel);
 
-        const auto cur_state = cur_thread.scheduling_state;
-        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        const auto cur_state = cur_thread.GetRawState();
+        if (cur_state == ThreadState::Runnable) {
             // Get the current active core.
             const s32 core_id = cur_thread.GetActiveCore();
 
@@ -645,8 +645,7 @@ void KScheduler::Unload(Thread* thread) {
 
 void KScheduler::Reload(Thread* thread) {
     if (thread) {
-        ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
-                   "Thread must be runnable.");
+        ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable.");
 
         // Cancel any outstanding wakeup events for this thread
         thread->SetIsRunning(true);
@@ -725,7 +724,7 @@ void KScheduler::SwitchToCurrent() {
         do {
             if (current_thread != nullptr && !current_thread->IsHLEThread()) {
                 current_thread->context_guard.lock();
-                if (!current_thread->IsRunnable()) {
+                if (current_thread->GetRawState() != ThreadState::Runnable) {
                     current_thread->context_guard.unlock();
                     break;
                 }
@@ -772,7 +771,7 @@ void KScheduler::Initialize() {
 
     {
         KScopedSchedulerLock lock{system.Kernel()};
-        idle_thread->SetStatus(ThreadStatus::Ready);
+        idle_thread->SetState(ThreadState::Runnable);
     }
 }
 
diff --git a/src/core/hle/kernel/k_scheduler.h b/src/core/hle/kernel/k_scheduler.h
index e84abc84c..783665123 100644
--- a/src/core/hle/kernel/k_scheduler.h
+++ b/src/core/hle/kernel/k_scheduler.h
@@ -100,11 +100,10 @@ public:
     void YieldToAnyThread();
 
     /// Notify the scheduler a thread's status has changed.
-    static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state);
+    static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, ThreadState old_state);
 
     /// Notify the scheduler a thread's priority has changed.
-    static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread,
-                                        u32 old_priority);
+    static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, s32 old_priority);
 
     /// Notify the scheduler a thread's core and/or affinity mask has changed.
     static void OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread,
diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h
index 2f1c1f691..9b40bd22c 100644
--- a/src/core/hle/kernel/k_scheduler_lock.h
+++ b/src/core/hle/kernel/k_scheduler_lock.h
@@ -19,7 +19,7 @@ class KernelCore;
 template <typename SchedulerType>
 class KAbstractSchedulerLock {
 public:
-    explicit KAbstractSchedulerLock(KernelCore& kernel) : kernel{kernel} {}
+    explicit KAbstractSchedulerLock(KernelCore& kernel_) : kernel{kernel_} {}
 
     bool IsLockedByCurrentThread() const {
         return this->owner_thread == kernel.GetCurrentEmuThreadID();
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
new file mode 100644
index 000000000..1c508cb55
--- /dev/null
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -0,0 +1,172 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/k_synchronization_object.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/svc_results.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
+                                        KSynchronizationObject** objects, const s32 num_objects,
+                                        s64 timeout) {
+    // Allocate space on stack for thread nodes.
+    std::vector<ThreadListNode> thread_nodes(num_objects);
+
+    // Prepare for wait.
+    Thread* thread = kernel.CurrentScheduler()->GetCurrentThread();
+    Handle timer = InvalidHandle;
+
+    {
+        // Setup the scheduling lock and sleep.
+        KScopedSchedulerLockAndSleep slp(kernel, timer, thread, timeout);
+
+        // Check if any of the objects are already signaled.
+        for (auto i = 0; i < num_objects; ++i) {
+            ASSERT(objects[i] != nullptr);
+
+            if (objects[i]->IsSignaled()) {
+                *out_index = i;
+                slp.CancelSleep();
+                return RESULT_SUCCESS;
+            }
+        }
+
+        // Check if the timeout is zero.
+        if (timeout == 0) {
+            slp.CancelSleep();
+            return Svc::ResultTimedOut;
+        }
+
+        // Check if the thread should terminate.
+        if (thread->IsTerminationRequested()) {
+            slp.CancelSleep();
+            return Svc::ResultTerminationRequested;
+        }
+
+        // Check if waiting was canceled.
+        if (thread->IsWaitCancelled()) {
+            slp.CancelSleep();
+            thread->ClearWaitCancelled();
+            return Svc::ResultCancelled;
+        }
+
+        // Add the waiters.
+        for (auto i = 0; i < num_objects; ++i) {
+            thread_nodes[i].thread = thread;
+            thread_nodes[i].next = nullptr;
+
+            if (objects[i]->thread_list_tail == nullptr) {
+                objects[i]->thread_list_head = std::addressof(thread_nodes[i]);
+            } else {
+                objects[i]->thread_list_tail->next = std::addressof(thread_nodes[i]);
+            }
+
+            objects[i]->thread_list_tail = std::addressof(thread_nodes[i]);
+        }
+
+        // For debugging only
+        thread->SetWaitObjectsForDebugging({objects, static_cast<std::size_t>(num_objects)});
+
+        // Mark the thread as waiting.
+        thread->SetCancellable();
+        thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        thread->SetState(ThreadState::Waiting);
+        thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization);
+    }
+
+    // The lock/sleep is done, so we should be able to get our result.
+
+    // Thread is no longer cancellable.
+    thread->ClearCancellable();
+
+    // For debugging only
+    thread->SetWaitObjectsForDebugging({});
+
+    // Cancel the timer as needed.
+    if (timer != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(timer);
+    }
+
+    // Get the wait result.
+    ResultCode wait_result{RESULT_SUCCESS};
+    s32 sync_index = -1;
+    {
+        KScopedSchedulerLock lock(kernel);
+        KSynchronizationObject* synced_obj;
+        wait_result = thread->GetWaitResult(std::addressof(synced_obj));
+
+        for (auto i = 0; i < num_objects; ++i) {
+            // Unlink the object from the list.
+            ThreadListNode* prev_ptr =
+                reinterpret_cast<ThreadListNode*>(std::addressof(objects[i]->thread_list_head));
+            ThreadListNode* prev_val = nullptr;
+            ThreadListNode *prev, *tail_prev;
+
+            do {
+                prev = prev_ptr;
+                prev_ptr = prev_ptr->next;
+                tail_prev = prev_val;
+                prev_val = prev_ptr;
+            } while (prev_ptr != std::addressof(thread_nodes[i]));
+
+            if (objects[i]->thread_list_tail == std::addressof(thread_nodes[i])) {
+                objects[i]->thread_list_tail = tail_prev;
+            }
+
+            prev->next = thread_nodes[i].next;
+
+            if (objects[i] == synced_obj) {
+                sync_index = i;
+            }
+        }
+    }
+
+    // Set output.
+    *out_index = sync_index;
+    return wait_result;
+}
+
+KSynchronizationObject::KSynchronizationObject(KernelCore& kernel) : Object{kernel} {}
+
+KSynchronizationObject ::~KSynchronizationObject() = default;
+
+void KSynchronizationObject::NotifyAvailable(ResultCode result) {
+    KScopedSchedulerLock lock(kernel);
+
+    // If we're not signaled, we've nothing to notify.
+    if (!this->IsSignaled()) {
+        return;
+    }
+
+    // Iterate over each thread.
+    for (auto* cur_node = thread_list_head; cur_node != nullptr; cur_node = cur_node->next) {
+        Thread* thread = cur_node->thread;
+        if (thread->GetState() == ThreadState::Waiting) {
+            thread->SetSyncedObject(this, result);
+            thread->SetState(ThreadState::Runnable);
+        }
+    }
+}
+
+std::vector<Thread*> KSynchronizationObject::GetWaitingThreadsForDebugging() const {
+    std::vector<Thread*> threads;
+
+    // If debugging, dump the list of waiters.
+    {
+        KScopedSchedulerLock lock(kernel);
+        for (auto* cur_node = thread_list_head; cur_node != nullptr; cur_node = cur_node->next) {
+            threads.emplace_back(cur_node->thread);
+        }
+    }
+
+    return threads;
+}
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_synchronization_object.h b/src/core/hle/kernel/k_synchronization_object.h
new file mode 100644
index 000000000..14d80ebf1
--- /dev/null
+++ b/src/core/hle/kernel/k_synchronization_object.h
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "core/hle/kernel/object.h"
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+class KernelCore;
+class Synchronization;
+class Thread;
+
+/// Class that represents a Kernel object that a thread can be waiting on
+class KSynchronizationObject : public Object {
+public:
+    struct ThreadListNode {
+        ThreadListNode* next{};
+        Thread* thread{};
+    };
+
+    [[nodiscard]] static ResultCode Wait(KernelCore& kernel, s32* out_index,
+                                         KSynchronizationObject** objects, const s32 num_objects,
+                                         s64 timeout);
+
+    [[nodiscard]] virtual bool IsSignaled() const = 0;
+
+    [[nodiscard]] std::vector<Thread*> GetWaitingThreadsForDebugging() const;
+
+protected:
+    explicit KSynchronizationObject(KernelCore& kernel);
+    virtual ~KSynchronizationObject();
+
+    void NotifyAvailable(ResultCode result);
+    void NotifyAvailable() {
+        return this->NotifyAvailable(RESULT_SUCCESS);
+    }
+
+private:
+    ThreadListNode* thread_list_head{};
+    ThreadListNode* thread_list_tail{};
+};
+
+// Specialization of DynamicObjectCast for KSynchronizationObjects
+template <>
+inline std::shared_ptr<KSynchronizationObject> DynamicObjectCast<KSynchronizationObject>(
+    std::shared_ptr<Object> object) {
+    if (object != nullptr && object->IsWaitable()) {
+        return std::static_pointer_cast<KSynchronizationObject>(object);
+    }
+    return nullptr;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index e8ece8164..c0ff287a6 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -38,7 +38,6 @@
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/service_thread.h"
 #include "core/hle/kernel/shared_memory.h"
-#include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/lock.h"
@@ -51,8 +50,7 @@ namespace Kernel {
 
 struct KernelCore::Impl {
     explicit Impl(Core::System& system, KernelCore& kernel)
-        : synchronization{system}, time_manager{system}, global_handle_table{kernel}, system{
-                                                                                          system} {}
+        : time_manager{system}, global_handle_table{kernel}, system{system} {}
 
     void SetMulticore(bool is_multicore) {
         this->is_multicore = is_multicore;
@@ -307,7 +305,6 @@ struct KernelCore::Impl {
     std::vector<std::shared_ptr<Process>> process_list;
     Process* current_process = nullptr;
     std::unique_ptr<Kernel::GlobalSchedulerContext> global_scheduler_context;
-    Kernel::Synchronization synchronization;
     Kernel::TimeManager time_manager;
 
     std::shared_ptr<ResourceLimit> system_resource_limit;
@@ -461,14 +458,6 @@ const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Kern
     return impl->interrupts;
 }
 
-Kernel::Synchronization& KernelCore::Synchronization() {
-    return impl->synchronization;
-}
-
-const Kernel::Synchronization& KernelCore::Synchronization() const {
-    return impl->synchronization;
-}
-
 Kernel::TimeManager& KernelCore::TimeManager() {
     return impl->time_manager;
 }
@@ -613,9 +602,11 @@ void KernelCore::Suspend(bool in_suspention) {
     const bool should_suspend = exception_exited || in_suspention;
     {
         KScopedSchedulerLock lock(*this);
-        ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep;
+        const auto state = should_suspend ? ThreadState::Runnable : ThreadState::Waiting;
         for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
-            impl->suspend_threads[i]->SetStatus(status);
+            impl->suspend_threads[i]->SetState(state);
+            impl->suspend_threads[i]->SetWaitReasonForDebugging(
+                ThreadWaitReasonForDebugging::Suspended);
         }
     }
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index e3169f5a7..933d9a7d6 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -33,7 +33,6 @@ template <typename T>
 class SlabHeap;
 } // namespace Memory
 
-class AddressArbiter;
 class ClientPort;
 class GlobalSchedulerContext;
 class HandleTable;
@@ -129,12 +128,6 @@ public:
     /// Gets the an instance of the current physical CPU core.
     const Kernel::PhysicalCore& CurrentPhysicalCore() const;
 
-    /// Gets the an instance of the Synchronization Interface.
-    Kernel::Synchronization& Synchronization();
-
-    /// Gets the an instance of the Synchronization Interface.
-    const Kernel::Synchronization& Synchronization() const;
-
     /// Gets the an instance of the TimeManager Interface.
     Kernel::TimeManager& TimeManager();
 
diff --git a/src/core/hle/kernel/memory/memory_layout.h b/src/core/hle/kernel/memory/memory_layout.h
index 9b3d6267a..c7c0b2f49 100644
--- a/src/core/hle/kernel/memory/memory_layout.h
+++ b/src/core/hle/kernel/memory/memory_layout.h
@@ -5,9 +5,28 @@
 #pragma once
 
 #include "common/common_types.h"
+#include "core/device_memory.h"
 
 namespace Kernel::Memory {
 
+constexpr std::size_t KernelAslrAlignment = 2 * 1024 * 1024;
+constexpr std::size_t KernelVirtualAddressSpaceWidth = 1ULL << 39;
+constexpr std::size_t KernelPhysicalAddressSpaceWidth = 1ULL << 48;
+constexpr std::size_t KernelVirtualAddressSpaceBase = 0ULL - KernelVirtualAddressSpaceWidth;
+constexpr std::size_t KernelVirtualAddressSpaceEnd =
+    KernelVirtualAddressSpaceBase + (KernelVirtualAddressSpaceWidth - KernelAslrAlignment);
+constexpr std::size_t KernelVirtualAddressSpaceLast = KernelVirtualAddressSpaceEnd - 1;
+constexpr std::size_t KernelVirtualAddressSpaceSize =
+    KernelVirtualAddressSpaceEnd - KernelVirtualAddressSpaceBase;
+
+constexpr bool IsKernelAddressKey(VAddr key) {
+    return KernelVirtualAddressSpaceBase <= key && key <= KernelVirtualAddressSpaceLast;
+}
+
+constexpr bool IsKernelAddress(VAddr address) {
+    return KernelVirtualAddressSpaceBase <= address && address < KernelVirtualAddressSpaceEnd;
+}
+
 class MemoryRegion final {
     friend class MemoryLayout;
 
diff --git a/src/core/hle/kernel/memory/page_heap.h b/src/core/hle/kernel/memory/page_heap.h
index 22b0de860..131093284 100644
--- a/src/core/hle/kernel/memory/page_heap.h
+++ b/src/core/hle/kernel/memory/page_heap.h
@@ -8,11 +8,11 @@
 #pragma once
 
 #include <array>
+#include <bit>
 #include <vector>
 
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "common/bit_util.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "core/hle/kernel/memory/memory_types.h"
@@ -105,7 +105,7 @@ private:
                         ASSERT(depth == 0);
                         return -1;
                     }
-                    offset = offset * 64 + Common::CountTrailingZeroes64(v);
+                    offset = offset * 64 + static_cast<u32>(std::countr_zero(v));
                     ++depth;
                 } while (depth < static_cast<s32>(used_depths));
 
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
deleted file mode 100644
index 4f8075e0e..000000000
--- a/src/core/hle/kernel/mutex.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/k_scheduler.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/mutex.h"
-#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/process.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/result.h"
-#include "core/memory.h"
-
-namespace Kernel {
-
-/// Returns the number of threads that are waiting for a mutex, and the highest priority one among
-/// those.
-static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThread(
-    const std::shared_ptr<Thread>& current_thread, VAddr mutex_addr) {
-
-    std::shared_ptr<Thread> highest_priority_thread;
-    u32 num_waiters = 0;
-
-    for (const auto& thread : current_thread->GetMutexWaitingThreads()) {
-        if (thread->GetMutexWaitAddress() != mutex_addr)
-            continue;
-
-        ++num_waiters;
-        if (highest_priority_thread == nullptr ||
-            thread->GetPriority() < highest_priority_thread->GetPriority()) {
-            highest_priority_thread = thread;
-        }
-    }
-
-    return {highest_priority_thread, num_waiters};
-}
-
-/// Update the mutex owner field of all threads waiting on the mutex to point to the new owner.
-static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread,
-                                   std::shared_ptr<Thread> new_owner) {
-    current_thread->RemoveMutexWaiter(new_owner);
-    const auto threads = current_thread->GetMutexWaitingThreads();
-    for (const auto& thread : threads) {
-        if (thread->GetMutexWaitAddress() != mutex_addr)
-            continue;
-
-        ASSERT(thread->GetLockOwner() == current_thread.get());
-        current_thread->RemoveMutexWaiter(thread);
-        if (new_owner != thread)
-            new_owner->AddMutexWaiter(thread);
-    }
-}
-
-Mutex::Mutex(Core::System& system) : system{system} {}
-Mutex::~Mutex() = default;
-
-ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
-                             Handle requesting_thread_handle) {
-    // The mutex address must be 4-byte aligned
-    if ((address % sizeof(u32)) != 0) {
-        LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
-        return ERR_INVALID_ADDRESS;
-    }
-
-    auto& kernel = system.Kernel();
-    std::shared_ptr<Thread> current_thread =
-        SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
-    {
-        KScopedSchedulerLock lock(kernel);
-        // The mutex address must be 4-byte aligned
-        if ((address % sizeof(u32)) != 0) {
-            return ERR_INVALID_ADDRESS;
-        }
-
-        const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
-        std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
-        std::shared_ptr<Thread> requesting_thread =
-            handle_table.Get<Thread>(requesting_thread_handle);
-
-        // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of
-        // another thread.
-        ASSERT(requesting_thread == current_thread);
-
-        current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-
-        const u32 addr_value = system.Memory().Read32(address);
-
-        // If the mutex isn't being held, just return success.
-        if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
-            return RESULT_SUCCESS;
-        }
-
-        if (holding_thread == nullptr) {
-            return ERR_INVALID_HANDLE;
-        }
-
-        // Wait until the mutex is released
-        current_thread->SetMutexWaitAddress(address);
-        current_thread->SetWaitHandle(requesting_thread_handle);
-
-        current_thread->SetStatus(ThreadStatus::WaitMutex);
-
-        // Update the lock holder thread's priority to prevent priority inversion.
-        holding_thread->AddMutexWaiter(current_thread);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-        auto* owner = current_thread->GetLockOwner();
-        if (owner != nullptr) {
-            owner->RemoveMutexWaiter(current_thread);
-        }
-    }
-    return current_thread->GetSignalingResult();
-}
-
-std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner,
-                                                             VAddr address) {
-    // The mutex address must be 4-byte aligned
-    if ((address % sizeof(u32)) != 0) {
-        LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
-        return {ERR_INVALID_ADDRESS, nullptr};
-    }
-
-    auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address);
-    if (new_owner == nullptr) {
-        system.Memory().Write32(address, 0);
-        return {RESULT_SUCCESS, nullptr};
-    }
-    // Transfer the ownership of the mutex from the previous owner to the new one.
-    TransferMutexOwnership(address, owner, new_owner);
-    u32 mutex_value = new_owner->GetWaitHandle();
-    if (num_waiters >= 2) {
-        // Notify the guest that there are still some threads waiting for the mutex
-        mutex_value |= Mutex::MutexHasWaitersFlag;
-    }
-    new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-    new_owner->SetLockOwner(nullptr);
-    new_owner->ResumeFromWait();
-
-    system.Memory().Write32(address, mutex_value);
-    return {RESULT_SUCCESS, new_owner};
-}
-
-ResultCode Mutex::Release(VAddr address) {
-    auto& kernel = system.Kernel();
-    KScopedSchedulerLock lock(kernel);
-
-    std::shared_ptr<Thread> current_thread =
-        SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
-
-    auto [result, new_owner] = Unlock(current_thread, address);
-
-    if (result != RESULT_SUCCESS && new_owner != nullptr) {
-        new_owner->SetSynchronizationResults(nullptr, result);
-    }
-
-    return result;
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
deleted file mode 100644
index 3b81dc3df..000000000
--- a/src/core/hle/kernel/mutex.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-union ResultCode;
-
-namespace Core {
-class System;
-}
-
-namespace Kernel {
-
-class Mutex final {
-public:
-    explicit Mutex(Core::System& system);
-    ~Mutex();
-
-    /// Flag that indicates that a mutex still has threads waiting for it.
-    static constexpr u32 MutexHasWaitersFlag = 0x40000000;
-    /// Mask of the bits in a mutex address value that contain the mutex owner.
-    static constexpr u32 MutexOwnerMask = 0xBFFFFFFF;
-
-    /// Attempts to acquire a mutex at the specified address.
-    ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
-                          Handle requesting_thread_handle);
-
-    /// Unlocks a mutex for owner at address
-    std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner,
-                                                          VAddr address);
-
-    /// Releases the mutex at the specified address.
-    ResultCode Release(VAddr address);
-
-private:
-    Core::System& system;
-};
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index e3391e2af..27124ef67 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -50,6 +50,11 @@ public:
     }
     virtual HandleType GetHandleType() const = 0;
 
+    void Close() {
+        // TODO(bunnei): This is a placeholder to decrement the reference count, which we will use
+        // when we implement KAutoObject instead of using shared_ptr.
+    }
+
     /**
      * Check if a thread can wait on the object
      * @return True if a thread can wait on the object, otherwise false
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index b905b486a..37b77fa6e 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -55,7 +55,7 @@ void SetupMainThread(Core::System& system, Process& owner_process, u32 priority,
     // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
     {
         KScopedSchedulerLock lock{kernel};
-        thread->SetStatus(ThreadStatus::Ready);
+        thread->SetState(ThreadState::Runnable);
     }
 }
 } // Anonymous namespace
@@ -162,48 +162,6 @@ u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
     return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage();
 }
 
-void Process::InsertConditionVariableThread(std::shared_ptr<Thread> thread) {
-    VAddr cond_var_addr = thread->GetCondVarWaitAddress();
-    std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread> current_thread = *it;
-        if (current_thread->GetPriority() > thread->GetPriority()) {
-            thread_list.insert(it, thread);
-            return;
-        }
-        ++it;
-    }
-    thread_list.push_back(thread);
-}
-
-void Process::RemoveConditionVariableThread(std::shared_ptr<Thread> thread) {
-    VAddr cond_var_addr = thread->GetCondVarWaitAddress();
-    std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread> current_thread = *it;
-        if (current_thread.get() == thread.get()) {
-            thread_list.erase(it);
-            return;
-        }
-        ++it;
-    }
-}
-
-std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads(
-    const VAddr cond_var_addr) {
-    std::vector<std::shared_ptr<Thread>> result{};
-    std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        std::shared_ptr<Thread> current_thread = *it;
-        result.push_back(current_thread);
-        ++it;
-    }
-    return result;
-}
-
 void Process::RegisterThread(const Thread* thread) {
     thread_list.push_back(thread);
 }
@@ -318,7 +276,7 @@ void Process::PrepareForTermination() {
                 continue;
 
             // TODO(Subv): When are the other running/ready threads terminated?
-            ASSERT_MSG(thread->GetStatus() == ThreadStatus::WaitSynch,
+            ASSERT_MSG(thread->GetState() == ThreadState::Waiting,
                        "Exiting processes with non-waiting threads is currently unimplemented");
 
             thread->Stop();
@@ -406,21 +364,18 @@ void Process::LoadModule(CodeSet code_set, VAddr base_addr) {
     ReprotectSegment(code_set.DataSegment(), Memory::MemoryPermission::ReadAndWrite);
 }
 
+bool Process::IsSignaled() const {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+    return is_signaled;
+}
+
 Process::Process(Core::System& system)
-    : SynchronizationObject{system.Kernel()}, page_table{std::make_unique<Memory::PageTable>(
-                                                  system)},
-      handle_table{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
+    : KSynchronizationObject{system.Kernel()},
+      page_table{std::make_unique<Memory::PageTable>(system)}, handle_table{system.Kernel()},
+      address_arbiter{system}, condition_var{system}, system{system} {}
 
 Process::~Process() = default;
 
-void Process::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
-}
-
-bool Process::ShouldWait(const Thread* thread) const {
-    return !is_signaled;
-}
-
 void Process::ChangeStatus(ProcessStatus new_status) {
     if (status == new_status) {
         return;
@@ -428,7 +383,7 @@ void Process::ChangeStatus(ProcessStatus new_status) {
 
     status = new_status;
     is_signaled = true;
-    Signal();
+    NotifyAvailable();
 }
 
 ResultCode Process::AllocateMainThreadStack(std::size_t stack_size) {
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index e412e58aa..564e1f27d 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -11,11 +11,11 @@
 #include <unordered_map>
 #include <vector>
 #include "common/common_types.h"
-#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/k_address_arbiter.h"
+#include "core/hle/kernel/k_condition_variable.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/process_capability.h"
-#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"
 
 namespace Core {
@@ -63,7 +63,7 @@ enum class ProcessStatus {
     DebugBreak,
 };
 
-class Process final : public SynchronizationObject {
+class Process final : public KSynchronizationObject {
 public:
     explicit Process(Core::System& system);
     ~Process() override;
@@ -123,24 +123,30 @@ public:
         return handle_table;
     }
 
-    /// Gets a reference to the process' address arbiter.
-    AddressArbiter& GetAddressArbiter() {
-        return address_arbiter;
+    ResultCode SignalToAddress(VAddr address) {
+        return condition_var.SignalToAddress(address);
     }
 
-    /// Gets a const reference to the process' address arbiter.
-    const AddressArbiter& GetAddressArbiter() const {
-        return address_arbiter;
+    ResultCode WaitForAddress(Handle handle, VAddr address, u32 tag) {
+        return condition_var.WaitForAddress(handle, address, tag);
     }
 
-    /// Gets a reference to the process' mutex lock.
-    Mutex& GetMutex() {
-        return mutex;
+    void SignalConditionVariable(u64 cv_key, int32_t count) {
+        return condition_var.Signal(cv_key, count);
     }
 
-    /// Gets a const reference to the process' mutex lock
-    const Mutex& GetMutex() const {
-        return mutex;
+    ResultCode WaitConditionVariable(VAddr address, u64 cv_key, u32 tag, s64 ns) {
+        return condition_var.Wait(address, cv_key, tag, ns);
+    }
+
+    ResultCode SignalAddressArbiter(VAddr address, Svc::SignalType signal_type, s32 value,
+                                    s32 count) {
+        return address_arbiter.SignalToAddress(address, signal_type, value, count);
+    }
+
+    ResultCode WaitAddressArbiter(VAddr address, Svc::ArbitrationType arb_type, s32 value,
+                                  s64 timeout) {
+        return address_arbiter.WaitForAddress(address, arb_type, value, timeout);
     }
 
     /// Gets the address to the process' dedicated TLS region.
@@ -250,15 +256,6 @@ public:
         return thread_list;
     }
 
-    /// Insert a thread into the condition variable wait container
-    void InsertConditionVariableThread(std::shared_ptr<Thread> thread);
-
-    /// Remove a thread from the condition variable wait container
-    void RemoveConditionVariableThread(std::shared_ptr<Thread> thread);
-
-    /// Obtain all condition variable threads waiting for some address
-    std::vector<std::shared_ptr<Thread>> GetConditionVariableThreads(VAddr cond_var_addr);
-
     /// Registers a thread as being created under this process,
     /// adding it to this process' thread list.
     void RegisterThread(const Thread* thread);
@@ -304,6 +301,8 @@ public:
 
     void LoadModule(CodeSet code_set, VAddr base_addr);
 
+    bool IsSignaled() const override;
+
     ///////////////////////////////////////////////////////////////////////////////////////////////
     // Thread-local storage management
 
@@ -314,12 +313,6 @@ public:
     void FreeTLSRegion(VAddr tls_address);
 
 private:
-    /// Checks if the specified thread should wait until this process is available.
-    bool ShouldWait(const Thread* thread) const override;
-
-    /// Acquires/locks this process for the specified thread if it's available.
-    void Acquire(Thread* thread) override;
-
     /// Changes the process status. If the status is different
     /// from the current process status, then this will trigger
     /// a process signal.
@@ -373,12 +366,12 @@ private:
     HandleTable handle_table;
 
     /// Per-process address arbiter.
-    AddressArbiter address_arbiter;
+    KAddressArbiter address_arbiter;
 
     /// The per-process mutex lock instance used for handling various
     /// forms of services, such as lock arbitration, and condition
     /// variable related facilities.
-    Mutex mutex;
+    KConditionVariable condition_var;
 
     /// Address indicating the location of the process' dedicated TLS region.
     VAddr tls_region_address = 0;
@@ -389,9 +382,6 @@ private:
     /// List of threads that are running with this process as their owner.
     std::list<const Thread*> thread_list;
 
-    /// List of threads waiting for a condition variable
-    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> cond_var_threads;
-
     /// Address of the top of the main thread's stack
     VAddr main_thread_stack_top{};
 
@@ -410,6 +400,8 @@ private:
     /// Schedule count of this process
     s64 schedule_count{};
 
+    bool is_signaled{};
+
     /// System context
     Core::System& system;
 };
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 0f128c586..0566311b6 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <bit>
+
 #include "common/bit_util.h"
 #include "common/logging/log.h"
 #include "core/hle/kernel/errors.h"
@@ -60,7 +62,7 @@ constexpr CapabilityType GetCapabilityType(u32 value) {
 
 u32 GetFlagBitOffset(CapabilityType type) {
     const auto value = static_cast<u32>(type);
-    return static_cast<u32>(Common::BitSize<u32>() - Common::CountLeadingZeroes32(value));
+    return static_cast<u32>(Common::BitSize<u32>() - static_cast<u32>(std::countl_zero(value)));
 }
 
 } // Anonymous namespace
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index cea262ce0..99ed0857e 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -14,24 +14,22 @@
 
 namespace Kernel {
 
-ReadableEvent::ReadableEvent(KernelCore& kernel) : SynchronizationObject{kernel} {}
+ReadableEvent::ReadableEvent(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 ReadableEvent::~ReadableEvent() = default;
 
-bool ReadableEvent::ShouldWait(const Thread* thread) const {
-    return !is_signaled;
-}
-
-void ReadableEvent::Acquire(Thread* thread) {
-    ASSERT_MSG(IsSignaled(), "object unavailable!");
-}
-
 void ReadableEvent::Signal() {
     if (is_signaled) {
         return;
     }
 
     is_signaled = true;
-    SynchronizationObject::Signal();
+    NotifyAvailable();
+}
+
+bool ReadableEvent::IsSignaled() const {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    return is_signaled;
 }
 
 void ReadableEvent::Clear() {
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 3264dd066..34e477274 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -4,8 +4,8 @@
 
 #pragma once
 
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/synchronization_object.h"
 
 union ResultCode;
 
@@ -14,7 +14,7 @@ namespace Kernel {
 class KernelCore;
 class WritableEvent;
 
-class ReadableEvent final : public SynchronizationObject {
+class ReadableEvent final : public KSynchronizationObject {
     friend class WritableEvent;
 
 public:
@@ -32,9 +32,6 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(const Thread* thread) const override;
-    void Acquire(Thread* thread) override;
-
     /// Unconditionally clears the readable event's state.
     void Clear();
 
@@ -46,11 +43,14 @@ public:
     ///      then ERR_INVALID_STATE will be returned.
     ResultCode Reset();
 
-    void Signal() override;
+    void Signal();
+
+    bool IsSignaled() const override;
 
 private:
     explicit ReadableEvent(KernelCore& kernel);
 
+    bool is_signaled{};
     std::string name; ///< Name of event (optional)
 };
 
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index a549ae9d7..82857f93b 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -13,7 +13,7 @@
 
 namespace Kernel {
 
-ServerPort::ServerPort(KernelCore& kernel) : SynchronizationObject{kernel} {}
+ServerPort::ServerPort(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 ServerPort::~ServerPort() = default;
 
 ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
@@ -28,15 +28,9 @@ ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
 
 void ServerPort::AppendPendingSession(std::shared_ptr<ServerSession> pending_session) {
     pending_sessions.push_back(std::move(pending_session));
-}
-
-bool ServerPort::ShouldWait(const Thread* thread) const {
-    // If there are no pending sessions, we wait until a new one is added.
-    return pending_sessions.empty();
-}
-
-void ServerPort::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
+    if (pending_sessions.size() == 1) {
+        NotifyAvailable();
+    }
 }
 
 bool ServerPort::IsSignaled() const {
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index 41b191b86..6470df993 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -9,8 +9,8 @@
 #include <utility>
 #include <vector>
 #include "common/common_types.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"
 
 namespace Kernel {
@@ -20,7 +20,7 @@ class KernelCore;
 class ServerSession;
 class SessionRequestHandler;
 
-class ServerPort final : public SynchronizationObject {
+class ServerPort final : public KSynchronizationObject {
 public:
     explicit ServerPort(KernelCore& kernel);
     ~ServerPort() override;
@@ -79,9 +79,6 @@ public:
     /// waiting to be accepted by this port.
     void AppendPendingSession(std::shared_ptr<ServerSession> pending_session);
 
-    bool ShouldWait(const Thread* thread) const override;
-    void Acquire(Thread* thread) override;
-
     bool IsSignaled() const override;
 
 private:
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index b40fe3916..4f2bb7822 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -24,7 +24,7 @@
 
 namespace Kernel {
 
-ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
+ServerSession::ServerSession(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 
 ServerSession::~ServerSession() {
     kernel.ReleaseServiceThread(service_thread);
@@ -42,16 +42,6 @@ ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kern
     return MakeResult(std::move(session));
 }
 
-bool ServerSession::ShouldWait(const Thread* thread) const {
-    // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
-    if (!parent->Client()) {
-        return false;
-    }
-
-    // Wait if we have no pending requests, or if we're currently handling a request.
-    return pending_requesting_threads.empty() || currently_handling != nullptr;
-}
-
 bool ServerSession::IsSignaled() const {
     // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
     if (!parent->Client()) {
@@ -62,15 +52,6 @@ bool ServerSession::IsSignaled() const {
     return !pending_requesting_threads.empty() && currently_handling == nullptr;
 }
 
-void ServerSession::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
-    // We are now handling a request, pop it from the stack.
-    // TODO(Subv): What happens if the client endpoint is closed before any requests are made?
-    ASSERT(!pending_requesting_threads.empty());
-    currently_handling = pending_requesting_threads.back();
-    pending_requesting_threads.pop_back();
-}
-
 void ServerSession::ClientDisconnected() {
     // We keep a shared pointer to the hle handler to keep it alive throughout
     // the call to ClientDisconnected, as ClientDisconnected invalidates the
@@ -172,7 +153,7 @@ ResultCode ServerSession::CompleteSyncRequest(HLERequestContext& context) {
     {
         KScopedSchedulerLock lock(kernel);
         if (!context.IsThreadWaiting()) {
-            context.GetThread().ResumeFromWait();
+            context.GetThread().Wakeup();
             context.GetThread().SetSynchronizationResults(nullptr, result);
         }
     }
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e8d1d99ea..9155cf7f5 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -10,8 +10,8 @@
 #include <vector>
 
 #include "common/threadsafe_queue.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/service_thread.h"
-#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"
 
 namespace Core::Memory {
@@ -43,7 +43,7 @@ class Thread;
  * After the server replies to the request, the response is marshalled back to the caller's
  * TLS buffer and control is transferred back to it.
  */
-class ServerSession final : public SynchronizationObject {
+class ServerSession final : public KSynchronizationObject {
     friend class ServiceThread;
 
 public:
@@ -77,8 +77,6 @@ public:
         return parent.get();
     }
 
-    bool IsSignaled() const override;
-
     /**
      * Sets the HLE handler for the session. This handler will be called to service IPC requests
      * instead of the regular IPC machinery. (The regular IPC machinery is currently not
@@ -100,10 +98,6 @@ public:
     ResultCode HandleSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory,
                                  Core::Timing::CoreTiming& core_timing);
 
-    bool ShouldWait(const Thread* thread) const override;
-
-    void Acquire(Thread* thread) override;
-
     /// Called when a client disconnection occurs.
     void ClientDisconnected();
 
@@ -130,6 +124,8 @@ public:
         convert_to_domain = true;
     }
 
+    bool IsSignaled() const override;
+
 private:
     /// Queues a sync request from the emulated application.
     ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory);
diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp
index e4dd53e24..75304b961 100644
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -9,7 +9,7 @@
 
 namespace Kernel {
 
-Session::Session(KernelCore& kernel) : SynchronizationObject{kernel} {}
+Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 Session::~Session() = default;
 
 Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
@@ -24,18 +24,9 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
     return std::make_pair(std::move(client_session), std::move(server_session));
 }
 
-bool Session::ShouldWait(const Thread* thread) const {
-    UNIMPLEMENTED();
-    return {};
-}
-
 bool Session::IsSignaled() const {
     UNIMPLEMENTED();
     return true;
 }
 
-void Session::Acquire(Thread* thread) {
-    UNIMPLEMENTED();
-}
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h
index 7cd9c0d77..f6dd2c1d2 100644
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -8,7 +8,7 @@
 #include <string>
 #include <utility>
 
-#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 
 namespace Kernel {
 
@@ -19,7 +19,7 @@ class ServerSession;
  * Parent structure to link the client and server endpoints of a session with their associated
  * client port.
  */
-class Session final : public SynchronizationObject {
+class Session final : public KSynchronizationObject {
 public:
     explicit Session(KernelCore& kernel);
     ~Session() override;
@@ -37,12 +37,8 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(const Thread* thread) const override;
-
     bool IsSignaled() const override;
 
-    void Acquire(Thread* thread) override;
-
     std::shared_ptr<ClientSession> Client() {
         if (auto result{client.lock()}) {
             return result;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index de3ed25da..cc8b661af 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -10,6 +10,7 @@
 
 #include "common/alignment.h"
 #include "common/assert.h"
+#include "common/common_funcs.h"
 #include "common/fiber.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
@@ -19,26 +20,28 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/cpu_manager.h"
-#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_address_arbiter.h"
+#include "core/hle/kernel/k_condition_variable.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/memory_block.h"
+#include "core/hle/kernel/memory/memory_layout.h"
 #include "core/hle/kernel/memory/page_table.h"
-#include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/svc.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/kernel/svc_types.h"
 #include "core/hle/kernel/svc_wrap.h"
-#include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/kernel/transfer_memory.h"
@@ -343,27 +346,11 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
     auto thread = kernel.CurrentScheduler()->GetCurrentThread();
     {
         KScopedSchedulerLock lock(kernel);
-        thread->InvalidateHLECallback();
-        thread->SetStatus(ThreadStatus::WaitIPC);
+        thread->SetState(ThreadState::Waiting);
+        thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC);
         session->SendSyncRequest(SharedFrom(thread), system.Memory(), system.CoreTiming());
     }
 
-    if (thread->HasHLECallback()) {
-        Handle event_handle = thread->GetHLETimeEvent();
-        if (event_handle != InvalidHandle) {
-            auto& time_manager = kernel.TimeManager();
-            time_manager.UnscheduleTimeEvent(event_handle);
-        }
-
-        {
-            KScopedSchedulerLock lock(kernel);
-            auto* sync_object = thread->GetHLESyncObject();
-            sync_object->RemoveWaitingThread(SharedFrom(thread));
-        }
-
-        thread->InvokeHLECallback(SharedFrom(thread));
-    }
-
     return thread->GetSignalingResult();
 }
 
@@ -436,7 +423,7 @@ static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32*
 }
 
 /// Wait for the given handles to synchronize, timeout after the specified nanoseconds
-static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
+static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr handles_address,
                                       u64 handle_count, s64 nano_seconds) {
     LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}",
               handles_address, handle_count, nano_seconds);
@@ -458,28 +445,26 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
     }
 
     auto& kernel = system.Kernel();
-    Thread::ThreadSynchronizationObjects objects(handle_count);
+    std::vector<KSynchronizationObject*> objects(handle_count);
     const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
 
     for (u64 i = 0; i < handle_count; ++i) {
         const Handle handle = memory.Read32(handles_address + i * sizeof(Handle));
-        const auto object = handle_table.Get<SynchronizationObject>(handle);
+        const auto object = handle_table.Get<KSynchronizationObject>(handle);
 
         if (object == nullptr) {
             LOG_ERROR(Kernel_SVC, "Object is a nullptr");
             return ERR_INVALID_HANDLE;
         }
 
-        objects[i] = object;
+        objects[i] = object.get();
     }
-    auto& synchronization = kernel.Synchronization();
-    const auto [result, handle_result] = synchronization.WaitFor(objects, nano_seconds);
-    *index = handle_result;
-    return result;
+    return KSynchronizationObject::Wait(kernel, index, objects.data(),
+                                        static_cast<s32>(objects.size()), nano_seconds);
 }
 
 static ResultCode WaitSynchronization32(Core::System& system, u32 timeout_low, u32 handles_address,
-                                        s32 handle_count, u32 timeout_high, Handle* index) {
+                                        s32 handle_count, u32 timeout_high, s32* index) {
     const s64 nano_seconds{(static_cast<s64>(timeout_high) << 32) | static_cast<s64>(timeout_low)};
     return WaitSynchronization(system, index, handles_address, handle_count, nano_seconds);
 }
@@ -504,56 +489,37 @@ static ResultCode CancelSynchronization32(Core::System& system, Handle thread_ha
     return CancelSynchronization(system, thread_handle);
 }
 
-/// Attempts to locks a mutex, creating it if it does not already exist
-static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle,
-                                VAddr mutex_addr, Handle requesting_thread_handle) {
-    LOG_TRACE(Kernel_SVC,
-              "called holding_thread_handle=0x{:08X}, mutex_addr=0x{:X}, "
-              "requesting_current_thread_handle=0x{:08X}",
-              holding_thread_handle, mutex_addr, requesting_thread_handle);
-
-    if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Mutex Address is a kernel virtual address, mutex_addr={:016X}",
-                  mutex_addr);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
+/// Attempts to locks a mutex
+static ResultCode ArbitrateLock(Core::System& system, Handle thread_handle, VAddr address,
+                                u32 tag) {
+    LOG_TRACE(Kernel_SVC, "called thread_handle=0x{:08X}, address=0x{:X}, tag=0x{:08X}",
+              thread_handle, address, tag);
 
-    if (!Common::IsWordAligned(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Mutex Address is not word aligned, mutex_addr={:016X}", mutex_addr);
-        return ERR_INVALID_ADDRESS;
-    }
+    // Validate the input address.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(u32)), Svc::ResultInvalidAddress);
 
-    auto* const current_process = system.Kernel().CurrentProcess();
-    return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle,
-                                                  requesting_thread_handle);
+    return system.Kernel().CurrentProcess()->WaitForAddress(thread_handle, address, tag);
 }
 
-static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle,
-                                  u32 mutex_addr, Handle requesting_thread_handle) {
-    return ArbitrateLock(system, holding_thread_handle, mutex_addr, requesting_thread_handle);
+static ResultCode ArbitrateLock32(Core::System& system, Handle thread_handle, u32 address,
+                                  u32 tag) {
+    return ArbitrateLock(system, thread_handle, address, tag);
 }
 
 /// Unlock a mutex
-static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
-    LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr);
-
-    if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Mutex Address is a kernel virtual address, mutex_addr={:016X}",
-                  mutex_addr);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
+static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) {
+    LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address);
 
-    if (!Common::IsWordAligned(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Mutex Address is not word aligned, mutex_addr={:016X}", mutex_addr);
-        return ERR_INVALID_ADDRESS;
-    }
+    // Validate the input address.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(u32)), Svc::ResultInvalidAddress);
 
-    auto* const current_process = system.Kernel().CurrentProcess();
-    return current_process->GetMutex().Release(mutex_addr);
+    return system.Kernel().CurrentProcess()->SignalToAddress(address);
 }
 
-static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) {
-    return ArbitrateUnlock(system, mutex_addr);
+static ResultCode ArbitrateUnlock32(Core::System& system, u32 address) {
+    return ArbitrateUnlock(system, address);
 }
 
 enum class BreakType : u32 {
@@ -1180,7 +1146,7 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri
         return ERR_INVALID_HANDLE;
     }
 
-    thread->SetPriority(priority);
+    thread->SetBasePriority(priority);
 
     return RESULT_SUCCESS;
 }
@@ -1559,7 +1525,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
         return ERR_INVALID_HANDLE;
     }
 
-    ASSERT(thread->GetStatus() == ThreadStatus::Dormant);
+    ASSERT(thread->GetState() == ThreadState::Initialized);
 
     return thread->Start();
 }
@@ -1620,224 +1586,135 @@ static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanosec
 }
 
 /// Wait process wide key atomic
-static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_addr,
-                                           VAddr condition_variable_addr, Handle thread_handle,
-                                           s64 nano_seconds) {
-    LOG_TRACE(
-        Kernel_SVC,
-        "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
-        mutex_addr, condition_variable_addr, thread_handle, nano_seconds);
-
-    if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) {
-        LOG_ERROR(
-            Kernel_SVC,
-            "Given mutex address must not be within the kernel address space. address=0x{:016X}",
-            mutex_addr);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    if (!Common::IsWordAligned(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Given mutex address must be word-aligned. address=0x{:016X}",
-                  mutex_addr);
-        return ERR_INVALID_ADDRESS;
-    }
-
-    ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
-    auto& kernel = system.Kernel();
-    Handle event_handle;
-    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
-    auto* const current_process = kernel.CurrentProcess();
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds);
-        const auto& handle_table = current_process->GetHandleTable();
-        std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle);
-        ASSERT(thread);
-
-        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-
-        if (thread->IsPendingTermination()) {
-            lock.CancelSleep();
-            return ERR_THREAD_TERMINATING;
-        }
-
-        const auto release_result = current_process->GetMutex().Release(mutex_addr);
-        if (release_result.IsError()) {
-            lock.CancelSleep();
-            return release_result;
-        }
-
-        if (nano_seconds == 0) {
-            lock.CancelSleep();
-            return RESULT_TIMEOUT;
-        }
-
-        current_thread->SetCondVarWaitAddress(condition_variable_addr);
-        current_thread->SetMutexWaitAddress(mutex_addr);
-        current_thread->SetWaitHandle(thread_handle);
-        current_thread->SetStatus(ThreadStatus::WaitCondVar);
-        current_process->InsertConditionVariableThread(SharedFrom(current_thread));
-    }
-
-    if (event_handle != InvalidHandle) {
-        auto& time_manager = kernel.TimeManager();
-        time_manager.UnscheduleTimeEvent(event_handle);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-
-        auto* owner = current_thread->GetLockOwner();
-        if (owner != nullptr) {
-            owner->RemoveMutexWaiter(SharedFrom(current_thread));
+static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr address, VAddr cv_key,
+                                           u32 tag, s64 timeout_ns) {
+    LOG_TRACE(Kernel_SVC, "called address={:X}, cv_key={:X}, tag=0x{:08X}, timeout_ns={}", address,
+              cv_key, tag, timeout_ns);
+
+    // Validate input.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(int32_t)), Svc::ResultInvalidAddress);
+
+    // Convert timeout from nanoseconds to ticks.
+    s64 timeout{};
+    if (timeout_ns > 0) {
+        const s64 offset_tick(timeout_ns);
+        if (offset_tick > 0) {
+            timeout = offset_tick + 2;
+            if (timeout <= 0) {
+                timeout = std::numeric_limits<s64>::max();
+            }
+        } else {
+            timeout = std::numeric_limits<s64>::max();
         }
-
-        current_process->RemoveConditionVariableThread(SharedFrom(current_thread));
+    } else {
+        timeout = timeout_ns;
     }
-    // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    return current_thread->GetSignalingResult();
+    // Wait on the condition variable.
+    return system.Kernel().CurrentProcess()->WaitConditionVariable(
+        address, Common::AlignDown(cv_key, sizeof(u32)), tag, timeout);
 }
 
-static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr,
-                                             u32 condition_variable_addr, Handle thread_handle,
-                                             u32 nanoseconds_low, u32 nanoseconds_high) {
-    const auto nanoseconds = static_cast<s64>(nanoseconds_low | (u64{nanoseconds_high} << 32));
-    return WaitProcessWideKeyAtomic(system, mutex_addr, condition_variable_addr, thread_handle,
-                                    nanoseconds);
+static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 address, u32 cv_key, u32 tag,
+                                             u32 timeout_ns_low, u32 timeout_ns_high) {
+    const auto timeout_ns = static_cast<s64>(timeout_ns_low | (u64{timeout_ns_high} << 32));
+    return WaitProcessWideKeyAtomic(system, address, cv_key, tag, timeout_ns);
 }
 
 /// Signal process wide key
-static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) {
-    LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
-              condition_variable_addr, target);
+static void SignalProcessWideKey(Core::System& system, VAddr cv_key, s32 count) {
+    LOG_TRACE(Kernel_SVC, "called, cv_key=0x{:X}, count=0x{:08X}", cv_key, count);
 
-    ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
+    // Signal the condition variable.
+    return system.Kernel().CurrentProcess()->SignalConditionVariable(
+        Common::AlignDown(cv_key, sizeof(u32)), count);
+}
 
-    // Retrieve a list of all threads that are waiting for this condition variable.
-    auto& kernel = system.Kernel();
-    KScopedSchedulerLock lock(kernel);
-    auto* const current_process = kernel.CurrentProcess();
-    std::vector<std::shared_ptr<Thread>> waiting_threads =
-        current_process->GetConditionVariableThreads(condition_variable_addr);
-
-    // Only process up to 'target' threads, unless 'target' is less equal 0, in which case process
-    // them all.
-    std::size_t last = waiting_threads.size();
-    if (target > 0) {
-        last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
-    }
-    for (std::size_t index = 0; index < last; ++index) {
-        auto& thread = waiting_threads[index];
-
-        ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
-
-        // liberate Cond Var Thread.
-        current_process->RemoveConditionVariableThread(thread);
-
-        const std::size_t current_core = system.CurrentCoreIndex();
-        auto& monitor = system.Monitor();
-
-        // Atomically read the value of the mutex.
-        u32 mutex_val = 0;
-        u32 update_val = 0;
-        const VAddr mutex_address = thread->GetMutexWaitAddress();
-        do {
-            // If the mutex is not yet acquired, acquire it.
-            mutex_val = monitor.ExclusiveRead32(current_core, mutex_address);
-
-            if (mutex_val != 0) {
-                update_val = mutex_val | Mutex::MutexHasWaitersFlag;
-            } else {
-                update_val = thread->GetWaitHandle();
-            }
-        } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val));
-        monitor.ClearExclusive();
-        if (mutex_val == 0) {
-            // We were able to acquire the mutex, resume this thread.
-            auto* const lock_owner = thread->GetLockOwner();
-            if (lock_owner != nullptr) {
-                lock_owner->RemoveMutexWaiter(thread);
-            }
+static void SignalProcessWideKey32(Core::System& system, u32 cv_key, s32 count) {
+    SignalProcessWideKey(system, cv_key, count);
+}
 
-            thread->SetLockOwner(nullptr);
-            thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-            thread->ResumeFromWait();
-        } else {
-            // The mutex is already owned by some other thread, make this thread wait on it.
-            const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
-            const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
-            auto owner = handle_table.Get<Thread>(owner_handle);
-            ASSERT(owner);
-            if (thread->GetStatus() == ThreadStatus::WaitCondVar) {
-                thread->SetStatus(ThreadStatus::WaitMutex);
-            }
+namespace {
 
-            owner->AddMutexWaiter(thread);
-        }
+constexpr bool IsValidSignalType(Svc::SignalType type) {
+    switch (type) {
+    case Svc::SignalType::Signal:
+    case Svc::SignalType::SignalAndIncrementIfEqual:
+    case Svc::SignalType::SignalAndModifyByWaitingCountIfEqual:
+        return true;
+    default:
+        return false;
     }
 }
 
-static void SignalProcessWideKey32(Core::System& system, u32 condition_variable_addr, s32 target) {
-    SignalProcessWideKey(system, condition_variable_addr, target);
+constexpr bool IsValidArbitrationType(Svc::ArbitrationType type) {
+    switch (type) {
+    case Svc::ArbitrationType::WaitIfLessThan:
+    case Svc::ArbitrationType::DecrementAndWaitIfLessThan:
+    case Svc::ArbitrationType::WaitIfEqual:
+        return true;
+    default:
+        return false;
+    }
 }
 
-// Wait for an address (via Address Arbiter)
-static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
-                                 s64 timeout) {
-    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
-              type, value, timeout);
-
-    // If the passed address is a kernel virtual address, return invalid memory state.
-    if (Core::Memory::IsKernelVirtualAddress(address)) {
-        LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
+} // namespace
 
-    // If the address is not properly aligned to 4 bytes, return invalid address.
-    if (!Common::IsWordAligned(address)) {
-        LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
-        return ERR_INVALID_ADDRESS;
+// Wait for an address (via Address Arbiter)
+static ResultCode WaitForAddress(Core::System& system, VAddr address, Svc::ArbitrationType arb_type,
+                                 s32 value, s64 timeout_ns) {
+    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, arb_type=0x{:X}, value=0x{:X}, timeout_ns={}",
+              address, arb_type, value, timeout_ns);
+
+    // Validate input.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(int32_t)), Svc::ResultInvalidAddress);
+    R_UNLESS(IsValidArbitrationType(arb_type), Svc::ResultInvalidEnumValue);
+
+    // Convert timeout from nanoseconds to ticks.
+    s64 timeout{};
+    if (timeout_ns > 0) {
+        const s64 offset_tick(timeout_ns);
+        if (offset_tick > 0) {
+            timeout = offset_tick + 2;
+            if (timeout <= 0) {
+                timeout = std::numeric_limits<s64>::max();
+            }
+        } else {
+            timeout = std::numeric_limits<s64>::max();
+        }
+    } else {
+        timeout = timeout_ns;
     }
 
-    const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
-    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
-    const ResultCode result =
-        address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
-    return result;
+    return system.Kernel().CurrentProcess()->WaitAddressArbiter(address, arb_type, value, timeout);
 }
 
-static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value,
-                                   u32 timeout_low, u32 timeout_high) {
-    const auto timeout = static_cast<s64>(timeout_low | (u64{timeout_high} << 32));
-    return WaitForAddress(system, address, type, value, timeout);
+static ResultCode WaitForAddress32(Core::System& system, u32 address, Svc::ArbitrationType arb_type,
+                                   s32 value, u32 timeout_ns_low, u32 timeout_ns_high) {
+    const auto timeout = static_cast<s64>(timeout_ns_low | (u64{timeout_ns_high} << 32));
+    return WaitForAddress(system, address, arb_type, value, timeout);
 }
 
 // Signals to an address (via Address Arbiter)
-static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
-                                  s32 num_to_wake) {
-    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
-              address, type, value, num_to_wake);
-
-    // If the passed address is a kernel virtual address, return invalid memory state.
-    if (Core::Memory::IsKernelVirtualAddress(address)) {
-        LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
+static ResultCode SignalToAddress(Core::System& system, VAddr address, Svc::SignalType signal_type,
+                                  s32 value, s32 count) {
+    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, signal_type=0x{:X}, value=0x{:X}, count=0x{:X}",
+              address, signal_type, value, count);
 
-    // If the address is not properly aligned to 4 bytes, return invalid address.
-    if (!Common::IsWordAligned(address)) {
-        LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
-        return ERR_INVALID_ADDRESS;
-    }
+    // Validate input.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(s32)), Svc::ResultInvalidAddress);
+    R_UNLESS(IsValidSignalType(signal_type), Svc::ResultInvalidEnumValue);
 
-    const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
-    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
-    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
+    return system.Kernel().CurrentProcess()->SignalAddressArbiter(address, signal_type, value,
+                                                                  count);
 }
 
-static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value,
-                                    s32 num_to_wake) {
-    return SignalToAddress(system, address, type, value, num_to_wake);
+static ResultCode SignalToAddress32(Core::System& system, u32 address, Svc::SignalType signal_type,
+                                    s32 value, s32 count) {
+    return SignalToAddress(system, address, signal_type, value, count);
 }
 
 static void KernelDebug([[maybe_unused]] Core::System& system,
diff --git a/src/core/hle/kernel/svc_common.h b/src/core/hle/kernel/svc_common.h
new file mode 100644
index 000000000..4af049551
--- /dev/null
+++ b/src/core/hle/kernel/svc_common.h
@@ -0,0 +1,14 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Kernel::Svc {
+
+constexpr s32 ArgumentHandleCountMax = 0x40;
+constexpr u32 HandleWaitMask{1u << 30};
+
+} // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h
new file mode 100644
index 000000000..78282f021
--- /dev/null
+++ b/src/core/hle/kernel/svc_results.h
@@ -0,0 +1,20 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Kernel::Svc {
+
+constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59};
+constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102};
+constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106};
+constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114};
+constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117};
+constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118};
+constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120};
+constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125};
+
+} // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h
index 11e1d8e2d..d623f7a50 100644
--- a/src/core/hle/kernel/svc_types.h
+++ b/src/core/hle/kernel/svc_types.h
@@ -65,4 +65,16 @@ struct MemoryInfo {
     u32 padding{};
 };
 
+enum class SignalType : u32 {
+    Signal = 0,
+    SignalAndIncrementIfEqual = 1,
+    SignalAndModifyByWaitingCountIfEqual = 2,
+};
+
+enum class ArbitrationType : u32 {
+    WaitIfLessThan = 0,
+    DecrementAndWaitIfLessThan = 1,
+    WaitIfEqual = 2,
+};
+
 } // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 0b6dd9df0..a32750ed7 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -7,6 +7,7 @@
 #include "common/common_types.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/hle/kernel/svc_types.h"
 #include "core/hle/result.h"
 
 namespace Kernel {
@@ -215,9 +216,10 @@ void SvcWrap64(Core::System& system) {
         func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw);
 }
 
-template <ResultCode func(Core::System&, u32*, u64, u64, s64)>
+// Used by WaitSynchronization
+template <ResultCode func(Core::System&, s32*, u64, u64, s64)>
 void SvcWrap64(Core::System& system) {
-    u32 param_1 = 0;
+    s32 param_1 = 0;
     const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
                             static_cast<s64>(Param(system, 3)))
                            .raw;
@@ -276,18 +278,22 @@ void SvcWrap64(Core::System& system) {
     FuncReturn(system, retval);
 }
 
-template <ResultCode func(Core::System&, u64, u32, s32, s64)>
+// Used by WaitForAddress
+template <ResultCode func(Core::System&, u64, Svc::ArbitrationType, s32, s64)>
 void SvcWrap64(Core::System& system) {
-    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
-                            static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
-                           .raw);
+    FuncReturn(system,
+               func(system, Param(system, 0), static_cast<Svc::ArbitrationType>(Param(system, 1)),
+                    static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
+                   .raw);
 }
 
-template <ResultCode func(Core::System&, u64, u32, s32, s32)>
+// Used by SignalToAddress
+template <ResultCode func(Core::System&, u64, Svc::SignalType, s32, s32)>
 void SvcWrap64(Core::System& system) {
-    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
-                            static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
-                           .raw);
+    FuncReturn(system,
+               func(system, Param(system, 0), static_cast<Svc::SignalType>(Param(system, 1)),
+                    static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
+                   .raw);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -503,22 +509,23 @@ void SvcWrap32(Core::System& system) {
 }
 
 // Used by WaitForAddress32
-template <ResultCode func(Core::System&, u32, u32, s32, u32, u32)>
+template <ResultCode func(Core::System&, u32, Svc::ArbitrationType, s32, u32, u32)>
 void SvcWrap32(Core::System& system) {
     const u32 retval = func(system, static_cast<u32>(Param(system, 0)),
-                            static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)),
-                            static_cast<u32>(Param(system, 3)), static_cast<u32>(Param(system, 4)))
+                            static_cast<Svc::ArbitrationType>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<u32>(Param(system, 3)),
+                            static_cast<u32>(Param(system, 4)))
                            .raw;
     FuncReturn(system, retval);
 }
 
 // Used by SignalToAddress32
-template <ResultCode func(Core::System&, u32, u32, s32, s32)>
+template <ResultCode func(Core::System&, u32, Svc::SignalType, s32, s32)>
 void SvcWrap32(Core::System& system) {
-    const u32 retval =
-        func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)),
-             static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
-            .raw;
+    const u32 retval = func(system, static_cast<u32>(Param(system, 0)),
+                            static_cast<Svc::SignalType>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
+                           .raw;
     FuncReturn(system, retval);
 }
 
@@ -539,9 +546,9 @@ void SvcWrap32(Core::System& system) {
 }
 
 // Used by WaitSynchronization32
-template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)>
+template <ResultCode func(Core::System&, u32, u32, s32, u32, s32*)>
 void SvcWrap32(Core::System& system) {
-    u32 param_1 = 0;
+    s32 param_1 = 0;
     const u32 retval = func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2),
                             Param32(system, 3), &param_1)
                            .raw;
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
deleted file mode 100644
index d3f520ea2..000000000
--- a/src/core/hle/kernel/synchronization.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "core/core.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/k_scheduler.h"
-#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/synchronization.h"
-#include "core/hle/kernel/synchronization_object.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/time_manager.h"
-
-namespace Kernel {
-
-Synchronization::Synchronization(Core::System& system) : system{system} {}
-
-void Synchronization::SignalObject(SynchronizationObject& obj) const {
-    auto& kernel = system.Kernel();
-    KScopedSchedulerLock lock(kernel);
-    if (obj.IsSignaled()) {
-        for (auto thread : obj.GetWaitingThreads()) {
-            if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
-                if (thread->GetStatus() != ThreadStatus::WaitHLEEvent) {
-                    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
-                    ASSERT(thread->IsWaitingSync());
-                }
-                thread->SetSynchronizationResults(&obj, RESULT_SUCCESS);
-                thread->ResumeFromWait();
-            }
-        }
-        obj.ClearWaitingThreads();
-    }
-}
-
-std::pair<ResultCode, Handle> Synchronization::WaitFor(
-    std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
-    auto& kernel = system.Kernel();
-    auto* const thread = kernel.CurrentScheduler()->GetCurrentThread();
-    Handle event_handle = InvalidHandle;
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds);
-        const auto itr =
-            std::find_if(sync_objects.begin(), sync_objects.end(),
-                         [thread](const std::shared_ptr<SynchronizationObject>& object) {
-                             return object->IsSignaled();
-                         });
-
-        if (itr != sync_objects.end()) {
-            // We found a ready object, acquire it and set the result value
-            SynchronizationObject* object = itr->get();
-            object->Acquire(thread);
-            const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
-            lock.CancelSleep();
-            return {RESULT_SUCCESS, index};
-        }
-
-        if (nano_seconds == 0) {
-            lock.CancelSleep();
-            return {RESULT_TIMEOUT, InvalidHandle};
-        }
-
-        if (thread->IsPendingTermination()) {
-            lock.CancelSleep();
-            return {ERR_THREAD_TERMINATING, InvalidHandle};
-        }
-
-        if (thread->IsSyncCancelled()) {
-            thread->SetSyncCancelled(false);
-            lock.CancelSleep();
-            return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
-        }
-
-        for (auto& object : sync_objects) {
-            object->AddWaitingThread(SharedFrom(thread));
-        }
-
-        thread->SetSynchronizationObjects(&sync_objects);
-        thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-        thread->SetStatus(ThreadStatus::WaitSynch);
-        thread->SetWaitingSync(true);
-    }
-    thread->SetWaitingSync(false);
-
-    if (event_handle != InvalidHandle) {
-        auto& time_manager = kernel.TimeManager();
-        time_manager.UnscheduleTimeEvent(event_handle);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-        ResultCode signaling_result = thread->GetSignalingResult();
-        SynchronizationObject* signaling_object = thread->GetSignalingObject();
-        thread->SetSynchronizationObjects(nullptr);
-        auto shared_thread = SharedFrom(thread);
-        for (auto& obj : sync_objects) {
-            obj->RemoveWaitingThread(shared_thread);
-        }
-        if (signaling_object != nullptr) {
-            const auto itr = std::find_if(
-                sync_objects.begin(), sync_objects.end(),
-                [signaling_object](const std::shared_ptr<SynchronizationObject>& object) {
-                    return object.get() == signaling_object;
-                });
-            ASSERT(itr != sync_objects.end());
-            signaling_object->Acquire(thread);
-            const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
-            return {signaling_result, index};
-        }
-        return {signaling_result, -1};
-    }
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization.h b/src/core/hle/kernel/synchronization.h
deleted file mode 100644
index 379f4b1d3..000000000
--- a/src/core/hle/kernel/synchronization.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "core/hle/kernel/object.h"
-#include "core/hle/result.h"
-
-namespace Core {
-class System;
-} // namespace Core
-
-namespace Kernel {
-
-class SynchronizationObject;
-
-/**
- * The 'Synchronization' class is an interface for handling synchronization methods
- * used by Synchronization objects and synchronization SVCs. This centralizes processing of
- * such
- */
-class Synchronization {
-public:
-    explicit Synchronization(Core::System& system);
-
-    /// Signals a synchronization object, waking up all its waiting threads
-    void SignalObject(SynchronizationObject& obj) const;
-
-    /// Tries to see if waiting for any of the sync_objects is necessary, if not
-    /// it returns Success and the handle index of the signaled sync object. In
-    /// case not, the current thread will be locked and wait for nano_seconds or
-    /// for a synchronization object to signal.
-    std::pair<ResultCode, Handle> WaitFor(
-        std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds);
-
-private:
-    Core::System& system;
-};
-} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp
deleted file mode 100644
index ba4d39157..000000000
--- a/src/core/hle/kernel/synchronization_object.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/process.h"
-#include "core/hle/kernel/synchronization.h"
-#include "core/hle/kernel/synchronization_object.h"
-#include "core/hle/kernel/thread.h"
-
-namespace Kernel {
-
-SynchronizationObject::SynchronizationObject(KernelCore& kernel) : Object{kernel} {}
-SynchronizationObject::~SynchronizationObject() = default;
-
-void SynchronizationObject::Signal() {
-    kernel.Synchronization().SignalObject(*this);
-}
-
-void SynchronizationObject::AddWaitingThread(std::shared_ptr<Thread> thread) {
-    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
-    if (itr == waiting_threads.end())
-        waiting_threads.push_back(std::move(thread));
-}
-
-void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
-    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
-    // If a thread passed multiple handles to the same object,
-    // the kernel might attempt to remove the thread from the object's
-    // waiting threads list multiple times.
-    if (itr != waiting_threads.end())
-        waiting_threads.erase(itr);
-}
-
-void SynchronizationObject::ClearWaitingThreads() {
-    waiting_threads.clear();
-}
-
-const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const {
-    return waiting_threads;
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h
deleted file mode 100644
index 7408ed51f..000000000
--- a/src/core/hle/kernel/synchronization_object.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <atomic>
-#include <memory>
-#include <vector>
-
-#include "core/hle/kernel/object.h"
-
-namespace Kernel {
-
-class KernelCore;
-class Synchronization;
-class Thread;
-
-/// Class that represents a Kernel object that a thread can be waiting on
-class SynchronizationObject : public Object {
-public:
-    explicit SynchronizationObject(KernelCore& kernel);
-    ~SynchronizationObject() override;
-
-    /**
-     * Check if the specified thread should wait until the object is available
-     * @param thread The thread about which we're deciding.
-     * @return True if the current thread should wait due to this object being unavailable
-     */
-    virtual bool ShouldWait(const Thread* thread) const = 0;
-
-    /// Acquire/lock the object for the specified thread if it is available
-    virtual void Acquire(Thread* thread) = 0;
-
-    /// Signal this object
-    virtual void Signal();
-
-    virtual bool IsSignaled() const {
-        return is_signaled;
-    }
-
-    /**
-     * Add a thread to wait on this object
-     * @param thread Pointer to thread to add
-     */
-    void AddWaitingThread(std::shared_ptr<Thread> thread);
-
-    /**
-     * Removes a thread from waiting on this object (e.g. if it was resumed already)
-     * @param thread Pointer to thread to remove
-     */
-    void RemoveWaitingThread(std::shared_ptr<Thread> thread);
-
-    /// Get a const reference to the waiting threads list for debug use
-    const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;
-
-    void ClearWaitingThreads();
-
-protected:
-    std::atomic_bool is_signaled{}; // Tells if this sync object is signaled
-
-private:
-    /// Threads waiting for this object to become available
-    std::vector<std::shared_ptr<Thread>> waiting_threads;
-};
-
-// Specialization of DynamicObjectCast for SynchronizationObjects
-template <>
-inline std::shared_ptr<SynchronizationObject> DynamicObjectCast<SynchronizationObject>(
-    std::shared_ptr<Object> object) {
-    if (object != nullptr && object->IsWaitable()) {
-        return std::static_pointer_cast<SynchronizationObject>(object);
-    }
-    return nullptr;
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index a4f9e0d97..d97323255 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -17,9 +17,11 @@
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_condition_variable.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/memory/memory_layout.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/thread.h"
@@ -34,26 +36,19 @@
 
 namespace Kernel {
 
-bool Thread::ShouldWait(const Thread* thread) const {
-    return status != ThreadStatus::Dead;
-}
-
 bool Thread::IsSignaled() const {
-    return status == ThreadStatus::Dead;
-}
-
-void Thread::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
+    return signaled;
 }
 
-Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
+Thread::Thread(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 Thread::~Thread() = default;
 
 void Thread::Stop() {
     {
         KScopedSchedulerLock lock(kernel);
-        SetStatus(ThreadStatus::Dead);
-        Signal();
+        SetState(ThreadState::Terminated);
+        signaled = true;
+        NotifyAvailable();
         kernel.GlobalHandleTable().Close(global_handle);
 
         if (owner_process) {
@@ -67,59 +62,27 @@ void Thread::Stop() {
     global_handle = 0;
 }
 
-void Thread::ResumeFromWait() {
+void Thread::Wakeup() {
     KScopedSchedulerLock lock(kernel);
-    switch (status) {
-    case ThreadStatus::Paused:
-    case ThreadStatus::WaitSynch:
-    case ThreadStatus::WaitHLEEvent:
-    case ThreadStatus::WaitSleep:
-    case ThreadStatus::WaitIPC:
-    case ThreadStatus::WaitMutex:
-    case ThreadStatus::WaitCondVar:
-    case ThreadStatus::WaitArb:
-    case ThreadStatus::Dormant:
-        break;
-
-    case ThreadStatus::Ready:
-        // The thread's wakeup callback must have already been cleared when the thread was first
-        // awoken.
-        ASSERT(hle_callback == nullptr);
-        // If the thread is waiting on multiple wait objects, it might be awoken more than once
-        // before actually resuming. We can ignore subsequent wakeups if the thread status has
-        // already been set to ThreadStatus::Ready.
-        return;
-    case ThreadStatus::Dead:
-        // This should never happen, as threads must complete before being stopped.
-        DEBUG_ASSERT_MSG(false, "Thread with object id {} cannot be resumed because it's DEAD.",
-                         GetObjectId());
-        return;
-    }
-
-    SetStatus(ThreadStatus::Ready);
-}
-
-void Thread::OnWakeUp() {
-    KScopedSchedulerLock lock(kernel);
-    SetStatus(ThreadStatus::Ready);
+    SetState(ThreadState::Runnable);
 }
 
 ResultCode Thread::Start() {
     KScopedSchedulerLock lock(kernel);
-    SetStatus(ThreadStatus::Ready);
+    SetState(ThreadState::Runnable);
     return RESULT_SUCCESS;
 }
 
 void Thread::CancelWait() {
     KScopedSchedulerLock lock(kernel);
-    if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) {
+    if (GetState() != ThreadState::Waiting || !is_cancellable) {
         is_sync_cancelled = true;
         return;
     }
     // TODO(Blinkhawk): Implement cancel of server session
     is_sync_cancelled = false;
     SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED);
-    SetStatus(ThreadStatus::Ready);
+    SetState(ThreadState::Runnable);
 }
 
 static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top,
@@ -183,25 +146,24 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
     std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel);
 
     thread->thread_id = kernel.CreateNewThreadID();
-    thread->status = ThreadStatus::Dormant;
+    thread->thread_state = ThreadState::Initialized;
     thread->entry_point = entry_point;
     thread->stack_top = stack_top;
     thread->disable_count = 1;
     thread->tpidr_el0 = 0;
-    thread->nominal_priority = thread->current_priority = priority;
+    thread->current_priority = priority;
+    thread->base_priority = priority;
+    thread->lock_owner = nullptr;
     thread->schedule_count = -1;
     thread->last_scheduled_tick = 0;
     thread->processor_id = processor_id;
     thread->ideal_core = processor_id;
     thread->affinity_mask.SetAffinity(processor_id, true);
-    thread->wait_objects = nullptr;
-    thread->mutex_wait_address = 0;
-    thread->condvar_wait_address = 0;
-    thread->wait_handle = 0;
     thread->name = std::move(name);
     thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap();
     thread->owner_process = owner_process;
     thread->type = type_flags;
+    thread->signaled = false;
     if ((type_flags & THREADTYPE_IDLE) == 0) {
         auto& scheduler = kernel.GlobalSchedulerContext();
         scheduler.AddThread(thread);
@@ -226,153 +188,185 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
     return MakeResult<std::shared_ptr<Thread>>(std::move(thread));
 }
 
-void Thread::SetPriority(u32 priority) {
-    KScopedSchedulerLock lock(kernel);
+void Thread::SetBasePriority(u32 priority) {
     ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST,
                "Invalid priority value.");
-    nominal_priority = priority;
-    UpdatePriority();
+
+    KScopedSchedulerLock lock(kernel);
+
+    // Change our base priority.
+    base_priority = priority;
+
+    // Perform a priority restoration.
+    RestorePriority(kernel, this);
 }
 
-void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) {
+void Thread::SetSynchronizationResults(KSynchronizationObject* object, ResultCode result) {
     signaling_object = object;
     signaling_result = result;
 }
 
-s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
-    ASSERT_MSG(!wait_objects->empty(), "Thread is not waiting for anything");
-    const auto match = std::find(wait_objects->rbegin(), wait_objects->rend(), object);
-    return static_cast<s32>(std::distance(match, wait_objects->rend()) - 1);
-}
-
 VAddr Thread::GetCommandBufferAddress() const {
     // Offset from the start of TLS at which the IPC command buffer begins.
     constexpr u64 command_header_offset = 0x80;
     return GetTLSAddress() + command_header_offset;
 }
 
-void Thread::SetStatus(ThreadStatus new_status) {
-    if (new_status == status) {
-        return;
-    }
+void Thread::SetState(ThreadState state) {
+    KScopedSchedulerLock sl(kernel);
 
-    switch (new_status) {
-    case ThreadStatus::Ready:
-        SetSchedulingStatus(ThreadSchedStatus::Runnable);
-        break;
-    case ThreadStatus::Dormant:
-        SetSchedulingStatus(ThreadSchedStatus::None);
-        break;
-    case ThreadStatus::Dead:
-        SetSchedulingStatus(ThreadSchedStatus::Exited);
-        break;
-    default:
-        SetSchedulingStatus(ThreadSchedStatus::Paused);
-        break;
-    }
+    // Clear debugging state
+    SetMutexWaitAddressForDebugging({});
+    SetWaitReasonForDebugging({});
 
-    status = new_status;
+    const ThreadState old_state = thread_state;
+    thread_state =
+        static_cast<ThreadState>((old_state & ~ThreadState::Mask) | (state & ThreadState::Mask));
+    if (thread_state != old_state) {
+        KScheduler::OnThreadStateChanged(kernel, this, old_state);
+    }
 }
 
-void Thread::AddMutexWaiter(std::shared_ptr<Thread> thread) {
-    if (thread->lock_owner.get() == this) {
-        // If the thread is already waiting for this thread to release the mutex, ensure that the
-        // waiters list is consistent and return without doing anything.
-        const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-        ASSERT(iter != wait_mutex_threads.end());
-        return;
+void Thread::AddWaiterImpl(Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    // Find the right spot to insert the waiter.
+    auto it = waiter_list.begin();
+    while (it != waiter_list.end()) {
+        if (it->GetPriority() > thread->GetPriority()) {
+            break;
+        }
+        it++;
     }
 
-    // A thread can't wait on two different mutexes at the same time.
-    ASSERT(thread->lock_owner == nullptr);
+    // Keep track of how many kernel waiters we have.
+    if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+        ASSERT((num_kernel_waiters++) >= 0);
+    }
 
-    // Ensure that the thread is not already in the list of mutex waiters
-    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(iter == wait_mutex_threads.end());
+    // Insert the waiter.
+    waiter_list.insert(it, *thread);
+    thread->SetLockOwner(this);
+}
 
-    // Keep the list in an ordered fashion
-    const auto insertion_point = std::find_if(
-        wait_mutex_threads.begin(), wait_mutex_threads.end(),
-        [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
-    wait_mutex_threads.insert(insertion_point, thread);
-    thread->lock_owner = SharedFrom(this);
+void Thread::RemoveWaiterImpl(Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
-    UpdatePriority();
-}
+    // Keep track of how many kernel waiters we have.
+    if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+        ASSERT((num_kernel_waiters--) > 0);
+    }
 
-void Thread::RemoveMutexWaiter(std::shared_ptr<Thread> thread) {
-    ASSERT(thread->lock_owner.get() == this);
+    // Remove the waiter.
+    waiter_list.erase(waiter_list.iterator_to(*thread));
+    thread->SetLockOwner(nullptr);
+}
 
-    // Ensure that the thread is in the list of mutex waiters
-    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(iter != wait_mutex_threads.end());
+void Thread::RestorePriority(KernelCore& kernel, Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
-    wait_mutex_threads.erase(iter);
+    while (true) {
+        // We want to inherit priority where possible.
+        s32 new_priority = thread->GetBasePriority();
+        if (thread->HasWaiters()) {
+            new_priority = std::min(new_priority, thread->waiter_list.front().GetPriority());
+        }
 
-    thread->lock_owner = nullptr;
-    UpdatePriority();
-}
+        // If the priority we would inherit is not different from ours, don't do anything.
+        if (new_priority == thread->GetPriority()) {
+            return;
+        }
 
-void Thread::UpdatePriority() {
-    // If any of the threads waiting on the mutex have a higher priority
-    // (taking into account priority inheritance), then this thread inherits
-    // that thread's priority.
-    u32 new_priority = nominal_priority;
-    if (!wait_mutex_threads.empty()) {
-        if (wait_mutex_threads.front()->current_priority < new_priority) {
-            new_priority = wait_mutex_threads.front()->current_priority;
+        // Ensure we don't violate condition variable red black tree invariants.
+        if (auto* cv_tree = thread->GetConditionVariableTree(); cv_tree != nullptr) {
+            BeforeUpdatePriority(kernel, cv_tree, thread);
         }
-    }
 
-    if (new_priority == current_priority) {
-        return;
-    }
+        // Change the priority.
+        const s32 old_priority = thread->GetPriority();
+        thread->SetPriority(new_priority);
 
-    if (GetStatus() == ThreadStatus::WaitCondVar) {
-        owner_process->RemoveConditionVariableThread(SharedFrom(this));
-    }
+        // Restore the condition variable, if relevant.
+        if (auto* cv_tree = thread->GetConditionVariableTree(); cv_tree != nullptr) {
+            AfterUpdatePriority(kernel, cv_tree, thread);
+        }
 
-    SetCurrentPriority(new_priority);
+        // Update the scheduler.
+        KScheduler::OnThreadPriorityChanged(kernel, thread, old_priority);
 
-    if (GetStatus() == ThreadStatus::WaitCondVar) {
-        owner_process->InsertConditionVariableThread(SharedFrom(this));
-    }
+        // Keep the lock owner up to date.
+        Thread* lock_owner = thread->GetLockOwner();
+        if (lock_owner == nullptr) {
+            return;
+        }
 
-    if (!lock_owner) {
-        return;
+        // Update the thread in the lock owner's sorted list, and continue inheriting.
+        lock_owner->RemoveWaiterImpl(thread);
+        lock_owner->AddWaiterImpl(thread);
+        thread = lock_owner;
     }
+}
 
-    // Ensure that the thread is within the correct location in the waiting list.
-    auto old_owner = lock_owner;
-    lock_owner->RemoveMutexWaiter(SharedFrom(this));
-    old_owner->AddMutexWaiter(SharedFrom(this));
-
-    // Recursively update the priority of the thread that depends on the priority of this one.
-    lock_owner->UpdatePriority();
+void Thread::AddWaiter(Thread* thread) {
+    AddWaiterImpl(thread);
+    RestorePriority(kernel, this);
 }
 
-bool Thread::AllSynchronizationObjectsReady() const {
-    return std::none_of(wait_objects->begin(), wait_objects->end(),
-                        [this](const std::shared_ptr<SynchronizationObject>& object) {
-                            return object->ShouldWait(this);
-                        });
+void Thread::RemoveWaiter(Thread* thread) {
+    RemoveWaiterImpl(thread);
+    RestorePriority(kernel, this);
 }
 
-bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) {
-    ASSERT(hle_callback);
-    return hle_callback(std::move(thread));
+Thread* Thread::RemoveWaiterByKey(s32* out_num_waiters, VAddr key) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    s32 num_waiters{};
+    Thread* next_lock_owner{};
+    auto it = waiter_list.begin();
+    while (it != waiter_list.end()) {
+        if (it->GetAddressKey() == key) {
+            Thread* thread = std::addressof(*it);
+
+            // Keep track of how many kernel waiters we have.
+            if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+                ASSERT((num_kernel_waiters--) > 0);
+            }
+            it = waiter_list.erase(it);
+
+            // Update the next lock owner.
+            if (next_lock_owner == nullptr) {
+                next_lock_owner = thread;
+                next_lock_owner->SetLockOwner(nullptr);
+            } else {
+                next_lock_owner->AddWaiterImpl(thread);
+            }
+            num_waiters++;
+        } else {
+            it++;
+        }
+    }
+
+    // Do priority updates, if we have a next owner.
+    if (next_lock_owner) {
+        RestorePriority(kernel, this);
+        RestorePriority(kernel, next_lock_owner);
+    }
+
+    // Return output.
+    *out_num_waiters = num_waiters;
+    return next_lock_owner;
 }
 
 ResultCode Thread::SetActivity(ThreadActivity value) {
     KScopedSchedulerLock lock(kernel);
 
-    auto sched_status = GetSchedulingStatus();
+    auto sched_status = GetState();
 
-    if (sched_status != ThreadSchedStatus::Runnable && sched_status != ThreadSchedStatus::Paused) {
+    if (sched_status != ThreadState::Runnable && sched_status != ThreadState::Waiting) {
         return ERR_INVALID_STATE;
     }
 
-    if (IsPendingTermination()) {
+    if (IsTerminationRequested()) {
         return RESULT_SUCCESS;
     }
 
@@ -394,7 +388,8 @@ ResultCode Thread::Sleep(s64 nanoseconds) {
     Handle event_handle{};
     {
         KScopedSchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds);
-        SetStatus(ThreadStatus::WaitSleep);
+        SetState(ThreadState::Waiting);
+        SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Sleep);
     }
 
     if (event_handle != InvalidHandle) {
@@ -405,34 +400,21 @@ ResultCode Thread::Sleep(s64 nanoseconds) {
 }
 
 void Thread::AddSchedulingFlag(ThreadSchedFlags flag) {
-    const u32 old_state = scheduling_state;
+    const auto old_state = GetRawState();
     pausing_state |= static_cast<u32>(flag);
-    const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
-    scheduling_state = base_scheduling | pausing_state;
+    const auto base_scheduling = GetState();
+    thread_state = base_scheduling | static_cast<ThreadState>(pausing_state);
     KScheduler::OnThreadStateChanged(kernel, this, old_state);
 }
 
 void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
-    const u32 old_state = scheduling_state;
+    const auto old_state = GetRawState();
     pausing_state &= ~static_cast<u32>(flag);
-    const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
-    scheduling_state = base_scheduling | pausing_state;
+    const auto base_scheduling = GetState();
+    thread_state = base_scheduling | static_cast<ThreadState>(pausing_state);
     KScheduler::OnThreadStateChanged(kernel, this, old_state);
 }
 
-void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
-    const u32 old_state = scheduling_state;
-    scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
-                       static_cast<u32>(new_status);
-    KScheduler::OnThreadStateChanged(kernel, this, old_state);
-}
-
-void Thread::SetCurrentPriority(u32 new_priority) {
-    const u32 old_priority = std::exchange(current_priority, new_priority);
-    KScheduler::OnThreadPriorityChanged(kernel, this, kernel.CurrentScheduler()->GetCurrentThread(),
-                                        old_priority);
-}
-
 ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
     KScopedSchedulerLock lock(kernel);
     const auto HighestSetCore = [](u64 mask, u32 max_cores) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 11ef29888..6b66c9a0e 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -6,16 +6,21 @@
 
 #include <array>
 #include <functional>
+#include <span>
 #include <string>
 #include <utility>
 #include <vector>
 
+#include <boost/intrusive/list.hpp>
+
 #include "common/common_types.h"
+#include "common/intrusive_red_black_tree.h"
 #include "common/spin_lock.h"
 #include "core/arm/arm_interface.h"
 #include "core/hle/kernel/k_affinity_mask.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/svc_common.h"
 #include "core/hle/result.h"
 
 namespace Common {
@@ -73,19 +78,24 @@ enum ThreadProcessorId : s32 {
                                      (1 << THREADPROCESSORID_2) | (1 << THREADPROCESSORID_3)
 };
 
-enum class ThreadStatus {
-    Ready,        ///< Ready to run
-    Paused,       ///< Paused by SetThreadActivity or debug
-    WaitHLEEvent, ///< Waiting for hle event to finish
-    WaitSleep,    ///< Waiting due to a SleepThread SVC
-    WaitIPC,      ///< Waiting for the reply from an IPC request
-    WaitSynch,    ///< Waiting due to WaitSynchronization
-    WaitMutex,    ///< Waiting due to an ArbitrateLock svc
-    WaitCondVar,  ///< Waiting due to an WaitProcessWideKey svc
-    WaitArb,      ///< Waiting due to a SignalToAddress/WaitForAddress svc
-    Dormant,      ///< Created but not yet made ready
-    Dead          ///< Run to completion, or forcefully terminated
+enum class ThreadState : u16 {
+    Initialized = 0,
+    Waiting = 1,
+    Runnable = 2,
+    Terminated = 3,
+
+    SuspendShift = 4,
+    Mask = (1 << SuspendShift) - 1,
+
+    ProcessSuspended = (1 << (0 + SuspendShift)),
+    ThreadSuspended = (1 << (1 + SuspendShift)),
+    DebugSuspended = (1 << (2 + SuspendShift)),
+    BacktraceSuspended = (1 << (3 + SuspendShift)),
+    InitSuspended = (1 << (4 + SuspendShift)),
+
+    SuspendFlagMask = ((1 << 5) - 1) << SuspendShift,
 };
+DECLARE_ENUM_FLAG_OPERATORS(ThreadState);
 
 enum class ThreadWakeupReason {
     Signal, // The thread was woken up by WakeupAllWaitingThreads due to an object signal.
@@ -97,13 +107,6 @@ enum class ThreadActivity : u32 {
     Paused = 1,
 };
 
-enum class ThreadSchedStatus : u32 {
-    None = 0,
-    Paused = 1,
-    Runnable = 2,
-    Exited = 3,
-};
-
 enum class ThreadSchedFlags : u32 {
     ProcessPauseFlag = 1 << 4,
     ThreadPauseFlag = 1 << 5,
@@ -111,13 +114,20 @@ enum class ThreadSchedFlags : u32 {
     KernelInitPauseFlag = 1 << 8,
 };
 
-enum class ThreadSchedMasks : u32 {
-    LowMask = 0x000f,
-    HighMask = 0xfff0,
-    ForcePauseMask = 0x0070,
+enum class ThreadWaitReasonForDebugging : u32 {
+    None,            ///< Thread is not waiting
+    Sleep,           ///< Thread is waiting due to a SleepThread SVC
+    IPC,             ///< Thread is waiting for the reply from an IPC request
+    Synchronization, ///< Thread is waiting due to a WaitSynchronization SVC
+    ConditionVar,    ///< Thread is waiting due to a WaitProcessWideKey SVC
+    Arbitration,     ///< Thread is waiting due to a SignalToAddress/WaitForAddress SVC
+    Suspended,       ///< Thread is waiting due to process suspension
 };
 
-class Thread final : public SynchronizationObject {
+class Thread final : public KSynchronizationObject, public boost::intrusive::list_base_hook<> {
+    friend class KScheduler;
+    friend class Process;
+
 public:
     explicit Thread(KernelCore& kernel);
     ~Thread() override;
@@ -127,10 +137,6 @@ public:
     using ThreadContext32 = Core::ARM_Interface::ThreadContext32;
     using ThreadContext64 = Core::ARM_Interface::ThreadContext64;
 
-    using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;
-
-    using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>;
-
     /**
      * Creates and returns a new thread. The new thread is immediately scheduled
      * @param system The instance of the whole system
@@ -186,59 +192,54 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(const Thread* thread) const override;
-    void Acquire(Thread* thread) override;
-    bool IsSignaled() const override;
-
     /**
      * Gets the thread's current priority
      * @return The current thread's priority
      */
-    u32 GetPriority() const {
+    [[nodiscard]] s32 GetPriority() const {
         return current_priority;
     }
 
     /**
+     * Sets the thread's current priority.
+     * @param priority The new priority.
+     */
+    void SetPriority(s32 priority) {
+        current_priority = priority;
+    }
+
+    /**
      * Gets the thread's nominal priority.
      * @return The current thread's nominal priority.
      */
-    u32 GetNominalPriority() const {
-        return nominal_priority;
+    [[nodiscard]] s32 GetBasePriority() const {
+        return base_priority;
     }
 
     /**
-     * Sets the thread's current priority
-     * @param priority The new priority
+     * Sets the thread's nominal priority.
+     * @param priority The new priority.
      */
-    void SetPriority(u32 priority);
-
-    /// Adds a thread to the list of threads that are waiting for a lock held by this thread.
-    void AddMutexWaiter(std::shared_ptr<Thread> thread);
-
-    /// Removes a thread from the list of threads that are waiting for a lock held by this thread.
-    void RemoveMutexWaiter(std::shared_ptr<Thread> thread);
-
-    /// Recalculates the current priority taking into account priority inheritance.
-    void UpdatePriority();
+    void SetBasePriority(u32 priority);
 
     /// Changes the core that the thread is running or scheduled to run on.
-    ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
+    [[nodiscard]] ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
 
     /**
      * Gets the thread's thread ID
      * @return The thread's ID
      */
-    u64 GetThreadID() const {
+    [[nodiscard]] u64 GetThreadID() const {
         return thread_id;
     }
 
     /// Resumes a thread from waiting
-    void ResumeFromWait();
-
-    void OnWakeUp();
+    void Wakeup();
 
     ResultCode Start();
 
+    virtual bool IsSignaled() const override;
+
     /// Cancels a waiting operation that this thread may or may not be within.
     ///
     /// When the thread is within a waiting state, this will set the thread's
@@ -247,29 +248,20 @@ public:
     ///
     void CancelWait();
 
-    void SetSynchronizationResults(SynchronizationObject* object, ResultCode result);
+    void SetSynchronizationResults(KSynchronizationObject* object, ResultCode result);
 
-    SynchronizationObject* GetSignalingObject() const {
-        return signaling_object;
+    void SetSyncedObject(KSynchronizationObject* object, ResultCode result) {
+        SetSynchronizationResults(object, result);
     }
 
-    ResultCode GetSignalingResult() const {
+    ResultCode GetWaitResult(KSynchronizationObject** out) const {
+        *out = signaling_object;
         return signaling_result;
     }
 
-    /**
-     * Retrieves the index that this particular object occupies in the list of objects
-     * that the thread passed to WaitSynchronization, starting the search from the last element.
-     *
-     * It is used to set the output index of WaitSynchronization when the thread is awakened.
-     *
-     * When a thread wakes up due to an object signal, the kernel will use the index of the last
-     * matching object in the wait objects list in case of having multiple instances of the same
-     * object in the list.
-     *
-     * @param object Object to query the index of.
-     */
-    s32 GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const;
+    ResultCode GetSignalingResult() const {
+        return signaling_result;
+    }
 
     /**
      * Stops a thread, invalidating it from further use
@@ -341,18 +333,22 @@ public:
 
     std::shared_ptr<Common::Fiber>& GetHostContext();
 
-    ThreadStatus GetStatus() const {
-        return status;
+    ThreadState GetState() const {
+        return thread_state & ThreadState::Mask;
+    }
+
+    ThreadState GetRawState() const {
+        return thread_state;
     }
 
-    void SetStatus(ThreadStatus new_status);
+    void SetState(ThreadState state);
 
     s64 GetLastScheduledTick() const {
-        return this->last_scheduled_tick;
+        return last_scheduled_tick;
     }
 
     void SetLastScheduledTick(s64 tick) {
-        this->last_scheduled_tick = tick;
+        last_scheduled_tick = tick;
     }
 
     u64 GetTotalCPUTimeTicks() const {
@@ -387,98 +383,18 @@ public:
         return owner_process;
     }
 
-    const ThreadSynchronizationObjects& GetSynchronizationObjects() const {
-        return *wait_objects;
-    }
-
-    void SetSynchronizationObjects(ThreadSynchronizationObjects* objects) {
-        wait_objects = objects;
-    }
-
-    void ClearSynchronizationObjects() {
-        for (const auto& waiting_object : *wait_objects) {
-            waiting_object->RemoveWaitingThread(SharedFrom(this));
-        }
-        wait_objects->clear();
-    }
-
-    /// Determines whether all the objects this thread is waiting on are ready.
-    bool AllSynchronizationObjectsReady() const;
-
     const MutexWaitingThreads& GetMutexWaitingThreads() const {
         return wait_mutex_threads;
     }
 
     Thread* GetLockOwner() const {
-        return lock_owner.get();
-    }
-
-    void SetLockOwner(std::shared_ptr<Thread> owner) {
-        lock_owner = std::move(owner);
-    }
-
-    VAddr GetCondVarWaitAddress() const {
-        return condvar_wait_address;
-    }
-
-    void SetCondVarWaitAddress(VAddr address) {
-        condvar_wait_address = address;
-    }
-
-    VAddr GetMutexWaitAddress() const {
-        return mutex_wait_address;
-    }
-
-    void SetMutexWaitAddress(VAddr address) {
-        mutex_wait_address = address;
-    }
-
-    Handle GetWaitHandle() const {
-        return wait_handle;
-    }
-
-    void SetWaitHandle(Handle handle) {
-        wait_handle = handle;
-    }
-
-    VAddr GetArbiterWaitAddress() const {
-        return arb_wait_address;
-    }
-
-    void SetArbiterWaitAddress(VAddr address) {
-        arb_wait_address = address;
-    }
-
-    bool HasHLECallback() const {
-        return hle_callback != nullptr;
-    }
-
-    void SetHLECallback(HLECallback callback) {
-        hle_callback = std::move(callback);
-    }
-
-    void SetHLETimeEvent(Handle time_event) {
-        hle_time_event = time_event;
-    }
-
-    void SetHLESyncObject(SynchronizationObject* object) {
-        hle_object = object;
-    }
-
-    Handle GetHLETimeEvent() const {
-        return hle_time_event;
-    }
-
-    SynchronizationObject* GetHLESyncObject() const {
-        return hle_object;
+        return lock_owner;
     }
 
-    void InvalidateHLECallback() {
-        SetHLECallback(nullptr);
+    void SetLockOwner(Thread* owner) {
+        lock_owner = owner;
     }
 
-    bool InvokeHLECallback(std::shared_ptr<Thread> thread);
-
     u32 GetIdealCore() const {
         return ideal_core;
     }
@@ -493,20 +409,11 @@ public:
     ResultCode Sleep(s64 nanoseconds);
 
     s64 GetYieldScheduleCount() const {
-        return this->schedule_count;
+        return schedule_count;
     }
 
     void SetYieldScheduleCount(s64 count) {
-        this->schedule_count = count;
-    }
-
-    ThreadSchedStatus GetSchedulingStatus() const {
-        return static_cast<ThreadSchedStatus>(scheduling_state &
-                                              static_cast<u32>(ThreadSchedMasks::LowMask));
-    }
-
-    bool IsRunnable() const {
-        return scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable);
+        schedule_count = count;
     }
 
     bool IsRunning() const {
@@ -517,36 +424,32 @@ public:
         is_running = value;
     }
 
-    bool IsSyncCancelled() const {
+    bool IsWaitCancelled() const {
         return is_sync_cancelled;
     }
 
-    void SetSyncCancelled(bool value) {
-        is_sync_cancelled = value;
+    void ClearWaitCancelled() {
+        is_sync_cancelled = false;
     }
 
     Handle GetGlobalHandle() const {
         return global_handle;
     }
 
-    bool IsWaitingForArbitration() const {
-        return waiting_for_arbitration;
+    bool IsCancellable() const {
+        return is_cancellable;
     }
 
-    void WaitForArbitration(bool set) {
-        waiting_for_arbitration = set;
+    void SetCancellable() {
+        is_cancellable = true;
     }
 
-    bool IsWaitingSync() const {
-        return is_waiting_on_sync;
+    void ClearCancellable() {
+        is_cancellable = false;
     }
 
-    void SetWaitingSync(bool is_waiting) {
-        is_waiting_on_sync = is_waiting;
-    }
-
-    bool IsPendingTermination() const {
-        return will_be_terminated || GetSchedulingStatus() == ThreadSchedStatus::Exited;
+    bool IsTerminationRequested() const {
+        return will_be_terminated || GetRawState() == ThreadState::Terminated;
     }
 
     bool IsPaused() const {
@@ -578,21 +481,21 @@ public:
         constexpr QueueEntry() = default;
 
         constexpr void Initialize() {
-            this->prev = nullptr;
-            this->next = nullptr;
+            prev = nullptr;
+            next = nullptr;
         }
 
         constexpr Thread* GetPrev() const {
-            return this->prev;
+            return prev;
         }
         constexpr Thread* GetNext() const {
-            return this->next;
+            return next;
         }
         constexpr void SetPrev(Thread* thread) {
-            this->prev = thread;
+            prev = thread;
         }
         constexpr void SetNext(Thread* thread) {
-            this->next = thread;
+            next = thread;
         }
 
     private:
@@ -601,11 +504,11 @@ public:
     };
 
     QueueEntry& GetPriorityQueueEntry(s32 core) {
-        return this->per_core_priority_queue_entry[core];
+        return per_core_priority_queue_entry[core];
     }
 
     const QueueEntry& GetPriorityQueueEntry(s32 core) const {
-        return this->per_core_priority_queue_entry[core];
+        return per_core_priority_queue_entry[core];
     }
 
     s32 GetDisableDispatchCount() const {
@@ -622,24 +525,170 @@ public:
         disable_count--;
     }
 
+    void SetWaitReasonForDebugging(ThreadWaitReasonForDebugging reason) {
+        wait_reason_for_debugging = reason;
+    }
+
+    [[nodiscard]] ThreadWaitReasonForDebugging GetWaitReasonForDebugging() const {
+        return wait_reason_for_debugging;
+    }
+
+    void SetWaitObjectsForDebugging(const std::span<KSynchronizationObject*>& objects) {
+        wait_objects_for_debugging.clear();
+        wait_objects_for_debugging.reserve(objects.size());
+        for (const auto& object : objects) {
+            wait_objects_for_debugging.emplace_back(object);
+        }
+    }
+
+    [[nodiscard]] const std::vector<KSynchronizationObject*>& GetWaitObjectsForDebugging() const {
+        return wait_objects_for_debugging;
+    }
+
+    void SetMutexWaitAddressForDebugging(VAddr address) {
+        mutex_wait_address_for_debugging = address;
+    }
+
+    [[nodiscard]] VAddr GetMutexWaitAddressForDebugging() const {
+        return mutex_wait_address_for_debugging;
+    }
+
+    void AddWaiter(Thread* thread);
+
+    void RemoveWaiter(Thread* thread);
+
+    [[nodiscard]] Thread* RemoveWaiterByKey(s32* out_num_waiters, VAddr key);
+
+    [[nodiscard]] VAddr GetAddressKey() const {
+        return address_key;
+    }
+
+    [[nodiscard]] u32 GetAddressKeyValue() const {
+        return address_key_value;
+    }
+
+    void SetAddressKey(VAddr key) {
+        address_key = key;
+    }
+
+    void SetAddressKey(VAddr key, u32 val) {
+        address_key = key;
+        address_key_value = val;
+    }
+
 private:
-    friend class GlobalSchedulerContext;
-    friend class KScheduler;
-    friend class Process;
+    static constexpr size_t PriorityInheritanceCountMax = 10;
+    union SyncObjectBuffer {
+        std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> sync_objects{};
+        std::array<Handle,
+                   Svc::ArgumentHandleCountMax*(sizeof(KSynchronizationObject*) / sizeof(Handle))>
+            handles;
+        constexpr SyncObjectBuffer() {}
+    };
+    static_assert(sizeof(SyncObjectBuffer::sync_objects) == sizeof(SyncObjectBuffer::handles));
+
+    struct ConditionVariableComparator {
+        struct LightCompareType {
+            u64 cv_key{};
+            s32 priority{};
+
+            [[nodiscard]] constexpr u64 GetConditionVariableKey() const {
+                return cv_key;
+            }
+
+            [[nodiscard]] constexpr s32 GetPriority() const {
+                return priority;
+            }
+        };
+
+        template <typename T>
+        requires(
+            std::same_as<T, Thread> ||
+            std::same_as<T, LightCompareType>) static constexpr int Compare(const T& lhs,
+                                                                            const Thread& rhs) {
+            const uintptr_t l_key = lhs.GetConditionVariableKey();
+            const uintptr_t r_key = rhs.GetConditionVariableKey();
+
+            if (l_key < r_key) {
+                // Sort first by key
+                return -1;
+            } else if (l_key == r_key && lhs.GetPriority() < rhs.GetPriority()) {
+                // And then by priority.
+                return -1;
+            } else {
+                return 1;
+            }
+        }
+    };
+
+    Common::IntrusiveRedBlackTreeNode condvar_arbiter_tree_node{};
+
+    using ConditionVariableThreadTreeTraits =
+        Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&Thread::condvar_arbiter_tree_node>;
+    using ConditionVariableThreadTree =
+        ConditionVariableThreadTreeTraits::TreeType<ConditionVariableComparator>;
+
+public:
+    using ConditionVariableThreadTreeType = ConditionVariableThreadTree;
+
+    [[nodiscard]] uintptr_t GetConditionVariableKey() const {
+        return condvar_key;
+    }
+
+    [[nodiscard]] uintptr_t GetAddressArbiterKey() const {
+        return condvar_key;
+    }
 
-    void SetSchedulingStatus(ThreadSchedStatus new_status);
+    void SetConditionVariable(ConditionVariableThreadTree* tree, VAddr address, uintptr_t cv_key,
+                              u32 value) {
+        condvar_tree = tree;
+        condvar_key = cv_key;
+        address_key = address;
+        address_key_value = value;
+    }
+
+    void ClearConditionVariable() {
+        condvar_tree = nullptr;
+    }
+
+    [[nodiscard]] bool IsWaitingForConditionVariable() const {
+        return condvar_tree != nullptr;
+    }
+
+    void SetAddressArbiter(ConditionVariableThreadTree* tree, uintptr_t address) {
+        condvar_tree = tree;
+        condvar_key = address;
+    }
+
+    void ClearAddressArbiter() {
+        condvar_tree = nullptr;
+    }
+
+    [[nodiscard]] bool IsWaitingForAddressArbiter() const {
+        return condvar_tree != nullptr;
+    }
+
+    [[nodiscard]] ConditionVariableThreadTree* GetConditionVariableTree() const {
+        return condvar_tree;
+    }
+
+    [[nodiscard]] bool HasWaiters() const {
+        return !waiter_list.empty();
+    }
+
+private:
     void AddSchedulingFlag(ThreadSchedFlags flag);
     void RemoveSchedulingFlag(ThreadSchedFlags flag);
-
-    void SetCurrentPriority(u32 new_priority);
+    void AddWaiterImpl(Thread* thread);
+    void RemoveWaiterImpl(Thread* thread);
+    static void RestorePriority(KernelCore& kernel, Thread* thread);
 
     Common::SpinLock context_guard{};
     ThreadContext32 context_32{};
     ThreadContext64 context_64{};
     std::shared_ptr<Common::Fiber> host_context{};
 
-    ThreadStatus status = ThreadStatus::Dormant;
-    u32 scheduling_state = 0;
+    ThreadState thread_state = ThreadState::Initialized;
 
     u64 thread_id = 0;
 
@@ -652,11 +701,11 @@ private:
     /// Nominal thread priority, as set by the emulated application.
     /// The nominal priority is the thread priority without priority
     /// inheritance taken into account.
-    u32 nominal_priority = 0;
+    s32 base_priority{};
 
     /// Current thread priority. This may change over the course of the
     /// thread's lifetime in order to facilitate priority inheritance.
-    u32 current_priority = 0;
+    s32 current_priority{};
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
     s64 schedule_count{};
@@ -671,37 +720,27 @@ private:
     Process* owner_process;
 
     /// Objects that the thread is waiting on, in the same order as they were
-    /// passed to WaitSynchronization.
-    ThreadSynchronizationObjects* wait_objects;
+    /// passed to WaitSynchronization. This is used for debugging only.
+    std::vector<KSynchronizationObject*> wait_objects_for_debugging;
 
-    SynchronizationObject* signaling_object;
+    /// The current mutex wait address. This is used for debugging only.
+    VAddr mutex_wait_address_for_debugging{};
+
+    /// The reason the thread is waiting. This is used for debugging only.
+    ThreadWaitReasonForDebugging wait_reason_for_debugging{};
+
+    KSynchronizationObject* signaling_object;
     ResultCode signaling_result{RESULT_SUCCESS};
 
     /// List of threads that are waiting for a mutex that is held by this thread.
     MutexWaitingThreads wait_mutex_threads;
 
     /// Thread that owns the lock that this thread is waiting for.
-    std::shared_ptr<Thread> lock_owner;
-
-    /// If waiting on a ConditionVariable, this is the ConditionVariable address
-    VAddr condvar_wait_address = 0;
-    /// If waiting on a Mutex, this is the mutex address
-    VAddr mutex_wait_address = 0;
-    /// The handle used to wait for the mutex.
-    Handle wait_handle = 0;
-
-    /// If waiting for an AddressArbiter, this is the address being waited on.
-    VAddr arb_wait_address{0};
-    bool waiting_for_arbitration{};
+    Thread* lock_owner{};
 
     /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
     Handle global_handle = 0;
 
-    /// Callback for HLE Events
-    HLECallback hle_callback;
-    Handle hle_time_event;
-    SynchronizationObject* hle_object;
-
     KScheduler* scheduler = nullptr;
 
     std::array<QueueEntry, Core::Hardware::NUM_CPU_CORES> per_core_priority_queue_entry{};
@@ -714,7 +753,7 @@ private:
 
     u32 pausing_state = 0;
     bool is_running = false;
-    bool is_waiting_on_sync = false;
+    bool is_cancellable = false;
     bool is_sync_cancelled = false;
 
     bool is_continuous_on_svc = false;
@@ -725,6 +764,18 @@ private:
 
     bool was_running = false;
 
+    bool signaled{};
+
+    ConditionVariableThreadTree* condvar_tree{};
+    uintptr_t condvar_key{};
+    VAddr address_key{};
+    u32 address_key_value{};
+    s32 num_kernel_waiters{};
+
+    using WaiterList = boost::intrusive::list<Thread>;
+    WaiterList waiter_list{};
+    WaiterList pinned_waiter_list{};
+
     std::string name;
 };
 
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index 79628e2b4..832edd629 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -18,12 +18,10 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} {
     time_manager_event_type = Core::Timing::CreateEvent(
         "Kernel::TimeManagerCallback",
         [this](std::uintptr_t thread_handle, std::chrono::nanoseconds) {
-            const KScopedSchedulerLock lock(system.Kernel());
-            const auto proper_handle = static_cast<Handle>(thread_handle);
-
             std::shared_ptr<Thread> thread;
             {
                 std::lock_guard lock{mutex};
+                const auto proper_handle = static_cast<Handle>(thread_handle);
                 if (cancelled_events[proper_handle]) {
                     return;
                 }
@@ -32,7 +30,7 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} {
 
             if (thread) {
                 // Thread can be null if process has exited
-                thread->OnWakeUp();
+                thread->Wakeup();
             }
         });
 }
@@ -42,8 +40,7 @@ void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64
     event_handle = timetask->GetGlobalHandle();
     if (nanoseconds > 0) {
         ASSERT(timetask);
-        ASSERT(timetask->GetStatus() != ThreadStatus::Ready);
-        ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex);
+        ASSERT(timetask->GetState() != ThreadState::Runnable);
         system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{nanoseconds},
                                           time_manager_event_type, event_handle);
     } else {
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 6981f8ee7..3ec0e1eca 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -32,9 +32,15 @@
 
 namespace Service::Account {
 
-constexpr ResultCode ERR_INVALID_BUFFER_SIZE{ErrorModule::Account, 30};
+constexpr ResultCode ERR_INVALID_USER_ID{ErrorModule::Account, 20};
+constexpr ResultCode ERR_INVALID_APPLICATION_ID{ErrorModule::Account, 22};
+constexpr ResultCode ERR_INVALID_BUFFER{ErrorModule::Account, 30};
+constexpr ResultCode ERR_INVALID_BUFFER_SIZE{ErrorModule::Account, 31};
 constexpr ResultCode ERR_FAILED_SAVE_DATA{ErrorModule::Account, 100};
 
+// Thumbnails are hard coded to be at least this size
+constexpr std::size_t THUMBNAIL_SIZE = 0x24000;
+
 static std::string GetImagePath(Common::UUID uuid) {
     return Common::FS::GetUserPath(Common::FS::UserPath::NANDDir) +
            "/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg";
@@ -369,7 +375,7 @@ protected:
         if (user_data.size() < sizeof(ProfileData)) {
             LOG_ERROR(Service_ACC, "ProfileData buffer too small!");
             IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_INVALID_BUFFER_SIZE);
+            rb.Push(ERR_INVALID_BUFFER);
             return;
         }
 
@@ -402,7 +408,7 @@ protected:
         if (user_data.size() < sizeof(ProfileData)) {
             LOG_ERROR(Service_ACC, "ProfileData buffer too small!");
             IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_INVALID_BUFFER_SIZE);
+            rb.Push(ERR_INVALID_BUFFER);
             return;
         }
 
@@ -534,7 +540,7 @@ private:
         rb.Push(RESULT_SUCCESS);
     }
 
-    Common::UUID user_id;
+    Common::UUID user_id{Common::INVALID_UUID};
 };
 
 // 6.0.0+
@@ -811,6 +817,55 @@ void Module::Interface::ListOpenContextStoredUsers(Kernel::HLERequestContext& ct
     rb.Push(RESULT_SUCCESS);
 }
 
+void Module::Interface::StoreSaveDataThumbnailApplication(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto uuid = rp.PopRaw<Common::UUID>();
+
+    LOG_WARNING(Service_ACC, "(STUBBED) called, uuid={}", uuid.Format());
+
+    // TODO(ogniK): Check if application ID is zero on acc initialize. As we don't have a reliable
+    // way of confirming things like the TID, we're going to assume a non zero value for the time
+    // being.
+    constexpr u64 tid{1};
+    StoreSaveDataThumbnail(ctx, uuid, tid);
+}
+
+void Module::Interface::StoreSaveDataThumbnailSystem(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto uuid = rp.PopRaw<Common::UUID>();
+    const auto tid = rp.Pop<u64_le>();
+
+    LOG_WARNING(Service_ACC, "(STUBBED) called, uuid={}, tid={:016X}", uuid.Format(), tid);
+    StoreSaveDataThumbnail(ctx, uuid, tid);
+}
+
+void Module::Interface::StoreSaveDataThumbnail(Kernel::HLERequestContext& ctx,
+                                               const Common::UUID& uuid, const u64 tid) {
+    IPC::ResponseBuilder rb{ctx, 2};
+
+    if (tid == 0) {
+        LOG_ERROR(Service_ACC, "TitleID is not valid!");
+        rb.Push(ERR_INVALID_APPLICATION_ID);
+        return;
+    }
+
+    if (!uuid) {
+        LOG_ERROR(Service_ACC, "User ID is not valid!");
+        rb.Push(ERR_INVALID_USER_ID);
+        return;
+    }
+    const auto thumbnail_size = ctx.GetReadBufferSize();
+    if (thumbnail_size != THUMBNAIL_SIZE) {
+        LOG_ERROR(Service_ACC, "Buffer size is empty! size={:X} expecting {:X}", thumbnail_size,
+                  THUMBNAIL_SIZE);
+        rb.Push(ERR_INVALID_BUFFER_SIZE);
+        return;
+    }
+
+    // TODO(ogniK): Construct save data thumbnail
+    rb.Push(RESULT_SUCCESS);
+}
+
 void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_ACC, "called");
     // A u8 is passed into this function which we can safely ignore. It's to determine if we have
diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h
index ab8edc049..0e3ad8ec6 100644
--- a/src/core/hle/service/acc/acc.h
+++ b/src/core/hle/service/acc/acc.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "common/uuid.h"
 #include "core/hle/service/glue/manager.h"
 #include "core/hle/service/service.h"
 
@@ -36,9 +37,13 @@ public:
         void ListQualifiedUsers(Kernel::HLERequestContext& ctx);
         void LoadOpenContext(Kernel::HLERequestContext& ctx);
         void ListOpenContextStoredUsers(Kernel::HLERequestContext& ctx);
+        void StoreSaveDataThumbnailApplication(Kernel::HLERequestContext& ctx);
+        void StoreSaveDataThumbnailSystem(Kernel::HLERequestContext& ctx);
 
     private:
         ResultCode InitializeApplicationInfoBase();
+        void StoreSaveDataThumbnail(Kernel::HLERequestContext& ctx, const Common::UUID& uuid,
+                                    const u64 tid);
 
         enum class ApplicationType : u32_le {
             GameCard = 0,
diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp
index d2bb8c2c8..49b22583e 100644
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -29,7 +29,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {104, nullptr, "GetProfileUpdateNotifier"},
         {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
         {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
-        {110, nullptr, "StoreSaveDataThumbnail"},
+        {110, &ACC_SU::StoreSaveDataThumbnailSystem, "StoreSaveDataThumbnail"},
         {111, nullptr, "ClearSaveDataThumbnail"},
         {112, nullptr, "LoadSaveDataThumbnail"},
         {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp
index 75a24f8f5..8d66d180d 100644
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -26,7 +26,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
         {102, nullptr, "AuthenticateApplicationAsync"},
         {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
-        {110, nullptr, "StoreSaveDataThumbnail"},
+        {110, &ACC_U0::StoreSaveDataThumbnailApplication, "StoreSaveDataThumbnail"},
         {111, nullptr, "ClearSaveDataThumbnail"},
         {120, nullptr, "CreateGuestLoginRequest"},
         {130, &ACC_U0::LoadOpenContext, "LoadOpenContext"}, // 5.0.0+
diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp
index a4aa5316a..951081cd0 100644
--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -29,7 +29,7 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {104, nullptr, "GetProfileUpdateNotifier"},
         {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
         {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
-        {110, nullptr, "StoreSaveDataThumbnail"},
+        {110, &ACC_U1::StoreSaveDataThumbnailApplication, "StoreSaveDataThumbnail"},
         {111, nullptr, "ClearSaveDataThumbnail"},
         {112, nullptr, "LoadSaveDataThumbnail"},
         {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp
index 9b829e957..d9865d56f 100644
--- a/src/core/hle/service/acc/profile_manager.cpp
+++ b/src/core/hle/service/acc/profile_manager.cpp
@@ -227,17 +227,17 @@ void ProfileManager::CloseUser(UUID uuid) {
 
 /// Gets all valid user ids on the system
 UserIDArray ProfileManager::GetAllUsers() const {
-    UserIDArray output;
-    std::transform(profiles.begin(), profiles.end(), output.begin(),
-                   [](const ProfileInfo& p) { return p.user_uuid; });
+    UserIDArray output{};
+    std::ranges::transform(profiles, output.begin(),
+                           [](const ProfileInfo& p) { return p.user_uuid; });
     return output;
 }
 
 /// Get all the open users on the system and zero out the rest of the data. This is specifically
 /// needed for GetOpenUsers and we need to ensure the rest of the output buffer is zero'd out
 UserIDArray ProfileManager::GetOpenUsers() const {
-    UserIDArray output;
-    std::transform(profiles.begin(), profiles.end(), output.begin(), [](const ProfileInfo& p) {
+    UserIDArray output{};
+    std::ranges::transform(profiles, output.begin(), [](const ProfileInfo& p) {
         if (p.is_open)
             return p.user_uuid;
         return UUID{Common::INVALID_UUID};
diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h
index 5310637a6..71b9d5518 100644
--- a/src/core/hle/service/acc/profile_manager.h
+++ b/src/core/hle/service/acc/profile_manager.h
@@ -23,12 +23,12 @@ using UserIDArray = std::array<Common::UUID, MAX_USERS>;
 /// Contains extra data related to a user.
 /// TODO: RE this structure
 struct ProfileData {
-    INSERT_PADDING_WORDS(1);
-    u32 icon_id{};
-    u8 bg_color_id{};
-    INSERT_PADDING_BYTES(0x7);
-    INSERT_PADDING_BYTES(0x10);
-    INSERT_PADDING_BYTES(0x60);
+    INSERT_PADDING_WORDS_NOINIT(1);
+    u32 icon_id;
+    u8 bg_color_id;
+    INSERT_PADDING_BYTES_NOINIT(0x7);
+    INSERT_PADDING_BYTES_NOINIT(0x10);
+    INSERT_PADDING_BYTES_NOINIT(0x60);
 };
 static_assert(sizeof(ProfileData) == 0x80, "ProfileData structure has incorrect size");
 
@@ -43,9 +43,9 @@ struct ProfileInfo {
 };
 
 struct ProfileBase {
-    Common::UUID user_uuid{Common::INVALID_UUID};
-    u64_le timestamp{};
-    ProfileUsername username{};
+    Common::UUID user_uuid;
+    u64_le timestamp;
+    ProfileUsername username;
 
     // Zero out all the fields to make the profile slot considered "Empty"
     void Invalidate() {
diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp
index d85505082..0c8b632e8 100644
--- a/src/core/hle/service/am/applets/error.cpp
+++ b/src/core/hle/service/am/applets/error.cpp
@@ -20,9 +20,9 @@ namespace Service::AM::Applets {
 struct ShowError {
     u8 mode;
     bool jump;
-    INSERT_UNION_PADDING_BYTES(4);
+    INSERT_PADDING_BYTES_NOINIT(4);
     bool use_64bit_error_code;
-    INSERT_UNION_PADDING_BYTES(1);
+    INSERT_PADDING_BYTES_NOINIT(1);
     u64 error_code_64;
     u32 error_code_32;
 };
@@ -32,7 +32,7 @@ static_assert(sizeof(ShowError) == 0x14, "ShowError has incorrect size.");
 struct ShowErrorRecord {
     u8 mode;
     bool jump;
-    INSERT_UNION_PADDING_BYTES(6);
+    INSERT_PADDING_BYTES_NOINIT(6);
     u64 error_code_64;
     u64 posix_time;
 };
@@ -41,7 +41,7 @@ static_assert(sizeof(ShowErrorRecord) == 0x18, "ShowErrorRecord has incorrect si
 struct SystemErrorArg {
     u8 mode;
     bool jump;
-    INSERT_UNION_PADDING_BYTES(6);
+    INSERT_PADDING_BYTES_NOINIT(6);
     u64 error_code_64;
     std::array<char, 8> language_code;
     std::array<char, 0x800> main_text;
@@ -52,7 +52,7 @@ static_assert(sizeof(SystemErrorArg) == 0x1018, "SystemErrorArg has incorrect si
 struct ApplicationErrorArg {
     u8 mode;
     bool jump;
-    INSERT_UNION_PADDING_BYTES(6);
+    INSERT_PADDING_BYTES_NOINIT(6);
     u32 error_code;
     std::array<char, 8> language_code;
     std::array<char, 0x800> main_text;
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 0cd797109..02ca711fb 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -29,7 +29,7 @@ constexpr int DefaultSampleRate{48000};
 struct AudoutParams {
     s32_le sample_rate;
     u16_le channel_count;
-    INSERT_PADDING_BYTES(2);
+    INSERT_PADDING_BYTES_NOINIT(2);
 };
 static_assert(sizeof(AudoutParams) == 0x8, "AudoutParams is an invalid size");
 
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index d280e7caf..ff783b3cc 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -141,7 +141,9 @@ bool Controller_NPad::IsDeviceHandleValid(const DeviceHandle& device_handle) {
            device_handle.device_index < DeviceIndex::MaxDeviceIndex;
 }
 
-Controller_NPad::Controller_NPad(Core::System& system) : ControllerBase(system), system(system) {}
+Controller_NPad::Controller_NPad(Core::System& system) : ControllerBase(system), system(system) {
+    latest_vibration_values.fill({DEFAULT_VIBRATION_VALUE, DEFAULT_VIBRATION_VALUE});
+}
 
 Controller_NPad::~Controller_NPad() {
     OnRelease();
@@ -732,7 +734,7 @@ bool Controller_NPad::VibrateControllerAtIndex(std::size_t npad_index, std::size
             // Send an empty vibration to stop any vibrations.
             vibrations[npad_index][device_index]->SetRumblePlay(0.0f, 160.0f, 0.0f, 320.0f);
             // Then reset the vibration value to its default value.
-            latest_vibration_values[npad_index][device_index] = {};
+            latest_vibration_values[npad_index][device_index] = DEFAULT_VIBRATION_VALUE;
         }
 
         return false;
@@ -890,7 +892,7 @@ void Controller_NPad::UpdateControllerAt(NPadControllerType controller, std::siz
         return;
     }
 
-    if (controller == NPadControllerType::Handheld) {
+    if (controller == NPadControllerType::Handheld && npad_index == HANDHELD_INDEX) {
         Settings::values.players.GetValue()[HANDHELD_INDEX].controller_type =
             MapNPadToSettingsType(controller);
         Settings::values.players.GetValue()[HANDHELD_INDEX].connected = true;
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index e2e826623..bc85ca4df 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -97,10 +97,10 @@ public:
     };
 
     struct DeviceHandle {
-        NpadType npad_type{};
-        u8 npad_id{};
-        DeviceIndex device_index{};
-        INSERT_PADDING_BYTES(1);
+        NpadType npad_type;
+        u8 npad_id;
+        DeviceIndex device_index;
+        INSERT_PADDING_BYTES_NOINIT(1);
     };
     static_assert(sizeof(DeviceHandle) == 4, "DeviceHandle is an invalid size");
 
@@ -120,13 +120,20 @@ public:
     static_assert(sizeof(NpadStyleSet) == 4, "NpadStyleSet is an invalid size");
 
     struct VibrationValue {
-        f32 amp_low{0.0f};
-        f32 freq_low{160.0f};
-        f32 amp_high{0.0f};
-        f32 freq_high{320.0f};
+        f32 amp_low;
+        f32 freq_low;
+        f32 amp_high;
+        f32 freq_high;
     };
     static_assert(sizeof(VibrationValue) == 0x10, "Vibration is an invalid size");
 
+    static constexpr VibrationValue DEFAULT_VIBRATION_VALUE{
+        .amp_low = 0.0f,
+        .freq_low = 160.0f,
+        .amp_high = 0.0f,
+        .freq_high = 320.0f,
+    };
+
     struct LedPattern {
         explicit LedPattern(u64 light1, u64 light2, u64 light3, u64 light4) {
             position1.Assign(light1);
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index 0df395e85..5219f2dad 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <cstring>
 #include "common/common_types.h"
 #include "core/core_timing.h"
@@ -16,7 +17,13 @@ constexpr std::size_t SHARED_MEMORY_OFFSET = 0x400;
 Controller_Touchscreen::Controller_Touchscreen(Core::System& system) : ControllerBase(system) {}
 Controller_Touchscreen::~Controller_Touchscreen() = default;
 
-void Controller_Touchscreen::OnInit() {}
+void Controller_Touchscreen::OnInit() {
+    for (std::size_t id = 0; id < MAX_FINGERS; ++id) {
+        mouse_finger_id[id] = MAX_FINGERS;
+        keyboard_finger_id[id] = MAX_FINGERS;
+        udp_finger_id[id] = MAX_FINGERS;
+    }
+}
 
 void Controller_Touchscreen::OnRelease() {}
 
@@ -40,38 +47,106 @@ void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timin
     cur_entry.sampling_number = last_entry.sampling_number + 1;
     cur_entry.sampling_number2 = cur_entry.sampling_number;
 
-    bool pressed = false;
-    float x, y;
-    std::tie(x, y, pressed) = touch_device->GetStatus();
-    auto& touch_entry = cur_entry.states[0];
-    touch_entry.attribute.raw = 0;
-    if (!pressed && touch_btn_device) {
-        std::tie(x, y, pressed) = touch_btn_device->GetStatus();
+    const Input::TouchStatus& mouse_status = touch_mouse_device->GetStatus();
+    const Input::TouchStatus& udp_status = touch_udp_device->GetStatus();
+    for (std::size_t id = 0; id < mouse_status.size(); ++id) {
+        mouse_finger_id[id] = UpdateTouchInputEvent(mouse_status[id], mouse_finger_id[id]);
+        udp_finger_id[id] = UpdateTouchInputEvent(udp_status[id], udp_finger_id[id]);
     }
-    if (pressed && Settings::values.touchscreen.enabled) {
-        touch_entry.x = static_cast<u16>(x * Layout::ScreenUndocked::Width);
-        touch_entry.y = static_cast<u16>(y * Layout::ScreenUndocked::Height);
-        touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
-        touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
-        touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
-        const u64 tick = core_timing.GetCPUTicks();
-        touch_entry.delta_time = tick - last_touch;
-        last_touch = tick;
-        touch_entry.finger = Settings::values.touchscreen.finger;
-        cur_entry.entry_count = 1;
-    } else {
-        cur_entry.entry_count = 0;
+
+    if (Settings::values.use_touch_from_button) {
+        const Input::TouchStatus& keyboard_status = touch_btn_device->GetStatus();
+        for (std::size_t id = 0; id < mouse_status.size(); ++id) {
+            keyboard_finger_id[id] =
+                UpdateTouchInputEvent(keyboard_status[id], keyboard_finger_id[id]);
+        }
     }
 
+    std::array<Finger, 16> active_fingers;
+    const auto end_iter = std::copy_if(fingers.begin(), fingers.end(), active_fingers.begin(),
+                                       [](const auto& finger) { return finger.pressed; });
+    const auto active_fingers_count =
+        static_cast<std::size_t>(std::distance(active_fingers.begin(), end_iter));
+
+    const u64 tick = core_timing.GetCPUTicks();
+    cur_entry.entry_count = static_cast<s32_le>(active_fingers_count);
+    for (std::size_t id = 0; id < MAX_FINGERS; ++id) {
+        auto& touch_entry = cur_entry.states[id];
+        if (id < active_fingers_count) {
+            touch_entry.x = static_cast<u16>(active_fingers[id].x * Layout::ScreenUndocked::Width);
+            touch_entry.y = static_cast<u16>(active_fingers[id].y * Layout::ScreenUndocked::Height);
+            touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
+            touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
+            touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
+            touch_entry.delta_time = tick - active_fingers[id].last_touch;
+            fingers[active_fingers[id].id].last_touch = tick;
+            touch_entry.finger = active_fingers[id].id;
+            touch_entry.attribute.raw = active_fingers[id].attribute.raw;
+        } else {
+            // Clear touch entry
+            touch_entry.attribute.raw = 0;
+            touch_entry.x = 0;
+            touch_entry.y = 0;
+            touch_entry.diameter_x = 0;
+            touch_entry.diameter_y = 0;
+            touch_entry.rotation_angle = 0;
+            touch_entry.delta_time = 0;
+            touch_entry.finger = 0;
+        }
+    }
     std::memcpy(data + SHARED_MEMORY_OFFSET, &shared_memory, sizeof(TouchScreenSharedMemory));
 }
 
 void Controller_Touchscreen::OnLoadInputDevices() {
-    touch_device = Input::CreateDevice<Input::TouchDevice>(Settings::values.touchscreen.device);
-    if (Settings::values.use_touch_from_button) {
-        touch_btn_device = Input::CreateDevice<Input::TouchDevice>("engine:touch_from_button");
-    } else {
-        touch_btn_device.reset();
+    touch_mouse_device = Input::CreateDevice<Input::TouchDevice>("engine:emu_window");
+    touch_udp_device = Input::CreateDevice<Input::TouchDevice>("engine:cemuhookudp");
+    touch_btn_device = Input::CreateDevice<Input::TouchDevice>("engine:touch_from_button");
+}
+
+std::optional<std::size_t> Controller_Touchscreen::GetUnusedFingerID() const {
+    std::size_t first_free_id = 0;
+    while (first_free_id < MAX_FINGERS) {
+        if (!fingers[first_free_id].pressed) {
+            return first_free_id;
+        } else {
+            first_free_id++;
+        }
+    }
+    return std::nullopt;
+}
+
+std::size_t Controller_Touchscreen::UpdateTouchInputEvent(
+    const std::tuple<float, float, bool>& touch_input, std::size_t finger_id) {
+    const auto& [x, y, pressed] = touch_input;
+    if (pressed) {
+        Attributes attribute{};
+        if (finger_id == MAX_FINGERS) {
+            const auto first_free_id = GetUnusedFingerID();
+            if (!first_free_id) {
+                // Invalid finger id do nothing
+                return MAX_FINGERS;
+            }
+            finger_id = first_free_id.value();
+            fingers[finger_id].pressed = true;
+            fingers[finger_id].id = static_cast<u32_le>(finger_id);
+            attribute.start_touch.Assign(1);
+        }
+        fingers[finger_id].x = x;
+        fingers[finger_id].y = y;
+        fingers[finger_id].attribute = attribute;
+        return finger_id;
     }
+
+    if (finger_id != MAX_FINGERS) {
+        if (!fingers[finger_id].attribute.end_touch) {
+            fingers[finger_id].attribute.end_touch.Assign(1);
+            fingers[finger_id].attribute.start_touch.Assign(0);
+            return finger_id;
+        }
+        fingers[finger_id].pressed = false;
+    }
+
+    return MAX_FINGERS;
 }
+
 } // namespace Service::HID
diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h
index 4d9042adc..784124e25 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -30,6 +30,18 @@ public:
     void OnLoadInputDevices() override;
 
 private:
+    static constexpr std::size_t MAX_FINGERS = 16;
+
+    // Returns an unused finger id, if there is no fingers available std::nullopt will be returned
+    std::optional<std::size_t> GetUnusedFingerID() const;
+
+    // If the touch is new it tries to assing a new finger id, if there is no fingers avaliable no
+    // changes will be made. Updates the coordinates if the finger id it's already set. If the touch
+    // ends delays the output by one frame to set the end_touch flag before finally freeing the
+    // finger id
+    std::size_t UpdateTouchInputEvent(const std::tuple<float, float, bool>& touch_input,
+                                      std::size_t finger_id);
+
     struct Attributes {
         union {
             u32 raw{};
@@ -55,7 +67,7 @@ private:
         s64_le sampling_number;
         s64_le sampling_number2;
         s32_le entry_count;
-        std::array<TouchState, 16> states;
+        std::array<TouchState, MAX_FINGERS> states;
     };
     static_assert(sizeof(TouchScreenEntry) == 0x298, "TouchScreenEntry is an invalid size");
 
@@ -66,9 +78,23 @@ private:
     };
     static_assert(sizeof(TouchScreenSharedMemory) == 0x3000,
                   "TouchScreenSharedMemory is an invalid size");
+
+    struct Finger {
+        u64_le last_touch{};
+        float x{};
+        float y{};
+        u32_le id{};
+        bool pressed{};
+        Attributes attribute;
+    };
+
     TouchScreenSharedMemory shared_memory{};
-    std::unique_ptr<Input::TouchDevice> touch_device;
+    std::unique_ptr<Input::TouchDevice> touch_mouse_device;
+    std::unique_ptr<Input::TouchDevice> touch_udp_device;
     std::unique_ptr<Input::TouchDevice> touch_btn_device;
-    s64_le last_touch{};
+    std::array<std::size_t, MAX_FINGERS> mouse_finger_id;
+    std::array<std::size_t, MAX_FINGERS> keyboard_finger_id;
+    std::array<std::size_t, MAX_FINGERS> udp_finger_id;
+    std::array<Finger, MAX_FINGERS> fingers;
 };
 } // namespace Service::HID
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 8d95f74e6..2b13d6fe6 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -401,9 +401,9 @@ void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) {
 void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 basic_xpad_id{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        u32 basic_xpad_id;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -431,9 +431,9 @@ void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) {
 void Hid::ActivateSixAxisSensor(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -452,9 +452,9 @@ void Hid::ActivateSixAxisSensor(Kernel::HLERequestContext& ctx) {
 void Hid::DeactivateSixAxisSensor(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -473,9 +473,9 @@ void Hid::DeactivateSixAxisSensor(Kernel::HLERequestContext& ctx) {
 void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -494,9 +494,9 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) {
 void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -515,10 +515,10 @@ void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
 void Hid::EnableSixAxisSensorFusion(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        bool enable_sixaxis_sensor_fusion{};
-        INSERT_PADDING_BYTES(3);
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        u64 applet_resource_user_id{};
+        bool enable_sixaxis_sensor_fusion;
+        INSERT_PADDING_BYTES_NOINIT(3);
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -556,9 +556,9 @@ void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
 void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -577,9 +577,9 @@ void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
 void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -599,9 +599,9 @@ void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
 void Hid::IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -620,9 +620,9 @@ void Hid::IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx) {
 void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 unknown{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        u32 unknown;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -702,10 +702,10 @@ void Hid::DeactivateNpad(Kernel::HLERequestContext& ctx) {
 void Hid::AcquireNpadStyleSetUpdateEventHandle(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 npad_id{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
-        u64 unknown{};
+        u32 npad_id;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
+        u64 unknown;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -722,9 +722,9 @@ void Hid::AcquireNpadStyleSetUpdateEventHandle(Kernel::HLERequestContext& ctx) {
 void Hid::DisconnectNpad(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 npad_id{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        u32 npad_id;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -756,9 +756,9 @@ void Hid::ActivateNpadWithRevision(Kernel::HLERequestContext& ctx) {
     // Should have no effect with how our npad sets up the data
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 unknown{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        u32 unknown;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -800,9 +800,9 @@ void Hid::GetNpadJoyHoldType(Kernel::HLERequestContext& ctx) {
 void Hid::SetNpadJoyAssignmentModeSingleByDefault(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 npad_id{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        u32 npad_id;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -821,10 +821,10 @@ void Hid::SetNpadJoyAssignmentModeSingle(Kernel::HLERequestContext& ctx) {
     // TODO: Check the differences between this and SetNpadJoyAssignmentModeSingleByDefault
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 npad_id{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
-        u64 npad_joy_device_type{};
+        u32 npad_id;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
+        u64 npad_joy_device_type;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -844,9 +844,9 @@ void Hid::SetNpadJoyAssignmentModeSingle(Kernel::HLERequestContext& ctx) {
 void Hid::SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 npad_id{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        u32 npad_id;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -952,9 +952,9 @@ void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) {
 void Hid::IsUnintendedHomeButtonInputProtectionEnabled(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        u32 npad_id{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        u32 npad_id;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -971,10 +971,10 @@ void Hid::IsUnintendedHomeButtonInputProtectionEnabled(Kernel::HLERequestContext
 void Hid::EnableUnintendedHomeButtonInputProtection(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        bool unintended_home_button_input_protection{};
-        INSERT_PADDING_BYTES(3);
-        u32 npad_id{};
-        u64 applet_resource_user_id{};
+        bool unintended_home_button_input_protection;
+        INSERT_PADDING_BYTES_NOINIT(3);
+        u32 npad_id;
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -1026,10 +1026,10 @@ void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) {
 void Hid::SendVibrationValue(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle vibration_device_handle{};
-        Controller_NPad::VibrationValue vibration_value{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle vibration_device_handle;
+        Controller_NPad::VibrationValue vibration_value;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -1050,9 +1050,9 @@ void Hid::SendVibrationValue(Kernel::HLERequestContext& ctx) {
 void Hid::GetActualVibrationValue(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle vibration_device_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle vibration_device_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -1147,9 +1147,9 @@ void Hid::EndPermitVibrationSession(Kernel::HLERequestContext& ctx) {
 void Hid::IsVibrationDeviceMounted(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle vibration_device_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle vibration_device_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -1180,9 +1180,9 @@ void Hid::ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
 void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
@@ -1200,9 +1200,9 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
 void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     struct Parameters {
-        Controller_NPad::DeviceHandle sixaxis_handle{};
-        INSERT_PADDING_WORDS(1);
-        u64 applet_resource_user_id{};
+        Controller_NPad::DeviceHandle sixaxis_handle;
+        INSERT_PADDING_WORDS_NOINIT(1);
+        u64 applet_resource_user_id;
     };
 
     const auto parameters{rp.PopRaw<Parameters>()};
diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp
index d73b90015..567a4e345 100644
--- a/src/core/hle/service/mii/manager.cpp
+++ b/src/core/hle/service/mii/manager.cpp
@@ -100,6 +100,7 @@ MiiInfo ConvertStoreDataToInfo(const MiiStoreData& data) {
         .mole_scale = static_cast<u8>(bf.mole_scale.Value()),
         .mole_x = static_cast<u8>(bf.mole_x.Value()),
         .mole_y = static_cast<u8>(bf.mole_y.Value()),
+        .padding = 0,
     };
 }
 
diff --git a/src/core/hle/service/mii/manager.h b/src/core/hle/service/mii/manager.h
index 927451dea..32c27ee65 100644
--- a/src/core/hle/service/mii/manager.h
+++ b/src/core/hle/service/mii/manager.h
@@ -27,58 +27,58 @@ enum class SourceFlag : u32 {
 DECLARE_ENUM_FLAG_OPERATORS(SourceFlag);
 
 struct MiiInfo {
-    Common::UUID uuid{Common::INVALID_UUID};
-    std::array<char16_t, 11> name{};
-    u8 font_region{};
-    u8 favorite_color{};
-    u8 gender{};
-    u8 height{};
-    u8 build{};
-    u8 type{};
-    u8 region_move{};
-    u8 faceline_type{};
-    u8 faceline_color{};
-    u8 faceline_wrinkle{};
-    u8 faceline_make{};
-    u8 hair_type{};
-    u8 hair_color{};
-    u8 hair_flip{};
-    u8 eye_type{};
-    u8 eye_color{};
-    u8 eye_scale{};
-    u8 eye_aspect{};
-    u8 eye_rotate{};
-    u8 eye_x{};
-    u8 eye_y{};
-    u8 eyebrow_type{};
-    u8 eyebrow_color{};
-    u8 eyebrow_scale{};
-    u8 eyebrow_aspect{};
-    u8 eyebrow_rotate{};
-    u8 eyebrow_x{};
-    u8 eyebrow_y{};
-    u8 nose_type{};
-    u8 nose_scale{};
-    u8 nose_y{};
-    u8 mouth_type{};
-    u8 mouth_color{};
-    u8 mouth_scale{};
-    u8 mouth_aspect{};
-    u8 mouth_y{};
-    u8 beard_color{};
-    u8 beard_type{};
-    u8 mustache_type{};
-    u8 mustache_scale{};
-    u8 mustache_y{};
-    u8 glasses_type{};
-    u8 glasses_color{};
-    u8 glasses_scale{};
-    u8 glasses_y{};
-    u8 mole_type{};
-    u8 mole_scale{};
-    u8 mole_x{};
-    u8 mole_y{};
-    INSERT_PADDING_BYTES(1);
+    Common::UUID uuid;
+    std::array<char16_t, 11> name;
+    u8 font_region;
+    u8 favorite_color;
+    u8 gender;
+    u8 height;
+    u8 build;
+    u8 type;
+    u8 region_move;
+    u8 faceline_type;
+    u8 faceline_color;
+    u8 faceline_wrinkle;
+    u8 faceline_make;
+    u8 hair_type;
+    u8 hair_color;
+    u8 hair_flip;
+    u8 eye_type;
+    u8 eye_color;
+    u8 eye_scale;
+    u8 eye_aspect;
+    u8 eye_rotate;
+    u8 eye_x;
+    u8 eye_y;
+    u8 eyebrow_type;
+    u8 eyebrow_color;
+    u8 eyebrow_scale;
+    u8 eyebrow_aspect;
+    u8 eyebrow_rotate;
+    u8 eyebrow_x;
+    u8 eyebrow_y;
+    u8 nose_type;
+    u8 nose_scale;
+    u8 nose_y;
+    u8 mouth_type;
+    u8 mouth_color;
+    u8 mouth_scale;
+    u8 mouth_aspect;
+    u8 mouth_y;
+    u8 beard_color;
+    u8 beard_type;
+    u8 mustache_type;
+    u8 mustache_scale;
+    u8 mustache_y;
+    u8 glasses_type;
+    u8 glasses_color;
+    u8 glasses_scale;
+    u8 glasses_y;
+    u8 mole_type;
+    u8 mole_scale;
+    u8 mole_x;
+    u8 mole_y;
+    u8 padding;
 
     std::u16string Name() const;
 };
@@ -324,7 +324,7 @@ public:
     ResultCode GetIndex(const MiiInfo& info, u32& index);
 
 private:
-    const Common::UUID user_id;
+    const Common::UUID user_id{Common::INVALID_UUID};
     u64 update_counter{};
 };
 
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 5557da72e..641bcadea 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -190,12 +190,6 @@ private:
     void GetDeviceState(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_NFP, "called");
 
-        auto nfc_event = nfp_interface.GetNFCEvent();
-        if (!nfc_event->ShouldWait(&ctx.GetThread()) && !has_attached_handle) {
-            device_state = DeviceState::TagFound;
-            nfc_event->Clear();
-        }
-
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u32>(static_cast<u32>(device_state));
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 4b3581949..ceaa93d28 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -38,6 +38,10 @@ void NVFlinger::SplitVSync() {
     system.RegisterHostThread();
     std::string name = "yuzu:VSyncThread";
     MicroProfileOnThreadCreate(name.c_str());
+
+    // Cleanup
+    SCOPE_EXIT({ MicroProfileOnThreadExit(); });
+
     Common::SetCurrentThreadName(name.c_str());
     Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
     s64 delay = 0;
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 4da69f503..2b91a89d1 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -139,9 +139,6 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
         server_port->AppendPendingSession(server);
     }
 
-    // Wake the threads waiting on the ServerPort
-    server_port->Signal();
-
     LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId());
     IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
     rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/time/clock_types.h b/src/core/hle/service/time/clock_types.h
index 72e1921ec..b78892223 100644
--- a/src/core/hle/service/time/clock_types.h
+++ b/src/core/hle/service/time/clock_types.h
@@ -73,19 +73,19 @@ struct TimeSpanType {
 static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size");
 
 struct ClockSnapshot {
-    SystemClockContext user_context{};
-    SystemClockContext network_context{};
-    s64 user_time{};
-    s64 network_time{};
-    TimeZone::CalendarTime user_calendar_time{};
-    TimeZone::CalendarTime network_calendar_time{};
-    TimeZone::CalendarAdditionalInfo user_calendar_additional_time{};
-    TimeZone::CalendarAdditionalInfo network_calendar_additional_time{};
-    SteadyClockTimePoint steady_clock_time_point{};
-    TimeZone::LocationName location_name{};
-    u8 is_automatic_correction_enabled{};
-    u8 type{};
-    INSERT_PADDING_BYTES(0x2);
+    SystemClockContext user_context;
+    SystemClockContext network_context;
+    s64 user_time;
+    s64 network_time;
+    TimeZone::CalendarTime user_calendar_time;
+    TimeZone::CalendarTime network_calendar_time;
+    TimeZone::CalendarAdditionalInfo user_calendar_additional_time;
+    TimeZone::CalendarAdditionalInfo network_calendar_additional_time;
+    SteadyClockTimePoint steady_clock_time_point;
+    TimeZone::LocationName location_name;
+    u8 is_automatic_correction_enabled;
+    u8 type;
+    INSERT_PADDING_BYTES_NOINIT(0x2);
 
     static ResultCode GetCurrentTime(s64& current_time,
                                      const SteadyClockTimePoint& steady_clock_time_point,
diff --git a/src/core/hle/service/time/time_zone_types.h b/src/core/hle/service/time/time_zone_types.h
index 9be15b53e..4a57e036d 100644
--- a/src/core/hle/service/time/time_zone_types.h
+++ b/src/core/hle/service/time/time_zone_types.h
@@ -45,23 +45,23 @@ static_assert(sizeof(TimeZoneRule) == 0x4000, "TimeZoneRule is incorrect size");
 
 /// https://switchbrew.org/wiki/Glue_services#CalendarAdditionalInfo
 struct CalendarAdditionalInfo {
-    u32 day_of_week{};
-    u32 day_of_year{};
+    u32 day_of_week;
+    u32 day_of_year;
     std::array<char, 8> timezone_name;
-    u32 is_dst{};
-    s32 gmt_offset{};
+    u32 is_dst;
+    s32 gmt_offset;
 };
 static_assert(sizeof(CalendarAdditionalInfo) == 0x18, "CalendarAdditionalInfo is incorrect size");
 
 /// https://switchbrew.org/wiki/Glue_services#CalendarTime
 struct CalendarTime {
-    s16 year{};
-    s8 month{};
-    s8 day{};
-    s8 hour{};
-    s8 minute{};
-    s8 second{};
-    INSERT_PADDING_BYTES(1);
+    s16 year;
+    s8 month;
+    s8 day;
+    s8 hour;
+    s8 minute;
+    s8 second;
+    INSERT_PADDING_BYTES_NOINIT(1);
 };
 static_assert(sizeof(CalendarTime) == 0x8, "CalendarTime is incorrect size");
 
diff --git a/src/input_common/touch_from_button.cpp b/src/input_common/touch_from_button.cpp
index a07124a86..ffbe4f2ed 100644
--- a/src/input_common/touch_from_button.cpp
+++ b/src/input_common/touch_from_button.cpp
@@ -25,18 +25,19 @@ public:
         }
     }
 
-    std::tuple<float, float, bool> GetStatus() const override {
-        for (const auto& m : map) {
-            const bool state = std::get<0>(m)->GetStatus();
+    Input::TouchStatus GetStatus() const override {
+        Input::TouchStatus touch_status{};
+        for (std::size_t id = 0; id < map.size() && id < touch_status.size(); ++id) {
+            const bool state = std::get<0>(map[id])->GetStatus();
             if (state) {
-                const float x = static_cast<float>(std::get<1>(m)) /
+                const float x = static_cast<float>(std::get<1>(map[id])) /
                                 static_cast<int>(Layout::ScreenUndocked::Width);
-                const float y = static_cast<float>(std::get<2>(m)) /
+                const float y = static_cast<float>(std::get<2>(map[id])) /
                                 static_cast<int>(Layout::ScreenUndocked::Height);
-                return {x, y, true};
+                touch_status[id] = {x, y, true};
             }
         }
-        return {};
+        return touch_status;
     }
 
 private:
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp
index 412d57896..e7e50d789 100644
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -136,6 +136,7 @@ static void SocketLoop(Socket* socket) {
 
 Client::Client() {
     LOG_INFO(Input, "Udp Initialization started");
+    finger_id.fill(MAX_TOUCH_FINGERS);
     ReloadSockets();
 }
 
@@ -176,7 +177,7 @@ void Client::ReloadSockets() {
     std::string server_token;
     std::size_t client = 0;
     while (std::getline(servers_ss, server_token, ',')) {
-        if (client == max_udp_clients) {
+        if (client == MAX_UDP_CLIENTS) {
             break;
         }
         std::stringstream server_ss(server_token);
@@ -194,7 +195,7 @@ void Client::ReloadSockets() {
         for (std::size_t pad = 0; pad < 4; ++pad) {
             const std::size_t client_number =
                 GetClientNumber(udp_input_address, udp_input_port, pad);
-            if (client_number != max_udp_clients) {
+            if (client_number != MAX_UDP_CLIENTS) {
                 LOG_ERROR(Input, "Duplicated UDP servers found");
                 continue;
             }
@@ -213,7 +214,7 @@ std::size_t Client::GetClientNumber(std::string_view host, u16 port, std::size_t
             return client;
         }
     }
-    return max_udp_clients;
+    return MAX_UDP_CLIENTS;
 }
 
 void Client::OnVersion([[maybe_unused]] Response::Version data) {
@@ -259,33 +260,14 @@ void Client::OnPadData(Response::PadData data, std::size_t client) {
         std::lock_guard guard(clients[client].status.update_mutex);
         clients[client].status.motion_status = clients[client].motion.GetMotion();
 
-        // TODO: add a setting for "click" touch. Click touch refers to a device that differentiates
-        // between a simple "tap" and a hard press that causes the touch screen to click.
-        const bool is_active = data.touch_1.is_active != 0;
-
-        float x = 0;
-        float y = 0;
-
-        if (is_active && clients[client].status.touch_calibration) {
-            const u16 min_x = clients[client].status.touch_calibration->min_x;
-            const u16 max_x = clients[client].status.touch_calibration->max_x;
-            const u16 min_y = clients[client].status.touch_calibration->min_y;
-            const u16 max_y = clients[client].status.touch_calibration->max_y;
-
-            x = static_cast<float>(std::clamp(static_cast<u16>(data.touch_1.x), min_x, max_x) -
-                                   min_x) /
-                static_cast<float>(max_x - min_x);
-            y = static_cast<float>(std::clamp(static_cast<u16>(data.touch_1.y), min_y, max_y) -
-                                   min_y) /
-                static_cast<float>(max_y - min_y);
+        for (std::size_t id = 0; id < data.touch.size(); ++id) {
+            UpdateTouchInput(data.touch[id], client, id);
         }
 
-        clients[client].status.touch_status = {x, y, is_active};
-
         if (configuring) {
             const Common::Vec3f gyroscope = clients[client].motion.GetGyroscope();
             const Common::Vec3f accelerometer = clients[client].motion.GetAcceleration();
-            UpdateYuzuSettings(client, accelerometer, gyroscope, is_active);
+            UpdateYuzuSettings(client, accelerometer, gyroscope);
         }
     }
 }
@@ -320,21 +302,17 @@ void Client::Reset() {
 }
 
 void Client::UpdateYuzuSettings(std::size_t client, const Common::Vec3<float>& acc,
-                                const Common::Vec3<float>& gyro, bool touch) {
+                                const Common::Vec3<float>& gyro) {
     if (gyro.Length() > 0.2f) {
-        LOG_DEBUG(Input, "UDP Controller {}: gyro=({}, {}, {}), accel=({}, {}, {}), touch={}",
-                  client, gyro[0], gyro[1], gyro[2], acc[0], acc[1], acc[2], touch);
+        LOG_DEBUG(Input, "UDP Controller {}: gyro=({}, {}, {}), accel=({}, {}, {})", client,
+                  gyro[0], gyro[1], gyro[2], acc[0], acc[1], acc[2]);
     }
     UDPPadStatus pad{
         .host = clients[client].host,
         .port = clients[client].port,
         .pad_index = clients[client].pad_index,
     };
-    if (touch) {
-        pad.touch = PadTouch::Click;
-        pad_queue.Push(pad);
-    }
-    for (size_t i = 0; i < 3; ++i) {
+    for (std::size_t i = 0; i < 3; ++i) {
         if (gyro[i] > 5.0f || gyro[i] < -5.0f) {
             pad.motion = static_cast<PadMotion>(i);
             pad.motion_value = gyro[i];
@@ -348,6 +326,50 @@ void Client::UpdateYuzuSettings(std::size_t client, const Common::Vec3<float>& a
     }
 }
 
+std::optional<std::size_t> Client::GetUnusedFingerID() const {
+    std::size_t first_free_id = 0;
+    while (first_free_id < MAX_TOUCH_FINGERS) {
+        if (!std::get<2>(touch_status[first_free_id])) {
+            return first_free_id;
+        } else {
+            first_free_id++;
+        }
+    }
+    return std::nullopt;
+}
+
+void Client::UpdateTouchInput(Response::TouchPad& touch_pad, std::size_t client, std::size_t id) {
+    // TODO: Use custom calibration per device
+    const Common::ParamPackage touch_param(Settings::values.touch_device);
+    const u16 min_x = static_cast<u16>(touch_param.Get("min_x", 100));
+    const u16 min_y = static_cast<u16>(touch_param.Get("min_y", 50));
+    const u16 max_x = static_cast<u16>(touch_param.Get("max_x", 1800));
+    const u16 max_y = static_cast<u16>(touch_param.Get("max_y", 850));
+    const std::size_t touch_id = client * 2 + id;
+    if (touch_pad.is_active) {
+        if (finger_id[touch_id] == MAX_TOUCH_FINGERS) {
+            const auto first_free_id = GetUnusedFingerID();
+            if (!first_free_id) {
+                // Invalid finger id skip to next input
+                return;
+            }
+            finger_id[touch_id] = *first_free_id;
+        }
+        auto& [x, y, pressed] = touch_status[finger_id[touch_id]];
+        x = static_cast<float>(std::clamp(static_cast<u16>(touch_pad.x), min_x, max_x) - min_x) /
+            static_cast<float>(max_x - min_x);
+        y = static_cast<float>(std::clamp(static_cast<u16>(touch_pad.y), min_y, max_y) - min_y) /
+            static_cast<float>(max_y - min_y);
+        pressed = true;
+        return;
+    }
+
+    if (finger_id[touch_id] != MAX_TOUCH_FINGERS) {
+        touch_status[finger_id[touch_id]] = {};
+        finger_id[touch_id] = MAX_TOUCH_FINGERS;
+    }
+}
+
 void Client::BeginConfiguration() {
     pad_queue.Clear();
     configuring = true;
@@ -360,7 +382,7 @@ void Client::EndConfiguration() {
 
 DeviceStatus& Client::GetPadState(const std::string& host, u16 port, std::size_t pad) {
     const std::size_t client_number = GetClientNumber(host, port, pad);
-    if (client_number == max_udp_clients) {
+    if (client_number == MAX_UDP_CLIENTS) {
         return clients[0].status;
     }
     return clients[client_number].status;
@@ -368,12 +390,20 @@ DeviceStatus& Client::GetPadState(const std::string& host, u16 port, std::size_t
 
 const DeviceStatus& Client::GetPadState(const std::string& host, u16 port, std::size_t pad) const {
     const std::size_t client_number = GetClientNumber(host, port, pad);
-    if (client_number == max_udp_clients) {
+    if (client_number == MAX_UDP_CLIENTS) {
         return clients[0].status;
     }
     return clients[client_number].status;
 }
 
+Input::TouchStatus& Client::GetTouchState() {
+    return touch_status;
+}
+
+const Input::TouchStatus& Client::GetTouchState() const {
+    return touch_status;
+}
+
 Common::SPSCQueue<UDPPadStatus>& Client::GetPadQueue() {
     return pad_queue;
 }
@@ -426,24 +456,24 @@ CalibrationConfigurationJob::CalibrationConfigurationJob(
                                         current_status = Status::Ready;
                                         status_callback(current_status);
                                     }
-                                    if (data.touch_1.is_active == 0) {
+                                    if (data.touch[0].is_active == 0) {
                                         return;
                                     }
-                                    LOG_DEBUG(Input, "Current touch: {} {}", data.touch_1.x,
-                                              data.touch_1.y);
-                                    min_x = std::min(min_x, static_cast<u16>(data.touch_1.x));
-                                    min_y = std::min(min_y, static_cast<u16>(data.touch_1.y));
+                                    LOG_DEBUG(Input, "Current touch: {} {}", data.touch[0].x,
+                                              data.touch[0].y);
+                                    min_x = std::min(min_x, static_cast<u16>(data.touch[0].x));
+                                    min_y = std::min(min_y, static_cast<u16>(data.touch[0].y));
                                     if (current_status == Status::Ready) {
                                         // First touch - min data (min_x/min_y)
                                         current_status = Status::Stage1Completed;
                                         status_callback(current_status);
                                     }
-                                    if (data.touch_1.x - min_x > CALIBRATION_THRESHOLD &&
-                                        data.touch_1.y - min_y > CALIBRATION_THRESHOLD) {
+                                    if (data.touch[0].x - min_x > CALIBRATION_THRESHOLD &&
+                                        data.touch[0].y - min_y > CALIBRATION_THRESHOLD) {
                                         // Set the current position as max value and finishes
                                         // configuration
-                                        max_x = data.touch_1.x;
-                                        max_y = data.touch_1.y;
+                                        max_x = data.touch[0].x;
+                                        max_y = data.touch[0].y;
                                         current_status = Status::Completed;
                                         data_callback(min_x, min_y, max_x, max_y);
                                         status_callback(current_status);
diff --git a/src/input_common/udp/client.h b/src/input_common/udp/client.h
index 00c8b09f5..822f9c550 100644
--- a/src/input_common/udp/client.h
+++ b/src/input_common/udp/client.h
@@ -28,6 +28,7 @@ class Socket;
 namespace Response {
 struct PadData;
 struct PortInfo;
+struct TouchPad;
 struct Version;
 } // namespace Response
 
@@ -50,7 +51,6 @@ struct UDPPadStatus {
     std::string host{"127.0.0.1"};
     u16 port{26760};
     std::size_t pad_index{};
-    PadTouch touch{PadTouch::Undefined};
     PadMotion motion{PadMotion::Undefined};
     f32 motion_value{0.0f};
 };
@@ -93,6 +93,9 @@ public:
     DeviceStatus& GetPadState(const std::string& host, u16 port, std::size_t pad);
     const DeviceStatus& GetPadState(const std::string& host, u16 port, std::size_t pad) const;
 
+    Input::TouchStatus& GetTouchState();
+    const Input::TouchStatus& GetTouchState() const;
+
 private:
     struct ClientData {
         std::string host{"127.0.0.1"};
@@ -122,14 +125,25 @@ private:
     void StartCommunication(std::size_t client, const std::string& host, u16 port,
                             std::size_t pad_index, u32 client_id);
     void UpdateYuzuSettings(std::size_t client, const Common::Vec3<float>& acc,
-                            const Common::Vec3<float>& gyro, bool touch);
+                            const Common::Vec3<float>& gyro);
+
+    // Returns an unused finger id, if there is no fingers available std::nullopt will be
+    // returned
+    std::optional<std::size_t> GetUnusedFingerID() const;
+
+    // Merges and updates all touch inputs into the touch_status array
+    void UpdateTouchInput(Response::TouchPad& touch_pad, std::size_t client, std::size_t id);
 
     bool configuring = false;
 
     // Allocate clients for 8 udp servers
-    const std::size_t max_udp_clients = 32;
-    std::array<ClientData, 4 * 8> clients;
-    Common::SPSCQueue<UDPPadStatus> pad_queue;
+    static constexpr std::size_t MAX_UDP_CLIENTS = 4 * 8;
+    // Each client can have up 2 touch inputs
+    static constexpr std::size_t MAX_TOUCH_FINGERS = MAX_UDP_CLIENTS * 2;
+    std::array<ClientData, MAX_UDP_CLIENTS> clients{};
+    Common::SPSCQueue<UDPPadStatus> pad_queue{};
+    Input::TouchStatus touch_status{};
+    std::array<std::size_t, MAX_TOUCH_FINGERS> finger_id{};
 };
 
 /// An async job allowing configuration of the touchpad calibration.
diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h
index fc1aea4b9..a3d276697 100644
--- a/src/input_common/udp/protocol.h
+++ b/src/input_common/udp/protocol.h
@@ -140,6 +140,14 @@ static_assert(sizeof(PortInfo) == 12, "UDP Response PortInfo struct has wrong si
 static_assert(std::is_trivially_copyable_v<PortInfo>,
               "UDP Response PortInfo is not trivially copyable");
 
+struct TouchPad {
+    u8 is_active{};
+    u8 id{};
+    u16_le x{};
+    u16_le y{};
+};
+static_assert(sizeof(TouchPad) == 6, "UDP Response TouchPad struct has wrong size ");
+
 #pragma pack(push, 1)
 struct PadData {
     PortInfo info{};
@@ -190,12 +198,7 @@ struct PadData {
         u8 button_13{};
     } analog_button;
 
-    struct TouchPad {
-        u8 is_active{};
-        u8 id{};
-        u16_le x{};
-        u16_le y{};
-    } touch_1, touch_2;
+    std::array<TouchPad, 2> touch;
 
     u64_le motion_timestamp;
 
@@ -222,7 +225,6 @@ static_assert(sizeof(Message<PadData>) == MAX_PACKET_SIZE,
 
 static_assert(sizeof(PadData::AnalogButton) == 12,
               "UDP Response AnalogButton struct has wrong size ");
-static_assert(sizeof(PadData::TouchPad) == 6, "UDP Response TouchPad struct has wrong size ");
 static_assert(sizeof(PadData::Accelerometer) == 12,
               "UDP Response Accelerometer struct has wrong size ");
 static_assert(sizeof(PadData::Gyroscope) == 12, "UDP Response Gyroscope struct has wrong size ");
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp
index c5da27a38..b630281a0 100644
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -78,8 +78,8 @@ public:
     explicit UDPTouch(std::string ip_, u16 port_, u16 pad_, CemuhookUDP::Client* client_)
         : ip(std::move(ip_)), port(port_), pad(pad_), client(client_) {}
 
-    std::tuple<float, float, bool> GetStatus() const override {
-        return client->GetPadState(ip, port, pad).touch_status;
+    Input::TouchStatus GetStatus() const override {
+        return client->GetTouchState();
     }
 
 private:
@@ -107,32 +107,4 @@ std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamP
     return std::make_unique<UDPTouch>(std::move(ip), port, pad, client.get());
 }
 
-void UDPTouchFactory::BeginConfiguration() {
-    polling = true;
-    client->BeginConfiguration();
-}
-
-void UDPTouchFactory::EndConfiguration() {
-    polling = false;
-    client->EndConfiguration();
-}
-
-Common::ParamPackage UDPTouchFactory::GetNextInput() {
-    Common::ParamPackage params;
-    CemuhookUDP::UDPPadStatus pad;
-    auto& queue = client->GetPadQueue();
-    while (queue.Pop(pad)) {
-        if (pad.touch == CemuhookUDP::PadTouch::Undefined) {
-            continue;
-        }
-        params.Set("engine", "cemuhookudp");
-        params.Set("ip", pad.host);
-        params.Set("port", static_cast<u16>(pad.port));
-        params.Set("pad_index", static_cast<u16>(pad.pad_index));
-        params.Set("touch", static_cast<u16>(pad.touch));
-        return params;
-    }
-    return params;
-}
-
 } // namespace InputCommon
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 8a606b448..6a5c18945 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,11 +1,11 @@
 add_executable(tests
     common/bit_field.cpp
-    common/bit_utils.cpp
     common/fibers.cpp
     common/param_package.cpp
     common/ring_buffer.cpp
     core/core_timing.cpp
     tests.cpp
+    video_core/buffer_base.cpp
 )
 
 create_target_directory_groups(tests)
diff --git a/src/tests/common/bit_utils.cpp b/src/tests/common/bit_utils.cpp
deleted file mode 100644
index 479b5995a..000000000
--- a/src/tests/common/bit_utils.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2017 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <catch2/catch.hpp>
-#include <math.h>
-#include "common/bit_util.h"
-
-namespace Common {
-
-TEST_CASE("BitUtils::CountTrailingZeroes", "[common]") {
-    REQUIRE(Common::CountTrailingZeroes32(0) == 32);
-    REQUIRE(Common::CountTrailingZeroes64(0) == 64);
-    REQUIRE(Common::CountTrailingZeroes32(9) == 0);
-    REQUIRE(Common::CountTrailingZeroes32(8) == 3);
-    REQUIRE(Common::CountTrailingZeroes32(0x801000) == 12);
-    REQUIRE(Common::CountTrailingZeroes64(9) == 0);
-    REQUIRE(Common::CountTrailingZeroes64(8) == 3);
-    REQUIRE(Common::CountTrailingZeroes64(0x801000) == 12);
-    REQUIRE(Common::CountTrailingZeroes64(0x801000000000UL) == 36);
-}
-
-} // namespace Common
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
new file mode 100644
index 000000000..651633e9e
--- /dev/null
+++ b/src/tests/video_core/buffer_base.cpp
@@ -0,0 +1,473 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <stdexcept>
+#include <unordered_map>
+
+#include <catch2/catch.hpp>
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+#include "video_core/buffer_cache/buffer_base.h"
+
+namespace {
+using VideoCommon::BufferBase;
+using Range = std::pair<u64, u64>;
+
+constexpr u64 PAGE = 4096;
+constexpr u64 WORD = 4096 * 64;
+
+constexpr VAddr c = 0x1328914000;
+
+class RasterizerInterface {
+public:
+    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+        const u64 page_start{addr >> Core::Memory::PAGE_BITS};
+        const u64 page_end{(addr + size + Core::Memory::PAGE_SIZE - 1) >> Core::Memory::PAGE_BITS};
+        for (u64 page = page_start; page < page_end; ++page) {
+            int& value = page_table[page];
+            value += delta;
+            if (value < 0) {
+                throw std::logic_error{"negative page"};
+            }
+            if (value == 0) {
+                page_table.erase(page);
+            }
+        }
+    }
+
+    [[nodiscard]] int Count(VAddr addr) const noexcept {
+        const auto it = page_table.find(addr >> Core::Memory::PAGE_BITS);
+        return it == page_table.end() ? 0 : it->second;
+    }
+
+    [[nodiscard]] unsigned Count() const noexcept {
+        unsigned count = 0;
+        for (const auto [index, value] : page_table) {
+            count += value;
+        }
+        return count;
+    }
+
+private:
+    std::unordered_map<u64, int> page_table;
+};
+} // Anonymous namespace
+
+TEST_CASE("BufferBase: Small buffer", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == WORD / PAGE);
+    REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{0, 0});
+
+    buffer.MarkRegionAsCpuModified(c + PAGE, 1);
+    REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{PAGE * 1, PAGE * 2});
+}
+
+TEST_CASE("BufferBase: Large buffer", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 32);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 32);
+    buffer.MarkRegionAsCpuModified(c + 4096, WORD * 4);
+    REQUIRE(buffer.ModifiedCpuRegion(c, WORD + PAGE * 2) == Range{PAGE, WORD + PAGE * 2});
+    REQUIRE(buffer.ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == Range{PAGE * 2, PAGE * 8});
+    REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 4 + PAGE});
+    REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 4, PAGE) == Range{WORD * 4, WORD * 4 + PAGE});
+    REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) ==
+            Range{WORD * 3 + PAGE * 63, WORD * 4});
+
+    buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE);
+    buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
+    REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) ==
+            Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 9});
+
+    buffer.UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
+    REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) ==
+            Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 7});
+
+    buffer.MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63);
+    REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 32});
+
+    buffer.UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE);
+    buffer.UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE);
+
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 32);
+    REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{0, 0});
+}
+
+TEST_CASE("BufferBase: Rasterizer counting", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, PAGE * 2);
+    REQUIRE(rasterizer.Count() == 0);
+    buffer.UnmarkRegionAsCpuModified(c, PAGE);
+    REQUIRE(rasterizer.Count() == 1);
+    buffer.MarkRegionAsCpuModified(c, PAGE * 2);
+    REQUIRE(rasterizer.Count() == 0);
+    buffer.UnmarkRegionAsCpuModified(c, PAGE);
+    buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
+    REQUIRE(rasterizer.Count() == 2);
+    buffer.MarkRegionAsCpuModified(c, PAGE * 2);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Basic range", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.MarkRegionAsCpuModified(c, PAGE);
+    int num = 0;
+    buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
+        REQUIRE(offset == 0U);
+        REQUIRE(size == PAGE);
+        ++num;
+    });
+    REQUIRE(num == 1U);
+}
+
+TEST_CASE("BufferBase: Border upload", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 2);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
+    buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
+    buffer.ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) {
+        REQUIRE(offset == WORD - PAGE);
+        REQUIRE(size == PAGE * 2);
+    });
+}
+
+TEST_CASE("BufferBase: Border upload range", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 2);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
+    buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
+    buffer.ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) {
+        REQUIRE(offset == WORD - PAGE);
+        REQUIRE(size == PAGE * 2);
+    });
+    buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
+    buffer.ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) {
+        REQUIRE(offset == WORD - PAGE);
+        REQUIRE(size == PAGE);
+    });
+    buffer.ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) {
+        REQUIRE(offset == WORD);
+        REQUIRE(size == PAGE);
+    });
+}
+
+TEST_CASE("BufferBase: Border upload partial range", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 2);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
+    buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
+    buffer.ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) {
+        REQUIRE(offset == WORD - PAGE);
+        REQUIRE(size == PAGE * 2);
+    });
+    buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
+    buffer.ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) {
+        REQUIRE(offset == WORD - PAGE);
+        REQUIRE(size == PAGE);
+    });
+    buffer.ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) {
+        REQUIRE(offset == WORD);
+        REQUIRE(size == PAGE);
+    });
+}
+
+TEST_CASE("BufferBase: Partial word uploads", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, 0x9d000);
+    int num = 0;
+    buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
+        REQUIRE(offset == 0U);
+        REQUIRE(size == WORD);
+        ++num;
+    });
+    REQUIRE(num == 1);
+    buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) {
+        REQUIRE(offset == WORD);
+        REQUIRE(size == WORD);
+        ++num;
+    });
+    REQUIRE(num == 2);
+    buffer.ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) {
+        REQUIRE(offset == WORD * 2);
+        REQUIRE(size == PAGE * 0x1d);
+        ++num;
+    });
+    REQUIRE(num == 3);
+}
+
+TEST_CASE("BufferBase: Partial page upload", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    int num = 0;
+    buffer.MarkRegionAsCpuModified(c + PAGE * 2, PAGE);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 9, PAGE);
+    buffer.ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) {
+        REQUIRE(offset == PAGE * 2);
+        REQUIRE(size == PAGE);
+        ++num;
+    });
+    REQUIRE(num == 1);
+    buffer.ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) {
+        REQUIRE(offset == PAGE * 9);
+        REQUIRE(size == PAGE);
+        ++num;
+    });
+    REQUIRE(num == 2);
+}
+
+TEST_CASE("BufferBase: Partial page upload with multiple words on the right") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 8);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
+    int num = 0;
+    buffer.ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) {
+        REQUIRE(offset == PAGE * 13);
+        REQUIRE(size == WORD * 7 - PAGE * 3);
+        ++num;
+    });
+    REQUIRE(num == 1);
+    buffer.ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) {
+        REQUIRE(offset == WORD * 7 + PAGE * 10);
+        REQUIRE(size == PAGE * 3);
+        ++num;
+    });
+    REQUIRE(num == 2);
+}
+
+TEST_CASE("BufferBase: Partial page upload with multiple words on the left", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 8);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
+    int num = 0;
+    buffer.ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) {
+        REQUIRE(offset == PAGE * 16);
+        REQUIRE(size == WORD * 7 - PAGE * 3);
+        ++num;
+    });
+    REQUIRE(num == 1);
+    buffer.ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) {
+        REQUIRE(offset == PAGE * 13);
+        REQUIRE(size == PAGE * 3);
+        ++num;
+    });
+    REQUIRE(num == 2);
+}
+
+TEST_CASE("BufferBase: Partial page upload with multiple words in the middle", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 8);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140);
+    int num = 0;
+    buffer.ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) {
+        REQUIRE(offset == PAGE * 16);
+        REQUIRE(size == WORD);
+        ++num;
+    });
+    REQUIRE(num == 1);
+    buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
+        REQUIRE(offset == PAGE * 13);
+        REQUIRE(size == PAGE * 3);
+        ++num;
+    });
+    REQUIRE(num == 2);
+    buffer.ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) {
+        REQUIRE(offset == WORD + PAGE * 16);
+        REQUIRE(size == PAGE * 73);
+        ++num;
+    });
+    REQUIRE(num == 3);
+}
+
+TEST_CASE("BufferBase: Empty right bits", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 2048);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 2048);
+    buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
+    buffer.ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) {
+        REQUIRE(offset == WORD - PAGE);
+        REQUIRE(size == PAGE * 2);
+    });
+}
+
+TEST_CASE("BufferBase: Out of bound ranges 1", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.MarkRegionAsCpuModified(c, PAGE);
+    int num = 0;
+    buffer.ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; });
+    buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; });
+    buffer.ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; });
+    REQUIRE(num == 0);
+    buffer.ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; });
+    REQUIRE(num == 1);
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Out of bound ranges 2", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, 0x22000);
+    REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x22000, PAGE));
+    REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x28000, PAGE));
+    REQUIRE(rasterizer.Count() == 0);
+    REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100));
+    REQUIRE(rasterizer.Count() == 1);
+    REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c - 0x1000, PAGE * 2));
+    buffer.UnmarkRegionAsCpuModified(c - 0x3000, PAGE * 2);
+    buffer.UnmarkRegionAsCpuModified(c - 0x2000, PAGE * 2);
+    REQUIRE(rasterizer.Count() == 2);
+}
+
+TEST_CASE("BufferBase: Out of bound ranges 3", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, 0x310720);
+    buffer.UnmarkRegionAsCpuModified(c, 0x310720);
+    REQUIRE(rasterizer.Count(c) == 1);
+    REQUIRE(rasterizer.Count(c + PAGE) == 1);
+    REQUIRE(rasterizer.Count(c + WORD) == 1);
+    REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1);
+}
+
+TEST_CASE("BufferBase: Sparse regions 1", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 1, PAGE);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4);
+    buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
+        static constexpr std::array<u64, 2> offsets{PAGE, PAGE * 3};
+        static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4};
+        REQUIRE(offset == offsets.at(i));
+        REQUIRE(size == sizes.at(i));
+        ++i;
+    });
+}
+
+TEST_CASE("BufferBase: Sparse regions 2", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, 0x22000);
+    buffer.UnmarkRegionAsCpuModified(c, 0x22000);
+    REQUIRE(rasterizer.Count() == 0x22);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE);
+    buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
+        static constexpr std::array<u64, 2> offsets{PAGE * 0x1B, PAGE * 0x21};
+        static constexpr std::array<u64, 2> sizes{PAGE, PAGE};
+        REQUIRE(offset == offsets.at(i));
+        REQUIRE(size == sizes.at(i));
+        ++i;
+    });
+}
+
+TEST_CASE("BufferBase: Single page modified range", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, PAGE);
+    REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
+    buffer.UnmarkRegionAsCpuModified(c, PAGE);
+    REQUIRE(!buffer.IsRegionCpuModified(c, PAGE));
+}
+
+TEST_CASE("BufferBase: Two page modified range", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, PAGE * 2);
+    REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c, PAGE * 2));
+    buffer.UnmarkRegionAsCpuModified(c, PAGE);
+    REQUIRE(!buffer.IsRegionCpuModified(c, PAGE));
+}
+
+TEST_CASE("BufferBase: Multi word modified ranges", "[video_core]") {
+    for (int offset = 0; offset < 4; ++offset) {
+        const VAddr address = c + WORD * offset;
+        RasterizerInterface rasterizer;
+        BufferBase buffer(rasterizer, address, WORD * 4);
+        REQUIRE(buffer.IsRegionCpuModified(address, PAGE));
+        REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 48, PAGE));
+        REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 56, PAGE));
+
+        buffer.UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE);
+        REQUIRE(buffer.IsRegionCpuModified(address + PAGE, WORD));
+        REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE));
+        REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE));
+        REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 33, PAGE));
+        REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE * 2));
+        REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
+
+        buffer.UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE);
+        REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
+    }
+}
+
+TEST_CASE("BufferBase: Single page in large buffer", "[video_core]") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 16);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 16);
+    REQUIRE(!buffer.IsRegionCpuModified(c, WORD * 16));
+
+    buffer.MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE);
+    REQUIRE(buffer.IsRegionCpuModified(c, WORD * 16));
+    REQUIRE(buffer.IsRegionCpuModified(c + WORD * 10, WORD * 2));
+    REQUIRE(buffer.IsRegionCpuModified(c + WORD * 11, WORD * 2));
+    REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12, WORD * 2));
+    REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8));
+    REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8));
+    REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2));
+    REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2));
+}
+
+TEST_CASE("BufferBase: Out of bounds region query") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 16);
+    REQUIRE(!buffer.IsRegionCpuModified(c - PAGE, PAGE));
+    REQUIRE(!buffer.IsRegionCpuModified(c - PAGE * 2, PAGE));
+    REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + WORD * 16 - PAGE, WORD * 64));
+    REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, WORD * 64));
+}
+
+TEST_CASE("BufferBase: Wrap word regions") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD * 2);
+    buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
+    buffer.MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2);
+    REQUIRE(buffer.IsRegionCpuModified(c, WORD * 2));
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 62, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 64, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 2));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 8));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 60, PAGE * 8));
+
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16));
+    buffer.MarkRegionAsCpuModified(c + PAGE * 127, PAGE);
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, PAGE));
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 126, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 126, PAGE * 2));
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 128, WORD * 16));
+}
+
+TEST_CASE("BufferBase: Unaligned page region query") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.MarkRegionAsCpuModified(c + 4000, 1000);
+    REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
+    REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
+}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f7b9d7f86..e01ea55ab 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_subdirectory(host_shaders)
 
 add_library(video_core STATIC
+    buffer_cache/buffer_base.h
     buffer_cache/buffer_block.h
     buffer_cache/buffer_cache.h
     buffer_cache/map_interval.cpp
@@ -135,8 +136,6 @@ add_library(video_core STATIC
     renderer_vulkan/vk_graphics_pipeline.h
     renderer_vulkan/vk_master_semaphore.cpp
     renderer_vulkan/vk_master_semaphore.h
-    renderer_vulkan/vk_memory_manager.cpp
-    renderer_vulkan/vk_memory_manager.h
     renderer_vulkan/vk_pipeline_cache.cpp
     renderer_vulkan/vk_pipeline_cache.h
     renderer_vulkan/vk_query_cache.cpp
@@ -259,6 +258,8 @@ add_library(video_core STATIC
     vulkan_common/vulkan_instance.h
     vulkan_common/vulkan_library.cpp
     vulkan_common/vulkan_library.h
+    vulkan_common/vulkan_memory_allocator.cpp
+    vulkan_common/vulkan_memory_allocator.h
     vulkan_common/vulkan_surface.cpp
     vulkan_common/vulkan_surface.h
     vulkan_common/vulkan_wrapper.cpp
@@ -312,9 +313,7 @@ else()
         -Werror=pessimizing-move
         -Werror=redundant-move
         -Werror=shadow
-        -Werror=switch
         -Werror=type-limits
-        -Werror=unused-variable
 
         $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
new file mode 100644
index 000000000..ee8602ce9
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -0,0 +1,495 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <bit>
+#include <limits>
+#include <utility>
+
+#include "common/alignment.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "core/memory.h"
+
+namespace VideoCommon {
+
+enum class BufferFlagBits {
+    Picked = 1 << 0,
+};
+DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
+
+/// Tag for creating null buffers with no storage or size
+struct NullBufferParams {};
+
+/**
+ * Range tracking buffer container.
+ *
+ * It keeps track of the modified CPU and GPU ranges on a CPU page granularity, notifying the given
+ * rasterizer about state changes in the tracking behavior of the buffer.
+ *
+ * The buffer size and address is forcefully aligned to CPU page boundaries.
+ */
+template <class RasterizerInterface>
+class BufferBase {
+    static constexpr u64 PAGES_PER_WORD = 64;
+    static constexpr u64 BYTES_PER_PAGE = Core::Memory::PAGE_SIZE;
+    static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
+
+    /// Vector tracking modified pages tightly packed with small vector optimization
+    union WrittenWords {
+        /// Returns the pointer to the words state
+        [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
+            return is_short ? &stack : heap;
+        }
+
+        /// Returns the pointer to the words state
+        [[nodiscard]] u64* Pointer(bool is_short) noexcept {
+            return is_short ? &stack : heap;
+        }
+
+        u64 stack = 0; ///< Small buffers storage
+        u64* heap;     ///< Not-small buffers pointer to the storage
+    };
+
+    struct GpuCpuWords {
+        explicit GpuCpuWords() = default;
+        explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} {
+            if (IsShort()) {
+                cpu.stack = ~u64{0};
+                gpu.stack = 0;
+            } else {
+                // Share allocation between CPU and GPU pages and set their default values
+                const size_t num_words = NumWords();
+                u64* const alloc = new u64[num_words * 2];
+                cpu.heap = alloc;
+                gpu.heap = alloc + num_words;
+                std::fill_n(cpu.heap, num_words, ~u64{0});
+                std::fill_n(gpu.heap, num_words, 0);
+            }
+            // Clean up tailing bits
+            const u64 last_local_page =
+                Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE);
+            const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
+            u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1];
+            last_word = (last_word << shift) >> shift;
+        }
+
+        ~GpuCpuWords() {
+            Release();
+        }
+
+        GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept {
+            Release();
+            size_bytes = rhs.size_bytes;
+            cpu = rhs.cpu;
+            gpu = rhs.gpu;
+            rhs.cpu.heap = nullptr;
+            return *this;
+        }
+
+        GpuCpuWords(GpuCpuWords&& rhs) noexcept
+            : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} {
+            rhs.cpu.heap = nullptr;
+        }
+
+        GpuCpuWords& operator=(const GpuCpuWords&) = delete;
+        GpuCpuWords(const GpuCpuWords&) = delete;
+
+        /// Returns true when the buffer fits in the small vector optimization
+        [[nodiscard]] bool IsShort() const noexcept {
+            return size_bytes <= BYTES_PER_WORD;
+        }
+
+        /// Returns the number of words of the buffer
+        [[nodiscard]] size_t NumWords() const noexcept {
+            return Common::DivCeil(size_bytes, BYTES_PER_WORD);
+        }
+
+        /// Release buffer resources
+        void Release() {
+            if (!IsShort()) {
+                // CPU written words is the base for the heap allocation
+                delete[] cpu.heap;
+            }
+        }
+
+        u64 size_bytes = 0;
+        WrittenWords cpu;
+        WrittenWords gpu;
+    };
+
+public:
+    explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes)
+        : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)},
+          words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {}
+
+    explicit BufferBase(NullBufferParams) {}
+
+    BufferBase& operator=(const BufferBase&) = delete;
+    BufferBase(const BufferBase&) = delete;
+
+    /// Returns the inclusive CPU modified range in a begin end pair
+    [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
+                                                        u64 query_size) const noexcept {
+        const u64 offset = query_cpu_addr - cpu_addr;
+        return ModifiedRegion<false>(offset, query_size);
+    }
+
+    /// Returns the inclusive GPU modified range in a begin end pair
+    [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
+                                                        u64 query_size) const noexcept {
+        const u64 offset = query_cpu_addr - cpu_addr;
+        return ModifiedRegion<true>(offset, query_size);
+    }
+
+    /// Returns true if a region has been modified from the CPU
+    [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
+        const u64 offset = query_cpu_addr - cpu_addr;
+        return IsRegionModified<false>(offset, query_size);
+    }
+
+    /// Returns true if a region has been modified from the GPU
+    [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
+        const u64 offset = query_cpu_addr - cpu_addr;
+        return IsRegionModified<true>(offset, query_size);
+    }
+
+    /// Mark region as CPU modified, notifying the rasterizer about this change
+    void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
+        ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size);
+    }
+
+    /// Unmark region as CPU modified, notifying the rasterizer about this change
+    void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
+        ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size);
+    }
+
+    /// Mark region as modified from the host GPU
+    void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
+        ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size);
+    }
+
+    /// Unmark region as modified from the host GPU
+    void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
+        ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size);
+    }
+
+    /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
+    template <typename Func>
+    void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
+        ForEachModifiedRange<false, true>(query_cpu_range, size, func);
+    }
+
+    /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
+    template <typename Func>
+    void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
+        ForEachModifiedRange<true, false>(query_cpu_range, size, func);
+    }
+
+    /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
+    template <typename Func>
+    void ForEachDownloadRange(Func&& func) {
+        ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func);
+    }
+
+    /// Mark buffer as picked
+    void Pick() noexcept {
+        flags |= BufferFlagBits::Picked;
+    }
+
+    /// Unmark buffer as picked
+    void Unpick() noexcept {
+        flags &= ~BufferFlagBits::Picked;
+    }
+
+    /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
+    [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
+        return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
+    }
+
+    /// Returns true if the buffer has been marked as picked
+    [[nodiscard]] bool IsPicked() const noexcept {
+        return True(flags & BufferFlagBits::Picked);
+    }
+
+    /// Returns the base CPU address of the buffer
+    [[nodiscard]] VAddr CpuAddr() const noexcept {
+        return cpu_addr;
+    }
+
+    /// Returns the offset relative to the given CPU address
+    /// @pre IsInBounds returns true
+    [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept {
+        return static_cast<u32>(other_cpu_addr - cpu_addr);
+    }
+
+    /// Returns the size in bytes of the buffer
+    [[nodiscard]] u64 SizeBytes() const noexcept {
+        return words.size_bytes;
+    }
+
+private:
+    /**
+     * Change the state of a range of pages
+     *
+     * @param written_words Pages to be marked or unmarked as modified
+     * @param dirty_addr    Base address to mark or unmark as modified
+     * @param size          Size in bytes to mark or unmark as modified
+     *
+     * @tparam enable            True when the bits will be set to one, false for zero
+     * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
+     */
+    template <bool enable, bool notify_rasterizer>
+    void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr,
+                           s64 size) noexcept(!notify_rasterizer) {
+        const s64 difference = dirty_addr - cpu_addr;
+        const u64 offset = std::max<s64>(difference, 0);
+        size += std::min<s64>(difference, 0);
+        if (offset >= SizeBytes() || size < 0) {
+            return;
+        }
+        u64* const state_words = written_words.Pointer(IsShort());
+        const u64 offset_end = std::min(offset + size, SizeBytes());
+        const u64 begin_page_index = offset / BYTES_PER_PAGE;
+        const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
+        const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE);
+        const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD);
+        u64 page_index = begin_page_index % PAGES_PER_WORD;
+        u64 word_index = begin_word_index;
+        while (word_index < end_word_index) {
+            const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD;
+            const u64 left_offset =
+                std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD;
+            const u64 right_offset = page_index;
+            u64 bits = ~u64{0};
+            bits = (bits >> right_offset) << right_offset;
+            bits = (bits << left_offset) >> left_offset;
+            if constexpr (notify_rasterizer) {
+                NotifyRasterizer<!enable>(word_index, state_words[word_index], bits);
+            }
+            if constexpr (enable) {
+                state_words[word_index] |= bits;
+            } else {
+                state_words[word_index] &= ~bits;
+            }
+            page_index = 0;
+            ++word_index;
+        }
+    }
+
+    /**
+     * Notify rasterizer about changes in the CPU tracking state of a word in the buffer
+     *
+     * @param word_index   Index to the word to notify to the rasterizer
+     * @param current_bits Current state of the word
+     * @param new_bits     New state of the word
+     *
+     * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
+     */
+    template <bool add_to_rasterizer>
+    void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) {
+        u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
+        VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
+        while (changed_bits != 0) {
+            const int empty_bits = std::countr_zero(changed_bits);
+            addr += empty_bits * BYTES_PER_PAGE;
+            changed_bits >>= empty_bits;
+
+            const u32 continuous_bits = std::countr_one(changed_bits);
+            const u64 size = continuous_bits * BYTES_PER_PAGE;
+            const VAddr begin_addr = addr;
+            addr += size;
+            changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0;
+            rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1);
+        }
+    }
+
+    /**
+     * Loop over each page in the given range, turn off those bits and notify the rasterizer if
+     * needed. Call the given function on each turned off range.
+     *
+     * @param query_cpu_range Base CPU address to loop over
+     * @param size            Size in bytes of the CPU range to loop over
+     * @param func            Function to call for each turned off region
+     *
+     * @tparam gpu               True for host GPU pages, false for CPU pages
+     * @tparam notify_rasterizer True when the rasterizer should be notified about state changes
+     */
+    template <bool gpu, bool notify_rasterizer, typename Func>
+    void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
+        const s64 difference = query_cpu_range - cpu_addr;
+        const u64 query_begin = std::max<s64>(difference, 0);
+        size += std::min<s64>(difference, 0);
+        if (query_begin >= SizeBytes() || size < 0) {
+            return;
+        }
+        const u64* const cpu_words = words.cpu.Pointer(IsShort());
+        const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
+        u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+        u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
+        u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
+
+        const auto modified = [](u64 word) { return word != 0; };
+        const auto first_modified_word = std::find_if(words_begin, words_end, modified);
+        if (first_modified_word == words_end) {
+            // Exit early when the buffer is not modified
+            return;
+        }
+        const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified);
+
+        const u64 word_index_begin = std::distance(state_words, first_modified_word);
+        const u64 word_index_end = std::distance(state_words, last_modified_word);
+
+        const unsigned local_page_begin = std::countr_zero(*first_modified_word);
+        const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]);
+        const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
+        const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
+        const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
+        const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE);
+        const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin);
+        const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end);
+        const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD;
+        const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1;
+
+        u64 page_begin = first_word_page_begin;
+        u64 current_base = 0;
+        u64 current_size = 0;
+        bool on_going = false;
+        for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) {
+            const bool is_last_word = word_index + 1 == word_index_end;
+            const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD;
+            const u64 right_offset = page_begin;
+            const u64 left_offset = PAGES_PER_WORD - page_end;
+            u64 bits = ~u64{0};
+            bits = (bits >> right_offset) << right_offset;
+            bits = (bits << left_offset) >> left_offset;
+
+            const u64 current_word = state_words[word_index] & bits;
+            state_words[word_index] &= ~bits;
+
+            // Exclude CPU modified pages when visiting GPU pages
+            const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0);
+            if constexpr (notify_rasterizer) {
+                NotifyRasterizer<true>(word_index, word, ~u64{0});
+            }
+            u64 page = page_begin;
+            page_begin = 0;
+
+            while (page < page_end) {
+                const int empty_bits = std::countr_zero(word >> page);
+                if (on_going && empty_bits != 0) {
+                    InvokeModifiedRange(func, current_size, current_base);
+                    current_size = 0;
+                    on_going = false;
+                }
+                page += empty_bits;
+
+                const int continuous_bits = std::countr_one(word >> page);
+                if (!on_going && continuous_bits != 0) {
+                    current_base = word_index * PAGES_PER_WORD + page;
+                    on_going = true;
+                }
+                current_size += continuous_bits;
+                page += continuous_bits;
+            }
+        }
+        if (on_going && current_size > 0) {
+            InvokeModifiedRange(func, current_size, current_base);
+        }
+    }
+
+    template <typename Func>
+    void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) {
+        const u64 current_size_bytes = current_size * BYTES_PER_PAGE;
+        const u64 offset_begin = current_base * BYTES_PER_PAGE;
+        const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes());
+        func(offset_begin, offset_end - offset_begin);
+    }
+
+    /**
+     * Returns true when a region has been modified
+     *
+     * @param offset Offset in bytes from the start of the buffer
+     * @param size   Size in bytes of the region to query for modifications
+     */
+    template <bool gpu>
+    [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
+        const u64* const cpu_words = words.cpu.Pointer(IsShort());
+        const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+        const u64 num_query_words = size / BYTES_PER_WORD + 1;
+        const u64 word_begin = offset / BYTES_PER_WORD;
+        const u64 word_end = std::min(word_begin + num_query_words, NumWords());
+        const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
+        u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
+        for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
+            const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
+            if (word == 0) {
+                continue;
+            }
+            const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit);
+            const u64 local_page_end = page_end % PAGES_PER_WORD;
+            const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD;
+            if (((word >> page_index) << page_index) << page_end_shift != 0) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Returns a begin end pair with the inclusive modified region
+     *
+     * @param offset Offset in bytes from the start of the buffer
+     * @param size   Size in bytes of the region to query for modifications
+     *
+     * @tparam gpu True to query GPU modified pages, false for CPU pages
+     */
+    template <bool gpu>
+    [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
+        const u64* const cpu_words = words.cpu.Pointer(IsShort());
+        const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+        const u64 num_query_words = size / BYTES_PER_WORD + 1;
+        const u64 word_begin = offset / BYTES_PER_WORD;
+        const u64 word_end = std::min(word_begin + num_query_words, NumWords());
+        const u64 page_base = offset / BYTES_PER_PAGE;
+        const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
+        u64 begin = std::numeric_limits<u64>::max();
+        u64 end = 0;
+        for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
+            const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
+            if (word == 0) {
+                continue;
+            }
+            const u64 local_page_begin = std::countr_zero(word);
+            const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word);
+            const u64 page_index = word_index * PAGES_PER_WORD;
+            const u64 page_begin = std::max(page_index + local_page_begin, page_base);
+            const u64 page_end = std::min(page_index + local_page_end, page_limit);
+            begin = std::min(begin, page_begin);
+            end = std::max(end, page_end);
+        }
+        static constexpr std::pair<u64, u64> EMPTY{0, 0};
+        return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY;
+    }
+
+    /// Returns the number of words of the buffer
+    [[nodiscard]] size_t NumWords() const noexcept {
+        return words.NumWords();
+    }
+
+    /// Returns true when the buffer fits in the small vector optimization
+    [[nodiscard]] bool IsShort() const noexcept {
+        return words.IsShort();
+    }
+
+    RasterizerInterface* rasterizer = nullptr;
+    VAddr cpu_addr = 0;
+    GpuCpuWords words;
+    BufferFlagBits flags{};
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index 94679d5d1..33b3c060b 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -18,10 +18,10 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //
 
+#include <bit>
 #include "command_classes/host1x.h"
 #include "command_classes/nvdec.h"
 #include "command_classes/vic.h"
-#include "common/bit_util.h"
 #include "video_core/cdma_pusher.h"
 #include "video_core/command_classes/nvdec_common.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -56,7 +56,7 @@ void CDmaPusher::Step() {
 
     for (const u32 value : values) {
         if (mask != 0) {
-            const u32 lbs = Common::CountTrailingZeroes32(mask);
+            const auto lbs = static_cast<u32>(std::countr_zero(mask));
             mask &= ~(1U << lbs);
             ExecuteCommand(static_cast<u32>(offset + lbs), value);
             continue;
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 8ca70b6dd..e5f212c1a 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -126,7 +126,7 @@ private:
 
     s32 count{};
     s32 offset{};
-    s32 mask{};
+    u32 mask{};
     bool incrementing{};
 
     // Queue of command lists to be processed
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index 65bbeac78..fea6aed98 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <array>
-#include "common/bit_util.h"
+#include <bit>
 #include "video_core/command_classes/codecs/h264.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
@@ -266,7 +266,7 @@ void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
 }
 
 void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
-    const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1));
+    const s32 size = 32 - std::countl_zero(value + 1);
     WriteBits(1, size);
 
     value -= (1U << (size - 1)) - 1;
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 81522988e..0de3280a2 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -171,30 +171,30 @@ public:
         static constexpr std::size_t NUM_REGS = 0x258;
         struct {
             u32 object;
-            INSERT_UNION_PADDING_WORDS(0x3F);
+            INSERT_PADDING_WORDS_NOINIT(0x3F);
             u32 no_operation;
             NotifyType notify;
-            INSERT_UNION_PADDING_WORDS(0x2);
+            INSERT_PADDING_WORDS_NOINIT(0x2);
             u32 wait_for_idle;
-            INSERT_UNION_PADDING_WORDS(0xB);
+            INSERT_PADDING_WORDS_NOINIT(0xB);
             u32 pm_trigger;
-            INSERT_UNION_PADDING_WORDS(0xF);
+            INSERT_PADDING_WORDS_NOINIT(0xF);
             u32 context_dma_notify;
             u32 dst_context_dma;
             u32 src_context_dma;
             u32 semaphore_context_dma;
-            INSERT_UNION_PADDING_WORDS(0x1C);
+            INSERT_PADDING_WORDS_NOINIT(0x1C);
             Surface dst;
             CpuIndexWrap pixels_from_cpu_index_wrap;
             u32 kind2d_check_enable;
             Surface src;
             SectorPromotion pixels_from_memory_sector_promotion;
-            INSERT_UNION_PADDING_WORDS(0x1);
+            INSERT_PADDING_WORDS_NOINIT(0x1);
             NumTpcs num_tpcs;
             u32 render_enable_addr_upper;
             u32 render_enable_addr_lower;
             RenderEnableMode render_enable_mode;
-            INSERT_UNION_PADDING_WORDS(0x4);
+            INSERT_PADDING_WORDS_NOINIT(0x4);
             u32 clip_x0;
             u32 clip_y0;
             u32 clip_width;
@@ -212,7 +212,7 @@ public:
                 BitField<8, 6, u32> y;
             } pattern_offset;
             BitField<0, 2, PatternSelect> pattern_select;
-            INSERT_UNION_PADDING_WORDS(0xC);
+            INSERT_PADDING_WORDS_NOINIT(0xC);
             struct {
                 BitField<0, 3, MonochromePatternColorFormat> color_format;
                 BitField<0, 1, MonochromePatternFormat> format;
@@ -227,15 +227,15 @@ public:
                 std::array<u32, 0x20> X1R5G5B5;
                 std::array<u32, 0x10> Y8;
             } color_pattern;
-            INSERT_UNION_PADDING_WORDS(0x10);
+            INSERT_PADDING_WORDS_NOINIT(0x10);
             struct {
                 u32 prim_mode;
                 u32 prim_color_format;
                 u32 prim_color;
                 u32 line_tie_break_bits;
-                INSERT_UNION_PADDING_WORDS(0x14);
+                INSERT_PADDING_WORDS_NOINIT(0x14);
                 u32 prim_point_xy;
-                INSERT_UNION_PADDING_WORDS(0x7);
+                INSERT_PADDING_WORDS_NOINIT(0x7);
                 std::array<Point, 0x40> prim_point;
             } render_solid;
             struct {
@@ -247,7 +247,7 @@ public:
                 u32 color0;
                 u32 color1;
                 u32 mono_opacity;
-                INSERT_UNION_PADDING_WORDS(0x6);
+                INSERT_PADDING_WORDS_NOINIT(0x6);
                 u32 src_width;
                 u32 src_height;
                 u32 dx_du_frac;
@@ -260,9 +260,9 @@ public:
                 u32 dst_y0_int;
                 u32 data;
             } pixels_from_cpu;
-            INSERT_UNION_PADDING_WORDS(0x3);
+            INSERT_PADDING_WORDS_NOINIT(0x3);
             u32 big_endian_control;
-            INSERT_UNION_PADDING_WORDS(0x3);
+            INSERT_PADDING_WORDS_NOINIT(0x3);
             struct {
                 BitField<0, 3, u32> block_shape;
                 BitField<0, 5, u32> corral_size;
@@ -271,7 +271,7 @@ public:
                     BitField<0, 1, Origin> origin;
                     BitField<4, 1, Filter> filter;
                 } sample_mode;
-                INSERT_UNION_PADDING_WORDS(0x8);
+                INSERT_PADDING_WORDS_NOINIT(0x8);
                 s32 dst_x0;
                 s32 dst_y0;
                 s32 dst_width;
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 51a041202..9f0a7b76d 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -55,7 +55,7 @@ public:
 
         union {
             struct {
-                INSERT_UNION_PADDING_WORDS(0x60);
+                INSERT_PADDING_WORDS_NOINIT(0x60);
 
                 Upload::Registers upload;
 
@@ -67,7 +67,7 @@ public:
 
                 u32 data_upload;
 
-                INSERT_UNION_PADDING_WORDS(0x3F);
+                INSERT_PADDING_WORDS_NOINIT(0x3F);
 
                 struct {
                     u32 address;
@@ -76,11 +76,11 @@ public:
                     }
                 } launch_desc_loc;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                INSERT_PADDING_WORDS_NOINIT(0x1);
 
                 u32 launch;
 
-                INSERT_UNION_PADDING_WORDS(0x4A7);
+                INSERT_PADDING_WORDS_NOINIT(0x4A7);
 
                 struct {
                     u32 address_high;
@@ -92,7 +92,7 @@ public:
                     }
                 } tsc;
 
-                INSERT_UNION_PADDING_WORDS(0x3);
+                INSERT_PADDING_WORDS_NOINIT(0x3);
 
                 struct {
                     u32 address_high;
@@ -104,7 +104,7 @@ public:
                     }
                 } tic;
 
-                INSERT_UNION_PADDING_WORDS(0x22);
+                INSERT_PADDING_WORDS_NOINIT(0x22);
 
                 struct {
                     u32 address_high;
@@ -115,11 +115,11 @@ public:
                     }
                 } code_loc;
 
-                INSERT_UNION_PADDING_WORDS(0x3FE);
+                INSERT_PADDING_WORDS_NOINIT(0x3FE);
 
                 u32 tex_cb_index;
 
-                INSERT_UNION_PADDING_WORDS(0x374);
+                INSERT_PADDING_WORDS_NOINIT(0x374);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 62483589e..19808a5c6 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -50,7 +50,7 @@ public:
 
         union {
             struct {
-                INSERT_UNION_PADDING_WORDS(0x60);
+                INSERT_PADDING_WORDS_NOINIT(0x60);
 
                 Upload::Registers upload;
 
@@ -62,7 +62,7 @@ public:
 
                 u32 data;
 
-                INSERT_UNION_PADDING_WORDS(0x11);
+                INSERT_PADDING_WORDS_NOINIT(0x11);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index bf9e07c9b..326b32228 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -536,7 +536,7 @@ public:
             Equation equation_a;
             Factor factor_source_a;
             Factor factor_dest_a;
-            INSERT_UNION_PADDING_WORDS(1);
+            INSERT_PADDING_WORDS_NOINIT(1);
         };
 
         enum class TessellationPrimitive : u32 {
@@ -608,7 +608,7 @@ public:
             };
             u32 layer_stride;
             u32 base_layer;
-            INSERT_UNION_PADDING_WORDS(7);
+            INSERT_PADDING_WORDS_NOINIT(7);
 
             GPUVAddr Address() const {
                 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
@@ -640,7 +640,7 @@ public:
                 BitField<8, 3, ViewportSwizzle> z;
                 BitField<12, 3, ViewportSwizzle> w;
             } swizzle;
-            INSERT_UNION_PADDING_WORDS(1);
+            INSERT_PADDING_WORDS_NOINIT(1);
 
             Common::Rectangle<f32> GetRect() const {
                 return {
@@ -700,7 +700,7 @@ public:
             u32 address_low;
             s32 buffer_size;
             s32 buffer_offset;
-            INSERT_UNION_PADDING_WORDS(3);
+            INSERT_PADDING_WORDS_NOINIT(3);
 
             GPUVAddr Address() const {
                 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
@@ -713,7 +713,7 @@ public:
             u32 stream;
             u32 varying_count;
             u32 stride;
-            INSERT_UNION_PADDING_WORDS(1);
+            INSERT_PADDING_WORDS_NOINIT(1);
         };
         static_assert(sizeof(TransformFeedbackLayout) == 16);
 
@@ -731,7 +731,7 @@ public:
 
         union {
             struct {
-                INSERT_UNION_PADDING_WORDS(0x44);
+                INSERT_PADDING_WORDS_NOINIT(0x44);
 
                 u32 wait_for_idle;
 
@@ -744,7 +744,7 @@ public:
 
                 ShadowRamControl shadow_ram_control;
 
-                INSERT_UNION_PADDING_WORDS(0x16);
+                INSERT_PADDING_WORDS_NOINIT(0x16);
 
                 Upload::Registers upload;
                 struct {
@@ -755,11 +755,11 @@ public:
 
                 u32 data_upload;
 
-                INSERT_UNION_PADDING_WORDS(0x16);
+                INSERT_PADDING_WORDS_NOINIT(0x16);
 
                 u32 force_early_fragment_tests;
 
-                INSERT_UNION_PADDING_WORDS(0x2D);
+                INSERT_PADDING_WORDS_NOINIT(0x2D);
 
                 struct {
                     union {
@@ -769,7 +769,7 @@ public:
                     };
                 } sync_info;
 
-                INSERT_UNION_PADDING_WORDS(0x15);
+                INSERT_PADDING_WORDS_NOINIT(0x15);
 
                 union {
                     BitField<0, 2, TessellationPrimitive> prim;
@@ -781,21 +781,21 @@ public:
                 std::array<f32, 4> tess_level_outer;
                 std::array<f32, 2> tess_level_inner;
 
-                INSERT_UNION_PADDING_WORDS(0x10);
+                INSERT_PADDING_WORDS_NOINIT(0x10);
 
                 u32 rasterize_enable;
 
                 std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
 
-                INSERT_UNION_PADDING_WORDS(0xC0);
+                INSERT_PADDING_WORDS_NOINIT(0xC0);
 
                 std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                INSERT_PADDING_WORDS_NOINIT(0x1);
 
                 u32 tfb_enabled;
 
-                INSERT_UNION_PADDING_WORDS(0x2E);
+                INSERT_PADDING_WORDS_NOINIT(0x2E);
 
                 std::array<RenderTargetConfig, NumRenderTargets> rt;
 
@@ -803,7 +803,7 @@ public:
 
                 std::array<ViewPort, NumViewports> viewports;
 
-                INSERT_UNION_PADDING_WORDS(0x1D);
+                INSERT_PADDING_WORDS_NOINIT(0x1D);
 
                 struct {
                     u32 first;
@@ -815,16 +815,16 @@ public:
                 float clear_color[4];
                 float clear_depth;
 
-                INSERT_UNION_PADDING_WORDS(0x3);
+                INSERT_PADDING_WORDS_NOINIT(0x3);
 
                 s32 clear_stencil;
 
-                INSERT_UNION_PADDING_WORDS(0x2);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
 
                 PolygonMode polygon_mode_front;
                 PolygonMode polygon_mode_back;
 
-                INSERT_UNION_PADDING_WORDS(0x3);
+                INSERT_PADDING_WORDS_NOINIT(0x3);
 
                 u32 polygon_offset_point_enable;
                 u32 polygon_offset_line_enable;
@@ -832,47 +832,47 @@ public:
 
                 u32 patch_vertices;
 
-                INSERT_UNION_PADDING_WORDS(0x4);
+                INSERT_PADDING_WORDS_NOINIT(0x4);
 
                 u32 fragment_barrier;
 
-                INSERT_UNION_PADDING_WORDS(0x7);
+                INSERT_PADDING_WORDS_NOINIT(0x7);
 
                 std::array<ScissorTest, NumViewports> scissor_test;
 
-                INSERT_UNION_PADDING_WORDS(0x15);
+                INSERT_PADDING_WORDS_NOINIT(0x15);
 
                 s32 stencil_back_func_ref;
                 u32 stencil_back_mask;
                 u32 stencil_back_func_mask;
 
-                INSERT_UNION_PADDING_WORDS(0x5);
+                INSERT_PADDING_WORDS_NOINIT(0x5);
 
                 u32 invalidate_texture_data_cache;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                INSERT_PADDING_WORDS_NOINIT(0x1);
 
                 u32 tiled_cache_barrier;
 
-                INSERT_UNION_PADDING_WORDS(0x4);
+                INSERT_PADDING_WORDS_NOINIT(0x4);
 
                 u32 color_mask_common;
 
-                INSERT_UNION_PADDING_WORDS(0x2);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
 
                 f32 depth_bounds[2];
 
-                INSERT_UNION_PADDING_WORDS(0x2);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
 
                 u32 rt_separate_frag_data;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                INSERT_PADDING_WORDS_NOINIT(0x1);
 
                 u32 multisample_raster_enable;
                 u32 multisample_raster_samples;
                 std::array<u32, 4> multisample_sample_mask;
 
-                INSERT_UNION_PADDING_WORDS(0x5);
+                INSERT_PADDING_WORDS_NOINIT(0x5);
 
                 struct {
                     u32 address_high;
@@ -898,7 +898,7 @@ public:
                     };
                 } render_area;
 
-                INSERT_UNION_PADDING_WORDS(0x3F);
+                INSERT_PADDING_WORDS_NOINIT(0x3F);
 
                 union {
                     BitField<0, 4, u32> stencil;
@@ -907,24 +907,24 @@ public:
                     BitField<12, 4, u32> viewport;
                 } clear_flags;
 
-                INSERT_UNION_PADDING_WORDS(0x10);
+                INSERT_PADDING_WORDS_NOINIT(0x10);
 
                 u32 fill_rectangle;
 
-                INSERT_UNION_PADDING_WORDS(0x8);
+                INSERT_PADDING_WORDS_NOINIT(0x8);
 
                 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
 
                 std::array<MsaaSampleLocation, 4> multisample_sample_locations;
 
-                INSERT_UNION_PADDING_WORDS(0x2);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
 
                 union {
                     BitField<0, 1, u32> enable;
                     BitField<4, 3, u32> target;
                 } multisample_coverage_to_color;
 
-                INSERT_UNION_PADDING_WORDS(0x8);
+                INSERT_PADDING_WORDS_NOINIT(0x8);
 
                 struct {
                     union {
@@ -947,7 +947,7 @@ public:
                     }
                 } rt_control;
 
-                INSERT_UNION_PADDING_WORDS(0x2);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
 
                 u32 zeta_width;
                 u32 zeta_height;
@@ -958,11 +958,11 @@ public:
 
                 SamplerIndex sampler_index;
 
-                INSERT_UNION_PADDING_WORDS(0x25);
+                INSERT_PADDING_WORDS_NOINIT(0x25);
 
                 u32 depth_test_enable;
 
-                INSERT_UNION_PADDING_WORDS(0x5);
+                INSERT_PADDING_WORDS_NOINIT(0x5);
 
                 u32 independent_blend_enable;
 
@@ -970,7 +970,7 @@ public:
 
                 u32 alpha_test_enabled;
 
-                INSERT_UNION_PADDING_WORDS(0x6);
+                INSERT_PADDING_WORDS_NOINIT(0x6);
 
                 u32 d3d_cull_mode;
 
@@ -985,7 +985,7 @@ public:
                     float a;
                 } blend_color;
 
-                INSERT_UNION_PADDING_WORDS(0x4);
+                INSERT_PADDING_WORDS_NOINIT(0x4);
 
                 struct {
                     u32 separate_alpha;
@@ -994,7 +994,7 @@ public:
                     Blend::Factor factor_dest_rgb;
                     Blend::Equation equation_a;
                     Blend::Factor factor_source_a;
-                    INSERT_UNION_PADDING_WORDS(1);
+                    INSERT_PADDING_WORDS_NOINIT(1);
                     Blend::Factor factor_dest_a;
 
                     u32 enable_common;
@@ -1010,7 +1010,7 @@ public:
                 u32 stencil_front_func_mask;
                 u32 stencil_front_mask;
 
-                INSERT_UNION_PADDING_WORDS(0x2);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
 
                 u32 frag_color_clamp;
 
@@ -1022,17 +1022,17 @@ public:
                 float line_width_smooth;
                 float line_width_aliased;
 
-                INSERT_UNION_PADDING_WORDS(0x1B);
+                INSERT_PADDING_WORDS_NOINIT(0x1B);
 
                 u32 invalidate_sampler_cache_no_wfi;
                 u32 invalidate_texture_header_cache_no_wfi;
 
-                INSERT_UNION_PADDING_WORDS(0x2);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
 
                 u32 vb_element_base;
                 u32 vb_base_instance;
 
-                INSERT_UNION_PADDING_WORDS(0x35);
+                INSERT_PADDING_WORDS_NOINIT(0x35);
 
                 u32 clip_distance_enabled;
 
@@ -1040,11 +1040,11 @@ public:
 
                 float point_size;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                INSERT_PADDING_WORDS_NOINIT(0x1);
 
                 u32 point_sprite_enable;
 
-                INSERT_UNION_PADDING_WORDS(0x3);
+                INSERT_PADDING_WORDS_NOINIT(0x3);
 
                 CounterReset counter_reset;
 
@@ -1057,7 +1057,7 @@ public:
                     BitField<4, 1, u32> alpha_to_one;
                 } multisample_control;
 
-                INSERT_UNION_PADDING_WORDS(0x4);
+                INSERT_PADDING_WORDS_NOINIT(0x4);
 
                 struct {
                     u32 address_high;
@@ -1081,7 +1081,7 @@ public:
                     }
                 } tsc;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                INSERT_PADDING_WORDS_NOINIT(0x1);
 
                 float polygon_offset_factor;
 
@@ -1098,7 +1098,7 @@ public:
                     }
                 } tic;
 
-                INSERT_UNION_PADDING_WORDS(0x5);
+                INSERT_PADDING_WORDS_NOINIT(0x5);
 
                 u32 stencil_two_side_enable;
                 StencilOp stencil_back_op_fail;
@@ -1106,17 +1106,17 @@ public:
                 StencilOp stencil_back_op_zpass;
                 ComparisonOp stencil_back_func_func;
 
-                INSERT_UNION_PADDING_WORDS(0x4);
+                INSERT_PADDING_WORDS_NOINIT(0x4);
 
                 u32 framebuffer_srgb;
 
                 float polygon_offset_units;
 
-                INSERT_UNION_PADDING_WORDS(0x4);
+                INSERT_PADDING_WORDS_NOINIT(0x4);
 
                 Tegra::Texture::MsaaMode multisample_mode;
 
-                INSERT_UNION_PADDING_WORDS(0xC);
+                INSERT_PADDING_WORDS_NOINIT(0xC);
 
                 union {
                     BitField<2, 1, u32> coord_origin;
@@ -1132,7 +1132,7 @@ public:
                             (static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
                     }
                 } code_address;
-                INSERT_UNION_PADDING_WORDS(1);
+                INSERT_PADDING_WORDS_NOINIT(1);
 
                 struct {
                     u32 vertex_end_gl;
@@ -1144,14 +1144,14 @@ public:
                     };
                 } draw;
 
-                INSERT_UNION_PADDING_WORDS(0xA);
+                INSERT_PADDING_WORDS_NOINIT(0xA);
 
                 struct {
                     u32 enabled;
                     u32 index;
                 } primitive_restart;
 
-                INSERT_UNION_PADDING_WORDS(0x5F);
+                INSERT_PADDING_WORDS_NOINIT(0x5F);
 
                 struct {
                     u32 start_addr_high;
@@ -1192,9 +1192,9 @@ public:
                     }
                 } index_array;
 
-                INSERT_UNION_PADDING_WORDS(0x7);
+                INSERT_PADDING_WORDS_NOINIT(0x7);
 
-                INSERT_UNION_PADDING_WORDS(0x1F);
+                INSERT_PADDING_WORDS_NOINIT(0x1F);
 
                 float polygon_offset_clamp;
 
@@ -1208,14 +1208,14 @@ public:
                     }
                 } instanced_arrays;
 
-                INSERT_UNION_PADDING_WORDS(0x4);
+                INSERT_PADDING_WORDS_NOINIT(0x4);
 
                 union {
                     BitField<0, 1, u32> enable;
                     BitField<4, 8, u32> unk4;
                 } vp_point_size;
 
-                INSERT_UNION_PADDING_WORDS(1);
+                INSERT_PADDING_WORDS_NOINIT(1);
 
                 u32 cull_test_enabled;
                 FrontFace front_face;
@@ -1223,11 +1223,11 @@ public:
 
                 u32 pixel_center_integer;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                INSERT_PADDING_WORDS_NOINIT(0x1);
 
                 u32 viewport_transform_enabled;
 
-                INSERT_UNION_PADDING_WORDS(0x3);
+                INSERT_PADDING_WORDS_NOINIT(0x3);
 
                 union {
                     BitField<0, 1, u32> depth_range_0_1;
@@ -1236,18 +1236,18 @@ public:
                     BitField<11, 1, u32> depth_clamp_disabled;
                 } view_volume_clip_control;
 
-                INSERT_UNION_PADDING_WORDS(0x1F);
+                INSERT_PADDING_WORDS_NOINIT(0x1F);
 
                 u32 depth_bounds_enable;
 
-                INSERT_UNION_PADDING_WORDS(1);
+                INSERT_PADDING_WORDS_NOINIT(1);
 
                 struct {
                     u32 enable;
                     LogicOperation operation;
                 } logic_op;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                INSERT_PADDING_WORDS_NOINIT(0x1);
 
                 union {
                     u32 raw;
@@ -1260,9 +1260,9 @@ public:
                     BitField<6, 4, u32> RT;
                     BitField<10, 11, u32> layer;
                 } clear_buffers;
-                INSERT_UNION_PADDING_WORDS(0xB);
+                INSERT_PADDING_WORDS_NOINIT(0xB);
                 std::array<ColorMask, NumRenderTargets> color_mask;
-                INSERT_UNION_PADDING_WORDS(0x38);
+                INSERT_PADDING_WORDS_NOINIT(0x38);
 
                 struct {
                     u32 query_address_high;
@@ -1284,7 +1284,7 @@ public:
                     }
                 } query;
 
-                INSERT_UNION_PADDING_WORDS(0x3C);
+                INSERT_PADDING_WORDS_NOINIT(0x3C);
 
                 struct {
                     union {
@@ -1325,10 +1325,10 @@ public:
                         BitField<4, 4, ShaderProgram> program;
                     };
                     u32 offset;
-                    INSERT_UNION_PADDING_WORDS(14);
+                    INSERT_PADDING_WORDS_NOINIT(14);
                 } shader_config[MaxShaderProgram];
 
-                INSERT_UNION_PADDING_WORDS(0x60);
+                INSERT_PADDING_WORDS_NOINIT(0x60);
 
                 u32 firmware[0x20];
 
@@ -1345,7 +1345,7 @@ public:
                     }
                 } const_buffer;
 
-                INSERT_UNION_PADDING_WORDS(0x10);
+                INSERT_PADDING_WORDS_NOINIT(0x10);
 
                 struct {
                     union {
@@ -1353,18 +1353,18 @@ public:
                         BitField<0, 1, u32> valid;
                         BitField<4, 5, u32> index;
                     };
-                    INSERT_UNION_PADDING_WORDS(7);
+                    INSERT_PADDING_WORDS_NOINIT(7);
                 } cb_bind[MaxShaderStage];
 
-                INSERT_UNION_PADDING_WORDS(0x56);
+                INSERT_PADDING_WORDS_NOINIT(0x56);
 
                 u32 tex_cb_index;
 
-                INSERT_UNION_PADDING_WORDS(0x7D);
+                INSERT_PADDING_WORDS_NOINIT(0x7D);
 
                 std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
 
-                INSERT_UNION_PADDING_WORDS(0x298);
+                INSERT_PADDING_WORDS_NOINIT(0x298);
 
                 struct {
                     /// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1376,14 +1376,14 @@ public:
                     }
                 } ssbo_info;
 
-                INSERT_UNION_PADDING_WORDS(0x11);
+                INSERT_PADDING_WORDS_NOINIT(0x11);
 
                 struct {
                     u32 address[MaxShaderStage];
                     u32 size[MaxShaderStage];
                 } tex_info_buffers;
 
-                INSERT_UNION_PADDING_WORDS(0xCC);
+                INSERT_PADDING_WORDS_NOINIT(0xCC);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index ceec05459..e0d7b89c5 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -68,10 +68,10 @@ struct Header {
 
     union {
         struct {
-            INSERT_UNION_PADDING_BYTES(3);  // ImapSystemValuesA
-            INSERT_UNION_PADDING_BYTES(1);  // ImapSystemValuesB
-            INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32]
-            INSERT_UNION_PADDING_BYTES(2);  // ImapColor
+            INSERT_PADDING_BYTES_NOINIT(3);  // ImapSystemValuesA
+            INSERT_PADDING_BYTES_NOINIT(1);  // ImapSystemValuesB
+            INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
             union {
                 BitField<0, 8, u16> clip_distances;
                 BitField<8, 1, u16> point_sprite_s;
@@ -82,20 +82,20 @@ struct Header {
                 BitField<14, 1, u16> instance_id;
                 BitField<15, 1, u16> vertex_id;
             };
-            INSERT_UNION_PADDING_BYTES(5);  // ImapFixedFncTexture[10]
-            INSERT_UNION_PADDING_BYTES(1);  // ImapReserved
-            INSERT_UNION_PADDING_BYTES(3);  // OmapSystemValuesA
-            INSERT_UNION_PADDING_BYTES(1);  // OmapSystemValuesB
-            INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32]
-            INSERT_UNION_PADDING_BYTES(2);  // OmapColor
-            INSERT_UNION_PADDING_BYTES(2);  // OmapSystemValuesC
-            INSERT_UNION_PADDING_BYTES(5);  // OmapFixedFncTexture[10]
-            INSERT_UNION_PADDING_BYTES(1);  // OmapReserved
+            INSERT_PADDING_BYTES_NOINIT(5);  // ImapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(1);  // ImapReserved
+            INSERT_PADDING_BYTES_NOINIT(3);  // OmapSystemValuesA
+            INSERT_PADDING_BYTES_NOINIT(1);  // OmapSystemValuesB
+            INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
+            INSERT_PADDING_BYTES_NOINIT(2);  // OmapColor
+            INSERT_PADDING_BYTES_NOINIT(2);  // OmapSystemValuesC
+            INSERT_PADDING_BYTES_NOINIT(5);  // OmapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(1);  // OmapReserved
         } vtg;
 
         struct {
-            INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
-            INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
+            INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+            INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
 
             union {
                 BitField<0, 2, PixelImap> x;
@@ -105,10 +105,10 @@ struct Header {
                 u8 raw;
             } imap_generic_vector[32];
 
-            INSERT_UNION_PADDING_BYTES(2);  // ImapColor
-            INSERT_UNION_PADDING_BYTES(2);  // ImapSystemValuesC
-            INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
-            INSERT_UNION_PADDING_BYTES(2);  // ImapReserved
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapSystemValuesC
+            INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapReserved
 
             struct {
                 u32 target;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d81e38680..b4ce6b154 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -270,7 +270,7 @@ public:
 
         union {
             struct {
-                INSERT_UNION_PADDING_WORDS(0x4);
+                INSERT_PADDING_WORDS_NOINIT(0x4);
                 struct {
                     u32 address_high;
                     u32 address_low;
@@ -283,18 +283,18 @@ public:
 
                 u32 semaphore_sequence;
                 u32 semaphore_trigger;
-                INSERT_UNION_PADDING_WORDS(0xC);
+                INSERT_PADDING_WORDS_NOINIT(0xC);
 
                 // The pusher and the puller share the reference counter, the pusher only has read
                 // access
                 u32 reference_count;
-                INSERT_UNION_PADDING_WORDS(0x5);
+                INSERT_PADDING_WORDS_NOINIT(0x5);
 
                 u32 semaphore_acquire;
                 u32 semaphore_release;
                 u32 fence_value;
                 FenceAction fence_action;
-                INSERT_UNION_PADDING_WORDS(0xE2);
+                INSERT_PADDING_WORDS_NOINIT(0xE2);
 
                 // Puller state
                 u32 acquire_mode;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index d7437e185..61796e33a 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -23,7 +23,6 @@
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -32,6 +31,7 @@
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_instance.h"
 #include "video_core/vulkan_common/vulkan_library.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_surface.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
@@ -137,7 +137,7 @@ bool RendererVulkan::Init() try {
     InitializeDevice();
     Report();
 
-    memory_manager = std::make_unique<VKMemoryManager>(*device);
+    memory_allocator = std::make_unique<MemoryAllocator>(*device);
 
     state_tracker = std::make_unique<StateTracker>(gpu);
 
@@ -149,11 +149,11 @@ bool RendererVulkan::Init() try {
 
     rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
                                                     cpu_memory, screen_info, *device,
-                                                    *memory_manager, *state_tracker, *scheduler);
+                                                    *memory_allocator, *state_tracker, *scheduler);
 
     blit_screen =
         std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
-                                       *memory_manager, *swapchain, *scheduler, screen_info);
+                                       *memory_allocator, *swapchain, *scheduler, screen_info);
     return true;
 
 } catch (const vk::Exception& exception) {
@@ -172,7 +172,7 @@ void RendererVulkan::ShutDown() {
     blit_screen.reset();
     scheduler.reset();
     swapchain.reset();
-    memory_manager.reset();
+    memory_allocator.reset();
     device.reset();
 }
 
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 5575ffc54..daf55b9b4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -29,8 +29,8 @@ namespace Vulkan {
 
 class Device;
 class StateTracker;
+class MemoryAllocator;
 class VKBlitScreen;
-class VKMemoryManager;
 class VKSwapchain;
 class VKScheduler;
 
@@ -75,7 +75,7 @@ private:
 
     vk::DebugUtilsMessenger debug_callback;
     std::unique_ptr<Device> device;
-    std::unique_ptr<VKMemoryManager> memory_manager;
+    std::unique_ptr<MemoryAllocator> memory_allocator;
     std::unique_ptr<StateTracker> state_tracker;
     std::unique_ptr<VKScheduler> scheduler;
     std::unique_ptr<VKSwapchain> swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 5e184eb42..3e3b895e0 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -22,13 +22,13 @@
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
 #include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
@@ -115,10 +115,10 @@ struct VKBlitScreen::BufferData {
 VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
                            Core::Frontend::EmuWindow& render_window_,
                            VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
-                           VKMemoryManager& memory_manager_, VKSwapchain& swapchain_,
+                           MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
                            VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
     : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
-      device{device_}, memory_manager{memory_manager_}, swapchain{swapchain_},
+      device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_},
       scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
     resource_ticks.resize(image_count);
 
@@ -150,8 +150,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
     SetUniformData(data, framebuffer);
     SetVertexData(data, framebuffer);
 
-    auto map = buffer_commit->Map();
-    std::memcpy(map.Address(), &data, sizeof(data));
+    const std::span<u8> map = buffer_commit.Map();
+    std::memcpy(map.data(), &data, sizeof(data));
 
     if (!use_accelerated) {
         const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -165,8 +165,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
         constexpr u32 block_height_log2 = 4;
         const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
         Tegra::Texture::UnswizzleTexture(
-            std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes),
-            bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
+            map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel,
+            framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
 
         const VkBufferImageCopy copy{
             .bufferOffset = image_offset,
@@ -224,8 +224,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
                                        VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
             });
     }
-    map.Release();
-
     scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
                       descriptor_set = descriptor_sets[image_index], buffer = *buffer,
                       size = swapchain.GetSize(), pipeline = *pipeline,
@@ -642,7 +640,7 @@ void VKBlitScreen::ReleaseRawImages() {
     raw_images.clear();
     raw_buffer_commits.clear();
     buffer.reset();
-    buffer_commit.reset();
+    buffer_commit = MemoryCommit{};
 }
 
 void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
@@ -659,7 +657,7 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
     };
 
     buffer = device.GetLogical().CreateBuffer(ci);
-    buffer_commit = memory_manager.Commit(buffer, true);
+    buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
 }
 
 void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
@@ -690,7 +688,7 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
             .pQueueFamilyIndices = nullptr,
             .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
         });
-        raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false);
+        raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
         raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
             .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 69ed61770..b52576957 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -6,7 +6,7 @@
 
 #include <memory>
 
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Core {
@@ -43,7 +43,7 @@ public:
     explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
                           Core::Frontend::EmuWindow& render_window,
                           VideoCore::RasterizerInterface& rasterizer, const Device& device,
-                          VKMemoryManager& memory_manager, VKSwapchain& swapchain,
+                          MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
                           VKScheduler& scheduler, const VKScreenInfo& screen_info);
     ~VKBlitScreen();
 
@@ -86,7 +86,7 @@ private:
     Core::Frontend::EmuWindow& render_window;
     VideoCore::RasterizerInterface& rasterizer;
     const Device& device;
-    VKMemoryManager& memory_manager;
+    MemoryAllocator& memory_allocator;
     VKSwapchain& swapchain;
     VKScheduler& scheduler;
     const std::size_t image_count;
@@ -104,14 +104,14 @@ private:
     vk::Sampler sampler;
 
     vk::Buffer buffer;
-    VKMemoryCommit buffer_commit;
+    MemoryCommit buffer_commit;
 
     std::vector<u64> resource_ticks;
 
     std::vector<vk::Semaphore> semaphores;
     std::vector<vk::Image> raw_images;
     std::vector<vk::ImageView> raw_image_views;
-    std::vector<VKMemoryCommit> raw_buffer_commits;
+    std::vector<MemoryCommit> raw_buffer_commits;
     u32 raw_width = 0;
     u32 raw_height = 0;
 };
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 4d517c547..d8ad40a0f 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -34,17 +34,13 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
 constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
     VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
 
-std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const Device& device, VKScheduler& scheduler) {
-    return std::make_unique<VKStreamBuffer>(device, scheduler);
-}
-
 } // Anonymous namespace
 
-Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
-               VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
+Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_,
+               StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
     : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
                                                                                  staging_pool_} {
-    const VkBufferCreateInfo ci{
+    buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
@@ -53,22 +49,20 @@ Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKSchedul
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
-    };
-
-    buffer.handle = device.GetLogical().CreateBuffer(ci);
-    buffer.commit = memory_manager.Commit(buffer.handle, false);
+    });
+    commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
 }
 
 Buffer::~Buffer() = default;
 
 void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
-    const auto& staging = staging_pool.GetUnusedBuffer(data_size, true);
-    std::memcpy(staging.commit->Map(data_size), data, data_size);
+    const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
+    std::memcpy(staging.mapped_span.data(), data, data_size);
 
     scheduler.RequestOutsideRenderPassOperationContext();
 
     const VkBuffer handle = Handle();
-    scheduler.Record([staging = *staging.handle, handle, offset, data_size,
+    scheduler.Record([staging = staging.buffer, handle, offset, data_size,
                       &device = device](vk::CommandBuffer cmdbuf) {
         const VkBufferMemoryBarrier read_barrier{
             .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
@@ -104,12 +98,12 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
 }
 
 void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
-    const auto& staging = staging_pool.GetUnusedBuffer(data_size, true);
+    auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
     scheduler.RequestOutsideRenderPassOperationContext();
 
     const VkBuffer handle = Handle();
     scheduler.Record(
-        [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
+        [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
             const VkBufferMemoryBarrier barrier{
                 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
                 .pNext = nullptr,
@@ -130,7 +124,7 @@ void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
         });
     scheduler.Finish();
 
-    std::memcpy(data, staging.commit->Map(data_size), data_size);
+    std::memcpy(data, staging.mapped_span.data(), data_size);
 }
 
 void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
@@ -168,29 +162,29 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
 
 VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
                              Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                             const Device& device_, VKMemoryManager& memory_manager_,
+                             const Device& device_, MemoryAllocator& memory_allocator_,
                              VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
-                             VKStagingBufferPool& staging_pool_)
+                             StagingBufferPool& staging_pool_)
     : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
                                                                  cpu_memory_, stream_buffer_},
-      device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
-                                                                                   staging_pool_} {}
+      device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
+      staging_pool{staging_pool_} {}
 
 VKBufferCache::~VKBufferCache() = default;
 
 std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
-    return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
+    return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
                                     size);
 }
 
 VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
     size = std::max(size, std::size_t(4));
-    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+    const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
         cmdbuf.FillBuffer(buffer, 0, size, 0);
     });
-    return {*empty.handle, 0, 0};
+    return {empty.buffer, 0, 0};
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 1c39aed34..41d577510 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,21 +8,20 @@
 
 #include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 class Device;
-class VKMemoryManager;
 class VKScheduler;
 
 class Buffer final : public VideoCommon::BufferBlock {
 public:
-    explicit Buffer(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
-                    VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
+    explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
+                    StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
     ~Buffer();
 
     void Upload(std::size_t offset, std::size_t data_size, const u8* data);
@@ -33,7 +32,7 @@ public:
                   std::size_t copy_size);
 
     VkBuffer Handle() const {
-        return *buffer.handle;
+        return *buffer;
     }
 
     u64 Address() const {
@@ -43,18 +42,19 @@ public:
 private:
     const Device& device;
     VKScheduler& scheduler;
-    VKStagingBufferPool& staging_pool;
+    StagingBufferPool& staging_pool;
 
-    VKBuffer buffer;
+    vk::Buffer buffer;
+    MemoryCommit commit;
 };
 
 class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
 public:
     explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
                            Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
-                           const Device& device, VKMemoryManager& memory_manager,
+                           const Device& device, MemoryAllocator& memory_allocator,
                            VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
-                           VKStagingBufferPool& staging_pool);
+                           StagingBufferPool& staging_pool);
     ~VKBufferCache();
 
     BufferInfo GetEmptyBuffer(std::size_t size) override;
@@ -64,9 +64,9 @@ protected:
 
 private:
     const Device& device;
-    VKMemoryManager& memory_manager;
+    MemoryAllocator& memory_allocator;
     VKScheduler& scheduler;
-    VKStagingBufferPool& staging_pool;
+    StagingBufferPool& staging_pool;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 02a6d54b7..5eb6a54be 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -164,7 +164,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
 
 QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
                              VKDescriptorPool& descriptor_pool_,
-                             VKStagingBufferPool& staging_buffer_pool_,
+                             StagingBufferPool& staging_buffer_pool_,
                              VKUpdateDescriptorQueue& update_descriptor_queue_)
     : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
                     BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
@@ -177,18 +177,18 @@ QuadArrayPass::~QuadArrayPass() = default;
 std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
     const u32 num_triangle_vertices = (num_vertices / 4) * 6;
     const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
-    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
 
     update_descriptor_queue.Acquire();
-    update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
+    update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
     const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
 
     scheduler.RequestOutsideRenderPassOperationContext();
 
     ASSERT(num_vertices % 4 == 0);
     const u32 num_quads = num_vertices / 4;
-    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
-                      first, set](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
+                      num_quads, first, set](vk::CommandBuffer cmdbuf) {
         constexpr u32 dispatch_size = 1024;
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
@@ -208,11 +208,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
     });
-    return {*buffer.handle, 0};
+    return {staging_ref.buffer, 0};
 }
 
 Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
-                     VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,
+                     VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
                      VKUpdateDescriptorQueue& update_descriptor_queue_)
     : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
                     BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
@@ -224,15 +224,15 @@ Uint8Pass::~Uint8Pass() = default;
 std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
                                              u64 src_offset) {
     const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
-    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
-    update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
+    update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
     const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
                       num_vertices](vk::CommandBuffer cmdbuf) {
         constexpr u32 dispatch_size = 1024;
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
@@ -252,12 +252,12 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
     });
-    return {*buffer.handle, 0};
+    return {staging_ref.buffer, 0};
 }
 
 QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
                                  VKDescriptorPool& descriptor_pool_,
-                                 VKStagingBufferPool& staging_buffer_pool_,
+                                 StagingBufferPool& staging_buffer_pool_,
                                  VKUpdateDescriptorQueue& update_descriptor_queue_)
     : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
                     BuildInputOutputDescriptorUpdateTemplate(),
@@ -286,15 +286,15 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
     const u32 num_tri_vertices = (num_vertices / 4) * 6;
 
     const std::size_t staging_size = num_tri_vertices * sizeof(u32);
-    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
-    update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
+    update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
     const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
                       num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
         static constexpr u32 dispatch_size = 1024;
         const std::array push_constants = {base_vertex, index_shift};
@@ -317,7 +317,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
     });
-    return {*buffer.handle, 0};
+    return {staging_ref.buffer, 0};
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 7ddb09afb..f5c6f5f17 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -16,8 +16,8 @@
 namespace Vulkan {
 
 class Device;
+class StagingBufferPool;
 class VKScheduler;
-class VKStagingBufferPool;
 class VKUpdateDescriptorQueue;
 
 class VKComputePass {
@@ -45,7 +45,7 @@ class QuadArrayPass final : public VKComputePass {
 public:
     explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
                            VKDescriptorPool& descriptor_pool_,
-                           VKStagingBufferPool& staging_buffer_pool_,
+                           StagingBufferPool& staging_buffer_pool_,
                            VKUpdateDescriptorQueue& update_descriptor_queue_);
     ~QuadArrayPass();
 
@@ -53,15 +53,14 @@ public:
 
 private:
     VKScheduler& scheduler;
-    VKStagingBufferPool& staging_buffer_pool;
+    StagingBufferPool& staging_buffer_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
 };
 
 class Uint8Pass final : public VKComputePass {
 public:
     explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
-                       VKDescriptorPool& descriptor_pool_,
-                       VKStagingBufferPool& staging_buffer_pool_,
+                       VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
                        VKUpdateDescriptorQueue& update_descriptor_queue_);
     ~Uint8Pass();
 
@@ -69,7 +68,7 @@ public:
 
 private:
     VKScheduler& scheduler;
-    VKStagingBufferPool& staging_buffer_pool;
+    StagingBufferPool& staging_buffer_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
 };
 
@@ -77,7 +76,7 @@ class QuadIndexedPass final : public VKComputePass {
 public:
     explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
                              VKDescriptorPool& descriptor_pool_,
-                             VKStagingBufferPool& staging_buffer_pool_,
+                             StagingBufferPool& staging_buffer_pool_,
                              VKUpdateDescriptorQueue& update_descriptor_queue_);
     ~QuadIndexedPass();
 
@@ -87,7 +86,7 @@ public:
 
 private:
     VKScheduler& scheduler;
-    VKStagingBufferPool& staging_buffer_pool;
+    StagingBufferPool& staging_buffer_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 4c5bc0aa1..6cd00884d 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -3,7 +3,6 @@
 // Refer to the license.txt file included.
 
 #include <memory>
-#include <thread>
 
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_fence_manager.h"
@@ -14,13 +13,11 @@
 
 namespace Vulkan {
 
-InnerFence::InnerFence(const Device& device_, VKScheduler& scheduler_, u32 payload_,
-                       bool is_stubbed_)
-    : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
+InnerFence::InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_)
+    : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
 
-InnerFence::InnerFence(const Device& device_, VKScheduler& scheduler_, GPUVAddr address_,
-                       u32 payload_, bool is_stubbed_)
-    : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
+InnerFence::InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
+    : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
 
 InnerFence::~InnerFence() = default;
 
@@ -28,63 +25,38 @@ void InnerFence::Queue() {
     if (is_stubbed) {
         return;
     }
-    ASSERT(!event);
-
-    event = device.GetLogical().CreateEvent();
-    ticks = scheduler.CurrentTick();
-
-    scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
-        cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
-    });
+    // Get the current tick so we can wait for it
+    wait_tick = scheduler.CurrentTick();
+    scheduler.Flush();
 }
 
 bool InnerFence::IsSignaled() const {
     if (is_stubbed) {
         return true;
     }
-    ASSERT(event);
-    return IsEventSignalled();
+    return scheduler.IsFree(wait_tick);
 }
 
 void InnerFence::Wait() {
     if (is_stubbed) {
         return;
     }
-    ASSERT(event);
-
-    if (ticks >= scheduler.CurrentTick()) {
-        scheduler.Flush();
-    }
-    while (!IsEventSignalled()) {
-        std::this_thread::yield();
-    }
-}
-
-bool InnerFence::IsEventSignalled() const {
-    switch (const VkResult result = event.GetStatus()) {
-    case VK_EVENT_SET:
-        return true;
-    case VK_EVENT_RESET:
-        return false;
-    default:
-        throw vk::Exception(result);
-    }
+    scheduler.Wait(wait_tick);
 }
 
 VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
                                Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
                                VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
-                               const Device& device_, VKScheduler& scheduler_)
+                               VKScheduler& scheduler_)
     : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
-      device{device_}, scheduler{scheduler_} {}
+      scheduler{scheduler_} {}
 
 Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
-    return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
+    return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
 }
 
 Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
-    return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
+    return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
 }
 
 void VKFenceManager::QueueFence(Fence& fence) {
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 6b51e4587..9c5e5aa8f 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -28,10 +28,8 @@ class VKScheduler;
 
 class InnerFence : public VideoCommon::FenceBase {
 public:
-    explicit InnerFence(const Device& device_, VKScheduler& scheduler_, u32 payload_,
-                        bool is_stubbed_);
-    explicit InnerFence(const Device& device_, VKScheduler& scheduler_, GPUVAddr address_,
-                        u32 payload_, bool is_stubbed_);
+    explicit InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_);
+    explicit InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
     ~InnerFence();
 
     void Queue();
@@ -41,12 +39,8 @@ public:
     void Wait();
 
 private:
-    bool IsEventSignalled() const;
-
-    const Device& device;
     VKScheduler& scheduler;
-    vk::Event event;
-    u64 ticks = 0;
+    u64 wait_tick = 0;
 };
 using Fence = std::shared_ptr<InnerFence>;
 
@@ -58,7 +52,7 @@ public:
     explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
                             Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
                             VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
-                            const Device& device_, VKScheduler& scheduler_);
+                            VKScheduler& scheduler_);
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
@@ -68,7 +62,6 @@ protected:
     void WaitFence(Fence& fence) override;
 
 private:
-    const Device& device;
     VKScheduler& scheduler;
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
deleted file mode 100644
index a6abd0eee..000000000
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <optional>
-#include <tuple>
-#include <vector>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/vulkan_common/vulkan_device.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
-namespace Vulkan {
-
-namespace {
-
-u64 GetAllocationChunkSize(u64 required_size) {
-    static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
-    auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
-    return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
-}
-
-} // Anonymous namespace
-
-class VKMemoryAllocation final {
-public:
-    explicit VKMemoryAllocation(const Device& device_, vk::DeviceMemory memory_,
-                                VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_)
-        : device{device_}, memory{std::move(memory_)}, properties{properties_},
-          allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {}
-
-    VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) {
-        auto found = TryFindFreeSection(free_iterator, allocation_size,
-                                        static_cast<u64>(commit_size), static_cast<u64>(alignment));
-        if (!found) {
-            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
-                                       static_cast<u64>(alignment));
-            if (!found) {
-                // Signal out of memory, it'll try to do more allocations.
-                return nullptr;
-            }
-        }
-        auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
-                                                           *found + commit_size);
-        commits.push_back(commit.get());
-
-        // Last commit's address is highly probable to be free.
-        free_iterator = *found + commit_size;
-
-        return commit;
-    }
-
-    void Free(const VKMemoryCommitImpl* commit) {
-        ASSERT(commit);
-
-        const auto it = std::find(std::begin(commits), std::end(commits), commit);
-        if (it == commits.end()) {
-            UNREACHABLE_MSG("Freeing unallocated commit!");
-            return;
-        }
-        commits.erase(it);
-    }
-
-    /// Returns whether this allocation is compatible with the arguments.
-    bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const {
-        return (wanted_properties & properties) && (type_mask & shifted_type) != 0;
-    }
-
-private:
-    static constexpr u32 ShiftType(u32 type) {
-        return 1U << type;
-    }
-
-    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
-    /// requirements.
-    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
-        u64 iterator = Common::AlignUp(start, alignment);
-        while (iterator + size <= end) {
-            const u64 try_left = iterator;
-            const u64 try_right = try_left + size;
-
-            bool overlap = false;
-            for (const auto& commit : commits) {
-                const auto [commit_left, commit_right] = commit->interval;
-                if (try_left < commit_right && commit_left < try_right) {
-                    // There's an overlap, continue the search where the overlapping commit ends.
-                    iterator = Common::AlignUp(commit_right, alignment);
-                    overlap = true;
-                    break;
-                }
-            }
-            if (!overlap) {
-                // A free address has been found.
-                return try_left;
-            }
-        }
-
-        // No free regions where found, return an empty optional.
-        return std::nullopt;
-    }
-
-    const Device& device;                   ///< Vulkan device.
-    const vk::DeviceMemory memory;          ///< Vulkan memory allocation handler.
-    const VkMemoryPropertyFlags properties; ///< Vulkan properties.
-    const u64 allocation_size;              ///< Size of this allocation.
-    const u32 shifted_type;                 ///< Stored Vulkan type of this allocation, shifted.
-
-    /// Hints where the next free region is likely going to be.
-    u64 free_iterator{};
-
-    /// Stores all commits done from this allocation.
-    std::vector<const VKMemoryCommitImpl*> commits;
-};
-
-VKMemoryManager::VKMemoryManager(const Device& device_)
-    : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
-
-VKMemoryManager::~VKMemoryManager() = default;
-
-VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements,
-                                       bool host_visible) {
-    const u64 chunk_size = GetAllocationChunkSize(requirements.size);
-
-    // When a host visible commit is asked, search for host visible and coherent, otherwise search
-    // for a fast device local type.
-    const VkMemoryPropertyFlags wanted_properties =
-        host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
-                     : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
-
-    if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
-        return commit;
-    }
-
-    // Commit has failed, allocate more memory.
-    if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
-        // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
-        // Allocation has failed, panic.
-        UNREACHABLE_MSG("Ran out of VRAM!");
-        return {};
-    }
-
-    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
-    // there's a bug.
-    auto commit = TryAllocCommit(requirements, wanted_properties);
-    ASSERT(commit);
-    return commit;
-}
-
-VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) {
-    auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible);
-    buffer.BindMemory(commit->GetMemory(), commit->GetOffset());
-    return commit;
-}
-
-VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) {
-    auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible);
-    image.BindMemory(commit->GetMemory(), commit->GetOffset());
-    return commit;
-}
-
-bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask,
-                                  u64 size) {
-    const u32 type = [&] {
-        for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
-            const auto flags = properties.memoryTypes[type_index].propertyFlags;
-            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
-                // The type matches in type and in the wanted properties.
-                return type_index;
-            }
-        }
-        UNREACHABLE_MSG("Couldn't find a compatible memory type!");
-        return 0U;
-    }();
-
-    // Try to allocate found type.
-    vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
-        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-        .pNext = nullptr,
-        .allocationSize = size,
-        .memoryTypeIndex = type,
-    });
-    if (!memory) {
-        LOG_CRITICAL(Render_Vulkan, "Device allocation failed!");
-        return false;
-    }
-
-    allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory),
-                                                               wanted_properties, size, type));
-    return true;
-}
-
-VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements,
-                                               VkMemoryPropertyFlags wanted_properties) {
-    for (auto& allocation : allocations) {
-        if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
-            continue;
-        }
-        if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
-            return commit;
-        }
-    }
-    return {};
-}
-
-VKMemoryCommitImpl::VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_,
-                                       const vk::DeviceMemory& memory_, u64 begin_, u64 end_)
-    : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {}
-
-VKMemoryCommitImpl::~VKMemoryCommitImpl() {
-    allocation->Free(this);
-}
-
-MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
-    return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));
-}
-
-void VKMemoryCommitImpl::Unmap() const {
-    memory.Unmap();
-}
-
-MemoryMap VKMemoryCommitImpl::Map() const {
-    return Map(interval.second - interval.first);
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
deleted file mode 100644
index 2452bca4e..000000000
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <span>
-#include <utility>
-#include <vector>
-#include "common/common_types.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
-namespace Vulkan {
-
-class Device;
-class MemoryMap;
-class VKMemoryAllocation;
-class VKMemoryCommitImpl;
-
-using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
-
-class VKMemoryManager final {
-public:
-    explicit VKMemoryManager(const Device& device_);
-    VKMemoryManager(const VKMemoryManager&) = delete;
-    ~VKMemoryManager();
-
-    /**
-     * Commits a memory with the specified requeriments.
-     * @param requirements Requirements returned from a Vulkan call.
-     * @param host_visible Signals the allocator that it *must* use host visible and coherent
-     *                     memory. When passing false, it will try to allocate device local memory.
-     * @returns A memory commit.
-     */
-    VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible);
-
-    /// Commits memory required by the buffer and binds it.
-    VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible);
-
-    /// Commits memory required by the image and binds it.
-    VKMemoryCommit Commit(const vk::Image& image, bool host_visible);
-
-private:
-    /// Allocates a chunk of memory.
-    bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
-
-    /// Tries to allocate a memory commit.
-    VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
-                                  VkMemoryPropertyFlags wanted_properties);
-
-    const Device& device;                                         ///< Device handler.
-    const VkPhysicalDeviceMemoryProperties properties;            ///< Physical device properties.
-    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
-};
-
-class VKMemoryCommitImpl final {
-    friend VKMemoryAllocation;
-    friend MemoryMap;
-
-public:
-    explicit VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_,
-                                const vk::DeviceMemory& memory_, u64 begin_, u64 end_);
-    ~VKMemoryCommitImpl();
-
-    /// Maps a memory region and returns a pointer to it.
-    /// It's illegal to have more than one memory map at the same time.
-    MemoryMap Map(u64 size, u64 offset = 0) const;
-
-    /// Maps the whole commit and returns a pointer to it.
-    /// It's illegal to have more than one memory map at the same time.
-    MemoryMap Map() const;
-
-    /// Returns the Vulkan memory handler.
-    VkDeviceMemory GetMemory() const {
-        return *memory;
-    }
-
-    /// Returns the start position of the commit relative to the allocation.
-    VkDeviceSize GetOffset() const {
-        return static_cast<VkDeviceSize>(interval.first);
-    }
-
-private:
-    /// Unmaps memory.
-    void Unmap() const;
-
-    const Device& device;             ///< Vulkan device.
-    const vk::DeviceMemory& memory;   ///< Vulkan device memory handler.
-    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
-    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
-};
-
-/// Holds ownership of a memory map.
-class MemoryMap final {
-public:
-    explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_)
-        : commit{commit_}, span{span_} {}
-
-    ~MemoryMap() {
-        if (commit) {
-            commit->Unmap();
-        }
-    }
-
-    /// Prematurely releases the memory map.
-    void Release() {
-        commit->Unmap();
-        commit = nullptr;
-    }
-
-    /// Returns a span to the memory map.
-    [[nodiscard]] std::span<u8> Span() const noexcept {
-        return span;
-    }
-
-    /// Returns the address of the memory map.
-    [[nodiscard]] u8* Address() const noexcept {
-        return span.data();
-    }
-
-    /// Returns the address of the memory map;
-    [[nodiscard]] operator u8*() const noexcept {
-        return span.data();
-    }
-
-private:
-    const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
-    std::span<u8> span;                 ///< Address to the mapped memory.
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 93fbea510..f0a111829 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -409,27 +409,26 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
 RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
                                    Tegra::MemoryManager& gpu_memory_,
                                    Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,
-                                   const Device& device_, VKMemoryManager& memory_manager_,
+                                   const Device& device_, MemoryAllocator& memory_allocator_,
                                    StateTracker& state_tracker_, VKScheduler& scheduler_)
     : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
       gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
-      screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_},
+      screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
       state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler),
-      staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler),
+      staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
       update_descriptor_queue(device, scheduler),
       blit_image(device, scheduler, state_tracker, descriptor_pool),
       quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
       quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
-      texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image},
+      texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
       pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
                      descriptor_pool, update_descriptor_queue),
-      buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer,
-                   staging_pool),
+      buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
+                   stream_buffer, staging_pool),
       query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
-      fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
-                    scheduler),
+      fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler),
       wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
     scheduler.SetQueryCache(query_cache);
     if (device.UseAsynchronousShaders()) {
@@ -1446,7 +1445,7 @@ VkBuffer RasterizerVulkan::DefaultBuffer() {
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
     });
-    default_buffer_commit = memory_manager.Commit(default_buffer, false);
+    default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
 
     scheduler.RequestOutsideRenderPassOperationContext();
     scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 4695718e9..8e261b9bd 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,7 +21,6 @@
 #include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_fence_manager.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -30,6 +29,7 @@
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/shader/async_shaders.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Core {
@@ -56,7 +56,7 @@ public:
     explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
                               Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
                               VKScreenInfo& screen_info_, const Device& device_,
-                              VKMemoryManager& memory_manager_, StateTracker& state_tracker_,
+                              MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
                               VKScheduler& scheduler_);
     ~RasterizerVulkan() override;
 
@@ -213,12 +213,12 @@ private:
 
     VKScreenInfo& screen_info;
     const Device& device;
-    VKMemoryManager& memory_manager;
+    MemoryAllocator& memory_allocator;
     StateTracker& state_tracker;
     VKScheduler& scheduler;
 
     VKStreamBuffer stream_buffer;
-    VKStagingBufferPool staging_pool;
+    StagingBufferPool staging_pool;
     VKDescriptorPool descriptor_pool;
     VKUpdateDescriptorQueue update_descriptor_queue;
     BlitImageHelper blit_image;
@@ -234,7 +234,7 @@ private:
     VKFenceManager fence_manager;
 
     vk::Buffer default_buffer;
-    VKMemoryCommit default_buffer_commit;
+    MemoryCommit default_buffer_commit;
     vk::Event wfi_event;
     VideoCommon::Shader::AsyncShaders async_shaders;
 
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 89cbe01ad..61d52b961 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1334,7 +1334,10 @@ private:
         }
 
         if (const auto comment = std::get_if<CommentNode>(&*node)) {
-            Name(OpUndef(t_void), comment->GetText());
+            if (device.HasDebuggingToolAttached()) {
+                // We should insert comments with OpString instead of using named variables
+                Name(OpUndef(t_int), comment->GetText());
+            }
             return {};
         }
 
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 1e0b8b922..97fd41cc1 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -3,10 +3,12 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
+#include <fmt/format.h>
+
+#include "common/assert.h"
 #include "common/bit_util.h"
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -16,45 +18,51 @@
 
 namespace Vulkan {
 
-VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_)
-    : buffer{std::move(buffer_)} {}
-
-VKStagingBufferPool::VKStagingBufferPool(const Device& device_, VKMemoryManager& memory_manager_,
-                                         VKScheduler& scheduler_)
-    : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {}
+StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
+                                     VKScheduler& scheduler_)
+    : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {}
 
-VKStagingBufferPool::~VKStagingBufferPool() = default;
+StagingBufferPool::~StagingBufferPool() = default;
 
-VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) {
-    if (const auto buffer = TryGetReservedBuffer(size, host_visible)) {
-        return *buffer;
+StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
+    if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
+        return *ref;
     }
-    return CreateStagingBuffer(size, host_visible);
+    return CreateStagingBuffer(size, usage);
 }
 
-void VKStagingBufferPool::TickFrame() {
-    current_delete_level = (current_delete_level + 1) % NumLevels;
+void StagingBufferPool::TickFrame() {
+    current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
 
-    ReleaseCache(true);
-    ReleaseCache(false);
+    ReleaseCache(MemoryUsage::DeviceLocal);
+    ReleaseCache(MemoryUsage::Upload);
+    ReleaseCache(MemoryUsage::Download);
 }
 
-VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
-    for (StagingBuffer& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
-        if (!scheduler.IsFree(entry.tick)) {
-            continue;
+std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
+                                                                        MemoryUsage usage) {
+    StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
+
+    const auto is_free = [this](const StagingBuffer& entry) {
+        return scheduler.IsFree(entry.tick);
+    };
+    auto& entries = cache_level.entries;
+    const auto hint_it = entries.begin() + cache_level.iterate_index;
+    auto it = std::find_if(entries.begin() + cache_level.iterate_index, entries.end(), is_free);
+    if (it == entries.end()) {
+        it = std::find_if(entries.begin(), hint_it, is_free);
+        if (it == hint_it) {
+            return std::nullopt;
         }
-        entry.tick = scheduler.CurrentTick();
-        return &*entry.buffer;
     }
-    return nullptr;
+    cache_level.iterate_index = std::distance(entries.begin(), it) + 1;
+    it->tick = scheduler.CurrentTick();
+    return it->Ref();
 }
 
-VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
+StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
     const u32 log2 = Common::Log2Ceil64(size);
-
-    auto buffer = std::make_unique<VKBuffer>();
-    buffer->handle = device.GetLogical().CreateBuffer({
+    vk::Buffer buffer = device.GetLogical().CreateBuffer({
         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
@@ -66,49 +74,63 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
     });
-    buffer->commit = memory_manager.Commit(buffer->handle, host_visible);
-
-    std::vector<StagingBuffer>& entries = GetCache(host_visible)[log2].entries;
-    StagingBuffer& entry = entries.emplace_back(std::move(buffer));
-    entry.tick = scheduler.CurrentTick();
-    return *entry.buffer;
-}
-
-VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
-    return host_visible ? host_staging_buffers : device_staging_buffers;
+    if (device.HasDebuggingToolAttached()) {
+        ++buffer_index;
+        buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
+    }
+    MemoryCommit commit = memory_allocator.Commit(buffer, usage);
+    const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
+
+    StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
+        .buffer = std::move(buffer),
+        .commit = std::move(commit),
+        .mapped_span = mapped_span,
+        .tick = scheduler.CurrentTick(),
+    });
+    return entry.Ref();
 }
 
-void VKStagingBufferPool::ReleaseCache(bool host_visible) {
-    auto& cache = GetCache(host_visible);
-    const u64 size = ReleaseLevel(cache, current_delete_level);
-    if (size == 0) {
-        return;
+StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
+    switch (usage) {
+    case MemoryUsage::DeviceLocal:
+        return device_local_cache;
+    case MemoryUsage::Upload:
+        return upload_cache;
+    case MemoryUsage::Download:
+        return download_cache;
+    default:
+        UNREACHABLE_MSG("Invalid memory usage={}", usage);
+        return upload_cache;
     }
 }
 
-u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
-    static constexpr std::size_t deletions_per_tick = 16;
+void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
+    ReleaseLevel(GetCache(usage), current_delete_level);
+}
 
+void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {
+    constexpr size_t deletions_per_tick = 16;
     auto& staging = cache[log2];
     auto& entries = staging.entries;
-    const std::size_t old_size = entries.size();
+    const size_t old_size = entries.size();
 
     const auto is_deleteable = [this](const StagingBuffer& entry) {
         return scheduler.IsFree(entry.tick);
     };
-    const std::size_t begin_offset = staging.delete_index;
-    const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
-    const auto begin = std::begin(entries) + begin_offset;
-    const auto end = std::begin(entries) + end_offset;
+    const size_t begin_offset = staging.delete_index;
+    const size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
+    const auto begin = entries.begin() + begin_offset;
+    const auto end = entries.begin() + end_offset;
     entries.erase(std::remove_if(begin, end, is_deleteable), end);
 
-    const std::size_t new_size = entries.size();
+    const size_t new_size = entries.size();
     staging.delete_index += deletions_per_tick;
     if (staging.delete_index >= new_size) {
         staging.delete_index = 0;
     }
-
-    return (1ULL << log2) * (old_size - new_size);
+    if (staging.iterate_index > new_size) {
+        staging.iterate_index = 0;
+    }
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 90dadcbbe..d42918a47 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -9,7 +9,7 @@
 
 #include "common/common_types.h"
 
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
@@ -17,55 +17,65 @@ namespace Vulkan {
 class Device;
 class VKScheduler;
 
-struct VKBuffer final {
-    vk::Buffer handle;
-    VKMemoryCommit commit;
+struct StagingBufferRef {
+    VkBuffer buffer;
+    std::span<u8> mapped_span;
 };
 
-class VKStagingBufferPool final {
+class StagingBufferPool {
 public:
-    explicit VKStagingBufferPool(const Device& device, VKMemoryManager& memory_manager,
-                                 VKScheduler& scheduler);
-    ~VKStagingBufferPool();
+    explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
+                               VKScheduler& scheduler);
+    ~StagingBufferPool();
 
-    VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible);
+    StagingBufferRef Request(size_t size, MemoryUsage usage);
 
     void TickFrame();
 
 private:
-    struct StagingBuffer final {
-        explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer);
-
-        std::unique_ptr<VKBuffer> buffer;
+    struct StagingBuffer {
+        vk::Buffer buffer;
+        MemoryCommit commit;
+        std::span<u8> mapped_span;
         u64 tick = 0;
+
+        StagingBufferRef Ref() const noexcept {
+            return {
+                .buffer = *buffer,
+                .mapped_span = mapped_span,
+            };
+        }
     };
 
-    struct StagingBuffers final {
+    struct StagingBuffers {
         std::vector<StagingBuffer> entries;
-        std::size_t delete_index = 0;
+        size_t delete_index = 0;
+        size_t iterate_index = 0;
     };
 
-    static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT;
-    using StagingBuffersCache = std::array<StagingBuffers, NumLevels>;
+    static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
+    using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
 
-    VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible);
+    std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
 
-    VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible);
+    StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
 
-    StagingBuffersCache& GetCache(bool host_visible);
+    StagingBuffersCache& GetCache(MemoryUsage usage);
 
-    void ReleaseCache(bool host_visible);
+    void ReleaseCache(MemoryUsage usage);
 
-    u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
+    void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
 
     const Device& device;
-    VKMemoryManager& memory_manager;
+    MemoryAllocator& memory_allocator;
     VKScheduler& scheduler;
 
-    StagingBuffersCache host_staging_buffers;
-    StagingBuffersCache device_staging_buffers;
+    StagingBuffersCache device_local_cache;
+    StagingBuffersCache upload_cache;
+    StagingBuffersCache download_cache;
 
-    std::size_t current_delete_level = 0;
+    size_t current_delete_level = 0;
+    u64 buffer_index = 0;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index bd11de012..ab14922d7 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -10,12 +10,12 @@
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/renderer_vulkan/blit_image.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
@@ -554,10 +554,18 @@ void TextureCacheRuntime::Finish() {
 }
 
 ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
-    const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true);
-    return ImageBufferMap{
-        .handle = *buffer.handle,
-        .map = buffer.commit->Map(size),
+    const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
+    return {
+        .handle = staging_ref.buffer,
+        .span = staging_ref.mapped_span,
+    };
+}
+
+ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
+    const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download);
+    return {
+        .handle = staging_ref.buffer,
+        .span = staging_ref.mapped_span,
     };
 }
 
@@ -788,9 +796,9 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
       image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
       aspect_mask(ImageAspectMask(info.format)) {
     if (image) {
-        commit = runtime.memory_manager.Commit(image, false);
+        commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
     } else {
-        commit = runtime.memory_manager.Commit(buffer, false);
+        commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
     }
     if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
         flags |= VideoCommon::ImageFlagBits::Converted;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 92a7aad8b..a55d405d1 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,8 +7,8 @@
 #include <compare>
 #include <span>
 
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/texture_cache/texture_cache.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
@@ -19,14 +19,13 @@ using VideoCommon::Offset2D;
 using VideoCommon::RenderTargets;
 using VideoCore::Surface::PixelFormat;
 
-class VKScheduler;
-class VKStagingBufferPool;
-
 class BlitImageHelper;
 class Device;
 class Image;
 class ImageView;
 class Framebuffer;
+class StagingBufferPool;
+class VKScheduler;
 
 struct RenderPassKey {
     constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
@@ -60,18 +59,18 @@ struct ImageBufferMap {
     }
 
     [[nodiscard]] std::span<u8> Span() const noexcept {
-        return map.Span();
+        return span;
     }
 
     VkBuffer handle;
-    MemoryMap map;
+    std::span<u8> span;
 };
 
 struct TextureCacheRuntime {
     const Device& device;
     VKScheduler& scheduler;
-    VKMemoryManager& memory_manager;
-    VKStagingBufferPool& staging_buffer_pool;
+    MemoryAllocator& memory_allocator;
+    StagingBufferPool& staging_buffer_pool;
     BlitImageHelper& blit_image_helper;
     std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache;
 
@@ -79,10 +78,7 @@ struct TextureCacheRuntime {
 
     [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
 
-    [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) {
-        // TODO: Have a special function for this
-        return MapUploadBuffer(size);
-    }
+    [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size);
 
     void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
                    const std::array<Offset2D, 2>& dst_region,
@@ -141,7 +137,7 @@ private:
     VKScheduler* scheduler;
     vk::Image image;
     vk::Buffer buffer;
-    VKMemoryCommit commit;
+    MemoryCommit commit;
     VkImageAspectFlags aspect_mask = 0;
     bool initialized = false;
 };
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
index a4fc1184b..15585caeb 100644
--- a/src/video_core/texture_cache/accelerated_swizzle.cpp
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -27,7 +27,7 @@ BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameter
     const Extent3D num_tiles = swizzle.num_tiles;
     const u32 bytes_per_block = BytesPerBlock(info.format);
     const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
-    const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
+    const u32 stride = Common::AlignUpLog2(num_tiles.width, stride_alignment) * bytes_per_block;
     const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
     return BlockLinearSwizzle2DParams{
         .origin{0, 0, 0},
@@ -47,7 +47,7 @@ BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameter
     const Extent3D num_tiles = swizzle.num_tiles;
     const u32 bytes_per_block = BytesPerBlock(info.format);
     const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
-    const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
+    const u32 stride = Common::AlignUpLog2(num_tiles.width, stride_alignment) * bytes_per_block;
 
     const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
     const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 279932778..b23424523 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -279,7 +279,7 @@ template <u32 GOB_EXTENT>
     const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth);
     const u32 alignment = is_small ? 0 : info.tile_width_spacing;
     return Extent2D{
-        .width = Common::AlignBits(gobs.width, alignment),
+        .width = Common::AlignUpLog2(gobs.width, alignment),
         .height = gobs.height,
     };
 }
@@ -352,7 +352,7 @@ template <u32 GOB_EXTENT>
     // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
     if (tile_width_spacing > 0) {
         const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
-        return Common::AlignBits(size_bytes, alignment_log2);
+        return Common::AlignUpLog2(size_bytes, alignment_log2);
     }
     const u32 aligned_height = Common::AlignUp(size.height, tile_size_y);
     while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) {
@@ -528,9 +528,9 @@ template <u32 GOB_EXTENT>
     const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
     const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
     return Extent3D{
-        .width = Common::AlignBits(num_tiles.width, alignment),
-        .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
-        .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
+        .width = Common::AlignUpLog2(num_tiles.width, alignment),
+        .height = Common::AlignUpLog2(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
+        .depth = Common::AlignUpLog2(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
     };
 }
 
@@ -679,7 +679,7 @@ u32 CalculateLayerSize(const ImageInfo& info) noexcept {
 }
 
 std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
-    ASSERT(info.resources.levels <= MAX_MIP_LEVELS);
+    ASSERT(info.resources.levels <= static_cast<s32>(MAX_MIP_LEVELS));
     const LevelInfo level_info = MakeLevelInfo(info);
     std::array<u32, MAX_MIP_LEVELS> offsets{};
     u32 offset = 0;
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index acd5bdd78..3625b666c 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -42,21 +42,24 @@ constexpr u32 Popcnt(u32 n) {
 
 class InputBitStream {
 public:
-    constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
-        : cur_byte{ptr}, next_bit{start_offset % 8} {}
+    constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0)
+        : cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {}
 
-    constexpr std::size_t GetBitsRead() const {
+    constexpr size_t GetBitsRead() const {
         return bits_read;
     }
 
     constexpr bool ReadBit() {
-        const bool bit = (*cur_byte >> next_bit++) & 1;
+        if (bits_read >= total_bits * 8) {
+            return 0;
+        }
+        const bool bit = ((*cur_byte >> next_bit) & 1) != 0;
+        ++next_bit;
         while (next_bit >= 8) {
             next_bit -= 8;
-            cur_byte++;
+            ++cur_byte;
         }
-
-        bits_read++;
+        ++bits_read;
         return bit;
     }
 
@@ -79,8 +82,9 @@ public:
 
 private:
     const u8* cur_byte;
-    std::size_t next_bit = 0;
-    std::size_t bits_read = 0;
+    size_t total_bits = 0;
+    size_t next_bit = 0;
+    size_t bits_read = 0;
 };
 
 class OutputBitStream {
@@ -193,15 +197,15 @@ struct IntegerEncodedValue {
     };
 };
 using IntegerEncodedVector = boost::container::static_vector<
-    IntegerEncodedValue, 64,
+    IntegerEncodedValue, 256,
     boost::container::static_vector_options<
         boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,
         boost::container::throw_on_overflow<false>>::type>;
 
 static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {
     // Implement the algorithm in section C.2.12
-    u32 m[5];
-    u32 t[5];
+    std::array<u32, 5> m;
+    std::array<u32, 5> t;
     u32 T;
 
     // Read the trit encoded block according to
@@ -866,7 +870,7 @@ public:
     }
 };
 
-static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions,
+static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, const u32 nPartitions,
                               const u32 nBitsForColorData) {
     // First figure out how many color values we have
     u32 nValues = 0;
@@ -898,7 +902,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
     // We now have enough to decode our integer sequence.
     IntegerEncodedVector decodedColorValues;
 
-    InputBitStream colorStream(data);
+    InputBitStream colorStream(data, 0);
     DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
 
     // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1441,7 +1445,7 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
 
 static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
                             const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
-    InputBitStream strm(inBuf.data());
+    InputBitStream strm(inBuf);
     TexelWeightParams weightParams = DecodeBlockInfo(strm);
 
     // Was there an error?
@@ -1619,15 +1623,16 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
 
     // Make sure that higher non-texel bits are set to zero
     const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
-    if (clearByteStart > 0) {
+    if (clearByteStart > 0 && clearByteStart <= texelWeightData.size()) {
         texelWeightData[clearByteStart - 1] &=
             static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
+        std::memset(texelWeightData.data() + clearByteStart, 0,
+                    std::min(16U - clearByteStart, 16U));
     }
-    std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
 
     IntegerEncodedVector texelWeightValues;
 
-    InputBitStream weightStream(texelWeightData.data());
+    InputBitStream weightStream(texelWeightData);
 
     DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
                           weightParams.GetNumWeightValues());
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 9f5181318..62685a183 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -49,7 +49,7 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
     // We can configure here a custom pitch
     // As it's not exposed 'width * bpp' will be the expected pitch.
     const u32 pitch = width * bytes_per_pixel;
-    const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
+    const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
 
     const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
     const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
@@ -217,9 +217,9 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
 std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                           u32 block_height, u32 block_depth) {
     if (tiled) {
-        const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
-        const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height);
-        const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth);
+        const u32 aligned_width = Common::AlignUpLog2(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
+        const u32 aligned_height = Common::AlignUpLog2(height, GOB_SIZE_Y_SHIFT + block_height);
+        const u32 aligned_depth = Common::AlignUpLog2(depth, GOB_SIZE_Z_SHIFT + block_depth);
         return aligned_width * aligned_height * aligned_depth;
     } else {
         return width * height * depth * bytes_per_pixel;
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h
index 2efcd244c..b0519f132 100644
--- a/src/video_core/vulkan_common/vulkan_debug_callback.h
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.h
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#pragma once
+
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 75173324e..37d7b45a3 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -99,8 +99,7 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
     });
 }
 
-std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
-    vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
+std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::PhysicalDevice physical) {
     static constexpr std::array formats{
         VK_FORMAT_A8B8G8R8_UNORM_PACK32,
         VK_FORMAT_A8B8G8R8_UINT_PACK32,
@@ -210,7 +209,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
 Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
                const vk::InstanceDispatch& dld_)
     : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
-      format_properties{GetFormatProperties(physical, dld)} {
+      format_properties{GetFormatProperties(physical)} {
     CheckSuitability();
     SetupFamilies(surface);
     SetupFeatures();
@@ -221,6 +220,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
     VkPhysicalDeviceFeatures2 features2{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
         .pNext = nullptr,
+        .features{},
     };
     const void* first_next = &features2;
     void** next = &features2.pNext;
@@ -256,7 +256,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
         .shaderTessellationAndGeometryPointSize = false,
         .shaderImageGatherExtended = true,
         .shaderStorageImageExtendedFormats = false,
-        .shaderStorageImageMultisample = true,
+        .shaderStorageImageMultisample = is_shader_storage_image_multisample,
         .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
         .shaderStorageImageWriteWithoutFormat = true,
         .shaderUniformBufferArrayDynamicIndexing = false,
@@ -310,6 +310,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
 
     VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT,
+        .pNext = nullptr,
         .hostQueryReset = true,
     };
     SetNext(next, host_query_reset);
@@ -604,7 +605,6 @@ void Device::CheckSuitability() const {
         std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
         std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
         std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
-        std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),
         std::make_pair(features.shaderStorageImageWriteWithoutFormat,
                        "shaderStorageImageWriteWithoutFormat"),
     };
@@ -804,6 +804,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
 void Device::SetupFeatures() {
     const auto supported_features{physical.GetFeatures()};
     is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
+    is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample;
     is_blit_depth_stencil_supported = TestDepthStencilBlits();
     is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
 }
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index a973c3ce4..4b66dba7a 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -272,23 +272,24 @@ private:
     bool is_optimal_astc_supported{};       ///< Support for native ASTC.
     bool is_float16_supported{};            ///< Support for float16 arithmetics.
     bool is_warp_potentially_bigger{};      ///< Host warp size can be bigger than guest.
-    bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
-    bool is_blit_depth_stencil_supported{};    ///< Support for blitting from and to depth stencil.
-    bool nv_viewport_swizzle{};                ///< Support for VK_NV_viewport_swizzle.
-    bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
-    bool ext_index_type_uint8{};               ///< Support for VK_EXT_index_type_uint8.
-    bool ext_sampler_filter_minmax{};          ///< Support for VK_EXT_sampler_filter_minmax.
-    bool ext_depth_range_unrestricted{};       ///< Support for VK_EXT_depth_range_unrestricted.
-    bool ext_shader_viewport_index_layer{};    ///< Support for VK_EXT_shader_viewport_index_layer.
-    bool ext_tooling_info{};                   ///< Support for VK_EXT_tooling_info.
-    bool ext_transform_feedback{};             ///< Support for VK_EXT_transform_feedback.
-    bool ext_custom_border_color{};            ///< Support for VK_EXT_custom_border_color.
-    bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.
-    bool ext_robustness2{};                    ///< Support for VK_EXT_robustness2.
-    bool ext_shader_stencil_export{};          ///< Support for VK_EXT_shader_stencil_export.
-    bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.
-    bool has_renderdoc{};                      ///< Has RenderDoc attached
-    bool has_nsight_graphics{};                ///< Has Nsight Graphics attached
+    bool is_formatless_image_load_supported{};  ///< Support for shader image read without format.
+    bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
+    bool is_blit_depth_stencil_supported{};     ///< Support for blitting from and to depth stencil.
+    bool nv_viewport_swizzle{};                 ///< Support for VK_NV_viewport_swizzle.
+    bool khr_uniform_buffer_standard_layout{};  ///< Support for std430 on UBOs.
+    bool ext_index_type_uint8{};                ///< Support for VK_EXT_index_type_uint8.
+    bool ext_sampler_filter_minmax{};           ///< Support for VK_EXT_sampler_filter_minmax.
+    bool ext_depth_range_unrestricted{};        ///< Support for VK_EXT_depth_range_unrestricted.
+    bool ext_shader_viewport_index_layer{};     ///< Support for VK_EXT_shader_viewport_index_layer.
+    bool ext_tooling_info{};                    ///< Support for VK_EXT_tooling_info.
+    bool ext_transform_feedback{};              ///< Support for VK_EXT_transform_feedback.
+    bool ext_custom_border_color{};             ///< Support for VK_EXT_custom_border_color.
+    bool ext_extended_dynamic_state{};          ///< Support for VK_EXT_extended_dynamic_state.
+    bool ext_robustness2{};                     ///< Support for VK_EXT_robustness2.
+    bool ext_shader_stencil_export{};           ///< Support for VK_EXT_shader_stencil_export.
+    bool nv_device_diagnostics_config{};        ///< Support for VK_NV_device_diagnostics_config.
+    bool has_renderdoc{};                       ///< Has RenderDoc attached
+    bool has_nsight_graphics{};                 ///< Has Nsight Graphics attached
 
     // Asynchronous Graphics Pipeline setting
     bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
new file mode 100644
index 000000000..d6eb3af31
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -0,0 +1,268 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <bit>
+#include <optional>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+namespace {
+struct Range {
+    u64 begin;
+    u64 end;
+
+    [[nodiscard]] bool Contains(u64 iterator, u64 size) const noexcept {
+        return iterator < end && begin < iterator + size;
+    }
+};
+
+[[nodiscard]] u64 AllocationChunkSize(u64 required_size) {
+    static constexpr std::array sizes{
+        0x1000ULL << 10,  0x1400ULL << 10,  0x1800ULL << 10,  0x1c00ULL << 10, 0x2000ULL << 10,
+        0x3200ULL << 10,  0x4000ULL << 10,  0x6000ULL << 10,  0x8000ULL << 10, 0xA000ULL << 10,
+        0x10000ULL << 10, 0x18000ULL << 10, 0x20000ULL << 10,
+    };
+    static_assert(std::is_sorted(sizes.begin(), sizes.end()));
+
+    const auto it = std::ranges::lower_bound(sizes, required_size);
+    return it != sizes.end() ? *it : Common::AlignUp(required_size, 4ULL << 20);
+}
+
+[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) {
+    switch (usage) {
+    case MemoryUsage::DeviceLocal:
+        return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+    case MemoryUsage::Upload:
+        return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+    case MemoryUsage::Download:
+        return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+               VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+    }
+    UNREACHABLE_MSG("Invalid memory usage={}", usage);
+    return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+}
+} // Anonymous namespace
+
+class MemoryAllocation {
+public:
+    explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_,
+                              VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
+        : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
+          property_flags{properties}, shifted_memory_type{1U << type} {}
+
+    [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
+        const std::optional<u64> alloc = FindFreeRegion(size, alignment);
+        if (!alloc) {
+            // Signal out of memory, it'll try to do more allocations.
+            return std::nullopt;
+        }
+        const Range range{
+            .begin = *alloc,
+            .end = *alloc + size,
+        };
+        commits.insert(std::ranges::upper_bound(commits, *alloc, {}, &Range::begin), range);
+        return std::make_optional<MemoryCommit>(this, *memory, *alloc, *alloc + size);
+    }
+
+    void Free(u64 begin) {
+        const auto it = std::ranges::find(commits, begin, &Range::begin);
+        ASSERT_MSG(it != commits.end(), "Invalid commit");
+        commits.erase(it);
+    }
+
+    [[nodiscard]] std::span<u8> Map() {
+        if (memory_mapped_span.empty()) {
+            u8* const raw_pointer = memory.Map(0, allocation_size);
+            memory_mapped_span = std::span<u8>(raw_pointer, allocation_size);
+        }
+        return memory_mapped_span;
+    }
+
+    /// Returns whether this allocation is compatible with the arguments.
+    [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
+        return (flags & property_flags) && (type_mask & shifted_memory_type) != 0;
+    }
+
+private:
+    [[nodiscard]] static constexpr u32 ShiftType(u32 type) {
+        return 1U << type;
+    }
+
+    [[nodiscard]] std::optional<u64> FindFreeRegion(u64 size, u64 alignment) noexcept {
+        ASSERT(std::has_single_bit(alignment));
+        const u64 alignment_log2 = std::countr_zero(alignment);
+        std::optional<u64> candidate;
+        u64 iterator = 0;
+        auto commit = commits.begin();
+        while (iterator + size <= allocation_size) {
+            candidate = candidate.value_or(iterator);
+            if (commit == commits.end()) {
+                break;
+            }
+            if (commit->Contains(*candidate, size)) {
+                candidate = std::nullopt;
+            }
+            iterator = Common::AlignUpLog2(commit->end, alignment_log2);
+            ++commit;
+        }
+        return candidate;
+    }
+
+    const Device& device;                       ///< Vulkan device.
+    const vk::DeviceMemory memory;              ///< Vulkan memory allocation handler.
+    const u64 allocation_size;                  ///< Size of this allocation.
+    const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
+    const u32 shifted_memory_type;              ///< Shifted Vulkan memory type.
+    std::vector<Range> commits;                 ///< All commit ranges done from this allocation.
+    std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
+};
+
+MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
+                           u64 end_) noexcept
+    : allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {}
+
+MemoryCommit::~MemoryCommit() {
+    Release();
+}
+
+MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept {
+    Release();
+    allocation = std::exchange(rhs.allocation, nullptr);
+    memory = rhs.memory;
+    begin = rhs.begin;
+    end = rhs.end;
+    span = std::exchange(rhs.span, std::span<u8>{});
+    return *this;
+}
+
+MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept
+    : allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin},
+      end{rhs.end}, span{std::exchange(rhs.span, std::span<u8>{})} {}
+
+std::span<u8> MemoryCommit::Map() {
+    if (span.empty()) {
+        span = allocation->Map().subspan(begin, end - begin);
+    }
+    return span;
+}
+
+void MemoryCommit::Release() {
+    if (allocation) {
+        allocation->Free(begin);
+    }
+}
+
+MemoryAllocator::MemoryAllocator(const Device& device_)
+    : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
+
+MemoryAllocator::~MemoryAllocator() = default;
+
+MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
+    // Find the fastest memory flags we can afford with the current requirements
+    const VkMemoryPropertyFlags flags = MemoryPropertyFlags(requirements.memoryTypeBits, usage);
+    if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) {
+        return std::move(*commit);
+    }
+    // Commit has failed, allocate more memory.
+    // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
+    AllocMemory(flags, requirements.memoryTypeBits, AllocationChunkSize(requirements.size));
+
+    // Commit again, this time it won't fail since there's a fresh allocation above.
+    // If it does, there's a bug.
+    return TryCommit(requirements, flags).value();
+}
+
+MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) {
+    auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage);
+    buffer.BindMemory(commit.Memory(), commit.Offset());
+    return commit;
+}
+
+MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
+    auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), usage);
+    image.BindMemory(commit.Memory(), commit.Offset());
+    return commit;
+}
+
+void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
+    const u32 type = FindType(flags, type_mask).value();
+    vk::DeviceMemory memory = device.GetLogical().AllocateMemory({
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .allocationSize = size,
+        .memoryTypeIndex = type,
+    });
+    allocations.push_back(
+        std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type));
+}
+
+std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
+                                                       VkMemoryPropertyFlags flags) {
+    for (auto& allocation : allocations) {
+        if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) {
+            continue;
+        }
+        if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
+            return commit;
+        }
+    }
+    return std::nullopt;
+}
+
+VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const {
+    return MemoryPropertyFlags(type_mask, MemoryUsagePropertyFlags(usage));
+}
+
+VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
+                                                           VkMemoryPropertyFlags flags) const {
+    if (FindType(flags, type_mask)) {
+        // Found a memory type with those requirements
+        return flags;
+    }
+    if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
+        // Remove host cached bit in case it's not supported
+        return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+    }
+    if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
+        // Remove device local, if it's not supported by the requested resource
+        return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+    }
+    UNREACHABLE_MSG("No compatible memory types found");
+    return 0;
+}
+
+std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const {
+    for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
+        const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags;
+        if ((type_mask & (1U << type_index)) && (type_flags & flags)) {
+            // The type matches in type and in the wanted properties.
+            return type_index;
+        }
+    }
+    // Failed to find index
+    return std::nullopt;
+}
+
+bool IsHostVisible(MemoryUsage usage) noexcept {
+    switch (usage) {
+    case MemoryUsage::DeviceLocal:
+        return false;
+    case MemoryUsage::Upload:
+    case MemoryUsage::Download:
+        return true;
+    }
+    UNREACHABLE_MSG("Invalid memory usage={}", usage);
+    return false;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
new file mode 100644
index 000000000..9e6cfabf9
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -0,0 +1,117 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <span>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+class Device;
+class MemoryMap;
+class MemoryAllocation;
+
+/// Hints and requirements for the backing memory type of a commit
+enum class MemoryUsage {
+    DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU
+    Upload,      ///< Requires a host visible memory type optimized for CPU to GPU uploads
+    Download,    ///< Requires a host visible memory type optimized for GPU to CPU readbacks
+};
+
+/// Ownership handle of a memory commitment.
+/// Points to a subregion of a memory allocation.
+class MemoryCommit {
+public:
+    explicit MemoryCommit() noexcept = default;
+    explicit MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
+                          u64 end_) noexcept;
+    ~MemoryCommit();
+
+    MemoryCommit& operator=(MemoryCommit&&) noexcept;
+    MemoryCommit(MemoryCommit&&) noexcept;
+
+    MemoryCommit& operator=(const MemoryCommit&) = delete;
+    MemoryCommit(const MemoryCommit&) = delete;
+
+    /// Returns a host visible memory map.
+    /// It will map the backing allocation if it hasn't been mapped before.
+    std::span<u8> Map();
+
+    /// Returns the Vulkan memory handler.
+    VkDeviceMemory Memory() const {
+        return memory;
+    }
+
+    /// Returns the start position of the commit relative to the allocation.
+    VkDeviceSize Offset() const {
+        return static_cast<VkDeviceSize>(begin);
+    }
+
+private:
+    void Release();
+
+    MemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
+    VkDeviceMemory memory{};        ///< Vulkan device memory handler.
+    u64 begin{};                    ///< Beginning offset in bytes to where the commit exists.
+    u64 end{};                      ///< Offset in bytes where the commit ends.
+    std::span<u8> span;             ///< Host visible memory span. Empty if not queried before.
+};
+
+/// Memory allocator container.
+/// Allocates and releases memory allocations on demand.
+class MemoryAllocator {
+public:
+    explicit MemoryAllocator(const Device& device_);
+    ~MemoryAllocator();
+
+    MemoryAllocator& operator=(const MemoryAllocator&) = delete;
+    MemoryAllocator(const MemoryAllocator&) = delete;
+
+    /**
+     * Commits a memory with the specified requirements.
+     *
+     * @param requirements Requirements returned from a Vulkan call.
+     * @param usage        Indicates how the memory will be used.
+     *
+     * @returns A memory commit.
+     */
+    MemoryCommit Commit(const VkMemoryRequirements& requirements, MemoryUsage usage);
+
+    /// Commits memory required by the buffer and binds it.
+    MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
+
+    /// Commits memory required by the image and binds it.
+    MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
+
+private:
+    /// Allocates a chunk of memory.
+    void AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
+
+    /// Tries to allocate a memory commit.
+    std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
+                                          VkMemoryPropertyFlags flags);
+
+    /// Returns the fastest compatible memory property flags from a wanted usage.
+    VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const;
+
+    /// Returns the fastest compatible memory property flags from the wanted flags.
+    VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const;
+
+    /// Returns index to the fastest memory type compatible with the passed requirements.
+    std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
+
+    const Device& device;                                       ///< Device handle.
+    const VkPhysicalDeviceMemoryProperties properties;          ///< Physical device properties.
+    std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
+};
+
+/// Returns true when a memory usage is guaranteed to be host visible.
+bool IsHostVisible(MemoryUsage usage) noexcept;
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 912cab46c..9689de0cb 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -144,152 +144,152 @@ inline VkResult Filter(VkResult result) {
 
 /// Table holding Vulkan instance function pointers.
 struct InstanceDispatch {
-    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
-
-    PFN_vkCreateInstance vkCreateInstance;
-    PFN_vkDestroyInstance vkDestroyInstance;
-    PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
-    PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;
-
-    PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
-    PFN_vkCreateDevice vkCreateDevice;
-    PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT;
-    PFN_vkDestroyDevice vkDestroyDevice;
-    PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
-    PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties;
-    PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices;
-    PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr;
-    PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
-    PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties;
-    PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties;
-    PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties;
-    PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
-    PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties;
-    PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR;
-    PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR;
-    PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR;
-    PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR;
-    PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR;
-    PFN_vkQueuePresentKHR vkQueuePresentKHR;
+    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
+
+    PFN_vkCreateInstance vkCreateInstance{};
+    PFN_vkDestroyInstance vkDestroyInstance{};
+    PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties{};
+    PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties{};
+
+    PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT{};
+    PFN_vkCreateDevice vkCreateDevice{};
+    PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT{};
+    PFN_vkDestroyDevice vkDestroyDevice{};
+    PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR{};
+    PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{};
+    PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices{};
+    PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr{};
+    PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{};
+    PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{};
+    PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{};
+    PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{};
+    PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{};
+    PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{};
+    PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR{};
+    PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR{};
+    PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR{};
+    PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR{};
+    PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR{};
+    PFN_vkQueuePresentKHR vkQueuePresentKHR{};
 };
 
 /// Table holding Vulkan device function pointers.
 struct DeviceDispatch : public InstanceDispatch {
-    PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR;
-    PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers;
-    PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets;
-    PFN_vkAllocateMemory vkAllocateMemory;
-    PFN_vkBeginCommandBuffer vkBeginCommandBuffer;
-    PFN_vkBindBufferMemory vkBindBufferMemory;
-    PFN_vkBindImageMemory vkBindImageMemory;
-    PFN_vkCmdBeginQuery vkCmdBeginQuery;
-    PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass;
-    PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT;
-    PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT;
-    PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets;
-    PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer;
-    PFN_vkCmdBindPipeline vkCmdBindPipeline;
-    PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT;
-    PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers;
-    PFN_vkCmdBlitImage vkCmdBlitImage;
-    PFN_vkCmdClearAttachments vkCmdClearAttachments;
-    PFN_vkCmdCopyBuffer vkCmdCopyBuffer;
-    PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage;
-    PFN_vkCmdCopyImage vkCmdCopyImage;
-    PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer;
-    PFN_vkCmdDispatch vkCmdDispatch;
-    PFN_vkCmdDraw vkCmdDraw;
-    PFN_vkCmdDrawIndexed vkCmdDrawIndexed;
-    PFN_vkCmdEndQuery vkCmdEndQuery;
-    PFN_vkCmdEndRenderPass vkCmdEndRenderPass;
-    PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT;
-    PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT;
-    PFN_vkCmdFillBuffer vkCmdFillBuffer;
-    PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
-    PFN_vkCmdPushConstants vkCmdPushConstants;
-    PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
-    PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
-    PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
-    PFN_vkCmdSetEvent vkCmdSetEvent;
-    PFN_vkCmdSetScissor vkCmdSetScissor;
-    PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
-    PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
-    PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask;
-    PFN_vkCmdSetViewport vkCmdSetViewport;
-    PFN_vkCmdWaitEvents vkCmdWaitEvents;
-    PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT;
-    PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT;
-    PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT;
-    PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT;
-    PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT;
-    PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT;
-    PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT;
-    PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT;
-    PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT;
-    PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT;
-    PFN_vkCmdResolveImage vkCmdResolveImage;
-    PFN_vkCreateBuffer vkCreateBuffer;
-    PFN_vkCreateBufferView vkCreateBufferView;
-    PFN_vkCreateCommandPool vkCreateCommandPool;
-    PFN_vkCreateComputePipelines vkCreateComputePipelines;
-    PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
-    PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
-    PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
-    PFN_vkCreateEvent vkCreateEvent;
-    PFN_vkCreateFence vkCreateFence;
-    PFN_vkCreateFramebuffer vkCreateFramebuffer;
-    PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
-    PFN_vkCreateImage vkCreateImage;
-    PFN_vkCreateImageView vkCreateImageView;
-    PFN_vkCreatePipelineLayout vkCreatePipelineLayout;
-    PFN_vkCreateQueryPool vkCreateQueryPool;
-    PFN_vkCreateRenderPass vkCreateRenderPass;
-    PFN_vkCreateSampler vkCreateSampler;
-    PFN_vkCreateSemaphore vkCreateSemaphore;
-    PFN_vkCreateShaderModule vkCreateShaderModule;
-    PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR;
-    PFN_vkDestroyBuffer vkDestroyBuffer;
-    PFN_vkDestroyBufferView vkDestroyBufferView;
-    PFN_vkDestroyCommandPool vkDestroyCommandPool;
-    PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
-    PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
-    PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
-    PFN_vkDestroyEvent vkDestroyEvent;
-    PFN_vkDestroyFence vkDestroyFence;
-    PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
-    PFN_vkDestroyImage vkDestroyImage;
-    PFN_vkDestroyImageView vkDestroyImageView;
-    PFN_vkDestroyPipeline vkDestroyPipeline;
-    PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout;
-    PFN_vkDestroyQueryPool vkDestroyQueryPool;
-    PFN_vkDestroyRenderPass vkDestroyRenderPass;
-    PFN_vkDestroySampler vkDestroySampler;
-    PFN_vkDestroySemaphore vkDestroySemaphore;
-    PFN_vkDestroyShaderModule vkDestroyShaderModule;
-    PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR;
-    PFN_vkDeviceWaitIdle vkDeviceWaitIdle;
-    PFN_vkEndCommandBuffer vkEndCommandBuffer;
-    PFN_vkFreeCommandBuffers vkFreeCommandBuffers;
-    PFN_vkFreeDescriptorSets vkFreeDescriptorSets;
-    PFN_vkFreeMemory vkFreeMemory;
-    PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
-    PFN_vkGetDeviceQueue vkGetDeviceQueue;
-    PFN_vkGetEventStatus vkGetEventStatus;
-    PFN_vkGetFenceStatus vkGetFenceStatus;
-    PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
-    PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
-    PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR;
-    PFN_vkMapMemory vkMapMemory;
-    PFN_vkQueueSubmit vkQueueSubmit;
-    PFN_vkResetFences vkResetFences;
-    PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT;
-    PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT;
-    PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT;
-    PFN_vkUnmapMemory vkUnmapMemory;
-    PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
-    PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
-    PFN_vkWaitForFences vkWaitForFences;
-    PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR;
+    PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{};
+    PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{};
+    PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{};
+    PFN_vkAllocateMemory vkAllocateMemory{};
+    PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
+    PFN_vkBindBufferMemory vkBindBufferMemory{};
+    PFN_vkBindImageMemory vkBindImageMemory{};
+    PFN_vkCmdBeginQuery vkCmdBeginQuery{};
+    PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
+    PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
+    PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
+    PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
+    PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
+    PFN_vkCmdBindPipeline vkCmdBindPipeline{};
+    PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
+    PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
+    PFN_vkCmdBlitImage vkCmdBlitImage{};
+    PFN_vkCmdClearAttachments vkCmdClearAttachments{};
+    PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
+    PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{};
+    PFN_vkCmdCopyImage vkCmdCopyImage{};
+    PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{};
+    PFN_vkCmdDispatch vkCmdDispatch{};
+    PFN_vkCmdDraw vkCmdDraw{};
+    PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
+    PFN_vkCmdEndQuery vkCmdEndQuery{};
+    PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
+    PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
+    PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
+    PFN_vkCmdFillBuffer vkCmdFillBuffer{};
+    PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
+    PFN_vkCmdPushConstants vkCmdPushConstants{};
+    PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
+    PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
+    PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
+    PFN_vkCmdSetEvent vkCmdSetEvent{};
+    PFN_vkCmdSetScissor vkCmdSetScissor{};
+    PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
+    PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
+    PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
+    PFN_vkCmdSetViewport vkCmdSetViewport{};
+    PFN_vkCmdWaitEvents vkCmdWaitEvents{};
+    PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
+    PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
+    PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
+    PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
+    PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
+    PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
+    PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
+    PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
+    PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
+    PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
+    PFN_vkCmdResolveImage vkCmdResolveImage{};
+    PFN_vkCreateBuffer vkCreateBuffer{};
+    PFN_vkCreateBufferView vkCreateBufferView{};
+    PFN_vkCreateCommandPool vkCreateCommandPool{};
+    PFN_vkCreateComputePipelines vkCreateComputePipelines{};
+    PFN_vkCreateDescriptorPool vkCreateDescriptorPool{};
+    PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout{};
+    PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR{};
+    PFN_vkCreateEvent vkCreateEvent{};
+    PFN_vkCreateFence vkCreateFence{};
+    PFN_vkCreateFramebuffer vkCreateFramebuffer{};
+    PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{};
+    PFN_vkCreateImage vkCreateImage{};
+    PFN_vkCreateImageView vkCreateImageView{};
+    PFN_vkCreatePipelineLayout vkCreatePipelineLayout{};
+    PFN_vkCreateQueryPool vkCreateQueryPool{};
+    PFN_vkCreateRenderPass vkCreateRenderPass{};
+    PFN_vkCreateSampler vkCreateSampler{};
+    PFN_vkCreateSemaphore vkCreateSemaphore{};
+    PFN_vkCreateShaderModule vkCreateShaderModule{};
+    PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR{};
+    PFN_vkDestroyBuffer vkDestroyBuffer{};
+    PFN_vkDestroyBufferView vkDestroyBufferView{};
+    PFN_vkDestroyCommandPool vkDestroyCommandPool{};
+    PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool{};
+    PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout{};
+    PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR{};
+    PFN_vkDestroyEvent vkDestroyEvent{};
+    PFN_vkDestroyFence vkDestroyFence{};
+    PFN_vkDestroyFramebuffer vkDestroyFramebuffer{};
+    PFN_vkDestroyImage vkDestroyImage{};
+    PFN_vkDestroyImageView vkDestroyImageView{};
+    PFN_vkDestroyPipeline vkDestroyPipeline{};
+    PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{};
+    PFN_vkDestroyQueryPool vkDestroyQueryPool{};
+    PFN_vkDestroyRenderPass vkDestroyRenderPass{};
+    PFN_vkDestroySampler vkDestroySampler{};
+    PFN_vkDestroySemaphore vkDestroySemaphore{};
+    PFN_vkDestroyShaderModule vkDestroyShaderModule{};
+    PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR{};
+    PFN_vkDeviceWaitIdle vkDeviceWaitIdle{};
+    PFN_vkEndCommandBuffer vkEndCommandBuffer{};
+    PFN_vkFreeCommandBuffers vkFreeCommandBuffers{};
+    PFN_vkFreeDescriptorSets vkFreeDescriptorSets{};
+    PFN_vkFreeMemory vkFreeMemory{};
+    PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{};
+    PFN_vkGetDeviceQueue vkGetDeviceQueue{};
+    PFN_vkGetEventStatus vkGetEventStatus{};
+    PFN_vkGetFenceStatus vkGetFenceStatus{};
+    PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
+    PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
+    PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
+    PFN_vkMapMemory vkMapMemory{};
+    PFN_vkQueueSubmit vkQueueSubmit{};
+    PFN_vkResetFences vkResetFences{};
+    PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT{};
+    PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT{};
+    PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT{};
+    PFN_vkUnmapMemory vkUnmapMemory{};
+    PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR{};
+    PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets{};
+    PFN_vkWaitForFences vkWaitForFences{};
+    PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR{};
 };
 
 /// Loads instance agnostic function pointers.
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index 4bf2bfd40..0a4c48b3d 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -93,7 +93,7 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
 
     const auto& profiles = profile_manager->GetAllUsers();
     for (const auto& user : profiles) {
-        Service::Account::ProfileBase profile;
+        Service::Account::ProfileBase profile{};
         if (!profile_manager->GetProfileBase(user, profile))
             continue;
 
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 85ee2577d..4528eb196 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -290,8 +290,8 @@ GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread_,
                             QString::fromUtf8(Common::g_scm_branch),
                             QString::fromUtf8(Common::g_scm_desc)));
     setAttribute(Qt::WA_AcceptTouchEvents);
-    auto layout = new QHBoxLayout(this);
-    layout->setMargin(0);
+    auto* layout = new QHBoxLayout(this);
+    layout->setContentsMargins(0, 0, 0, 0);
     setLayout(layout);
     input_subsystem->Initialize();
 
@@ -394,7 +394,7 @@ void GRenderWindow::mousePressEvent(QMouseEvent* event) {
     input_subsystem->GetMouse()->PressButton(x, y, event->button());
 
     if (event->button() == Qt::LeftButton) {
-        this->TouchPressed(x, y);
+        this->TouchPressed(x, y, 0);
     }
 
     emit MouseActivity();
@@ -409,7 +409,7 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
     auto pos = event->pos();
     const auto [x, y] = ScaleTouch(pos);
     input_subsystem->GetMouse()->MouseMove(x, y);
-    this->TouchMoved(x, y);
+    this->TouchMoved(x, y, 0);
 
     emit MouseActivity();
 }
@@ -423,36 +423,72 @@ void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) {
     input_subsystem->GetMouse()->ReleaseButton(event->button());
 
     if (event->button() == Qt::LeftButton) {
-        this->TouchReleased();
+        this->TouchReleased(0);
     }
 }
 
 void GRenderWindow::TouchBeginEvent(const QTouchEvent* event) {
-    // TouchBegin always has exactly one touch point, so take the .first()
-    const auto [x, y] = ScaleTouch(event->touchPoints().first().pos());
-    this->TouchPressed(x, y);
+    QList<QTouchEvent::TouchPoint> touch_points = event->touchPoints();
+    for (const auto& touch_point : touch_points) {
+        if (!TouchUpdate(touch_point)) {
+            TouchStart(touch_point);
+        }
+    }
 }
 
 void GRenderWindow::TouchUpdateEvent(const QTouchEvent* event) {
-    QPointF pos;
-    int active_points = 0;
-
-    // average all active touch points
-    for (const auto& tp : event->touchPoints()) {
-        if (tp.state() & (Qt::TouchPointPressed | Qt::TouchPointMoved | Qt::TouchPointStationary)) {
-            active_points++;
-            pos += tp.pos();
+    QList<QTouchEvent::TouchPoint> touch_points = event->touchPoints();
+    for (const auto& touch_point : touch_points) {
+        if (!TouchUpdate(touch_point)) {
+            TouchStart(touch_point);
         }
     }
+    // Release all inactive points
+    for (std::size_t id = 0; id < touch_ids.size(); ++id) {
+        if (!TouchExist(touch_ids[id], touch_points)) {
+            touch_ids[id] = 0;
+            this->TouchReleased(id + 1);
+        }
+    }
+}
 
-    pos /= active_points;
+void GRenderWindow::TouchEndEvent() {
+    for (std::size_t id = 0; id < touch_ids.size(); ++id) {
+        if (touch_ids[id] != 0) {
+            touch_ids[id] = 0;
+            this->TouchReleased(id + 1);
+        }
+    }
+}
 
-    const auto [x, y] = ScaleTouch(pos);
-    this->TouchMoved(x, y);
+bool GRenderWindow::TouchStart(const QTouchEvent::TouchPoint& touch_point) {
+    for (std::size_t id = 0; id < touch_ids.size(); ++id) {
+        if (touch_ids[id] == 0) {
+            touch_ids[id] = touch_point.id() + 1;
+            const auto [x, y] = ScaleTouch(touch_point.pos());
+            this->TouchPressed(x, y, id + 1);
+            return true;
+        }
+    }
+    return false;
 }
 
-void GRenderWindow::TouchEndEvent() {
-    this->TouchReleased();
+bool GRenderWindow::TouchUpdate(const QTouchEvent::TouchPoint& touch_point) {
+    for (std::size_t id = 0; id < touch_ids.size(); ++id) {
+        if (touch_ids[id] == static_cast<std::size_t>(touch_point.id() + 1)) {
+            const auto [x, y] = ScaleTouch(touch_point.pos());
+            this->TouchMoved(x, y, id + 1);
+            return true;
+        }
+    }
+    return false;
+}
+
+bool GRenderWindow::TouchExist(std::size_t id,
+                               const QList<QTouchEvent::TouchPoint>& touch_points) const {
+    return std::any_of(touch_points.begin(), touch_points.end(), [id](const auto& point) {
+        return id == static_cast<std::size_t>(point.id() + 1);
+    });
 }
 
 bool GRenderWindow::event(QEvent* event) {
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 339095509..b5ec7de07 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -11,6 +11,7 @@
 
 #include <QImage>
 #include <QThread>
+#include <QTouchEvent>
 #include <QWidget>
 #include <QWindow>
 
@@ -21,7 +22,6 @@
 class GRenderWindow;
 class GMainWindow;
 class QKeyEvent;
-class QTouchEvent;
 class QStringList;
 
 namespace InputCommon {
@@ -191,6 +191,10 @@ private:
     void TouchUpdateEvent(const QTouchEvent* event);
     void TouchEndEvent();
 
+    bool TouchStart(const QTouchEvent::TouchPoint& touch_point);
+    bool TouchUpdate(const QTouchEvent::TouchPoint& touch_point);
+    bool TouchExist(std::size_t id, const QList<QTouchEvent::TouchPoint>& touch_points) const;
+
     void OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal_size) override;
 
     bool InitializeOpenGL();
@@ -215,6 +219,8 @@ private:
 
     bool first_frame = false;
 
+    std::array<std::size_t, 16> touch_ids{};
+
 protected:
     void showEvent(QShowEvent* event) override;
     bool eventFilter(QObject* object, QEvent* event) override;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index cda448718..8d85a1986 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -464,13 +464,7 @@ void Config::ReadMouseValues() {
 void Config::ReadTouchscreenValues() {
     Settings::values.touchscreen.enabled =
         ReadSetting(QStringLiteral("touchscreen_enabled"), true).toBool();
-    Settings::values.touchscreen.device =
-        ReadSetting(QStringLiteral("touchscreen_device"), QStringLiteral("engine:emu_window"))
-            .toString()
-            .toStdString();
 
-    Settings::values.touchscreen.finger =
-        ReadSetting(QStringLiteral("touchscreen_finger"), 0).toUInt();
     Settings::values.touchscreen.rotation_angle =
         ReadSetting(QStringLiteral("touchscreen_angle"), 0).toUInt();
     Settings::values.touchscreen.diameter_x =
@@ -563,7 +557,8 @@ void Config::ReadMotionTouchValues() {
             .toString()
             .toStdString();
     Settings::values.touch_device =
-        ReadSetting(QStringLiteral("touch_device"), QStringLiteral("engine:emu_window"))
+        ReadSetting(QStringLiteral("touch_device"),
+                    QStringLiteral("min_x:100,min_y:50,max_x:1800,max_y:850"))
             .toString()
             .toStdString();
     Settings::values.use_touch_from_button =
@@ -1005,7 +1000,8 @@ void Config::SavePlayerValue(std::size_t player_index) {
                  static_cast<u8>(Settings::ControllerType::ProController));
 
     if (!player_prefix.isEmpty()) {
-        WriteSetting(QStringLiteral("%1connected").arg(player_prefix), player.connected, false);
+        WriteSetting(QStringLiteral("%1connected").arg(player_prefix), player.connected,
+                     player_index == 0);
         WriteSetting(QStringLiteral("%1vibration_enabled").arg(player_prefix),
                      player.vibration_enabled, true);
         WriteSetting(QStringLiteral("%1vibration_strength").arg(player_prefix),
@@ -1087,10 +1083,7 @@ void Config::SaveTouchscreenValues() {
     const auto& touchscreen = Settings::values.touchscreen;
 
     WriteSetting(QStringLiteral("touchscreen_enabled"), touchscreen.enabled, true);
-    WriteSetting(QStringLiteral("touchscreen_device"), QString::fromStdString(touchscreen.device),
-                 QStringLiteral("engine:emu_window"));
 
-    WriteSetting(QStringLiteral("touchscreen_finger"), touchscreen.finger, 0);
     WriteSetting(QStringLiteral("touchscreen_angle"), touchscreen.rotation_angle, 0);
     WriteSetting(QStringLiteral("touchscreen_diameter_x"), touchscreen.diameter_x, 15);
     WriteSetting(QStringLiteral("touchscreen_diameter_y"), touchscreen.diameter_y, 15);
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index b33f8437a..d6b17a28d 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -117,31 +117,13 @@ void ConfigureDialog::UpdateVisibleTabs() {
         return;
     }
 
-    const std::map<QWidget*, QString> widgets = {
-        {ui->generalTab, tr("General")},
-        {ui->systemTab, tr("System")},
-        {ui->profileManagerTab, tr("Profiles")},
-        {ui->inputTab, tr("Controls")},
-        {ui->hotkeysTab, tr("Hotkeys")},
-        {ui->cpuTab, tr("CPU")},
-        {ui->cpuDebugTab, tr("Debug")},
-        {ui->graphicsTab, tr("Graphics")},
-        {ui->graphicsAdvancedTab, tr("Advanced")},
-        {ui->audioTab, tr("Audio")},
-        {ui->debugTab, tr("Debug")},
-        {ui->webTab, tr("Web")},
-        {ui->uiTab, tr("UI")},
-        {ui->filesystemTab, tr("Filesystem")},
-        {ui->serviceTab, tr("Services")},
-    };
-
     [[maybe_unused]] const QSignalBlocker blocker(ui->tabWidget);
 
     ui->tabWidget->clear();
 
-    const QList<QWidget*> tabs = qvariant_cast<QList<QWidget*>>(items[0]->data(Qt::UserRole));
+    const auto tabs = qvariant_cast<QList<QWidget*>>(items[0]->data(Qt::UserRole));
 
-    for (const auto tab : tabs) {
+    for (auto* const tab : tabs) {
         ui->tabWidget->addTab(tab, tab->accessibleName());
     }
 }
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 46ea026e4..13f0351d4 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -575,6 +575,16 @@ void ConfigureInputPlayer::ApplyConfiguration() {
 
     std::transform(motions_param.begin(), motions_param.end(), motions.begin(),
                    [](const Common::ParamPackage& param) { return param.Serialize(); });
+
+    // Apply configuration for handheld
+    if (player_index == 0) {
+        auto& handheld = Settings::values.players.GetValue()[HANDHELD_INDEX];
+        if (player.controller_type == Settings::ControllerType::Handheld) {
+            handheld = player;
+        }
+        handheld.connected = ui->groupConnectedController->isChecked() &&
+                             player.controller_type == Settings::ControllerType::Handheld;
+    }
 }
 
 void ConfigureInputPlayer::TryConnectSelectedController() {
diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp
index eb8eacbf9..1f2b792e4 100644
--- a/src/yuzu/configuration/configure_motion_touch.cpp
+++ b/src/yuzu/configuration/configure_motion_touch.cpp
@@ -4,12 +4,15 @@
 
 #include <array>
 #include <sstream>
+
 #include <QCloseEvent>
 #include <QLabel>
 #include <QMessageBox>
 #include <QPushButton>
+#include <QRegularExpression>
 #include <QStringListModel>
 #include <QVBoxLayout>
+
 #include "common/logging/log.h"
 #include "core/settings.h"
 #include "input_common/main.h"
@@ -78,19 +81,11 @@ void CalibrationConfigurationDialog::UpdateButtonText(const QString& text) {
     cancel_button->setText(text);
 }
 
-constexpr std::array<std::pair<const char*, const char*>, 2> TouchProviders = {{
-    {"emu_window", QT_TRANSLATE_NOOP("ConfigureMotionTouch", "Emulator Window")},
-    {"cemuhookudp", QT_TRANSLATE_NOOP("ConfigureMotionTouch", "CemuhookUDP")},
-}};
-
 ConfigureMotionTouch::ConfigureMotionTouch(QWidget* parent,
                                            InputCommon::InputSubsystem* input_subsystem_)
     : QDialog(parent), input_subsystem{input_subsystem_},
       ui(std::make_unique<Ui::ConfigureMotionTouch>()) {
     ui->setupUi(this);
-    for (const auto& [provider, name] : TouchProviders) {
-        ui->touch_provider->addItem(tr(name), QString::fromUtf8(provider));
-    }
 
     ui->udp_learn_more->setOpenExternalLinks(true);
     ui->udp_learn_more->setText(
@@ -109,11 +104,7 @@ ConfigureMotionTouch::~ConfigureMotionTouch() = default;
 void ConfigureMotionTouch::SetConfiguration() {
     const Common::ParamPackage motion_param(Settings::values.motion_device);
     const Common::ParamPackage touch_param(Settings::values.touch_device);
-    const std::string motion_engine = motion_param.Get("engine", "motion_emu");
-    const std::string touch_engine = touch_param.Get("engine", "emu_window");
 
-    ui->touch_provider->setCurrentIndex(
-        ui->touch_provider->findData(QString::fromStdString(touch_engine)));
     ui->touch_from_button_checkbox->setChecked(Settings::values.use_touch_from_button);
     touch_from_button_maps = Settings::values.touch_from_button_maps;
     for (const auto& touch_map : touch_from_button_maps) {
@@ -146,30 +137,21 @@ void ConfigureMotionTouch::SetConfiguration() {
 }
 
 void ConfigureMotionTouch::UpdateUiDisplay() {
-    const QString touch_engine = ui->touch_provider->currentData().toString();
     const QString cemuhook_udp = QStringLiteral("cemuhookudp");
 
     ui->motion_sensitivity_label->setVisible(true);
     ui->motion_sensitivity->setVisible(true);
 
-    if (touch_engine == cemuhook_udp) {
-        ui->touch_calibration->setVisible(true);
-        ui->touch_calibration_config->setVisible(true);
-        ui->touch_calibration_label->setVisible(true);
-        ui->touch_calibration->setText(
-            QStringLiteral("(%1, %2) - (%3, %4)").arg(min_x).arg(min_y).arg(max_x).arg(max_y));
-    } else {
-        ui->touch_calibration->setVisible(false);
-        ui->touch_calibration_config->setVisible(false);
-        ui->touch_calibration_label->setVisible(false);
-    }
+    ui->touch_calibration->setVisible(true);
+    ui->touch_calibration_config->setVisible(true);
+    ui->touch_calibration_label->setVisible(true);
+    ui->touch_calibration->setText(
+        QStringLiteral("(%1, %2) - (%3, %4)").arg(min_x).arg(min_y).arg(max_x).arg(max_y));
 
     ui->udp_config_group_box->setVisible(true);
 }
 
 void ConfigureMotionTouch::ConnectEvents() {
-    connect(ui->touch_provider, qOverload<int>(&QComboBox::currentIndexChanged), this,
-            [this](int index) { UpdateUiDisplay(); });
     connect(ui->udp_test, &QPushButton::clicked, this, &ConfigureMotionTouch::OnCemuhookUDPTest);
     connect(ui->udp_add, &QPushButton::clicked, this, &ConfigureMotionTouch::OnUDPAddServer);
     connect(ui->udp_remove, &QPushButton::clicked, this, &ConfigureMotionTouch::OnUDPDeleteServer);
@@ -185,14 +167,15 @@ void ConfigureMotionTouch::ConnectEvents() {
 }
 
 void ConfigureMotionTouch::OnUDPAddServer() {
-    QRegExp re(tr(R"re(^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4]"
-                  "[0-9]|[01]?[0-9][0-9]?)$)re")); // a valid ip address
+    // Validator for IP address
+    const QRegularExpression re(QStringLiteral(
+        R"re(^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$)re"));
     bool ok;
-    QString port_text = ui->udp_port->text();
-    QString server_text = ui->udp_server->text();
+    const QString port_text = ui->udp_port->text();
+    const QString server_text = ui->udp_server->text();
     const QString server_string = tr("%1:%2").arg(server_text, port_text);
-    int port_number = port_text.toInt(&ok, 10);
-    int row = udp_server_list_model->rowCount();
+    const int port_number = port_text.toInt(&ok, 10);
+    const int row = udp_server_list_model->rowCount();
 
     if (!ok) {
         QMessageBox::warning(this, tr("yuzu"), tr("Port number has invalid characters"));
@@ -202,7 +185,7 @@ void ConfigureMotionTouch::OnUDPAddServer() {
         QMessageBox::warning(this, tr("yuzu"), tr("Port has to be in range 0 and 65353"));
         return;
     }
-    if (!re.exactMatch(server_text)) {
+    if (!re.match(server_text).hasMatch()) {
         QMessageBox::warning(this, tr("yuzu"), tr("IP address is not valid"));
         return;
     }
@@ -324,17 +307,11 @@ void ConfigureMotionTouch::ApplyConfiguration() {
         return;
     }
 
-    std::string touch_engine = ui->touch_provider->currentData().toString().toStdString();
-
     Common::ParamPackage touch_param{};
-    touch_param.Set("engine", std::move(touch_engine));
-
-    if (touch_engine == "cemuhookudp") {
-        touch_param.Set("min_x", min_x);
-        touch_param.Set("min_y", min_y);
-        touch_param.Set("max_x", max_x);
-        touch_param.Set("max_y", max_y);
-    }
+    touch_param.Set("min_x", min_x);
+    touch_param.Set("min_y", min_y);
+    touch_param.Set("max_x", max_x);
+    touch_param.Set("max_y", max_y);
 
     Settings::values.touch_device = touch_param.Serialize();
     Settings::values.use_touch_from_button = ui->touch_from_button_checkbox->isChecked();
diff --git a/src/yuzu/configuration/configure_motion_touch.ui b/src/yuzu/configuration/configure_motion_touch.ui
index ebca835ac..1e35ea946 100644
--- a/src/yuzu/configuration/configure_motion_touch.ui
+++ b/src/yuzu/configuration/configure_motion_touch.ui
@@ -68,23 +68,9 @@
       <item>
        <layout class="QHBoxLayout">
         <item>
-         <widget class="QLabel" name="touch_provider_label">
-          <property name="text">
-           <string>Touch Provider:</string>
-          </property>
-         </widget>
-        </item>
-        <item>
-         <widget class="QComboBox" name="touch_provider"/>
-        </item>
-       </layout>
-      </item>
-      <item>
-       <layout class="QHBoxLayout">
-        <item>
          <widget class="QLabel" name="touch_calibration_label">
           <property name="text">
-           <string>Calibration:</string>
+           <string>UDP Calibration:</string>
           </property>
          </widget>
         </item>
diff --git a/src/yuzu/configuration/configure_profile_manager.cpp b/src/yuzu/configuration/configure_profile_manager.cpp
index 13d9a4757..d102a43af 100644
--- a/src/yuzu/configuration/configure_profile_manager.cpp
+++ b/src/yuzu/configuration/configure_profile_manager.cpp
@@ -40,7 +40,7 @@ QString GetImagePath(Common::UUID uuid) {
 }
 
 QString GetAccountUsername(const Service::Account::ProfileManager& manager, Common::UUID uuid) {
-    Service::Account::ProfileBase profile;
+    Service::Account::ProfileBase profile{};
     if (!manager.GetProfileBase(uuid, profile)) {
         return {};
     }
@@ -147,7 +147,7 @@ void ConfigureProfileManager::SetConfiguration() {
 void ConfigureProfileManager::PopulateUserList() {
     const auto& profiles = profile_manager->GetAllUsers();
     for (const auto& user : profiles) {
-        Service::Account::ProfileBase profile;
+        Service::Account::ProfileBase profile{};
         if (!profile_manager->GetProfileBase(user, profile))
             continue;
 
@@ -212,7 +212,7 @@ void ConfigureProfileManager::RenameUser() {
     const auto uuid = profile_manager->GetUser(user);
     ASSERT(uuid);
 
-    Service::Account::ProfileBase profile;
+    Service::Account::ProfileBase profile{};
     if (!profile_manager->GetProfileBase(*uuid, profile))
         return;
 
diff --git a/src/yuzu/configuration/configure_service.cpp b/src/yuzu/configuration/configure_service.cpp
index 0de7a4f0b..b580cfff2 100644
--- a/src/yuzu/configuration/configure_service.cpp
+++ b/src/yuzu/configuration/configure_service.cpp
@@ -9,6 +9,7 @@
 #include "ui_configure_service.h"
 #include "yuzu/configuration/configure_service.h"
 
+#ifdef YUZU_ENABLE_BOXCAT
 namespace {
 QString FormatEventStatusString(const Service::BCAT::EventStatus& status) {
     QString out;
@@ -32,6 +33,7 @@ QString FormatEventStatusString(const Service::BCAT::EventStatus& status) {
     return out;
 }
 } // Anonymous namespace
+#endif
 
 ConfigureService::ConfigureService(QWidget* parent)
     : QWidget(parent), ui(std::make_unique<Ui::ConfigureService>()) {
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.cpp b/src/yuzu/configuration/configure_touchscreen_advanced.cpp
index 7d7cc00b7..29c86c7bc 100644
--- a/src/yuzu/configuration/configure_touchscreen_advanced.cpp
+++ b/src/yuzu/configuration/configure_touchscreen_advanced.cpp
@@ -33,21 +33,18 @@ void ConfigureTouchscreenAdvanced::RetranslateUI() {
 }
 
 void ConfigureTouchscreenAdvanced::ApplyConfiguration() {
-    Settings::values.touchscreen.finger = ui->finger_box->value();
     Settings::values.touchscreen.diameter_x = ui->diameter_x_box->value();
     Settings::values.touchscreen.diameter_y = ui->diameter_y_box->value();
     Settings::values.touchscreen.rotation_angle = ui->angle_box->value();
 }
 
 void ConfigureTouchscreenAdvanced::LoadConfiguration() {
-    ui->finger_box->setValue(Settings::values.touchscreen.finger);
     ui->diameter_x_box->setValue(Settings::values.touchscreen.diameter_x);
     ui->diameter_y_box->setValue(Settings::values.touchscreen.diameter_y);
     ui->angle_box->setValue(Settings::values.touchscreen.rotation_angle);
 }
 
 void ConfigureTouchscreenAdvanced::RestoreDefaults() {
-    ui->finger_box->setValue(0);
     ui->diameter_x_box->setValue(15);
     ui->diameter_y_box->setValue(15);
     ui->angle_box->setValue(0);
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.ui b/src/yuzu/configuration/configure_touchscreen_advanced.ui
index 30ceccddb..88e7cf050 100644
--- a/src/yuzu/configuration/configure_touchscreen_advanced.ui
+++ b/src/yuzu/configuration/configure_touchscreen_advanced.ui
@@ -65,20 +65,13 @@
         </property>
        </spacer>
       </item>
-      <item row="2" column="1">
+      <item row="1" column="1">
        <widget class="QLabel" name="label_4">
         <property name="text">
          <string>Touch Diameter Y</string>
         </property>
        </widget>
       </item>
-      <item row="0" column="1">
-       <widget class="QLabel" name="label">
-        <property name="text">
-         <string>Finger</string>
-        </property>
-       </widget>
-      </item>
       <item row="0" column="3">
        <spacer name="horizontalSpacer_2">
         <property name="orientation">
@@ -92,37 +85,27 @@
         </property>
        </spacer>
       </item>
-      <item row="1" column="1">
+      <item row="0" column="1">
        <widget class="QLabel" name="label_3">
         <property name="text">
          <string>Touch Diameter X</string>
         </property>
        </widget>
       </item>
-      <item row="0" column="2">
-       <widget class="QSpinBox" name="finger_box">
-        <property name="minimumSize">
-         <size>
-          <width>80</width>
-          <height>0</height>
-         </size>
-        </property>
-       </widget>
-      </item>
-      <item row="3" column="1">
+      <item row="2" column="1">
        <widget class="QLabel" name="label_5">
         <property name="text">
          <string>Rotational Angle</string>
         </property>
        </widget>
       </item>
-      <item row="1" column="2">
+      <item row="0" column="2">
        <widget class="QSpinBox" name="diameter_x_box"/>
       </item>
-      <item row="2" column="2">
+      <item row="1" column="2">
        <widget class="QSpinBox" name="diameter_y_box"/>
       </item>
-      <item row="3" column="2">
+      <item row="2" column="2">
        <widget class="QSpinBox" name="angle_box"/>
       </item>
      </layout>
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 0925c10b4..a93b5d3c2 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -14,10 +14,10 @@
 #include "core/core.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_scheduler.h"
-#include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/svc_common.h"
 #include "core/hle/kernel/thread.h"
 #include "core/memory.h"
 
@@ -116,7 +116,7 @@ QString WaitTreeText::GetText() const {
 WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
     : mutex_address(mutex_address) {
     mutex_value = Core::System::GetInstance().Memory().Read32(mutex_address);
-    owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
+    owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Svc::HandleWaitMask);
     owner = handle_table.Get<Kernel::Thread>(owner_handle);
 }
 
@@ -127,7 +127,7 @@ QString WaitTreeMutexInfo::GetText() const {
 }
 
 std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeMutexInfo::GetChildren() const {
-    const bool has_waiters = (mutex_value & Kernel::Mutex::MutexHasWaitersFlag) != 0;
+    const bool has_waiters = (mutex_value & Kernel::Svc::HandleWaitMask) != 0;
 
     std::vector<std::unique_ptr<WaitTreeItem>> list;
     list.push_back(std::make_unique<WaitTreeText>(tr("has waiters: %1").arg(has_waiters)));
@@ -169,7 +169,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons
     return list;
 }
 
-WaitTreeSynchronizationObject::WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& o)
+WaitTreeSynchronizationObject::WaitTreeSynchronizationObject(
+    const Kernel::KSynchronizationObject& o)
     : object(o) {}
 WaitTreeSynchronizationObject::~WaitTreeSynchronizationObject() = default;
 
@@ -188,7 +189,7 @@ QString WaitTreeSynchronizationObject::GetText() const {
 }
 
 std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::make(
-    const Kernel::SynchronizationObject& object) {
+    const Kernel::KSynchronizationObject& object) {
     switch (object.GetHandleType()) {
     case Kernel::HandleType::ReadableEvent:
         return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object));
@@ -202,7 +203,7 @@ std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::ma
 std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChildren() const {
     std::vector<std::unique_ptr<WaitTreeItem>> list;
 
-    const auto& threads = object.GetWaitingThreads();
+    const auto& threads = object.GetWaitingThreadsForDebugging();
     if (threads.empty()) {
         list.push_back(std::make_unique<WaitTreeText>(tr("waited by no thread")));
     } else {
@@ -211,8 +212,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChi
     return list;
 }
 
-WaitTreeObjectList::WaitTreeObjectList(
-    const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, bool w_all)
+WaitTreeObjectList::WaitTreeObjectList(const std::vector<Kernel::KSynchronizationObject*>& list,
+                                       bool w_all)
     : object_list(list), wait_all(w_all) {}
 
 WaitTreeObjectList::~WaitTreeObjectList() = default;
@@ -237,8 +238,8 @@ WaitTreeThread::~WaitTreeThread() = default;
 QString WaitTreeThread::GetText() const {
     const auto& thread = static_cast<const Kernel::Thread&>(object);
     QString status;
-    switch (thread.GetStatus()) {
-    case Kernel::ThreadStatus::Ready:
+    switch (thread.GetState()) {
+    case Kernel::ThreadState::Runnable:
         if (!thread.IsPaused()) {
             if (thread.WasRunning()) {
                 status = tr("running");
@@ -249,35 +250,39 @@ QString WaitTreeThread::GetText() const {
             status = tr("paused");
         }
         break;
-    case Kernel::ThreadStatus::Paused:
-        status = tr("paused");
-        break;
-    case Kernel::ThreadStatus::WaitHLEEvent:
-        status = tr("waiting for HLE return");
-        break;
-    case Kernel::ThreadStatus::WaitSleep:
-        status = tr("sleeping");
-        break;
-    case Kernel::ThreadStatus::WaitIPC:
-        status = tr("waiting for IPC reply");
-        break;
-    case Kernel::ThreadStatus::WaitSynch:
-        status = tr("waiting for objects");
-        break;
-    case Kernel::ThreadStatus::WaitMutex:
-        status = tr("waiting for mutex");
-        break;
-    case Kernel::ThreadStatus::WaitCondVar:
-        status = tr("waiting for condition variable");
+    case Kernel::ThreadState::Waiting:
+        switch (thread.GetWaitReasonForDebugging()) {
+        case Kernel::ThreadWaitReasonForDebugging::Sleep:
+            status = tr("sleeping");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::IPC:
+            status = tr("waiting for IPC reply");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::Synchronization:
+            status = tr("waiting for objects");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::ConditionVar:
+            status = tr("waiting for condition variable");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::Arbitration:
+            status = tr("waiting for address arbiter");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::Suspended:
+            status = tr("waiting for suspend resume");
+            break;
+        default:
+            status = tr("waiting");
+            break;
+        }
         break;
-    case Kernel::ThreadStatus::WaitArb:
-        status = tr("waiting for address arbiter");
+    case Kernel::ThreadState::Initialized:
+        status = tr("initialized");
         break;
-    case Kernel::ThreadStatus::Dormant:
-        status = tr("dormant");
+    case Kernel::ThreadState::Terminated:
+        status = tr("terminated");
         break;
-    case Kernel::ThreadStatus::Dead:
-        status = tr("dead");
+    default:
+        status = tr("unknown");
         break;
     }
 
@@ -293,8 +298,8 @@ QColor WaitTreeThread::GetColor() const {
     const std::size_t color_index = IsDarkTheme() ? 1 : 0;
 
     const auto& thread = static_cast<const Kernel::Thread&>(object);
-    switch (thread.GetStatus()) {
-    case Kernel::ThreadStatus::Ready:
+    switch (thread.GetState()) {
+    case Kernel::ThreadState::Runnable:
         if (!thread.IsPaused()) {
             if (thread.WasRunning()) {
                 return QColor(WaitTreeColors[0][color_index]);
@@ -304,21 +309,24 @@ QColor WaitTreeThread::GetColor() const {
         } else {
             return QColor(WaitTreeColors[2][color_index]);
         }
-    case Kernel::ThreadStatus::Paused:
-        return QColor(WaitTreeColors[3][color_index]);
-    case Kernel::ThreadStatus::WaitHLEEvent:
-    case Kernel::ThreadStatus::WaitIPC:
-        return QColor(WaitTreeColors[4][color_index]);
-    case Kernel::ThreadStatus::WaitSleep:
-        return QColor(WaitTreeColors[5][color_index]);
-    case Kernel::ThreadStatus::WaitSynch:
-    case Kernel::ThreadStatus::WaitMutex:
-    case Kernel::ThreadStatus::WaitCondVar:
-    case Kernel::ThreadStatus::WaitArb:
-        return QColor(WaitTreeColors[6][color_index]);
-    case Kernel::ThreadStatus::Dormant:
+    case Kernel::ThreadState::Waiting:
+        switch (thread.GetWaitReasonForDebugging()) {
+        case Kernel::ThreadWaitReasonForDebugging::IPC:
+            return QColor(WaitTreeColors[4][color_index]);
+        case Kernel::ThreadWaitReasonForDebugging::Sleep:
+            return QColor(WaitTreeColors[5][color_index]);
+        case Kernel::ThreadWaitReasonForDebugging::Synchronization:
+        case Kernel::ThreadWaitReasonForDebugging::ConditionVar:
+        case Kernel::ThreadWaitReasonForDebugging::Arbitration:
+        case Kernel::ThreadWaitReasonForDebugging::Suspended:
+            return QColor(WaitTreeColors[6][color_index]);
+            break;
+        default:
+            return QColor(WaitTreeColors[3][color_index]);
+        }
+    case Kernel::ThreadState::Initialized:
         return QColor(WaitTreeColors[7][color_index]);
-    case Kernel::ThreadStatus::Dead:
+    case Kernel::ThreadState::Terminated:
         return QColor(WaitTreeColors[8][color_index]);
     default:
         return WaitTreeItem::GetColor();
@@ -354,11 +362,11 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
     list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadID())));
     list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)")
                                                       .arg(thread.GetPriority())
-                                                      .arg(thread.GetNominalPriority())));
+                                                      .arg(thread.GetBasePriority())));
     list.push_back(std::make_unique<WaitTreeText>(
         tr("last running ticks = %1").arg(thread.GetLastScheduledTick())));
 
-    const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
+    const VAddr mutex_wait_address = thread.GetMutexWaitAddressForDebugging();
     if (mutex_wait_address != 0) {
         const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
         list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
@@ -366,9 +374,11 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
         list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
     }
 
-    if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) {
-        list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(),
-                                                            thread.IsWaitingSync()));
+    if (thread.GetState() == Kernel::ThreadState::Waiting &&
+        thread.GetWaitReasonForDebugging() ==
+            Kernel::ThreadWaitReasonForDebugging::Synchronization) {
+        list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetWaitObjectsForDebugging(),
+                                                            thread.IsCancellable()));
     }
 
     list.push_back(std::make_unique<WaitTreeCallstack>(thread));
@@ -380,7 +390,7 @@ WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object)
     : WaitTreeSynchronizationObject(object) {}
 WaitTreeEvent::~WaitTreeEvent() = default;
 
-WaitTreeThreadList::WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list)
+WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::Thread*>& list)
     : thread_list(list) {}
 WaitTreeThreadList::~WaitTreeThreadList() = default;
 
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index 8e3bc4b24..cf96911ea 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -18,8 +18,8 @@ class EmuThread;
 
 namespace Kernel {
 class HandleTable;
+class KSynchronizationObject;
 class ReadableEvent;
-class SynchronizationObject;
 class Thread;
 } // namespace Kernel
 
@@ -102,30 +102,29 @@ private:
 class WaitTreeSynchronizationObject : public WaitTreeExpandableItem {
     Q_OBJECT
 public:
-    explicit WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& object);
+    explicit WaitTreeSynchronizationObject(const Kernel::KSynchronizationObject& object);
     ~WaitTreeSynchronizationObject() override;
 
     static std::unique_ptr<WaitTreeSynchronizationObject> make(
-        const Kernel::SynchronizationObject& object);
+        const Kernel::KSynchronizationObject& object);
     QString GetText() const override;
     std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
 
 protected:
-    const Kernel::SynchronizationObject& object;
+    const Kernel::KSynchronizationObject& object;
 };
 
 class WaitTreeObjectList : public WaitTreeExpandableItem {
     Q_OBJECT
 public:
-    WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list,
-                       bool wait_all);
+    WaitTreeObjectList(const std::vector<Kernel::KSynchronizationObject*>& list, bool wait_all);
     ~WaitTreeObjectList() override;
 
     QString GetText() const override;
     std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
 
 private:
-    const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& object_list;
+    const std::vector<Kernel::KSynchronizationObject*>& object_list;
     bool wait_all;
 };
 
@@ -150,14 +149,14 @@ public:
 class WaitTreeThreadList : public WaitTreeExpandableItem {
     Q_OBJECT
 public:
-    explicit WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list);
+    explicit WaitTreeThreadList(const std::vector<Kernel::Thread*>& list);
     ~WaitTreeThreadList() override;
 
     QString GetText() const override;
     std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
 
 private:
-    const std::vector<std::shared_ptr<Kernel::Thread>>& thread_list;
+    const std::vector<Kernel::Thread*>& thread_list;
 };
 
 class WaitTreeModel : public QAbstractItemModel {
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 70d865112..9afd5b45f 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -119,7 +119,7 @@ void GameListSearchField::setFocus() {
 GameListSearchField::GameListSearchField(GameList* parent) : QWidget{parent} {
     auto* const key_release_eater = new KeyReleaseEater(parent, this);
     layout_filter = new QHBoxLayout;
-    layout_filter->setMargin(8);
+    layout_filter->setContentsMargins(8, 8, 8, 8);
     label_filter = new QLabel;
     label_filter->setText(tr("Filter:"));
     edit_filter = new QLineEdit;
@@ -173,8 +173,8 @@ void GameList::OnItemExpanded(const QModelIndex& item) {
         return;
     }
 
-    auto* game_dir = item.data(GameListDir::GameDirRole).value<UISettings::GameDir*>();
-    game_dir->expanded = tree_view->isExpanded(item);
+    UISettings::values.game_dirs[item.data(GameListDir::GameDirRole).toInt()].expanded =
+        tree_view->isExpanded(item);
 }
 
 // Event in order to filter the gamelist after editing the searchfield
@@ -262,9 +262,9 @@ void GameList::OnUpdateThemedIcons() {
                 Qt::DecorationRole);
             break;
         case GameListItemType::CustomDir: {
-            const UISettings::GameDir* game_dir =
-                child->data(GameListDir::GameDirRole).value<UISettings::GameDir*>();
-            const QString icon_name = QFileInfo::exists(game_dir->path)
+            const UISettings::GameDir& game_dir =
+                UISettings::values.game_dirs[child->data(GameListDir::GameDirRole).toInt()];
+            const QString icon_name = QFileInfo::exists(game_dir.path)
                                           ? QStringLiteral("folder")
                                           : QStringLiteral("bad_folder");
             child->setData(
@@ -366,7 +366,7 @@ void GameList::AddDirEntry(GameListDir* entry_items) {
     item_model->invisibleRootItem()->appendRow(entry_items);
     tree_view->setExpanded(
         entry_items->index(),
-        entry_items->data(GameListDir::GameDirRole).value<UISettings::GameDir*>()->expanded);
+        UISettings::values.game_dirs[entry_items->data(GameListDir::GameDirRole).toInt()].expanded);
 }
 
 void GameList::AddEntry(const QList<QStandardItem*>& entry_items, GameListDir* parent) {
@@ -549,7 +549,7 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
 
 void GameList::AddCustomDirPopup(QMenu& context_menu, QModelIndex selected) {
     UISettings::GameDir& game_dir =
-        *selected.data(GameListDir::GameDirRole).value<UISettings::GameDir*>();
+        UISettings::values.game_dirs[selected.data(GameListDir::GameDirRole).toInt()];
 
     QAction* deep_scan = context_menu.addAction(tr("Scan Subfolders"));
     QAction* delete_dir = context_menu.addAction(tr("Remove Game Directory"));
@@ -568,8 +568,7 @@ void GameList::AddCustomDirPopup(QMenu& context_menu, QModelIndex selected) {
 }
 
 void GameList::AddPermDirPopup(QMenu& context_menu, QModelIndex selected) {
-    UISettings::GameDir& game_dir =
-        *selected.data(GameListDir::GameDirRole).value<UISettings::GameDir*>();
+    const int game_dir_index = selected.data(GameListDir::GameDirRole).toInt();
 
     QAction* move_up = context_menu.addAction(tr("\u25B2 Move Up"));
     QAction* move_down = context_menu.addAction(tr("\u25bc Move Down"));
@@ -580,34 +579,39 @@ void GameList::AddPermDirPopup(QMenu& context_menu, QModelIndex selected) {
     move_up->setEnabled(row > 0);
     move_down->setEnabled(row < item_model->rowCount() - 2);
 
-    connect(move_up, &QAction::triggered, [this, selected, row, &game_dir] {
-        // find the indices of the items in settings and swap them
-        std::swap(UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(game_dir)],
-                  UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(
-                      *selected.sibling(row - 1, 0)
-                           .data(GameListDir::GameDirRole)
-                           .value<UISettings::GameDir*>())]);
+    connect(move_up, &QAction::triggered, [this, selected, row, game_dir_index] {
+        const int other_index = selected.sibling(row - 1, 0).data(GameListDir::GameDirRole).toInt();
+        // swap the items in the settings
+        std::swap(UISettings::values.game_dirs[game_dir_index],
+                  UISettings::values.game_dirs[other_index]);
+        // swap the indexes held by the QVariants
+        item_model->setData(selected, QVariant(other_index), GameListDir::GameDirRole);
+        item_model->setData(selected.sibling(row - 1, 0), QVariant(game_dir_index),
+                            GameListDir::GameDirRole);
         // move the treeview items
         QList<QStandardItem*> item = item_model->takeRow(row);
         item_model->invisibleRootItem()->insertRow(row - 1, item);
-        tree_view->setExpanded(selected, game_dir.expanded);
+        tree_view->setExpanded(selected, UISettings::values.game_dirs[game_dir_index].expanded);
     });
 
-    connect(move_down, &QAction::triggered, [this, selected, row, &game_dir] {
-        // find the indices of the items in settings and swap them
-        std::swap(UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(game_dir)],
-                  UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(
-                      *selected.sibling(row + 1, 0)
-                           .data(GameListDir::GameDirRole)
-                           .value<UISettings::GameDir*>())]);
+    connect(move_down, &QAction::triggered, [this, selected, row, game_dir_index] {
+        const int other_index = selected.sibling(row + 1, 0).data(GameListDir::GameDirRole).toInt();
+        // swap the items in the settings
+        std::swap(UISettings::values.game_dirs[game_dir_index],
+                  UISettings::values.game_dirs[other_index]);
+        // swap the indexes held by the QVariants
+        item_model->setData(selected, QVariant(other_index), GameListDir::GameDirRole);
+        item_model->setData(selected.sibling(row + 1, 0), QVariant(game_dir_index),
+                            GameListDir::GameDirRole);
         // move the treeview items
         const QList<QStandardItem*> item = item_model->takeRow(row);
         item_model->invisibleRootItem()->insertRow(row + 1, item);
-        tree_view->setExpanded(selected, game_dir.expanded);
+        tree_view->setExpanded(selected, UISettings::values.game_dirs[game_dir_index].expanded);
     });
 
-    connect(open_directory_location, &QAction::triggered,
-            [this, game_dir] { emit OpenDirectory(game_dir.path); });
+    connect(open_directory_location, &QAction::triggered, [this, game_dir_index] {
+        emit OpenDirectory(UISettings::values.game_dirs[game_dir_index].path);
+    });
 }
 
 void GameList::LoadCompatibilityList() {
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index df935022d..f25445f18 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -230,7 +230,7 @@ public:
         setData(type(), TypeRole);
 
         UISettings::GameDir* game_dir = &directory;
-        setData(QVariant::fromValue(game_dir), GameDirRole);
+        setData(QVariant(UISettings::values.game_dirs.indexOf(directory)), GameDirRole);
 
         const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64);
         switch (dir_type) {
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 41ef6f6b8..f76102459 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -296,10 +296,6 @@ void Config::ReadValues() {
         sdl2_config->GetBoolean("ControlsGeneral", "motion_enabled", true));
     Settings::values.touchscreen.enabled =
         sdl2_config->GetBoolean("ControlsGeneral", "touch_enabled", true);
-    Settings::values.touchscreen.device =
-        sdl2_config->Get("ControlsGeneral", "touch_device", "engine:emu_window");
-    Settings::values.touchscreen.finger =
-        sdl2_config->GetInteger("ControlsGeneral", "touch_finger", 0);
     Settings::values.touchscreen.rotation_angle =
         sdl2_config->GetInteger("ControlsGeneral", "touch_angle", 0);
     Settings::values.touchscreen.diameter_x =
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index e32bed5e6..7843d5167 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -29,16 +29,16 @@ EmuWindow_SDL2::~EmuWindow_SDL2() {
 }
 
 void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) {
-    TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0));
+    TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0);
     input_subsystem->GetMouse()->MouseMove(x, y);
 }
 
 void EmuWindow_SDL2::OnMouseButton(u32 button, u8 state, s32 x, s32 y) {
     if (button == SDL_BUTTON_LEFT) {
         if (state == SDL_PRESSED) {
-            TouchPressed((unsigned)std::max(x, 0), (unsigned)std::max(y, 0));
+            TouchPressed((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0);
         } else {
-            TouchReleased();
+            TouchReleased(0);
         }
     } else if (button == SDL_BUTTON_RIGHT) {
         if (state == SDL_PRESSED) {
@@ -66,16 +66,16 @@ void EmuWindow_SDL2::OnFingerDown(float x, float y) {
     // 3DS does
 
     const auto [px, py] = TouchToPixelPos(x, y);
-    TouchPressed(px, py);
+    TouchPressed(px, py, 0);
 }
 
 void EmuWindow_SDL2::OnFingerMotion(float x, float y) {
     const auto [px, py] = TouchToPixelPos(x, y);
-    TouchMoved(px, py);
+    TouchMoved(px, py, 0);
 }
 
 void EmuWindow_SDL2::OnFingerUp() {
-    TouchReleased();
+    TouchReleased(0);
 }
 
 void EmuWindow_SDL2::OnKeyEvent(int key, u8 state) {
diff --git a/src/yuzu_tester/CMakeLists.txt b/src/yuzu_tester/CMakeLists.txt
deleted file mode 100644
index d8a2a1511..000000000
--- a/src/yuzu_tester/CMakeLists.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
-
-add_executable(yuzu-tester
-    config.cpp
-    config.h
-    default_ini.h
-    emu_window/emu_window_sdl2_hide.cpp
-    emu_window/emu_window_sdl2_hide.h
-    resource.h
-    service/yuzutest.cpp
-    service/yuzutest.h
-    yuzu.cpp
-    yuzu.rc
-)
-
-create_target_directory_groups(yuzu-tester)
-
-target_link_libraries(yuzu-tester PRIVATE common core input_common)
-target_link_libraries(yuzu-tester PRIVATE inih glad)
-if (MSVC)
-    target_link_libraries(yuzu-tester PRIVATE getopt)
-endif()
-target_link_libraries(yuzu-tester PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads)
-
-if(UNIX AND NOT APPLE)
-    install(TARGETS yuzu-tester RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
-endif()
-
-if (MSVC)
-    include(CopyYuzuSDLDeps)
-    copy_yuzu_SDL_deps(yuzu-tester)
-endif()
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
deleted file mode 100644
index 0aa143e1f..000000000
--- a/src/yuzu_tester/config.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <memory>
-#include <sstream>
-#include <SDL.h>
-#include <inih/cpp/INIReader.h>
-#include "common/file_util.h"
-#include "common/logging/log.h"
-#include "common/param_package.h"
-#include "core/hle/service/acc/profile_manager.h"
-#include "core/settings.h"
-#include "input_common/main.h"
-#include "yuzu_tester/config.h"
-#include "yuzu_tester/default_ini.h"
-
-namespace FS = Common::FS;
-
-Config::Config() {
-    // TODO: Don't hardcode the path; let the frontend decide where to put the config files.
-    sdl2_config_loc = FS::GetUserPath(FS::UserPath::ConfigDir) + "sdl2-tester-config.ini";
-    sdl2_config = std::make_unique<INIReader>(sdl2_config_loc);
-
-    Reload();
-}
-
-Config::~Config() = default;
-
-bool Config::LoadINI(const std::string& default_contents, bool retry) {
-    const char* location = this->sdl2_config_loc.c_str();
-    if (sdl2_config->ParseError() < 0) {
-        if (retry) {
-            LOG_WARNING(Config, "Failed to load {}. Creating file from defaults...", location);
-            FS::CreateFullPath(location);
-            FS::WriteStringToFile(true, default_contents, location);
-            sdl2_config = std::make_unique<INIReader>(location); // Reopen file
-
-            return LoadINI(default_contents, false);
-        }
-        LOG_ERROR(Config, "Failed.");
-        return false;
-    }
-    LOG_INFO(Config, "Successfully loaded {}", location);
-    return true;
-}
-
-void Config::ReadValues() {
-    // Controls
-    for (std::size_t p = 0; p < Settings::values.players.GetValue().size(); ++p) {
-        for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
-            Settings::values.players.GetValue()[p].buttons[i] = "";
-        }
-
-        for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
-            Settings::values.players.GetValue()[p].analogs[i] = "";
-        }
-    }
-
-    Settings::values.mouse_enabled = false;
-    for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
-        Settings::values.mouse_buttons[i] = "";
-    }
-
-    Settings::values.motion_device = "";
-
-    Settings::values.keyboard_enabled = false;
-
-    Settings::values.debug_pad_enabled = false;
-    for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
-        Settings::values.debug_pad_buttons[i] = "";
-    }
-
-    for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
-        Settings::values.debug_pad_analogs[i] = "";
-    }
-
-    Settings::values.vibration_enabled.SetValue(true);
-    Settings::values.enable_accurate_vibrations.SetValue(false);
-    Settings::values.motion_enabled.SetValue(true);
-    Settings::values.touchscreen.enabled = "";
-    Settings::values.touchscreen.device = "";
-    Settings::values.touchscreen.finger = 0;
-    Settings::values.touchscreen.rotation_angle = 0;
-    Settings::values.touchscreen.diameter_x = 15;
-    Settings::values.touchscreen.diameter_y = 15;
-
-    Settings::values.use_docked_mode.SetValue(
-        sdl2_config->GetBoolean("Controls", "use_docked_mode", true));
-
-    // Data Storage
-    Settings::values.use_virtual_sd =
-        sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true);
-    FS::GetUserPath(Common::FS::UserPath::NANDDir,
-                    sdl2_config->Get("Data Storage", "nand_directory",
-                                     Common::FS::GetUserPath(Common::FS::UserPath::NANDDir)));
-    FS::GetUserPath(Common::FS::UserPath::SDMCDir,
-                    sdl2_config->Get("Data Storage", "sdmc_directory",
-                                     Common::FS::GetUserPath(Common::FS::UserPath::SDMCDir)));
-
-    // System
-    Settings::values.current_user = std::clamp<int>(
-        sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1);
-
-    const auto rng_seed_enabled = sdl2_config->GetBoolean("System", "rng_seed_enabled", false);
-    if (rng_seed_enabled) {
-        Settings::values.rng_seed.SetValue(sdl2_config->GetInteger("System", "rng_seed", 0));
-    } else {
-        Settings::values.rng_seed.SetValue(std::nullopt);
-    }
-
-    const auto custom_rtc_enabled = sdl2_config->GetBoolean("System", "custom_rtc_enabled", false);
-    if (custom_rtc_enabled) {
-        Settings::values.custom_rtc.SetValue(
-            std::chrono::seconds(sdl2_config->GetInteger("System", "custom_rtc", 0)));
-    } else {
-        Settings::values.custom_rtc.SetValue(std::nullopt);
-    }
-
-    // Core
-    Settings::values.use_multi_core.SetValue(
-        sdl2_config->GetBoolean("Core", "use_multi_core", false));
-
-    // Renderer
-    Settings::values.aspect_ratio.SetValue(
-        static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)));
-    Settings::values.max_anisotropy.SetValue(
-        static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0)));
-    Settings::values.use_frame_limit.SetValue(false);
-    Settings::values.frame_limit.SetValue(100);
-    Settings::values.use_disk_shader_cache.SetValue(
-        sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false));
-    const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
-    Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level));
-    Settings::values.use_asynchronous_gpu_emulation.SetValue(
-        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false));
-    Settings::values.use_fast_gpu_time.SetValue(
-        sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true));
-
-    Settings::values.bg_red.SetValue(
-        static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)));
-    Settings::values.bg_green.SetValue(
-        static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0)));
-    Settings::values.bg_blue.SetValue(
-        static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0)));
-
-    // Audio
-    Settings::values.sink_id = "null";
-    Settings::values.enable_audio_stretching.SetValue(false);
-    Settings::values.audio_device_id = "auto";
-    Settings::values.volume.SetValue(0);
-
-    Settings::values.language_index.SetValue(
-        sdl2_config->GetInteger("System", "language_index", 1));
-
-    // Miscellaneous
-    Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace");
-    Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false);
-
-    // Debugging
-    Settings::values.program_args = "";
-    Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false);
-    Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false);
-
-    const auto title_list = sdl2_config->Get("AddOns", "title_ids", "");
-    std::stringstream ss(title_list);
-    std::string line;
-    while (std::getline(ss, line, '|')) {
-        const auto title_id = std::stoul(line, nullptr, 16);
-        const auto disabled_list = sdl2_config->Get("AddOns", "disabled_" + line, "");
-
-        std::stringstream inner_ss(disabled_list);
-        std::string inner_line;
-        std::vector<std::string> out;
-        while (std::getline(inner_ss, inner_line, '|')) {
-            out.push_back(inner_line);
-        }
-
-        Settings::values.disabled_addons.insert_or_assign(title_id, out);
-    }
-
-    // Web Service
-    Settings::values.enable_telemetry =
-        sdl2_config->GetBoolean("WebService", "enable_telemetry", true);
-    Settings::values.web_api_url =
-        sdl2_config->Get("WebService", "web_api_url", "https://api.yuzu-emu.org");
-    Settings::values.yuzu_username = sdl2_config->Get("WebService", "yuzu_username", "");
-    Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", "");
-}
-
-void Config::Reload() {
-    LoadINI(DefaultINI::sdl2_config_file);
-    ReadValues();
-}
diff --git a/src/yuzu_tester/config.h b/src/yuzu_tester/config.h
deleted file mode 100644
index 3b68e5bc9..000000000
--- a/src/yuzu_tester/config.h
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <string>
-
-class INIReader;
-
-class Config {
-    std::unique_ptr<INIReader> sdl2_config;
-    std::string sdl2_config_loc;
-
-    bool LoadINI(const std::string& default_contents = "", bool retry = true);
-    void ReadValues();
-
-public:
-    Config();
-    ~Config();
-
-    void Reload();
-};
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
deleted file mode 100644
index 779c3791b..000000000
--- a/src/yuzu_tester/default_ini.h
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-namespace DefaultINI {
-
-const char* sdl2_config_file = R"(
-[Core]
-# Whether to use multi-core for CPU emulation
-# 0 (default): Disabled, 1: Enabled
-use_multi_core=
-
-[Cpu]
-# Enable inline page tables optimization (faster guest memory access)
-# 0: Disabled, 1 (default): Enabled
-cpuopt_page_tables =
-
-# Enable block linking CPU optimization (reduce block dispatcher use during predictable jumps)
-# 0: Disabled, 1 (default): Enabled
-cpuopt_block_linking =
-
-# Enable return stack buffer CPU optimization (reduce block dispatcher use during predictable returns)
-# 0: Disabled, 1 (default): Enabled
-cpuopt_return_stack_buffer =
-
-# Enable fast dispatcher CPU optimization (use a two-tiered dispatcher architecture)
-# 0: Disabled, 1 (default): Enabled
-cpuopt_fast_dispatcher =
-
-# Enable context elimination CPU Optimization (reduce host memory use for guest context)
-# 0: Disabled, 1 (default): Enabled
-cpuopt_context_elimination =
-
-# Enable constant propagation CPU optimization (basic IR optimization)
-# 0: Disabled, 1 (default): Enabled
-cpuopt_const_prop =
-
-# Enable miscellaneous CPU optimizations (basic IR optimization)
-# 0: Disabled, 1 (default): Enabled
-cpuopt_misc_ir =
-
-# Enable reduction of memory misalignment checks (reduce memory fallbacks for misaligned access)
-# 0: Disabled, 1 (default): Enabled
-cpuopt_reduce_misalign_checks =
-
-[Renderer]
-# Whether to use software or hardware rendering.
-# 0: Software, 1 (default): Hardware
-use_hw_renderer =
-
-# Whether to use the Just-In-Time (JIT) compiler for shader emulation
-# 0: Interpreter (slow), 1 (default): JIT (fast)
-use_shader_jit =
-
-# Aspect ratio
-# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
-aspect_ratio =
-
-# Anisotropic filtering
-# 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x
-max_anisotropy =
-
-# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
-# 0 (default): Off, 1: On
-use_vsync =
-
-# Whether to use disk based shader cache
-# 0 (default): Off, 1 : On
-use_disk_shader_cache =
-
-# Whether to use accurate GPU emulation
-# 0 (default): Off (fast), 1 : On (slow)
-use_accurate_gpu_emulation =
-
-# Whether to use asynchronous GPU emulation
-# 0 : Off (slow), 1 (default): On (fast)
-use_asynchronous_gpu_emulation =
-
-# The clear color for the renderer. What shows up on the sides of the bottom screen.
-# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
-bg_red =
-bg_blue =
-bg_green =
-
-[Layout]
-# Layout for the screen inside the render window.
-# 0 (default): Default Top Bottom Screen, 1: Single Screen Only, 2: Large Screen Small Screen
-layout_option =
-
-# Toggle custom layout (using the settings below) on or off.
-# 0 (default): Off, 1: On
-custom_layout =
-
-# Screen placement when using Custom layout option
-# 0x, 0y is the top left corner of the render window.
-custom_top_left =
-custom_top_top =
-custom_top_right =
-custom_top_bottom =
-custom_bottom_left =
-custom_bottom_top =
-custom_bottom_right =
-custom_bottom_bottom =
-
-# Swaps the prominent screen with the other screen.
-# For example, if Single Screen is chosen, setting this to 1 will display the bottom screen instead of the top screen.
-# 0 (default): Top Screen is prominent, 1: Bottom Screen is prominent
-swap_screen =
-
-[Data Storage]
-# Whether to create a virtual SD card.
-# 1 (default): Yes, 0: No
-use_virtual_sd =
-
-[System]
-# Whether the system is docked
-# 1 (default): Yes, 0: No
-use_docked_mode =
-
-# Allow the use of NFC in games
-# 1 (default): Yes, 0 : No
-enable_nfc =
-
-# Sets the seed for the RNG generator built into the switch
-# rng_seed will be ignored and randomly generated if rng_seed_enabled is false
-rng_seed_enabled =
-rng_seed =
-
-# Sets the current time (in seconds since 12:00 AM Jan 1, 1970) that will be used by the time service
-# This will auto-increment, with the time set being the time the game is started
-# This override will only occur if custom_rtc_enabled is true, otherwise the current time is used
-custom_rtc_enabled =
-custom_rtc =
-
-# Sets the account username, max length is 32 characters
-# yuzu (default)
-username = yuzu
-
-# Sets the systems language index
-# 0: Japanese, 1: English (default), 2: French, 3: German, 4: Italian, 5: Spanish, 6: Chinese,
-# 7: Korean, 8: Dutch, 9: Portuguese, 10: Russian, 11: Taiwanese, 12: British English, 13: Canadian French,
-# 14: Latin American Spanish, 15: Simplified Chinese, 16: Traditional Chinese
-language_index =
-
-# The system region that yuzu will use during emulation
-# -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan
-region_value =
-
-[Miscellaneous]
-# A filter which removes logs below a certain logging level.
-# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical
-log_filter = *:Trace
-
-[Debugging]
-# Arguments to be passed to argv/argc in the emulated program. It is preferable to use the testing service datastring
-program_args=
-# Determines whether or not yuzu will dump the ExeFS of all games it attempts to load while loading them
-dump_exefs=false
-# Determines whether or not yuzu will dump all NSOs it attempts to load while loading them
-dump_nso=false
-
-[WebService]
-# Whether or not to enable telemetry
-# 0: No, 1 (default): Yes
-enable_telemetry =
-# URL for Web API
-web_api_url = https://api.yuzu-emu.org
-# Username and token for yuzu Web Service
-# See https://profile.yuzu-emu.org/ for more info
-yuzu_username =
-yuzu_token =
-
-[AddOns]
-# Used to disable add-ons
-# List of title IDs of games that will have add-ons disabled (separated by '|'):
-title_ids =
-# For each title ID, have a key/value pair called `disabled_<title_id>` equal to the names of the add-ons to disable (sep. by '|')
-# e.x. disabled_0100000000010000 = Update|DLC <- disables Updates and DLC on Super Mario Odyssey
-)";
-}
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
deleted file mode 100644
index 358e03870..000000000
--- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cstdlib>
-#include <string>
-
-#include <fmt/format.h>
-
-#define SDL_MAIN_HANDLED
-#include <SDL.h>
-
-#include <glad/glad.h>
-
-#include "common/logging/log.h"
-#include "common/scm_rev.h"
-#include "core/settings.h"
-#include "input_common/main.h"
-#include "yuzu_tester/emu_window/emu_window_sdl2_hide.h"
-
-bool EmuWindow_SDL2_Hide::SupportsRequiredGLExtensions() {
-    std::vector<std::string> unsupported_ext;
-
-    if (!GLAD_GL_ARB_direct_state_access)
-        unsupported_ext.push_back("ARB_direct_state_access");
-    if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)
-        unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev");
-    if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge)
-        unsupported_ext.push_back("ARB_texture_mirror_clamp_to_edge");
-    if (!GLAD_GL_ARB_multi_bind)
-        unsupported_ext.push_back("ARB_multi_bind");
-
-    // Extensions required to support some texture formats.
-    if (!GLAD_GL_EXT_texture_compression_s3tc)
-        unsupported_ext.push_back("EXT_texture_compression_s3tc");
-    if (!GLAD_GL_ARB_texture_compression_rgtc)
-        unsupported_ext.push_back("ARB_texture_compression_rgtc");
-    if (!GLAD_GL_ARB_depth_buffer_float)
-        unsupported_ext.push_back("ARB_depth_buffer_float");
-
-    for (const std::string& ext : unsupported_ext)
-        LOG_CRITICAL(Frontend, "Unsupported GL extension: {}", ext);
-
-    return unsupported_ext.empty();
-}
-
-EmuWindow_SDL2_Hide::EmuWindow_SDL2_Hide() {
-    // Initialize the window
-    if (SDL_Init(SDL_INIT_VIDEO) < 0) {
-        LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting...");
-        exit(1);
-    }
-
-    input_subsystem->Initialize();
-
-    SDL_SetMainReady();
-
-    SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
-    SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
-    SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
-    SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
-    SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8);
-    SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
-    SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
-    SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
-
-    std::string window_title = fmt::format("yuzu-tester {} | {}-{}", Common::g_build_fullname,
-                                           Common::g_scm_branch, Common::g_scm_desc);
-    render_window = SDL_CreateWindow(window_title.c_str(),
-                                     SDL_WINDOWPOS_UNDEFINED, // x position
-                                     SDL_WINDOWPOS_UNDEFINED, // y position
-                                     Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height,
-                                     SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE |
-                                         SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_HIDDEN);
-
-    if (render_window == nullptr) {
-        LOG_CRITICAL(Frontend, "Failed to create SDL2 window! {}", SDL_GetError());
-        exit(1);
-    }
-
-    gl_context = SDL_GL_CreateContext(render_window);
-
-    if (gl_context == nullptr) {
-        LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context! {}", SDL_GetError());
-        exit(1);
-    }
-
-    if (!gladLoadGLLoader(static_cast<GLADloadproc>(SDL_GL_GetProcAddress))) {
-        LOG_CRITICAL(Frontend, "Failed to initialize GL functions! {}", SDL_GetError());
-        exit(1);
-    }
-
-    if (!SupportsRequiredGLExtensions()) {
-        LOG_CRITICAL(Frontend, "GPU does not support all required OpenGL extensions! Exiting...");
-        exit(1);
-    }
-
-    SDL_PumpEvents();
-    SDL_GL_SetSwapInterval(false);
-    LOG_INFO(Frontend, "yuzu-tester Version: {} | {}-{}", Common::g_build_fullname,
-             Common::g_scm_branch, Common::g_scm_desc);
-    Settings::LogSettings();
-}
-
-EmuWindow_SDL2_Hide::~EmuWindow_SDL2_Hide() {
-    input_subsystem->Shutdown();
-    SDL_GL_DeleteContext(gl_context);
-    SDL_Quit();
-}
-
-bool EmuWindow_SDL2_Hide::IsShown() const {
-    return false;
-}
-
-class SDLGLContext : public Core::Frontend::GraphicsContext {
-public:
-    explicit SDLGLContext() {
-        // create a hidden window to make the shared context against
-        window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0,
-                                  SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL);
-        context = SDL_GL_CreateContext(window);
-    }
-
-    ~SDLGLContext() {
-        DoneCurrent();
-        SDL_GL_DeleteContext(context);
-        SDL_DestroyWindow(window);
-    }
-
-    void MakeCurrent() override {
-        SDL_GL_MakeCurrent(window, context);
-    }
-
-    void DoneCurrent() override {
-        SDL_GL_MakeCurrent(window, nullptr);
-    }
-
-private:
-    SDL_Window* window;
-    SDL_GLContext context;
-};
-
-std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_Hide::CreateSharedContext() const {
-    return std::make_unique<SDLGLContext>();
-}
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
deleted file mode 100644
index adccdf35e..000000000
--- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "core/frontend/emu_window.h"
-
-struct SDL_Window;
-
-namespace InputCommon {
-class InputSubsystem;
-}
-
-class EmuWindow_SDL2_Hide : public Core::Frontend::EmuWindow {
-public:
-    explicit EmuWindow_SDL2_Hide();
-    ~EmuWindow_SDL2_Hide();
-
-    /// Whether the screen is being shown or not.
-    bool IsShown() const override;
-
-    std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
-
-private:
-    /// Whether the GPU and driver supports the OpenGL extension required
-    bool SupportsRequiredGLExtensions();
-
-    std::unique_ptr<InputCommon::InputSubsystem> input_subsystem;
-
-    /// Internal SDL2 render window
-    SDL_Window* render_window;
-
-    using SDL_GLContext = void*;
-    /// The OpenGL context associated with the window
-    SDL_GLContext gl_context;
-};
diff --git a/src/yuzu_tester/resource.h b/src/yuzu_tester/resource.h
deleted file mode 100644
index df8e459e4..000000000
--- a/src/yuzu_tester/resource.h
+++ /dev/null
@@ -1,16 +0,0 @@
-//{{NO_DEPENDENCIES}}
-// Microsoft Visual C++ generated include file.
-// Used by pcafe.rc
-//
-#define IDI_ICON3 103
-
-// Next default values for new objects
-//
-#ifdef APSTUDIO_INVOKED
-#ifndef APSTUDIO_READONLY_SYMBOLS
-#define _APS_NEXT_RESOURCE_VALUE 105
-#define _APS_NEXT_COMMAND_VALUE 40001
-#define _APS_NEXT_CONTROL_VALUE 1001
-#define _APS_NEXT_SYMED_VALUE 101
-#endif
-#endif
diff --git a/src/yuzu_tester/service/yuzutest.cpp b/src/yuzu_tester/service/yuzutest.cpp
deleted file mode 100644
index e257fae25..000000000
--- a/src/yuzu_tester/service/yuzutest.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <memory>
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/service/service.h"
-#include "core/hle/service/sm/sm.h"
-#include "yuzu_tester/service/yuzutest.h"
-
-namespace Service::Yuzu {
-
-constexpr u64 SERVICE_VERSION = 0x00000002;
-
-class YuzuTest final : public ServiceFramework<YuzuTest> {
-public:
-    explicit YuzuTest(Core::System& system_, std::string data_,
-                      std::function<void(std::vector<TestResult>)> finish_callback_)
-        : ServiceFramework{system_, "yuzutest"}, data{std::move(data_)}, finish_callback{std::move(
-                                                                             finish_callback_)} {
-        static const FunctionInfo functions[] = {
-            {0, &YuzuTest::Initialize, "Initialize"},
-            {1, &YuzuTest::GetServiceVersion, "GetServiceVersion"},
-            {2, &YuzuTest::GetData, "GetData"},
-            {10, &YuzuTest::StartIndividual, "StartIndividual"},
-            {20, &YuzuTest::FinishIndividual, "FinishIndividual"},
-            {100, &YuzuTest::ExitProgram, "ExitProgram"},
-        };
-
-        RegisterHandlers(functions);
-    }
-
-private:
-    void Initialize(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Frontend, "called");
-        IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(RESULT_SUCCESS);
-    }
-
-    void GetServiceVersion(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Frontend, "called");
-        IPC::ResponseBuilder rb{ctx, 4};
-        rb.Push(RESULT_SUCCESS);
-        rb.Push(SERVICE_VERSION);
-    }
-
-    void GetData(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Frontend, "called");
-        const auto size = ctx.GetWriteBufferSize();
-        const auto write_size = std::min(size, data.size());
-        ctx.WriteBuffer(data.data(), write_size);
-
-        IPC::ResponseBuilder rb{ctx, 3};
-        rb.Push(RESULT_SUCCESS);
-        rb.Push<u32>(static_cast<u32>(write_size));
-    }
-
-    void StartIndividual(Kernel::HLERequestContext& ctx) {
-        const auto name_raw = ctx.ReadBuffer();
-
-        const auto name = Common::StringFromFixedZeroTerminatedBuffer(
-            reinterpret_cast<const char*>(name_raw.data()), name_raw.size());
-
-        LOG_DEBUG(Frontend, "called, name={}", name);
-
-        IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(RESULT_SUCCESS);
-    }
-
-    void FinishIndividual(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
-        const auto code = rp.PopRaw<u32>();
-
-        const auto result_data_raw = ctx.ReadBuffer();
-        const auto test_name_raw = ctx.ReadBuffer(1);
-
-        const auto data = Common::StringFromFixedZeroTerminatedBuffer(
-            reinterpret_cast<const char*>(result_data_raw.data()), result_data_raw.size());
-        const auto test_name = Common::StringFromFixedZeroTerminatedBuffer(
-            reinterpret_cast<const char*>(test_name_raw.data()), test_name_raw.size());
-
-        LOG_INFO(Frontend, "called, result_code={:08X}, data={}, name={}", code, data, test_name);
-
-        results.push_back({code, data, test_name});
-
-        IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(RESULT_SUCCESS);
-    }
-
-    void ExitProgram(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Frontend, "called");
-
-        IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(RESULT_SUCCESS);
-
-        finish_callback(std::move(results));
-    }
-
-    std::string data;
-
-    std::vector<TestResult> results;
-    std::function<void(std::vector<TestResult>)> finish_callback;
-};
-
-void InstallInterfaces(Core::System& system, std::string data,
-                       std::function<void(std::vector<TestResult>)> finish_callback) {
-    auto& sm = system.ServiceManager();
-    std::make_shared<YuzuTest>(system, std::move(data), std::move(finish_callback))
-        ->InstallAsService(sm);
-}
-
-} // namespace Service::Yuzu
diff --git a/src/yuzu_tester/service/yuzutest.h b/src/yuzu_tester/service/yuzutest.h
deleted file mode 100644
index 7794814fa..000000000
--- a/src/yuzu_tester/service/yuzutest.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <functional>
-#include <string>
-
-namespace Core {
-class System;
-}
-
-namespace Service::Yuzu {
-
-struct TestResult {
-    u32 code;
-    std::string data;
-    std::string name;
-};
-
-void InstallInterfaces(Core::System& system, std::string data,
-                       std::function<void(std::vector<TestResult>)> finish_callback);
-
-} // namespace Service::Yuzu
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
deleted file mode 100644
index 09cf2ad77..000000000
--- a/src/yuzu_tester/yuzu.cpp
+++ /dev/null
@@ -1,268 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <chrono>
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include <fmt/ostream.h>
-
-#include "common/common_paths.h"
-#include "common/detached_tasks.h"
-#include "common/file_util.h"
-#include "common/logging/backend.h"
-#include "common/logging/filter.h"
-#include "common/logging/log.h"
-#include "common/microprofile.h"
-#include "common/scm_rev.h"
-#include "common/scope_exit.h"
-#include "common/string_util.h"
-#include "common/telemetry.h"
-#include "core/core.h"
-#include "core/crypto/key_manager.h"
-#include "core/file_sys/registered_cache.h"
-#include "core/file_sys/vfs_real.h"
-#include "core/hle/service/filesystem/filesystem.h"
-#include "core/loader/loader.h"
-#include "core/settings.h"
-#include "core/telemetry_session.h"
-#include "video_core/renderer_base.h"
-#include "yuzu_tester/config.h"
-#include "yuzu_tester/emu_window/emu_window_sdl2_hide.h"
-#include "yuzu_tester/service/yuzutest.h"
-
-#ifdef _WIN32
-// windows.h needs to be included before shellapi.h
-#include <windows.h>
-
-#include <shellapi.h>
-#endif
-
-#undef _UNICODE
-#include <getopt.h>
-#ifndef _MSC_VER
-#include <unistd.h>
-#endif
-
-#ifdef _WIN32
-extern "C" {
-// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable
-// graphics
-__declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001;
-__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
-}
-#endif
-
-static void PrintHelp(const char* argv0) {
-    std::cout << "Usage: " << argv0
-              << " [options] <filename>\n"
-                 "-h, --help            Display this help and exit\n"
-                 "-v, --version         Output version information and exit\n"
-                 "-d, --datastring      Pass following string as data to test service command #2\n"
-                 "-l, --log             Log to console in addition to file (will log to file only "
-                 "by default)\n";
-}
-
-static void PrintVersion() {
-    std::cout << "yuzu [Test Utility] " << Common::g_scm_branch << " " << Common::g_scm_desc
-              << std::endl;
-}
-
-static void InitializeLogging(bool console) {
-    Log::Filter log_filter(Log::Level::Debug);
-    log_filter.ParseFilterString(Settings::values.log_filter);
-    Log::SetGlobalFilter(log_filter);
-
-    if (console)
-        Log::AddBackend(std::make_unique<Log::ColorConsoleBackend>());
-
-    const std::string& log_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir);
-    Common::FS::CreateFullPath(log_dir);
-    Log::AddBackend(std::make_unique<Log::FileBackend>(log_dir + LOG_FILE));
-#ifdef _WIN32
-    Log::AddBackend(std::make_unique<Log::DebuggerBackend>());
-#endif
-}
-
-/// Application entry point
-int main(int argc, char** argv) {
-    Common::DetachedTasks detached_tasks;
-    Config config;
-
-    int option_index = 0;
-
-#ifdef _WIN32
-    int argc_w;
-    auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w);
-
-    if (argv_w == nullptr) {
-        std::cout << "Failed to get command line arguments" << std::endl;
-        return -1;
-    }
-#endif
-    std::string filepath;
-
-    static struct option long_options[] = {
-        {"help", no_argument, 0, 'h'},
-        {"version", no_argument, 0, 'v'},
-        {"datastring", optional_argument, 0, 'd'},
-        {"log", no_argument, 0, 'l'},
-        {0, 0, 0, 0},
-    };
-
-    bool console_log = false;
-    std::string datastring;
-
-    while (optind < argc) {
-        int arg = getopt_long(argc, argv, "hvdl::", long_options, &option_index);
-        if (arg != -1) {
-            switch (static_cast<char>(arg)) {
-            case 'h':
-                PrintHelp(argv[0]);
-                return 0;
-            case 'v':
-                PrintVersion();
-                return 0;
-            case 'd':
-                datastring = argv[optind];
-                ++optind;
-                break;
-            case 'l':
-                console_log = true;
-                break;
-            }
-        } else {
-#ifdef _WIN32
-            filepath = Common::UTF16ToUTF8(argv_w[optind]);
-#else
-            filepath = argv[optind];
-#endif
-            optind++;
-        }
-    }
-
-    InitializeLogging(console_log);
-
-#ifdef _WIN32
-    LocalFree(argv_w);
-#endif
-
-    MicroProfileOnThreadCreate("EmuThread");
-    SCOPE_EXIT({ MicroProfileShutdown(); });
-
-    if (filepath.empty()) {
-        LOG_CRITICAL(Frontend, "Failed to load application: No application specified");
-        std::cout << "Failed to load application: No application specified" << std::endl;
-        PrintHelp(argv[0]);
-        return -1;
-    }
-
-    Core::System& system{Core::System::GetInstance()};
-
-    Settings::Apply(system);
-
-    const auto emu_window{std::make_unique<EmuWindow_SDL2_Hide>()};
-
-    bool finished = false;
-    int return_value = 0;
-    const auto callback = [&finished,
-                           &return_value](std::vector<Service::Yuzu::TestResult> results) {
-        finished = true;
-        return_value = 0;
-
-        // Find the minimum length needed to fully enclose all test names (and the header field) in
-        // the fmt::format column by first finding the maximum size of any test name and comparing
-        // that to 9, the string length of 'Test Name'
-        const auto needed_length_name =
-            std::max<u64>(std::max_element(results.begin(), results.end(),
-                                           [](const auto& lhs, const auto& rhs) {
-                                               return lhs.name.size() < rhs.name.size();
-                                           })
-                              ->name.size(),
-                          9ull);
-
-        std::size_t passed = 0;
-        std::size_t failed = 0;
-
-        std::cout << fmt::format("Result [Res Code] | {:<{}} | Extra Data", "Test Name",
-                                 needed_length_name)
-                  << std::endl;
-
-        for (const auto& res : results) {
-            const auto main_res = res.code == 0 ? "PASSED" : "FAILED";
-            if (res.code == 0)
-                ++passed;
-            else
-                ++failed;
-            std::cout << fmt::format("{} [{:08X}] | {:<{}} | {}", main_res, res.code, res.name,
-                                     needed_length_name, res.data)
-                      << std::endl;
-        }
-
-        std::cout << std::endl
-                  << fmt::format("{:4d} Passed | {:4d} Failed | {:4d} Total | {:2.2f} Passed Ratio",
-                                 passed, failed, passed + failed,
-                                 static_cast<float>(passed) / (passed + failed))
-                  << std::endl
-                  << (failed == 0 ? "PASSED" : "FAILED") << std::endl;
-
-        if (failed > 0)
-            return_value = -1;
-    };
-
-    system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>());
-    system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>());
-    system.GetFileSystemController().CreateFactories(*system.GetFilesystem());
-
-    SCOPE_EXIT({ system.Shutdown(); });
-
-    const Core::System::ResultStatus load_result{system.Load(*emu_window, filepath)};
-
-    switch (load_result) {
-    case Core::System::ResultStatus::ErrorGetLoader:
-        LOG_CRITICAL(Frontend, "Failed to obtain loader for {}!", filepath);
-        return -1;
-    case Core::System::ResultStatus::ErrorLoader:
-        LOG_CRITICAL(Frontend, "Failed to load ROM!");
-        return -1;
-    case Core::System::ResultStatus::ErrorNotInitialized:
-        LOG_CRITICAL(Frontend, "CPUCore not initialized");
-        return -1;
-    case Core::System::ResultStatus::ErrorVideoCore:
-        LOG_CRITICAL(Frontend, "Failed to initialize VideoCore!");
-        return -1;
-    case Core::System::ResultStatus::Success:
-        break; // Expected case
-    default:
-        if (static_cast<u32>(load_result) >
-            static_cast<u32>(Core::System::ResultStatus::ErrorLoader)) {
-            const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
-            const u16 error_id = static_cast<u16>(load_result) - loader_id;
-            LOG_CRITICAL(Frontend,
-                         "While attempting to load the ROM requested, an error occurred. Please "
-                         "refer to the yuzu wiki for more information or the yuzu discord for "
-                         "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
-                         loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));
-        }
-        break;
-    }
-
-    Service::Yuzu::InstallInterfaces(system, datastring, callback);
-
-    system.TelemetrySession().AddField(Common::Telemetry::FieldType::App, "Frontend",
-                                       "SDLHideTester");
-
-    system.GPU().Start();
-
-    void(system.Run());
-    while (!finished) {
-        std::this_thread::sleep_for(std::chrono::milliseconds(1));
-    }
-    void(system.Pause());
-
-    detached_tasks.WaitForAllTasks();
-    return return_value;
-}
diff --git a/src/yuzu_tester/yuzu.rc b/src/yuzu_tester/yuzu.rc
deleted file mode 100644
index 0cde75e2f..000000000
--- a/src/yuzu_tester/yuzu.rc
+++ /dev/null
@@ -1,17 +0,0 @@
-#include "winresrc.h"
-/////////////////////////////////////////////////////////////////////////////
-//
-// Icon
-//
-
-// Icon with lowest ID value placed first to ensure application icon
-// remains consistent on all systems.
-YUZU_ICON               ICON                    "../../dist/yuzu.ico"
-
-
-/////////////////////////////////////////////////////////////////////////////
-//
-// RT_MANIFEST
-//
-
-0                       RT_MANIFEST             "../../dist/yuzu.manifest"