153 files changed, 4944 insertions, 4694 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 21c46a567..3df54816d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(host_shaders)
+
 add_library(video_core STATIC
     buffer_cache/buffer_block.h
     buffer_cache/buffer_cache.h
@@ -98,6 +100,8 @@ add_library(video_core STATIC
     sampler_cache.cpp
     sampler_cache.h
     shader_cache.h
+    shader_notify.cpp
+    shader_notify.h
     shader/decode/arithmetic.cpp
     shader/decode/arithmetic_immediate.cpp
     shader/decode/bfe.cpp
@@ -128,6 +132,8 @@ add_library(video_core STATIC
     shader/decode/other.cpp
     shader/ast.cpp
     shader/ast.h
+    shader/async_shaders.cpp
+    shader/async_shaders.h
     shader/compiler_settings.cpp
     shader/compiler_settings.h
     shader/control_flow.cpp
@@ -184,6 +190,8 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_blit_screen.h
         renderer_vulkan/vk_buffer_cache.cpp
         renderer_vulkan/vk_buffer_cache.h
+        renderer_vulkan/vk_command_pool.cpp
+        renderer_vulkan/vk_command_pool.h
         renderer_vulkan/vk_compute_pass.cpp
         renderer_vulkan/vk_compute_pass.h
         renderer_vulkan/vk_compute_pipeline.cpp
@@ -198,6 +206,8 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_graphics_pipeline.h
         renderer_vulkan/vk_image.cpp
         renderer_vulkan/vk_image.h
+        renderer_vulkan/vk_master_semaphore.cpp
+        renderer_vulkan/vk_master_semaphore.h
         renderer_vulkan/vk_memory_manager.cpp
         renderer_vulkan/vk_memory_manager.h
         renderer_vulkan/vk_pipeline_cache.cpp
@@ -208,8 +218,8 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_rasterizer.h
         renderer_vulkan/vk_renderpass_cache.cpp
         renderer_vulkan/vk_renderpass_cache.h
-        renderer_vulkan/vk_resource_manager.cpp
-        renderer_vulkan/vk_resource_manager.h
+        renderer_vulkan/vk_resource_pool.cpp
+        renderer_vulkan/vk_resource_pool.h
         renderer_vulkan/vk_sampler_cache.cpp
         renderer_vulkan/vk_sampler_cache.h
         renderer_vulkan/vk_scheduler.cpp
@@ -240,6 +250,9 @@ create_target_directory_groups(video_core)
 target_link_libraries(video_core PUBLIC common core)
 target_link_libraries(video_core PRIVATE glad xbyak)
 
+add_dependencies(video_core host_shaders)
+target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
+
 if (ENABLE_VULKAN)
     target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
@@ -260,5 +273,12 @@ endif()
 if (MSVC)
     target_compile_options(video_core PRIVATE /we4267)
 else()
-    target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion)
+    target_compile_options(video_core PRIVATE
+        -Werror=conversion
+        -Wno-error=sign-conversion
+        -Werror=switch
+        -Werror=unused-variable
+        -Werror=unused-but-set-variable
+        -Werror=class-memaccess
+    )
 endif()
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index dd7ce8c99..e7edd733f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -51,46 +51,43 @@ public:
                             bool is_written = false, bool use_fast_cbuf = false) {
         std::lock_guard lock{mutex};
 
-        auto& memory_manager = system.GPU().MemoryManager();
-        const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
-        if (!cpu_addr_opt) {
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             return GetEmptyBuffer(size);
         }
-        const VAddr cpu_addr = *cpu_addr_opt;
 
         // Cache management is a big overhead, so only cache entries with a given size.
         // TODO: Figure out which size is the best for given games.
         constexpr std::size_t max_stream_size = 0x800;
         if (use_fast_cbuf || size < max_stream_size) {
-            if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
-                const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
+            if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
+                const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
                 if (use_fast_cbuf) {
                     u8* dest;
                     if (is_granular) {
-                        dest = memory_manager.GetPointer(gpu_addr);
+                        dest = gpu_memory.GetPointer(gpu_addr);
                     } else {
                         staging_buffer.resize(size);
                         dest = staging_buffer.data();
-                        memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
+                        gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
                     }
                     return ConstBufferUpload(dest, size);
                 }
                 if (is_granular) {
-                    u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
+                    u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
                     return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
                         std::memcpy(dest, host_ptr, size);
                     });
                 } else {
-                    return StreamBufferUpload(
-                        size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
-                            memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
-                        });
+                    return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
+                        gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
+                    });
                 }
             }
         }
 
-        Buffer* const block = GetBlock(cpu_addr, size);
-        MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
+        Buffer* const block = GetBlock(*cpu_addr, size);
+        MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
         if (!map) {
             return GetEmptyBuffer(size);
         }
@@ -106,7 +103,7 @@ public:
             }
         }
 
-        return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
+        return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
     }
 
     /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -262,9 +259,11 @@ public:
     virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
 
 protected:
-    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
-                         std::unique_ptr<StreamBuffer> stream_buffer)
-        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
+    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+                         Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+                         std::unique_ptr<StreamBuffer> stream_buffer_)
+        : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
+          stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
 
     ~BufferCache() = default;
 
@@ -326,14 +325,13 @@ private:
     MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
         const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
         if (overlaps.empty()) {
-            auto& memory_manager = system.GPU().MemoryManager();
             const VAddr cpu_addr_end = cpu_addr + size;
-            if (memory_manager.IsGranularRange(gpu_addr, size)) {
-                u8* host_ptr = memory_manager.GetPointer(gpu_addr);
+            if (gpu_memory.IsGranularRange(gpu_addr, size)) {
+                u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
                 block->Upload(block->Offset(cpu_addr), size, host_ptr);
             } else {
                 staging_buffer.resize(size);
-                memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
                 block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
             }
             return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
@@ -392,7 +390,7 @@ private:
                 continue;
             }
             staging_buffer.resize(size);
-            system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+            cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
             block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
         }
     }
@@ -431,7 +429,7 @@ private:
         const std::size_t size = map->end - map->start;
         staging_buffer.resize(size);
         block->Download(block->Offset(map->start), size, staging_buffer.data());
-        system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
+        cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
         map->MarkAsModified(false, 0);
     }
 
@@ -524,11 +522,8 @@ private:
     void MarkRegionAsWritten(VAddr start, VAddr end) {
         const u64 page_end = end >> WRITE_PAGE_BIT;
         for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
-            auto it = written_pages.find(page_start);
-            if (it != written_pages.end()) {
-                it->second = it->second + 1;
-            } else {
-                written_pages.insert_or_assign(page_start, 1);
+            if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) {
+                ++it->second;
             }
         }
     }
@@ -539,7 +534,7 @@ private:
             auto it = written_pages.find(page_start);
             if (it != written_pages.end()) {
                 if (it->second > 1) {
-                    it->second = it->second - 1;
+                    --it->second;
                 } else {
                     written_pages.erase(it);
                 }
@@ -570,7 +565,8 @@ private:
     }
 
     VideoCore::RasterizerInterface& rasterizer;
-    Core::System& system;
+    Tegra::MemoryManager& gpu_memory;
+    Core::Memory::Memory& cpu_memory;
 
     std::unique_ptr<StreamBuffer> stream_buffer;
     BufferType stream_buffer_handle;
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index 6c426b035..b06c32c84 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -17,101 +17,94 @@ namespace {
 // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
 
 constexpr std::array VIEW_CLASS_128_BITS = {
-    PixelFormat::RGBA32F,
-    PixelFormat::RGBA32UI,
+    PixelFormat::R32G32B32A32_FLOAT,
+    PixelFormat::R32G32B32A32_UINT,
+    PixelFormat::R32G32B32A32_SINT,
 };
-// Missing formats:
-// PixelFormat::RGBA32I
 
 constexpr std::array VIEW_CLASS_96_BITS = {
-    PixelFormat::RGB32F,
+    PixelFormat::R32G32B32_FLOAT,
 };
 // Missing formats:
 // PixelFormat::RGB32UI,
 // PixelFormat::RGB32I,
 
 constexpr std::array VIEW_CLASS_64_BITS = {
-    PixelFormat::RGBA16F, PixelFormat::RG32F,   PixelFormat::RGBA16UI, PixelFormat::RG32UI,
-    PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S,
+    PixelFormat::R32G32_FLOAT,       PixelFormat::R32G32_UINT,
+    PixelFormat::R32G32_SINT,        PixelFormat::R16G16B16A16_FLOAT,
+    PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
+    PixelFormat::R16G16B16A16_UINT,  PixelFormat::R16G16B16A16_SINT,
 };
-// Missing formats:
-// PixelFormat::RGBA16I
-// PixelFormat::RG32I
 
 // TODO: How should we handle 48 bits?
 
 constexpr std::array VIEW_CLASS_32_BITS = {
-    PixelFormat::RG16F,        PixelFormat::R11FG11FB10F, PixelFormat::R32F,
-    PixelFormat::A2B10G10R10U, PixelFormat::RG16UI,       PixelFormat::R32UI,
-    PixelFormat::RG16I,        PixelFormat::R32I,         PixelFormat::ABGR8U,
-    PixelFormat::RG16,         PixelFormat::ABGR8S,       PixelFormat::RG16S,
-    PixelFormat::RGBA8_SRGB,   PixelFormat::E5B9G9R9F,    PixelFormat::BGRA8,
-    PixelFormat::BGRA8_SRGB,
+    PixelFormat::R16G16_FLOAT,      PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
+    PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT,     PixelFormat::R32_UINT,
+    PixelFormat::R16G16_SINT,       PixelFormat::R32_SINT,        PixelFormat::A8B8G8R8_UNORM,
+    PixelFormat::R16G16_UNORM,      PixelFormat::A8B8G8R8_SNORM,  PixelFormat::R16G16_SNORM,
+    PixelFormat::A8B8G8R8_SRGB,     PixelFormat::E5B9G9R9_FLOAT,  PixelFormat::B8G8R8A8_UNORM,
+    PixelFormat::B8G8R8A8_SRGB,     PixelFormat::A8B8G8R8_UINT,   PixelFormat::A8B8G8R8_SINT,
+    PixelFormat::A2B10G10R10_UINT,
 };
-// Missing formats:
-// PixelFormat::RGBA8UI
-// PixelFormat::RGBA8I
-// PixelFormat::RGB10_A2_UI
 
 // TODO: How should we handle 24 bits?
 
 constexpr std::array VIEW_CLASS_16_BITS = {
-    PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I,
-    PixelFormat::RG8U, PixelFormat::R16U,  PixelFormat::RG8S,  PixelFormat::R16S,
+    PixelFormat::R16_FLOAT,  PixelFormat::R8G8_UINT,  PixelFormat::R16_UINT,
+    PixelFormat::R16_SINT,   PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
+    PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM,  PixelFormat::R8G8_SINT,
 };
-// Missing formats:
-// PixelFormat::RG8I
 
 constexpr std::array VIEW_CLASS_8_BITS = {
-    PixelFormat::R8UI,
-    PixelFormat::R8U,
+    PixelFormat::R8_UINT,
+    PixelFormat::R8_UNORM,
+    PixelFormat::R8_SINT,
+    PixelFormat::R8_SNORM,
 };
-// Missing formats:
-// PixelFormat::R8I
-// PixelFormat::R8S
 
 constexpr std::array VIEW_CLASS_RGTC1_RED = {
-    PixelFormat::DXN1,
+    PixelFormat::BC4_UNORM,
+    PixelFormat::BC4_SNORM,
 };
-// Missing formats:
-// COMPRESSED_SIGNED_RED_RGTC1
 
 constexpr std::array VIEW_CLASS_RGTC2_RG = {
-    PixelFormat::DXN2UNORM,
-    PixelFormat::DXN2SNORM,
+    PixelFormat::BC5_UNORM,
+    PixelFormat::BC5_SNORM,
 };
 
 constexpr std::array VIEW_CLASS_BPTC_UNORM = {
-    PixelFormat::BC7U,
-    PixelFormat::BC7U_SRGB,
+    PixelFormat::BC7_UNORM,
+    PixelFormat::BC7_SRGB,
 };
 
 constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
-    PixelFormat::BC6H_SF16,
-    PixelFormat::BC6H_UF16,
+    PixelFormat::BC6H_SFLOAT,
+    PixelFormat::BC6H_UFLOAT,
 };
 
 // Compatibility table taken from Table 4.X.1 in:
 // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
 
 constexpr std::array COPY_CLASS_128_BITS = {
-    PixelFormat::RGBA32UI,   PixelFormat::RGBA32F,   PixelFormat::DXT23,
-    PixelFormat::DXT23_SRGB, PixelFormat::DXT45,     PixelFormat::DXT45_SRGB,
-    PixelFormat::DXN2SNORM,  PixelFormat::BC7U,      PixelFormat::BC7U_SRGB,
-    PixelFormat::BC6H_SF16,  PixelFormat::BC6H_UF16,
+    PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
+    PixelFormat::BC2_UNORM,         PixelFormat::BC2_SRGB,           PixelFormat::BC3_UNORM,
+    PixelFormat::BC3_SRGB,          PixelFormat::BC5_UNORM,          PixelFormat::BC5_SNORM,
+    PixelFormat::BC7_UNORM,         PixelFormat::BC7_SRGB,           PixelFormat::BC6H_SFLOAT,
+    PixelFormat::BC6H_UFLOAT,
 };
 // Missing formats:
 // PixelFormat::RGBA32I
 // COMPRESSED_RG_RGTC2
 
 constexpr std::array COPY_CLASS_64_BITS = {
-    PixelFormat::RGBA16F, PixelFormat::RG32F,   PixelFormat::RGBA16UI,  PixelFormat::RG32UI,
-    PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1,
-
+    PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
+    PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
+    PixelFormat::R16G16B16A16_SINT,  PixelFormat::R32G32_UINT,
+    PixelFormat::R32G32_FLOAT,       PixelFormat::R32G32_SINT,
+    PixelFormat::BC1_RGBA_UNORM,     PixelFormat::BC1_RGBA_SRGB,
 };
 // Missing formats:
-// PixelFormat::RGBA16I
-// PixelFormat::RG32I,
 // COMPRESSED_RGB_S3TC_DXT1_EXT
 // COMPRESSED_SRGB_S3TC_DXT1_EXT
 // COMPRESSED_RGBA_S3TC_DXT1_EXT
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index d1082566d..51766349b 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#pragma once
+
 #include <array>
 #include <bitset>
 #include <cstddef>
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ff10ff40d..9409c4075 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,13 @@
 
 namespace Tegra::Engines {
 
-Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+Fermi2D::Fermi2D() = default;
+
+Fermi2D::~Fermi2D() = default;
+
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+    rasterizer = &rasterizer_;
+}
 
 void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
     ASSERT_MSG(method < Regs::NUM_REGS,
@@ -81,13 +87,13 @@ void Fermi2D::HandleSurfaceCopy() {
     const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
     const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
                                           dst_blit_y2};
-    Config copy_config;
-    copy_config.operation = regs.operation;
-    copy_config.filter = regs.blit_control.filter;
-    copy_config.src_rect = src_rect;
-    copy_config.dst_rect = dst_rect;
-
-    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
+    const Config copy_config{
+        .operation = regs.operation,
+        .filter = regs.blit_control.filter,
+        .src_rect = src_rect,
+        .dst_rect = dst_rect,
+    };
+    if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
         UNIMPLEMENTED();
     }
 }
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 8f37d053f..0909709ec 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -34,8 +34,11 @@ namespace Tegra::Engines {
 
 class Fermi2D final : public EngineInterface {
 public:
-    explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
-    ~Fermi2D() = default;
+    explicit Fermi2D();
+    ~Fermi2D();
+
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
 
     /// Write the value to the register identified by method.
     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
@@ -142,14 +145,14 @@ public:
     } regs{};
 
     struct Config {
-        Operation operation;
-        Filter filter;
+        Operation operation{};
+        Filter filter{};
         Common::Rectangle<u32> src_rect;
         Common::Rectangle<u32> dst_rect;
     };
 
 private:
-    VideoCore::RasterizerInterface& rasterizer;
+    VideoCore::RasterizerInterface* rasterizer;
 
     /// Performs the copy from the source surface to the destination surface as configured in the
     /// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a82b06a38..898370739 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -16,14 +16,15 @@
 
 namespace Tegra::Engines {
 
-KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                             MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
-                                                                                  memory_manager,
-                                                                                  regs.upload} {}
+KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
+    : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
 
 KeplerCompute::~KeplerCompute() = default;
 
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+    rasterizer = &rasterizer_;
+}
+
 void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
     ASSERT_MSG(method < Regs::NUM_REGS,
                "Invalid KeplerCompute register, increase the size of the Regs structure");
@@ -104,11 +105,11 @@ SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
 }
 
 VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
-    return rasterizer.AccessGuestDriverProfile();
+    return rasterizer->AccessGuestDriverProfile();
 }
 
 const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
-    return rasterizer.AccessGuestDriverProfile();
+    return rasterizer->AccessGuestDriverProfile();
 }
 
 void KeplerCompute::ProcessLaunch() {
@@ -119,7 +120,7 @@ void KeplerCompute::ProcessLaunch() {
     const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
     LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
 
-    rasterizer.DispatchCompute(code_addr);
+    rasterizer->DispatchCompute(code_addr);
 }
 
 Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index b7f668d88..7f2500aab 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -42,10 +42,12 @@ namespace Tegra::Engines {
 
 class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
 public:
-    explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                           MemoryManager& memory_manager);
+    explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
     ~KeplerCompute();
 
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
     static constexpr std::size_t NumConstBuffers = 8;
 
     struct Regs {
@@ -230,11 +232,6 @@ public:
     const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
 
 private:
-    Core::System& system;
-    VideoCore::RasterizerInterface& rasterizer;
-    MemoryManager& memory_manager;
-    Upload::State upload_state;
-
     void ProcessLaunch();
 
     /// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -242,6 +239,11 @@ private:
 
     /// Retrieves information about a specific TSC entry from the TSC buffer.
     Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
+
+    Core::System& system;
+    MemoryManager& memory_manager;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+    Upload::State upload_state;
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c01436295..57ebc785f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,14 +22,19 @@ using VideoCore::QueryType;
 /// First register id that is actually a Macro call.
 constexpr u32 MacroRegistersStart = 0xE00;
 
-Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                     MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
-      macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
+Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
+    : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
+      upload_state{memory_manager, regs.upload} {
     dirty.flags.flip();
     InitializeRegisterDefaults();
 }
 
+Maxwell3D::~Maxwell3D() = default;
+
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+    rasterizer = &rasterizer_;
+}
+
 void Maxwell3D::InitializeRegisterDefaults() {
     // Initializes registers to their default values - what games expect them to be at boot. This is
     // for certain registers that may not be explicitly set by games.
@@ -192,7 +197,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
 
     switch (method) {
     case MAXWELL3D_REG_INDEX(wait_for_idle): {
-        rasterizer.WaitForIdle();
+        rasterizer->WaitForIdle();
         break;
     }
     case MAXWELL3D_REG_INDEX(shadow_ram_control): {
@@ -402,7 +407,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
 
     const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
     if (ShouldExecute()) {
-        rasterizer.Draw(is_indexed, true);
+        rasterizer->Draw(is_indexed, true);
     }
 
     // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -465,7 +470,7 @@ void Maxwell3D::ProcessQueryGet() {
     switch (regs.query.query_get.operation) {
     case Regs::QueryOperation::Release:
         if (regs.query.query_get.fence == 1) {
-            rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
+            rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
         } else {
             StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
         }
@@ -533,7 +538,7 @@ void Maxwell3D::ProcessQueryCondition() {
 void Maxwell3D::ProcessCounterReset() {
     switch (regs.counter_reset) {
     case Regs::CounterReset::SampleCnt:
-        rasterizer.ResetCounter(QueryType::SamplesPassed);
+        rasterizer->ResetCounter(QueryType::SamplesPassed);
         break;
     default:
         LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
@@ -547,7 +552,7 @@ void Maxwell3D::ProcessSyncPoint() {
     const u32 increment = regs.sync_info.increment.Value();
     [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
     if (increment) {
-        rasterizer.SignalSyncPoint(sync_point);
+        rasterizer->SignalSyncPoint(sync_point);
     }
 }
 
@@ -570,7 +575,7 @@ void Maxwell3D::DrawArrays() {
 
     const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
     if (ShouldExecute()) {
-        rasterizer.Draw(is_indexed, false);
+        rasterizer->Draw(is_indexed, false);
     }
 
     // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -590,9 +595,9 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
         return 0;
     case Regs::QuerySelect::SamplesPassed:
         // Deferred.
-        rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
-                         system.GPU().GetTicks());
-        return {};
+        rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+                          system.GPU().GetTicks());
+        return std::nullopt;
     default:
         LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
                   static_cast<u32>(regs.query.query_get.select.Value()));
@@ -718,7 +723,7 @@ void Maxwell3D::ProcessClearBuffers() {
            regs.clear_buffers.R == regs.clear_buffers.B &&
            regs.clear_buffers.R == regs.clear_buffers.A);
 
-    rasterizer.Clear();
+    rasterizer->Clear();
 }
 
 u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
@@ -752,11 +757,11 @@ SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
 }
 
 VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
-    return rasterizer.AccessGuestDriverProfile();
+    return rasterizer->AccessGuestDriverProfile();
 }
 
 const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
-    return rasterizer.AccessGuestDriverProfile();
+    return rasterizer->AccessGuestDriverProfile();
 }
 
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ef1618990..bc289c55d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -51,9 +51,11 @@ namespace Tegra::Engines {
 
 class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
 public:
-    explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                       MemoryManager& memory_manager);
-    ~Maxwell3D() = default;
+    explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
+    ~Maxwell3D();
+
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
 
     /// Register structure of the Maxwell3D engine.
     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -647,7 +649,7 @@ public:
                     GetX() + GetWidth(),  // right
                     GetY()                // bottom
                 };
-            };
+            }
 
             f32 GetX() const {
                 return std::max(0.0f, translate_x - std::fabs(scale_x));
@@ -1418,12 +1420,12 @@ public:
         return execute_on;
     }
 
-    VideoCore::RasterizerInterface& GetRasterizer() {
-        return rasterizer;
+    VideoCore::RasterizerInterface& Rasterizer() {
+        return *rasterizer;
     }
 
-    const VideoCore::RasterizerInterface& GetRasterizer() const {
-        return rasterizer;
+    const VideoCore::RasterizerInterface& Rasterizer() const {
+        return *rasterizer;
     }
 
     /// Notify a memory write has happened.
@@ -1460,11 +1462,10 @@ private:
     void InitializeRegisterDefaults();
 
     Core::System& system;
-
-    VideoCore::RasterizerInterface& rasterizer;
-
     MemoryManager& memory_manager;
 
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
     /// Start offsets of each macro in macro_memory
     std::array<u32, 0x80> macro_positions = {};
 
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a2d3d7823..8fa359d0a 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -94,7 +94,8 @@ void MaxwellDMA::CopyPitchToPitch() {
 }
 
 void MaxwellDMA::CopyBlockLinearToPitch() {
-    ASSERT(regs.src_params.block_size.depth == 0);
+    UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
+    UNIMPLEMENTED_IF(regs.src_params.layer != 0);
 
     // Optimized path for micro copies.
     const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
@@ -113,8 +114,6 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
     const u32 block_depth = src_params.block_size.depth;
     const size_t src_size =
         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
-    const size_t src_layer_size =
-        CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
 
     if (read_buffer.size() < src_size) {
         read_buffer.resize(src_size);
@@ -123,17 +122,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
         write_buffer.resize(dst_size);
     }
 
-    if (Settings::IsGPULevelExtreme()) {
-        memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
-        memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
-    } else {
-        memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
-        memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
-    }
+    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+    memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
 
     UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
-                     read_buffer.data() + src_layer_size * src_params.layer, write_buffer.data(),
-                     block_height, src_params.origin.x, src_params.origin.y);
+                     block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
+                     read_buffer.data());
 
     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 }
@@ -198,7 +192,6 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
     if (read_buffer.size() < src_size) {
         read_buffer.resize(src_size);
     }
-
     if (write_buffer.size() < dst_size) {
         write_buffer.resize(dst_size);
     }
@@ -212,8 +205,8 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
     }
 
     UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
-                     bytes_per_pixel, read_buffer.data(), write_buffer.data(),
-                     regs.src_params.block_size.height, pos_x, pos_y);
+                     bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
+                     write_buffer.data(), read_buffer.data());
 
     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 }
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index 72e2a33d5..ceec05459 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -41,30 +41,30 @@ struct Header {
         BitField<26, 1, u32> does_load_or_store;
         BitField<27, 1, u32> does_fp64;
         BitField<28, 4, u32> stream_out_mask;
-    } common0{};
+    } common0;
 
     union {
         BitField<0, 24, u32> shader_local_memory_low_size;
         BitField<24, 8, u32> per_patch_attribute_count;
-    } common1{};
+    } common1;
 
     union {
         BitField<0, 24, u32> shader_local_memory_high_size;
         BitField<24, 8, u32> threads_per_input_primitive;
-    } common2{};
+    } common2;
 
     union {
         BitField<0, 24, u32> shader_local_memory_crs_size;
         BitField<24, 4, OutputTopology> output_topology;
         BitField<28, 4, u32> reserved;
-    } common3{};
+    } common3;
 
     union {
         BitField<0, 12, u32> max_output_vertices;
         BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
         BitField<20, 4, u32> reserved;
         BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
-    } common4{};
+    } common4;
 
     union {
         struct {
@@ -145,7 +145,7 @@ struct Header {
             }
         } ps;
 
-        std::array<u32, 0xF> raw{};
+        std::array<u32, 0xF> raw;
     };
 
     u64 GetLocalMemorySize() const {
@@ -153,7 +153,6 @@ struct Header {
                 (common2.shader_local_memory_high_size << 24));
     }
 };
-
 static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
 
 } // namespace Tegra::Shader
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 8b2a6a42c..de6991ef6 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -5,15 +5,10 @@
 #pragma once
 
 #include <algorithm>
-#include <array>
-#include <memory>
 #include <queue>
 
-#include "common/assert.h"
 #include "common/common_types.h"
 #include "core/core.h"
-#include "core/memory.h"
-#include "core/settings.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
@@ -79,8 +74,6 @@ public:
     }
 
     void WaitPendingFences() {
-        auto& gpu{system.GPU()};
-        auto& memory_manager{gpu.MemoryManager()};
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
             if (ShouldWait()) {
@@ -88,8 +81,8 @@ public:
             }
             PopAsyncFlushes();
             if (current_fence->IsSemaphore()) {
-                memory_manager.template Write<u32>(current_fence->GetAddress(),
-                                                   current_fence->GetPayload());
+                gpu_memory.template Write<u32>(current_fence->GetAddress(),
+                                               current_fence->GetPayload());
             } else {
                 gpu.IncrementSyncPoint(current_fence->GetPayload());
             }
@@ -98,13 +91,13 @@ public:
     }
 
 protected:
-    FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                 TTextureCache& texture_cache, TTBufferCache& buffer_cache,
-                 TQueryCache& query_cache)
-        : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
-          buffer_cache{buffer_cache}, query_cache{query_cache} {}
+    explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
+                          TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
+                          TQueryCache& query_cache_)
+        : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
+          texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
 
-    virtual ~FenceManager() {}
+    virtual ~FenceManager() = default;
 
     /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
     /// true
@@ -118,16 +111,15 @@ protected:
     /// Waits until a fence has been signalled by the host GPU.
     virtual void WaitFence(TFence& fence) = 0;
 
-    Core::System& system;
     VideoCore::RasterizerInterface& rasterizer;
+    Tegra::GPU& gpu;
+    Tegra::MemoryManager& gpu_memory;
     TTextureCache& texture_cache;
     TTBufferCache& buffer_cache;
     TQueryCache& query_cache;
 
 private:
     void TryReleasePendingFences() {
-        auto& gpu{system.GPU()};
-        auto& memory_manager{gpu.MemoryManager()};
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
             if (ShouldWait() && !IsFenceSignaled(current_fence)) {
@@ -135,8 +127,8 @@ private:
             }
             PopAsyncFlushes();
             if (current_fence->IsSemaphore()) {
-                memory_manager.template Write<u32>(current_fence->GetAddress(),
-                                                   current_fence->GetPayload());
+                gpu_memory.template Write<u32>(current_fence->GetAddress(),
+                                               current_fence->GetPayload());
             } else {
                 gpu.IncrementSyncPoint(current_fence->GetPayload());
             }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 758bfe148..4bb9256e9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -20,26 +20,35 @@
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"
+#include "video_core/shader_notify.h"
 #include "video_core/video_core.h"
 
 namespace Tegra {
 
 MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
 
-GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
-    : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
-    auto& rasterizer{renderer->Rasterizer()};
-    memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
-    dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
-    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
-    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
-    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
-    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
-}
+GPU::GPU(Core::System& system_, bool is_async_)
+    : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
+      dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+      maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
+      fermi_2d{std::make_unique<Engines::Fermi2D>()},
+      kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
+      maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
+      kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+      shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
 
 GPU::~GPU() = default;
 
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+    renderer = std::move(renderer_);
+
+    VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
+    memory_manager->BindRasterizer(rasterizer);
+    maxwell_3d->BindRasterizer(rasterizer);
+    fermi_2d->BindRasterizer(rasterizer);
+    kepler_compute->BindRasterizer(rasterizer);
+}
+
 Engines::Maxwell3D& GPU::Maxwell3D() {
     return *maxwell_3d;
 }
@@ -79,7 +88,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) {
     }
     MICROPROFILE_SCOPE(GPU_wait);
     std::unique_lock lock{sync_mutex};
-    sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
+    sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; });
 }
 
 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 2c42483bd..2d15d1c6f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -33,59 +33,68 @@ class System;
 
 namespace VideoCore {
 class RendererBase;
+class ShaderNotify;
 } // namespace VideoCore
 
 namespace Tegra {
 
 enum class RenderTargetFormat : u32 {
     NONE = 0x0,
-    RGBA32_FLOAT = 0xC0,
-    RGBA32_UINT = 0xC2,
-    RGBA16_UNORM = 0xC6,
-    RGBA16_SNORM = 0xC7,
-    RGBA16_UINT = 0xC9,
-    RGBA16_FLOAT = 0xCA,
-    RG32_FLOAT = 0xCB,
-    RG32_UINT = 0xCD,
-    RGBX16_FLOAT = 0xCE,
-    BGRA8_UNORM = 0xCF,
-    BGRA8_SRGB = 0xD0,
-    RGB10_A2_UNORM = 0xD1,
-    RGBA8_UNORM = 0xD5,
-    RGBA8_SRGB = 0xD6,
-    RGBA8_SNORM = 0xD7,
-    RGBA8_UINT = 0xD9,
-    RG16_UNORM = 0xDA,
-    RG16_SNORM = 0xDB,
-    RG16_SINT = 0xDC,
-    RG16_UINT = 0xDD,
-    RG16_FLOAT = 0xDE,
-    R11G11B10_FLOAT = 0xE0,
+    R32B32G32A32_FLOAT = 0xC0,
+    R32G32B32A32_SINT = 0xC1,
+    R32G32B32A32_UINT = 0xC2,
+    R16G16B16A16_UNORM = 0xC6,
+    R16G16B16A16_SNORM = 0xC7,
+    R16G16B16A16_SINT = 0xC8,
+    R16G16B16A16_UINT = 0xC9,
+    R16G16B16A16_FLOAT = 0xCA,
+    R32G32_FLOAT = 0xCB,
+    R32G32_SINT = 0xCC,
+    R32G32_UINT = 0xCD,
+    R16G16B16X16_FLOAT = 0xCE,
+    B8G8R8A8_UNORM = 0xCF,
+    B8G8R8A8_SRGB = 0xD0,
+    A2B10G10R10_UNORM = 0xD1,
+    A2B10G10R10_UINT = 0xD2,
+    A8B8G8R8_UNORM = 0xD5,
+    A8B8G8R8_SRGB = 0xD6,
+    A8B8G8R8_SNORM = 0xD7,
+    A8B8G8R8_SINT = 0xD8,
+    A8B8G8R8_UINT = 0xD9,
+    R16G16_UNORM = 0xDA,
+    R16G16_SNORM = 0xDB,
+    R16G16_SINT = 0xDC,
+    R16G16_UINT = 0xDD,
+    R16G16_FLOAT = 0xDE,
+    B10G11R11_FLOAT = 0xE0,
     R32_SINT = 0xE3,
     R32_UINT = 0xE4,
     R32_FLOAT = 0xE5,
-    B5G6R5_UNORM = 0xE8,
-    BGR5A1_UNORM = 0xE9,
-    RG8_UNORM = 0xEA,
-    RG8_SNORM = 0xEB,
-    RG8_UINT = 0xED,
+    R5G6B5_UNORM = 0xE8,
+    A1R5G5B5_UNORM = 0xE9,
+    R8G8_UNORM = 0xEA,
+    R8G8_SNORM = 0xEB,
+    R8G8_SINT = 0xEC,
+    R8G8_UINT = 0xED,
     R16_UNORM = 0xEE,
     R16_SNORM = 0xEF,
     R16_SINT = 0xF0,
     R16_UINT = 0xF1,
     R16_FLOAT = 0xF2,
     R8_UNORM = 0xF3,
+    R8_SNORM = 0xF4,
+    R8_SINT = 0xF5,
     R8_UINT = 0xF6,
 };
 
 enum class DepthFormat : u32 {
-    Z32_FLOAT = 0xA,
-    Z16_UNORM = 0x13,
-    S8_Z24_UNORM = 0x14,
-    Z24_X8_UNORM = 0x15,
-    Z24_S8_UNORM = 0x16,
-    Z24_C8_UNORM = 0x18,
-    Z32_S8_X24_FLOAT = 0x19,
+    D32_FLOAT = 0xA,
+    D16_UNORM = 0x13,
+    S8_UINT_Z24_UNORM = 0x14,
+    D24X8_UNORM = 0x15,
+    D24S8_UNORM = 0x16,
+    D24C8_UNORM = 0x18,
+    D32_FLOAT_S8X24_UINT = 0x19,
 };
 
 struct CommandListHeader;
@@ -96,9 +105,9 @@ class DebugContext;
  */
 struct FramebufferConfig {
     enum class PixelFormat : u32 {
-        ABGR8 = 1,
-        RGB565 = 4,
-        BGRA8 = 5,
+        A8B8G8R8_UNORM = 1,
+        RGB565_UNORM = 4,
+        B8G8R8A8_UNORM = 5,
     };
 
     VAddr address;
@@ -133,11 +142,6 @@ class MemoryManager;
 
 class GPU {
 public:
-    explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                 bool is_async);
-
-    virtual ~GPU();
-
     struct MethodCall {
         u32 method{};
         u32 argument{};
@@ -153,6 +157,12 @@ public:
               method_count(method_count) {}
     };
 
+    explicit GPU(Core::System& system, bool is_async);
+    virtual ~GPU();
+
+    /// Binds a renderer to the GPU.
+    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
+
     /// Calls a GPU method.
     void CallMethod(const MethodCall& method_call);
 
@@ -207,6 +217,14 @@ public:
         return *renderer;
     }
 
+    VideoCore::ShaderNotify& ShaderNotify() {
+        return *shader_notify;
+    }
+
+    const VideoCore::ShaderNotify& ShaderNotify() const {
+        return *shader_notify;
+    }
+
     // Waits for the GPU to finish working
     virtual void WaitIdle() const = 0;
 
@@ -235,7 +253,7 @@ public:
     const Tegra::DmaPusher& DmaPusher() const;
 
     struct Regs {
-        static constexpr size_t NUM_REGS = 0x100;
+        static constexpr size_t NUM_REGS = 0x40;
 
         union {
             struct {
@@ -254,7 +272,7 @@ public:
                 u32 semaphore_trigger;
                 INSERT_UNION_PADDING_WORDS(0xC);
 
-                // The puser and the puller share the reference counter, the pusher only has read
+                // The pusher and the puller share the reference counter, the pusher only has read
                 // access
                 u32 reference_count;
                 INSERT_UNION_PADDING_WORDS(0x5);
@@ -328,13 +346,12 @@ private:
     bool ExecuteMethodOnEngine(u32 method);
 
 protected:
-    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
     Core::System& system;
+    std::unique_ptr<Tegra::MemoryManager> memory_manager;
+    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
     std::unique_ptr<VideoCore::RendererBase> renderer;
 
 private:
-    std::unique_ptr<Tegra::MemoryManager> memory_manager;
-
     /// Mapping of command subchannels to their bound engine ids
     std::array<EngineID, 8> bound_engines = {};
     /// 3D engine
@@ -347,6 +364,8 @@ private:
     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
     /// Inline memory engine
     std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+    /// Shader build notifier
+    std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
 
     std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
 
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 7b855f63e..70a3d5738 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,16 +10,14 @@
 
 namespace VideoCommon {
 
-GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
-                     std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
-    : GPU(system, std::move(renderer_), true), gpu_thread{system},
-      cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
-      gpu_context(std::move(context)) {}
+GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {}
 
 GPUAsynch::~GPUAsynch() = default;
 
 void GPUAsynch::Start() {
-    gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
+    gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+    cpu_context = renderer->GetRenderWindow().CreateSharedContext();
+    cpu_context->MakeCurrent();
 }
 
 void GPUAsynch::ObtainContext() {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 15e9f1d38..f89c855a5 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -20,8 +20,7 @@ namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU asynchronously
 class GPUAsynch final : public Tegra::GPU {
 public:
-    explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                       std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+    explicit GPUAsynch(Core::System& system);
     ~GPUAsynch() override;
 
     void Start() override;
@@ -42,7 +41,6 @@ protected:
 private:
     GPUThread::ThreadManager gpu_thread;
     std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
-    std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index aaeb9811d..1ca47ddef 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,20 +7,18 @@
 
 namespace VideoCommon {
 
-GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                   std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
-    : GPU(system, std::move(renderer), false), context{std::move(context)} {}
+GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {}
 
 GPUSynch::~GPUSynch() = default;
 
 void GPUSynch::Start() {}
 
 void GPUSynch::ObtainContext() {
-    context->MakeCurrent();
+    renderer->Context().MakeCurrent();
 }
 
 void GPUSynch::ReleaseContext() {
-    context->DoneCurrent();
+    renderer->Context().DoneCurrent();
 }
 
 void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 762c20aa5..297258cb1 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -19,8 +19,7 @@ namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU synchronously
 class GPUSynch final : public Tegra::GPU {
 public:
-    explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                      std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+    explicit GPUSynch(Core::System& system);
     ~GPUSynch() override;
 
     void Start() override;
@@ -36,9 +35,6 @@ public:
 protected:
     void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
                              [[maybe_unused]] u32 value) const override {}
-
-private:
-    std::unique_ptr<Core::Frontend::GraphicsContext> context;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 738c6f0c1..bf761abf2 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -44,9 +44,9 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
             dma_pusher.DispatchCalls();
         } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
-        } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
+        } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
             renderer.Rasterizer().ReleaseFences();
-        } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
+        } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
             system.GPU().TickWork();
         } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
             renderer.Rasterizer().FlushRegion(data->addr, data->size);
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
new file mode 100644
index 000000000..aa62363a7
--- /dev/null
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(SHADER_FILES
+    opengl_present.frag
+    opengl_present.vert
+)
+
+set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
+set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
+
+set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
+add_custom_command(
+    OUTPUT
+        ${SHADER_DIR}
+    COMMAND
+        ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR}
+)
+
+set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
+set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
+
+foreach(FILENAME IN ITEMS ${SHADER_FILES})
+    string(REPLACE "." "_" SHADER_NAME ${FILENAME})
+    set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
+    set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
+    add_custom_command(
+        OUTPUT
+            ${HEADER_FILE}
+        COMMAND
+            ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE}
+        MAIN_DEPENDENCY
+            ${SOURCE_FILE}
+        DEPENDS
+            ${HEADER_GENERATOR}
+            ${INPUT_FILE}
+    )
+    set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE})
+endforeach()
+
+add_custom_target(host_shaders
+    DEPENDS
+        ${SHADER_HEADERS}
+    SOURCES
+        ${SHADER_FILES}
+)
diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake
new file mode 100644
index 000000000..368bce0ed
--- /dev/null
+++ b/src/video_core/host_shaders/StringShaderHeader.cmake
@@ -0,0 +1,11 @@
+set(SOURCE_FILE ${CMAKE_ARGV3})
+set(HEADER_FILE ${CMAKE_ARGV4})
+set(INPUT_FILE ${CMAKE_ARGV5})
+
+get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME)
+string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME})
+string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME)
+
+file(READ ${SOURCE_FILE} CONTENTS)
+
+configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY)
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
new file mode 100644
index 000000000..8a4cb024b
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -0,0 +1,10 @@
+#version 430 core
+
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;
+
+layout (binding = 0) uniform sampler2D color_texture;
+
+void main() {
+    color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
+}
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
new file mode 100644
index 000000000..2235d31a4
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -0,0 +1,24 @@
+#version 430 core
+
+out gl_PerVertex {
+    vec4 gl_Position;
+};
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;
+
+// This is a truncated 3x3 matrix for 2D transformations:
+// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
+// The third column performs translation.
+// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
+// implicitly be [0, 0, 1]
+layout (location = 0) uniform mat3x2 modelview_matrix;
+
+void main() {
+    // Multiply input position by the rotscale part of the matrix and then manually translate by
+    // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
+    // to `vec3(vert_position.xy, 1.0)`
+    gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
+    frag_tex_coord = vert_tex_coord;
+}
diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in
new file mode 100644
index 000000000..ccdb0d2a9
--- /dev/null
+++ b/src/video_core/host_shaders/source_shader.h.in
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <string_view>
+
+namespace HostShaders {
+
+constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)";
+
+} // namespace HostShaders
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index a50e7b4e0..cd21a2112 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -36,7 +36,7 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
         }
     } else {
         // Macro not compiled, check if it's uploaded and if so, compile it
-        std::optional<u32> mid_method = std::nullopt;
+        std::optional<u32> mid_method;
         const auto macro_code = uploaded_macro_code.find(method);
         if (macro_code == uploaded_macro_code.end()) {
             for (const auto& [method_base, code] : uploaded_macro_code) {
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 0c9ff59a4..df00b57df 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -24,7 +24,7 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
     maxwell3d.regs.index_array.first = parameters[4];
 
     if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(true, true);
+        maxwell3d.Rasterizer().Draw(true, true);
     }
     maxwell3d.regs.index_array.count = 0;
     maxwell3d.mme_draw.instance_count = 0;
@@ -42,7 +42,7 @@ void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
     maxwell3d.mme_draw.instance_count = count;
 
     if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(false, true);
+        maxwell3d.Rasterizer().Draw(false, true);
     }
     maxwell3d.regs.vertex_buffer.count = 0;
     maxwell3d.mme_draw.instance_count = 0;
@@ -65,7 +65,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
     maxwell3d.regs.draw.topology.Assign(
         static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
     if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(true, true);
+        maxwell3d.Rasterizer().Draw(true, true);
     }
     maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
     maxwell3d.regs.index_array.count = 0;
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index aa5256419..bd01fd1f2 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -34,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho
         this->parameters = std::make_unique<u32[]>(num_parameters);
     }
     std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
-    this->num_parameters = num_parameters;
 
     // Execute the code until we hit an exit condition.
     bool keep_executing = true;
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 07292702f..954b87515 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,11 +14,11 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
 
 namespace Tegra {
-static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
-static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
-static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
-static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
-static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
+constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
+constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
+constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
 
 static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
     STATE,
@@ -419,7 +419,6 @@ void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
 
 void MacroJITx64Impl::Compile() {
     MICROPROFILE_SCOPE(MacroJitCompile);
-    bool keep_executing = true;
     labels.fill(Xbyak::Label());
 
     Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index ff5505d12..02cf53d15 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -4,7 +4,6 @@
 
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/process.h"
@@ -15,122 +14,142 @@
 
 namespace Tegra {
 
-MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
-    : rasterizer{rasterizer}, system{system} {
-    page_table.Resize(address_space_width, page_bits, false);
-
-    // Initialize the map with a single free region covering the entire managed space.
-    VirtualMemoryArea initial_vma;
-    initial_vma.size = address_space_end;
-    vma_map.emplace(initial_vma.base, initial_vma);
-
-    UpdatePageTableForVMA(initial_vma);
-}
+MemoryManager::MemoryManager(Core::System& system_)
+    : system{system_}, page_table(page_table_size) {}
 
 MemoryManager::~MemoryManager() = default;
 
-GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
-    const u64 aligned_size{Common::AlignUp(size, page_size)};
-    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
-
-    AllocateMemory(gpu_addr, 0, aligned_size);
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+    rasterizer = &rasterizer_;
+}
 
+GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
+    u64 remaining_size{size};
+    for (u64 offset{}; offset < size; offset += page_size) {
+        if (remaining_size < page_size) {
+            SetPageEntry(gpu_addr + offset, page_entry + offset, remaining_size);
+        } else {
+            SetPageEntry(gpu_addr + offset, page_entry + offset);
+        }
+        remaining_size -= page_size;
+    }
     return gpu_addr;
 }
 
-GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
-    const u64 aligned_size{Common::AlignUp(size, page_size)};
-
-    AllocateMemory(gpu_addr, 0, aligned_size);
+GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
+    return UpdateRange(gpu_addr, cpu_addr, size);
+}
 
-    return gpu_addr;
+GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
+    return Map(cpu_addr, *FindFreeRange(size, align), size);
 }
 
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
-    const u64 aligned_size{Common::AlignUp(size, page_size)};
-    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
+void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
+    if (!size) {
+        return;
+    }
 
-    MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
-    ASSERT(
-        system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
+    // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+    system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size);
 
-    return gpu_addr;
+    UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
 }
 
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & page_mask) == 0);
+std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) {
+    for (u64 offset{}; offset < size; offset += page_size) {
+        if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) {
+            return std::nullopt;
+        }
+    }
 
-    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    return UpdateRange(gpu_addr, PageEntry::State::Allocated, size);
+}
 
-    MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
-    ASSERT(
-        system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
-    return gpu_addr;
+GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) {
+    return *AllocateFixed(*FindFreeRange(size, align), size);
 }
 
-GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & page_mask) == 0);
+void MemoryManager::TryLockPage(PageEntry page_entry, std::size_t size) {
+    if (!page_entry.IsValid()) {
+        return;
+    }
 
-    const u64 aligned_size{Common::AlignUp(size, page_size)};
-    const auto cpu_addr = GpuToCpuAddress(gpu_addr);
-    ASSERT(cpu_addr);
+    ASSERT(system.CurrentProcess()
+               ->PageTable()
+               .LockForDeviceAddressSpace(page_entry.ToAddress(), size)
+               .IsSuccess());
+}
 
-    // Flush and invalidate through the GPU interface, to be asynchronous if possible.
-    system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
+void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) {
+    if (!page_entry.IsValid()) {
+        return;
+    }
 
-    UnmapRange(gpu_addr, aligned_size);
     ASSERT(system.CurrentProcess()
                ->PageTable()
-               .UnlockForDeviceAddressSpace(cpu_addr.value(), size)
+               .UnlockForDeviceAddressSpace(page_entry.ToAddress(), size)
                .IsSuccess());
+}
 
-    return gpu_addr;
+PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const {
+    return page_table[PageEntryIndex(gpu_addr)];
 }
 
-GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
-    // Find the first Free VMA.
-    const VMAHandle vma_handle{
-        std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
-            if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
-                return false;
-            }
+void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
+    // TODO(bunnei): We should lock/unlock device regions. This currently causes issues due to
+    // improper tracking, but should be fixed in the future.
 
-            const VAddr vma_end{vma.second.base + vma.second.size};
-            return vma_end > region_start && vma_end >= region_start + size;
-        })};
+    //// Unlock the old page
+    // TryUnlockPage(page_table[PageEntryIndex(gpu_addr)], size);
 
-    if (vma_handle == vma_map.end()) {
-        return {};
-    }
+    //// Lock the new page
+    // TryLockPage(page_entry, size);
 
-    return std::max(region_start, vma_handle->second.base);
+    page_table[PageEntryIndex(gpu_addr)] = page_entry;
 }
 
-bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
-    return (addr >> page_bits) < page_table.pointers.size();
-}
+std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const {
+    if (!align) {
+        align = page_size;
+    } else {
+        align = Common::AlignUp(align, page_size);
+    }
 
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
-    if (!IsAddressValid(addr)) {
-        return {};
+    u64 available_size{};
+    GPUVAddr gpu_addr{address_space_start};
+    while (gpu_addr + available_size < address_space_size) {
+        if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
+            available_size += page_size;
+
+            if (available_size >= size) {
+                return gpu_addr;
+            }
+        } else {
+            gpu_addr += available_size + page_size;
+            available_size = 0;
+
+            const auto remainder{gpu_addr % align};
+            if (remainder) {
+                gpu_addr = (gpu_addr - remainder) + align;
+            }
+        }
     }
 
-    const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
-    if (cpu_addr) {
-        return cpu_addr + (addr & page_mask);
+    return std::nullopt;
+}
+
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
+    const auto page_entry{GetPageEntry(gpu_addr)};
+    if (!page_entry.IsValid()) {
+        return std::nullopt;
     }
 
-    return {};
+    return page_entry.ToAddress() + (gpu_addr & page_mask);
 }
 
 template <typename T>
 T MemoryManager::Read(GPUVAddr addr) const {
-    if (!IsAddressValid(addr)) {
-        return {};
-    }
-
-    const u8* page_pointer{GetPointer(addr)};
-    if (page_pointer) {
+    if (auto page_pointer{GetPointer(addr)}; page_pointer) {
         // NOTE: Avoid adding any extra logic to this fast-path block
         T value;
         std::memcpy(&value, page_pointer, sizeof(T));
@@ -144,12 +163,7 @@ T MemoryManager::Read(GPUVAddr addr) const {
 
 template <typename T>
 void MemoryManager::Write(GPUVAddr addr, T data) {
-    if (!IsAddressValid(addr)) {
-        return;
-    }
-
-    u8* page_pointer{GetPointer(addr)};
-    if (page_pointer) {
+    if (auto page_pointer{GetPointer(addr)}; page_pointer) {
         // NOTE: Avoid adding any extra logic to this fast-path block
         std::memcpy(page_pointer, &data, sizeof(T));
         return;
@@ -167,66 +181,49 @@ template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
 template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
 template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
 
-u8* MemoryManager::GetPointer(GPUVAddr addr) {
-    if (!IsAddressValid(addr)) {
+u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
+    if (!GetPageEntry(gpu_addr).IsValid()) {
         return {};
     }
 
-    auto& memory = system.Memory();
-
-    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
-
-    if (page_addr != 0) {
-        return memory.GetPointer(page_addr + (addr & page_mask));
+    const auto address{GpuToCpuAddress(gpu_addr)};
+    if (!address) {
+        return {};
     }
 
-    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
-    return {};
+    return system.Memory().GetPointer(*address);
 }
 
-const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
-    if (!IsAddressValid(addr)) {
+const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
+    if (!GetPageEntry(gpu_addr).IsValid()) {
         return {};
     }
 
-    const auto& memory = system.Memory();
-
-    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
-
-    if (page_addr != 0) {
-        return memory.GetPointer(page_addr + (addr & page_mask));
+    const auto address{GpuToCpuAddress(gpu_addr)};
+    if (!address) {
+        return {};
     }
 
-    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
-    return {};
-}
-
-bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
-    const std::size_t inner_size = size - 1;
-    const GPUVAddr end = start + inner_size;
-    const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
-    const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
-    const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
-    return range == inner_size;
+    return system.Memory().GetPointer(*address);
 }
 
-void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
-                              const std::size_t size) const {
+void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const {
     std::size_t remaining_size{size};
     std::size_t page_index{gpu_src_addr >> page_bits};
     std::size_t page_offset{gpu_src_addr & page_mask};
 
-    auto& memory = system.Memory();
-
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 
-        const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
-        // Flush must happen on the rasterizer interface, such that memory is always synchronous
-        // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
-        rasterizer.FlushRegion(src_addr, copy_amount);
-        memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+        if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+            const auto src_addr{*page_addr + page_offset};
+
+            // Flush must happen on the rasterizer interface, such that memory is always synchronous
+            // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
+            rasterizer->FlushRegion(src_addr, copy_amount);
+            system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+        }
 
         page_index++;
         page_offset = 0;
@@ -241,18 +238,17 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
     std::size_t page_index{gpu_src_addr >> page_bits};
     std::size_t page_offset{gpu_src_addr & page_mask};
 
-    auto& memory = system.Memory();
-
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
-        const u8* page_pointer = page_table.pointers[page_index];
-        if (page_pointer) {
-            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
-            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+
+        if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+            const auto src_addr{*page_addr + page_offset};
+            system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
         } else {
             std::memset(dest_buffer, 0, copy_amount);
         }
+
         page_index++;
         page_offset = 0;
         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
@@ -260,23 +256,23 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
     }
 }
 
-void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
-                               const std::size_t size) {
+void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) {
     std::size_t remaining_size{size};
     std::size_t page_index{gpu_dest_addr >> page_bits};
     std::size_t page_offset{gpu_dest_addr & page_mask};
 
-    auto& memory = system.Memory();
-
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 
-        const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
-        // Invalidate must happen on the rasterizer interface, such that memory is always
-        // synchronous when it is written (even when in asynchronous GPU mode).
-        rasterizer.InvalidateRegion(dest_addr, copy_amount);
-        memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+        if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+            const auto dest_addr{*page_addr + page_offset};
+
+            // Invalidate must happen on the rasterizer interface, such that memory is always
+            // synchronous when it is written (even when in asynchronous GPU mode).
+            rasterizer->InvalidateRegion(dest_addr, copy_amount);
+            system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+        }
 
         page_index++;
         page_offset = 0;
@@ -286,21 +282,20 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
 }
 
 void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
-                                     const std::size_t size) {
+                                     std::size_t size) {
     std::size_t remaining_size{size};
     std::size_t page_index{gpu_dest_addr >> page_bits};
     std::size_t page_offset{gpu_dest_addr & page_mask};
 
-    auto& memory = system.Memory();
-
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
-        u8* page_pointer = page_table.pointers[page_index];
-        if (page_pointer) {
-            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
-            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+
+        if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+            const auto dest_addr{*page_addr + page_offset};
+            system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
         }
+
         page_index++;
         page_offset = 0;
         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
@@ -308,273 +303,26 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
     }
 }
 
-void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
-                              const std::size_t size) {
+void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
     std::vector<u8> tmp_buffer(size);
     ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
     WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
 }
 
 void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
-                                    const std::size_t size) {
+                                    std::size_t size) {
     std::vector<u8> tmp_buffer(size);
     ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
     WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
 }
 
-bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
-    const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
-    const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size;
-    return page <= Core::Memory::PAGE_SIZE;
-}
-
-void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
-                             VAddr backing_addr) {
-    LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
-              (base + size) * page_size);
-
-    const VAddr end{base + size};
-    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
-               base + page_table.pointers.size());
-
-    if (memory == nullptr) {
-        while (base != end) {
-            page_table.pointers[base] = nullptr;
-            page_table.backing_addr[base] = 0;
-
-            base += 1;
-        }
-    } else {
-        while (base != end) {
-            page_table.pointers[base] = memory;
-            page_table.backing_addr[base] = backing_addr;
-
-            base += 1;
-            memory += page_size;
-            backing_addr += page_size;
-        }
-    }
-}
-
-void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
-    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
-    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
-}
-
-void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
-    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
-    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
-}
-
-bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
-    ASSERT(base + size == next.base);
-    if (type != next.type) {
-        return {};
-    }
-    if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
-        return {};
-    }
-    if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
-        return {};
-    }
-    return true;
-}
-
-MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
-    if (target >= address_space_end) {
-        return vma_map.end();
-    } else {
-        return std::prev(vma_map.upper_bound(target));
-    }
-}
-
-MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
-    VirtualMemoryArea& vma{vma_handle->second};
-
-    vma.type = VirtualMemoryArea::Type::Allocated;
-    vma.backing_addr = 0;
-    vma.backing_memory = {};
-    UpdatePageTableForVMA(vma);
-
-    return MergeAdjacent(vma_handle);
-}
-
-MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
-                                                       u64 size) {
-
-    // This is the appropriately sized VMA that will turn into our allocation.
-    VMAIter vma_handle{CarveVMA(target, size)};
-    VirtualMemoryArea& vma{vma_handle->second};
-
-    ASSERT(vma.size == size);
-
-    vma.offset = offset;
-
-    return Allocate(vma_handle);
-}
-
-MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
-                                                         VAddr backing_addr) {
-    // This is the appropriately sized VMA that will turn into our allocation.
-    VMAIter vma_handle{CarveVMA(target, size)};
-    VirtualMemoryArea& vma{vma_handle->second};
-
-    ASSERT(vma.size == size);
-
-    vma.type = VirtualMemoryArea::Type::Mapped;
-    vma.backing_memory = memory;
-    vma.backing_addr = backing_addr;
-    UpdatePageTableForVMA(vma);
-
-    return MergeAdjacent(vma_handle);
-}
-
-void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
-    VMAIter vma{CarveVMARange(target, size)};
-    const VAddr target_end{target + size};
-    const VMAIter end{vma_map.end()};
-
-    // The comparison against the end of the range must be done using addresses since VMAs can be
-    // merged during this process, causing invalidation of the iterators.
-    while (vma != end && vma->second.base < target_end) {
-        // Unmapped ranges return to allocated state and can be reused
-        // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
-        vma = std::next(Allocate(vma));
-    }
-
-    ASSERT(FindVMA(target)->second.size >= size);
-}
-
-MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
-    // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
-    // non-const access to its container.
-    return vma_map.erase(iter, iter); // Erases an empty range of elements
-}
-
-MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
-    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
-    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
-
-    VMAIter vma_handle{StripIterConstness(FindVMA(base))};
-    if (vma_handle == vma_map.end()) {
-        // Target address is outside the managed range
-        return {};
-    }
-
-    const VirtualMemoryArea& vma{vma_handle->second};
-    if (vma.type == VirtualMemoryArea::Type::Mapped) {
-        // Region is already allocated
-        return vma_handle;
-    }
-
-    const VAddr start_in_vma{base - vma.base};
-    const VAddr end_in_vma{start_in_vma + size};
-
-    ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
-               vma.size, end_in_vma);
-
-    if (end_in_vma < vma.size) {
-        // Split VMA at the end of the allocated region
-        SplitVMA(vma_handle, end_in_vma);
-    }
-    if (start_in_vma != 0) {
-        // Split VMA at the start of the allocated region
-        vma_handle = SplitVMA(vma_handle, start_in_vma);
-    }
-
-    return vma_handle;
-}
-
-MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
-    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
-    ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
-
-    const VAddr target_end{target + size};
-    ASSERT(target_end >= target);
-    ASSERT(size > 0);
-
-    VMAIter begin_vma{StripIterConstness(FindVMA(target))};
-    const VMAIter i_end{vma_map.lower_bound(target_end)};
-    if (std::any_of(begin_vma, i_end, [](const auto& entry) {
-            return entry.second.type == VirtualMemoryArea::Type::Unmapped;
-        })) {
-        return {};
-    }
-
-    if (target != begin_vma->second.base) {
-        begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
-    }
-
-    VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
-    if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
-        end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
-    }
-
-    return begin_vma;
-}
-
-MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
-    VirtualMemoryArea& old_vma{vma_handle->second};
-    VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
-
-    // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
-    // a bug. This restriction might be removed later.
-    ASSERT(offset_in_vma < old_vma.size);
-    ASSERT(offset_in_vma > 0);
-
-    old_vma.size = offset_in_vma;
-    new_vma.base += offset_in_vma;
-    new_vma.size -= offset_in_vma;
-
-    switch (new_vma.type) {
-    case VirtualMemoryArea::Type::Unmapped:
-        break;
-    case VirtualMemoryArea::Type::Allocated:
-        new_vma.offset += offset_in_vma;
-        break;
-    case VirtualMemoryArea::Type::Mapped:
-        new_vma.backing_memory += offset_in_vma;
-        break;
-    }
-
-    ASSERT(old_vma.CanBeMergedWith(new_vma));
-
-    return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
-}
-
-MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
-    const VMAIter next_vma{std::next(iter)};
-    if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
-        iter->second.size += next_vma->second.size;
-        vma_map.erase(next_vma);
-    }
-
-    if (iter != vma_map.begin()) {
-        VMAIter prev_vma{std::prev(iter)};
-        if (prev_vma->second.CanBeMergedWith(iter->second)) {
-            prev_vma->second.size += iter->second.size;
-            vma_map.erase(iter);
-            iter = prev_vma;
-        }
-    }
-
-    return iter;
-}
-
-void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
-    switch (vma.type) {
-    case VirtualMemoryArea::Type::Unmapped:
-        UnmapRegion(vma.base, vma.size);
-        break;
-    case VirtualMemoryArea::Type::Allocated:
-        MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
-        break;
-    case VirtualMemoryArea::Type::Mapped:
-        MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
-        break;
+bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
+    const auto cpu_addr{GpuToCpuAddress(gpu_addr)};
+    if (!cpu_addr) {
+        return false;
     }
+    const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size};
+    return page <= Core::Memory::PAGE_SIZE;
 }
 
 } // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 87658e87a..53c8d122a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -6,9 +6,9 @@
 
 #include <map>
 #include <optional>
+#include <vector>
 
 #include "common/common_types.h"
-#include "common/page_table.h"
 
 namespace VideoCore {
 class RasterizerInterface;
@@ -20,58 +20,70 @@ class System;
 
 namespace Tegra {
 
-/**
- * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
- * with homogeneous attributes across its extents. In this particular implementation each VMA is
- * also backed by a single host memory allocation.
- */
-struct VirtualMemoryArea {
-    enum class Type : u8 {
-        Unmapped,
-        Allocated,
-        Mapped,
+class PageEntry final {
+public:
+    enum class State : u32 {
+        Unmapped = static_cast<u32>(-1),
+        Allocated = static_cast<u32>(-2),
     };
 
-    /// Virtual base address of the region.
-    GPUVAddr base{};
-    /// Size of the region.
-    u64 size{};
-    /// Memory area mapping type.
-    Type type{Type::Unmapped};
-    /// CPU memory mapped address corresponding to this memory area.
-    VAddr backing_addr{};
-    /// Offset into the backing_memory the mapping starts from.
-    std::size_t offset{};
-    /// Pointer backing this VMA.
-    u8* backing_memory{};
-
-    /// Tests if this area can be merged to the right with `next`.
-    bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+    constexpr PageEntry() = default;
+    constexpr PageEntry(State state) : state{state} {}
+    constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
+
+    [[nodiscard]] constexpr bool IsUnmapped() const {
+        return state == State::Unmapped;
+    }
+
+    [[nodiscard]] constexpr bool IsAllocated() const {
+        return state == State::Allocated;
+    }
+
+    [[nodiscard]] constexpr bool IsValid() const {
+        return !IsUnmapped() && !IsAllocated();
+    }
+
+    [[nodiscard]] constexpr VAddr ToAddress() const {
+        if (!IsValid()) {
+            return {};
+        }
+
+        return static_cast<VAddr>(state) << ShiftBits;
+    }
+
+    [[nodiscard]] constexpr PageEntry operator+(u64 offset) const {
+        // If this is a reserved value, offsets do not apply
+        if (!IsValid()) {
+            return *this;
+        }
+        return PageEntry{(static_cast<VAddr>(state) << ShiftBits) + offset};
+    }
+
+private:
+    static constexpr std::size_t ShiftBits{12};
+
+    State state{State::Unmapped};
 };
+static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
 
 class MemoryManager final {
 public:
-    explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
+    explicit MemoryManager(Core::System& system);
     ~MemoryManager();
 
-    GPUVAddr AllocateSpace(u64 size, u64 align);
-    GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
-    GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
-    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
-    GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
-    std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+    /// Binds a renderer to the memory manager.
+    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
+    [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
 
     template <typename T>
-    T Read(GPUVAddr addr) const;
+    [[nodiscard]] T Read(GPUVAddr addr) const;
 
     template <typename T>
     void Write(GPUVAddr addr, T data);
 
-    u8* GetPointer(GPUVAddr addr);
-    const u8* GetPointer(GPUVAddr addr) const;
-
-    /// Returns true if the block is continuous in host memory, false otherwise
-    bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;
+    [[nodiscard]] u8* GetPointer(GPUVAddr addr);
+    [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
 
     /**
      * ReadBlock and WriteBlock are full read and write operations over virtual
@@ -98,92 +110,43 @@ public:
     void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
 
     /**
-     * IsGranularRange checks if a gpu region can be simply read with a pointer
+     * IsGranularRange checks if a gpu region can be simply read with a pointer.
      */
-    bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
-
-private:
-    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
-    using VMAHandle = VMAMap::const_iterator;
-    using VMAIter = VMAMap::iterator;
-
-    bool IsAddressValid(GPUVAddr addr) const;
-    void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
-                  VAddr backing_addr = 0);
-    void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
-    void UnmapRegion(GPUVAddr base, u64 size);
+    [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
 
-    /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
-    VMAHandle FindVMA(GPUVAddr target) const;
-
-    VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
-
-    /**
-     * Maps an unmanaged host memory pointer at a given address.
-     *
-     * @param target       The guest address to start the mapping at.
-     * @param memory       The memory to be mapped.
-     * @param size         Size of the mapping in bytes.
-     * @param backing_addr The base address of the range to back this mapping.
-     */
-    VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
+    [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
+    [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
+    [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
+    [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
+    void Unmap(GPUVAddr gpu_addr, std::size_t size);
 
-    /// Unmaps a range of addresses, splitting VMAs as necessary.
-    void UnmapRange(GPUVAddr target, u64 size);
-
-    /// Converts a VMAHandle to a mutable VMAIter.
-    VMAIter StripIterConstness(const VMAHandle& iter);
-
-    /// Marks as the specified VMA as allocated.
-    VMAIter Allocate(VMAIter vma);
-
-    /**
-     * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
-     * the appropriate error checking.
-     */
-    VMAIter CarveVMA(GPUVAddr base, u64 size);
-
-    /**
-     * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
-     * end of the range.
-     */
-    VMAIter CarveVMARange(GPUVAddr base, u64 size);
-
-    /**
-     * Splits a VMA in two, at the specified offset.
-     * @returns the right side of the split, with the original iterator becoming the left side.
-     */
-    VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
-
-    /**
-     * Checks for and merges the specified VMA with adjacent ones if possible.
-     * @returns the merged VMA or the original if no merging was possible.
-     */
-    VMAIter MergeAdjacent(VMAIter vma);
+private:
+    [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
+    void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
+    GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
+    [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
 
-    /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
-    void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+    void TryLockPage(PageEntry page_entry, std::size_t size);
+    void TryUnlockPage(PageEntry page_entry, std::size_t size);
 
-    /// Finds a free (unmapped region) of the specified size starting at the specified address.
-    GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
+    [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
+        return (gpu_addr >> page_bits) & page_table_mask;
+    }
 
-private:
+    static constexpr u64 address_space_size = 1ULL << 40;
+    static constexpr u64 address_space_start = 1ULL << 32;
     static constexpr u64 page_bits{16};
     static constexpr u64 page_size{1 << page_bits};
     static constexpr u64 page_mask{page_size - 1};
+    static constexpr u64 page_table_bits{24};
+    static constexpr u64 page_table_size{1 << page_table_bits};
+    static constexpr u64 page_table_mask{page_table_size - 1};
 
-    /// Address space in bits, according to Tegra X1 TRM
-    static constexpr u32 address_space_width{40};
-    /// Start address for mapping, this is fairly arbitrary but must be non-zero.
-    static constexpr GPUVAddr address_space_base{0x100000};
-    /// End of address space, based on address space in bits.
-    static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
+    Core::System& system;
 
-    Common::PageTable page_table;
-    VMAMap vma_map;
-    VideoCore::RasterizerInterface& rasterizer;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
 
-    Core::System& system;
+    std::vector<PageEntry> page_table;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 836b25c1d..9da9fb4ff 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -41,146 +41,168 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
 }
 
 static constexpr ConversionArray morton_to_linear_fns = {
-    MortonCopy<true, PixelFormat::ABGR8U>,
-    MortonCopy<true, PixelFormat::ABGR8S>,
-    MortonCopy<true, PixelFormat::ABGR8UI>,
-    MortonCopy<true, PixelFormat::B5G6R5U>,
-    MortonCopy<true, PixelFormat::A2B10G10R10U>,
-    MortonCopy<true, PixelFormat::A1B5G5R5U>,
-    MortonCopy<true, PixelFormat::R8U>,
-    MortonCopy<true, PixelFormat::R8UI>,
-    MortonCopy<true, PixelFormat::RGBA16F>,
-    MortonCopy<true, PixelFormat::RGBA16U>,
-    MortonCopy<true, PixelFormat::RGBA16S>,
-    MortonCopy<true, PixelFormat::RGBA16UI>,
-    MortonCopy<true, PixelFormat::R11FG11FB10F>,
-    MortonCopy<true, PixelFormat::RGBA32UI>,
-    MortonCopy<true, PixelFormat::DXT1>,
-    MortonCopy<true, PixelFormat::DXT23>,
-    MortonCopy<true, PixelFormat::DXT45>,
-    MortonCopy<true, PixelFormat::DXN1>,
-    MortonCopy<true, PixelFormat::DXN2UNORM>,
-    MortonCopy<true, PixelFormat::DXN2SNORM>,
-    MortonCopy<true, PixelFormat::BC7U>,
-    MortonCopy<true, PixelFormat::BC6H_UF16>,
-    MortonCopy<true, PixelFormat::BC6H_SF16>,
-    MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
-    MortonCopy<true, PixelFormat::BGRA8>,
-    MortonCopy<true, PixelFormat::RGBA32F>,
-    MortonCopy<true, PixelFormat::RG32F>,
-    MortonCopy<true, PixelFormat::R32F>,
-    MortonCopy<true, PixelFormat::R16F>,
-    MortonCopy<true, PixelFormat::R16U>,
-    MortonCopy<true, PixelFormat::R16S>,
-    MortonCopy<true, PixelFormat::R16UI>,
-    MortonCopy<true, PixelFormat::R16I>,
-    MortonCopy<true, PixelFormat::RG16>,
-    MortonCopy<true, PixelFormat::RG16F>,
-    MortonCopy<true, PixelFormat::RG16UI>,
-    MortonCopy<true, PixelFormat::RG16I>,
-    MortonCopy<true, PixelFormat::RG16S>,
-    MortonCopy<true, PixelFormat::RGB32F>,
-    MortonCopy<true, PixelFormat::RGBA8_SRGB>,
-    MortonCopy<true, PixelFormat::RG8U>,
-    MortonCopy<true, PixelFormat::RG8S>,
-    MortonCopy<true, PixelFormat::RG8UI>,
-    MortonCopy<true, PixelFormat::RG32UI>,
-    MortonCopy<true, PixelFormat::RGBX16F>,
-    MortonCopy<true, PixelFormat::R32UI>,
-    MortonCopy<true, PixelFormat::R32I>,
-    MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
-    MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
-    MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
-    MortonCopy<true, PixelFormat::BGRA8_SRGB>,
-    MortonCopy<true, PixelFormat::DXT1_SRGB>,
-    MortonCopy<true, PixelFormat::DXT23_SRGB>,
-    MortonCopy<true, PixelFormat::DXT45_SRGB>,
-    MortonCopy<true, PixelFormat::BC7U_SRGB>,
-    MortonCopy<true, PixelFormat::R4G4B4A4U>,
+    MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
+    MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
+    MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
+    MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
+    MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
+    MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
+    MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
+    MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
+    MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
+    MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
+    MortonCopy<true, PixelFormat::R8_UNORM>,
+    MortonCopy<true, PixelFormat::R8_SNORM>,
+    MortonCopy<true, PixelFormat::R8_SINT>,
+    MortonCopy<true, PixelFormat::R8_UINT>,
+    MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
+    MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
+    MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
+    MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
+    MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
+    MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
+    MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
+    MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
+    MortonCopy<true, PixelFormat::BC2_UNORM>,
+    MortonCopy<true, PixelFormat::BC3_UNORM>,
+    MortonCopy<true, PixelFormat::BC4_UNORM>,
+    MortonCopy<true, PixelFormat::BC4_SNORM>,
+    MortonCopy<true, PixelFormat::BC5_UNORM>,
+    MortonCopy<true, PixelFormat::BC5_SNORM>,
+    MortonCopy<true, PixelFormat::BC7_UNORM>,
+    MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
+    MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
+    MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
+    MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
+    MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
+    MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
+    MortonCopy<true, PixelFormat::R32G32_FLOAT>,
+    MortonCopy<true, PixelFormat::R32G32_SINT>,
+    MortonCopy<true, PixelFormat::R32_FLOAT>,
+    MortonCopy<true, PixelFormat::R16_FLOAT>,
+    MortonCopy<true, PixelFormat::R16_UNORM>,
+    MortonCopy<true, PixelFormat::R16_SNORM>,
+    MortonCopy<true, PixelFormat::R16_UINT>,
+    MortonCopy<true, PixelFormat::R16_SINT>,
+    MortonCopy<true, PixelFormat::R16G16_UNORM>,
+    MortonCopy<true, PixelFormat::R16G16_FLOAT>,
+    MortonCopy<true, PixelFormat::R16G16_UINT>,
+    MortonCopy<true, PixelFormat::R16G16_SINT>,
+    MortonCopy<true, PixelFormat::R16G16_SNORM>,
+    MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
+    MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
+    MortonCopy<true, PixelFormat::R8G8_UNORM>,
+    MortonCopy<true, PixelFormat::R8G8_SNORM>,
+    MortonCopy<true, PixelFormat::R8G8_SINT>,
+    MortonCopy<true, PixelFormat::R8G8_UINT>,
+    MortonCopy<true, PixelFormat::R32G32_UINT>,
+    MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
+    MortonCopy<true, PixelFormat::R32_UINT>,
+    MortonCopy<true, PixelFormat::R32_SINT>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
+    MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
+    MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
+    MortonCopy<true, PixelFormat::BC2_SRGB>,
+    MortonCopy<true, PixelFormat::BC3_SRGB>,
+    MortonCopy<true, PixelFormat::BC7_SRGB>,
+    MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
     MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
     MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
     MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
     MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
-    MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
     MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
-    MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+    MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
     MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
-    MortonCopy<true, PixelFormat::ASTC_2D_6X6>,
+    MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
     MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
-    MortonCopy<true, PixelFormat::ASTC_2D_10X10>,
+    MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
     MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
-    MortonCopy<true, PixelFormat::ASTC_2D_12X12>,
+    MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
     MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
-    MortonCopy<true, PixelFormat::ASTC_2D_8X6>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
     MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
-    MortonCopy<true, PixelFormat::ASTC_2D_6X5>,
+    MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
     MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
-    MortonCopy<true, PixelFormat::E5B9G9R9F>,
-    MortonCopy<true, PixelFormat::Z32F>,
-    MortonCopy<true, PixelFormat::Z16>,
-    MortonCopy<true, PixelFormat::Z24S8>,
-    MortonCopy<true, PixelFormat::S8Z24>,
-    MortonCopy<true, PixelFormat::Z32FS8>,
+    MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
+    MortonCopy<true, PixelFormat::D32_FLOAT>,
+    MortonCopy<true, PixelFormat::D16_UNORM>,
+    MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
+    MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
+    MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
 };
 
 static constexpr ConversionArray linear_to_morton_fns = {
-    MortonCopy<false, PixelFormat::ABGR8U>,
-    MortonCopy<false, PixelFormat::ABGR8S>,
-    MortonCopy<false, PixelFormat::ABGR8UI>,
-    MortonCopy<false, PixelFormat::B5G6R5U>,
-    MortonCopy<false, PixelFormat::A2B10G10R10U>,
-    MortonCopy<false, PixelFormat::A1B5G5R5U>,
-    MortonCopy<false, PixelFormat::R8U>,
-    MortonCopy<false, PixelFormat::R8UI>,
-    MortonCopy<false, PixelFormat::RGBA16F>,
-    MortonCopy<false, PixelFormat::RGBA16S>,
-    MortonCopy<false, PixelFormat::RGBA16U>,
-    MortonCopy<false, PixelFormat::RGBA16UI>,
-    MortonCopy<false, PixelFormat::R11FG11FB10F>,
-    MortonCopy<false, PixelFormat::RGBA32UI>,
-    MortonCopy<false, PixelFormat::DXT1>,
-    MortonCopy<false, PixelFormat::DXT23>,
-    MortonCopy<false, PixelFormat::DXT45>,
-    MortonCopy<false, PixelFormat::DXN1>,
-    MortonCopy<false, PixelFormat::DXN2UNORM>,
-    MortonCopy<false, PixelFormat::DXN2SNORM>,
-    MortonCopy<false, PixelFormat::BC7U>,
-    MortonCopy<false, PixelFormat::BC6H_UF16>,
-    MortonCopy<false, PixelFormat::BC6H_SF16>,
+    MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
+    MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
+    MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
+    MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
+    MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
+    MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
+    MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
+    MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
+    MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
+    MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
+    MortonCopy<false, PixelFormat::R8_UNORM>,
+    MortonCopy<false, PixelFormat::R8_SNORM>,
+    MortonCopy<false, PixelFormat::R8_SINT>,
+    MortonCopy<false, PixelFormat::R8_UINT>,
+    MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
+    MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
+    MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
+    MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
+    MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
+    MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
+    MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
+    MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
+    MortonCopy<false, PixelFormat::BC2_UNORM>,
+    MortonCopy<false, PixelFormat::BC3_UNORM>,
+    MortonCopy<false, PixelFormat::BC4_UNORM>,
+    MortonCopy<false, PixelFormat::BC4_SNORM>,
+    MortonCopy<false, PixelFormat::BC5_UNORM>,
+    MortonCopy<false, PixelFormat::BC5_SNORM>,
+    MortonCopy<false, PixelFormat::BC7_UNORM>,
+    MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
+    MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
     // TODO(Subv): Swizzling ASTC formats are not supported
     nullptr,
-    MortonCopy<false, PixelFormat::BGRA8>,
-    MortonCopy<false, PixelFormat::RGBA32F>,
-    MortonCopy<false, PixelFormat::RG32F>,
-    MortonCopy<false, PixelFormat::R32F>,
-    MortonCopy<false, PixelFormat::R16F>,
-    MortonCopy<false, PixelFormat::R16U>,
-    MortonCopy<false, PixelFormat::R16S>,
-    MortonCopy<false, PixelFormat::R16UI>,
-    MortonCopy<false, PixelFormat::R16I>,
-    MortonCopy<false, PixelFormat::RG16>,
-    MortonCopy<false, PixelFormat::RG16F>,
-    MortonCopy<false, PixelFormat::RG16UI>,
-    MortonCopy<false, PixelFormat::RG16I>,
-    MortonCopy<false, PixelFormat::RG16S>,
-    MortonCopy<false, PixelFormat::RGB32F>,
-    MortonCopy<false, PixelFormat::RGBA8_SRGB>,
-    MortonCopy<false, PixelFormat::RG8U>,
-    MortonCopy<false, PixelFormat::RG8S>,
-    MortonCopy<false, PixelFormat::RG8UI>,
-    MortonCopy<false, PixelFormat::RG32UI>,
-    MortonCopy<false, PixelFormat::RGBX16F>,
-    MortonCopy<false, PixelFormat::R32UI>,
-    MortonCopy<false, PixelFormat::R32I>,
+    MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
+    MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
+    MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
+    MortonCopy<false, PixelFormat::R32G32_FLOAT>,
+    MortonCopy<false, PixelFormat::R32G32_SINT>,
+    MortonCopy<false, PixelFormat::R32_FLOAT>,
+    MortonCopy<false, PixelFormat::R16_FLOAT>,
+    MortonCopy<false, PixelFormat::R16_UNORM>,
+    MortonCopy<false, PixelFormat::R16_SNORM>,
+    MortonCopy<false, PixelFormat::R16_UINT>,
+    MortonCopy<false, PixelFormat::R16_SINT>,
+    MortonCopy<false, PixelFormat::R16G16_UNORM>,
+    MortonCopy<false, PixelFormat::R16G16_FLOAT>,
+    MortonCopy<false, PixelFormat::R16G16_UINT>,
+    MortonCopy<false, PixelFormat::R16G16_SINT>,
+    MortonCopy<false, PixelFormat::R16G16_SNORM>,
+    MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
+    MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
+    MortonCopy<false, PixelFormat::R8G8_UNORM>,
+    MortonCopy<false, PixelFormat::R8G8_SNORM>,
+    MortonCopy<false, PixelFormat::R8G8_SINT>,
+    MortonCopy<false, PixelFormat::R8G8_UINT>,
+    MortonCopy<false, PixelFormat::R32G32_UINT>,
+    MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
+    MortonCopy<false, PixelFormat::R32_UINT>,
+    MortonCopy<false, PixelFormat::R32_SINT>,
     nullptr,
     nullptr,
     nullptr,
-    MortonCopy<false, PixelFormat::BGRA8_SRGB>,
-    MortonCopy<false, PixelFormat::DXT1_SRGB>,
-    MortonCopy<false, PixelFormat::DXT23_SRGB>,
-    MortonCopy<false, PixelFormat::DXT45_SRGB>,
-    MortonCopy<false, PixelFormat::BC7U_SRGB>,
-    MortonCopy<false, PixelFormat::R4G4B4A4U>,
+    MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
+    MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
+    MortonCopy<false, PixelFormat::BC2_SRGB>,
+    MortonCopy<false, PixelFormat::BC3_SRGB>,
+    MortonCopy<false, PixelFormat::BC7_SRGB>,
+    MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
     nullptr,
     nullptr,
     nullptr,
@@ -199,12 +221,12 @@ static constexpr ConversionArray linear_to_morton_fns = {
     nullptr,
     nullptr,
     nullptr,
-    MortonCopy<false, PixelFormat::E5B9G9R9F>,
-    MortonCopy<false, PixelFormat::Z32F>,
-    MortonCopy<false, PixelFormat::Z16>,
-    MortonCopy<false, PixelFormat::Z24S8>,
-    MortonCopy<false, PixelFormat::S8Z24>,
-    MortonCopy<false, PixelFormat::Z32FS8>,
+    MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
+    MortonCopy<false, PixelFormat::D32_FLOAT>,
+    MortonCopy<false, PixelFormat::D16_UNORM>,
+    MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
+    MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
+    MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
 };
 
 static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 0d3a88765..fc54ca0ef 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -91,14 +91,15 @@ private:
     std::shared_ptr<HostCounter> last;
 };
 
-template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
-          class QueryPool>
+template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
 class QueryCacheBase {
 public:
-    explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
-        : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
-                                                      static_cast<QueryCache&>(*this),
-                                                      VideoCore::QueryType::SamplesPassed}}} {}
+    explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
+                            Tegra::Engines::Maxwell3D& maxwell3d_,
+                            Tegra::MemoryManager& gpu_memory_)
+        : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+          gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+                                                          VideoCore::QueryType::SamplesPassed}}} {}
 
     void InvalidateRegion(VAddr addr, std::size_t size) {
         std::unique_lock lock{mutex};
@@ -118,29 +119,27 @@ public:
      */
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
         std::unique_lock lock{mutex};
-        auto& memory_manager = system.GPU().MemoryManager();
-        const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
-        ASSERT(cpu_addr_opt);
-        VAddr cpu_addr = *cpu_addr_opt;
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+        ASSERT(cpu_addr);
 
-        CachedQuery* query = TryGet(cpu_addr);
+        CachedQuery* query = TryGet(*cpu_addr);
         if (!query) {
-            ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
-            const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+            ASSERT_OR_EXECUTE(cpu_addr, return;);
+            u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
 
-            query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
+            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
         }
 
         query->BindCounter(Stream(type).Current(), timestamp);
         if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
-            AsyncFlushQuery(cpu_addr);
+            AsyncFlushQuery(*cpu_addr);
         }
     }
 
     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
     void UpdateCounters() {
         std::unique_lock lock{mutex};
-        const auto& regs = system.GPU().Maxwell3D().regs;
+        const auto& regs = maxwell3d.regs;
         Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
     }
 
@@ -206,9 +205,6 @@ public:
         committed_flushes.pop_front();
     }
 
-protected:
-    std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
-
 private:
     /// Flushes a memory range to guest memory and removes it from the cache.
     void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
@@ -270,8 +266,9 @@ private:
     static constexpr std::uintptr_t PAGE_SIZE = 4096;
     static constexpr unsigned PAGE_BITS = 12;
 
-    Core::System& system;
     VideoCore::RasterizerInterface& rasterizer;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::MemoryManager& gpu_memory;
 
     std::recursive_mutex mutex;
 
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 3cbdac8e7..b3e0919f8 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -106,11 +106,8 @@ public:
     virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
 
     /// Initialize disk cached resources for the game being emulated
-    virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
-                                   const DiskResourceLoadCallback& callback = {}) {}
-
-    /// Initializes renderer dirty flags
-    virtual void SetupDirtyFlags() {}
+    virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+                                   const DiskResourceLoadCallback& callback) {}
 
     /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
     GuestDriverProfile& AccessGuestDriverProfile() {
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index dfb06e87e..a93a1732c 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -9,7 +9,9 @@
 
 namespace VideoCore {
 
-RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} {
+RendererBase::RendererBase(Core::Frontend::EmuWindow& window_,
+                           std::unique_ptr<Core::Frontend::GraphicsContext> context_)
+    : render_window{window_}, context{std::move(context_)} {
     RefreshBaseSettings();
 }
 
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d85219b6..5c650808b 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -15,7 +15,8 @@
 
 namespace Core::Frontend {
 class EmuWindow;
-}
+class GraphicsContext;
+} // namespace Core::Frontend
 
 namespace VideoCore {
 
@@ -25,14 +26,15 @@ struct RendererSettings {
 
     // Screenshot
     std::atomic<bool> screenshot_requested{false};
-    void* screenshot_bits;
+    void* screenshot_bits{};
     std::function<void()> screenshot_complete_callback;
     Layout::FramebufferLayout screenshot_framebuffer_layout;
 };
 
 class RendererBase : NonCopyable {
 public:
-    explicit RendererBase(Core::Frontend::EmuWindow& window);
+    explicit RendererBase(Core::Frontend::EmuWindow& window,
+                          std::unique_ptr<Core::Frontend::GraphicsContext> context);
     virtual ~RendererBase();
 
     /// Initialize the renderer
@@ -44,11 +46,6 @@ public:
     /// Finalize rendering the guest frame and draw into the presentation texture
     virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
 
-    /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer
-    /// specific implementation)
-    /// Returns true if a frame was drawn
-    virtual bool TryPresent(int timeout_ms) = 0;
-
     // Getter/setter functions:
     // ------------------------
 
@@ -68,6 +65,14 @@ public:
         return *rasterizer;
     }
 
+    Core::Frontend::GraphicsContext& Context() {
+        return *context;
+    }
+
+    const Core::Frontend::GraphicsContext& Context() const {
+        return *context;
+    }
+
     Core::Frontend::EmuWindow& GetRenderWindow() {
         return render_window;
     }
@@ -94,6 +99,7 @@ public:
 protected:
     Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
     std::unique_ptr<RasterizerInterface> rasterizer;
+    std::unique_ptr<Core::Frontend::GraphicsContext> context;
     f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
     int m_current_frame = 0;  ///< Current frame, should be set by the renderer
 
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index eb5158407..b7e9ed2e9 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) {
     return type;
 }
 
-std::string GlobalMemoryName(const GlobalMemoryBase& base) {
-    return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
-}
-
 class ARBDecompiler final {
 public:
     explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
@@ -199,6 +195,8 @@ public:
     }
 
 private:
+    void DefineGlobalMemory();
+
     void DeclareHeader();
     void DeclareVertex();
     void DeclareGeometry();
@@ -228,6 +226,7 @@ private:
 
     std::pair<std::string, std::size_t> BuildCoords(Operation);
     std::string BuildAoffi(Operation);
+    std::string GlobalMemoryPointer(const GmemNode& gmem);
     void Exit();
 
     std::string Assign(Operation);
@@ -378,10 +377,8 @@ private:
         std::string address;
         std::string_view opname;
         if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
-            AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
-                    Visit(gmem->GetBaseAddress()));
-            address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
-            opname = "ATOMB";
+            address = GlobalMemoryPointer(*gmem);
+            opname = "ATOM";
         } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
             address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
             opname = "ATOMS";
@@ -456,9 +453,13 @@ private:
         shader_source += '\n';
     }
 
-    std::string AllocTemporary() {
-        max_temporaries = std::max(max_temporaries, num_temporaries + 1);
-        return fmt::format("T{}.x", num_temporaries++);
+    std::string AllocLongVectorTemporary() {
+        max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
+        return fmt::format("L{}", num_long_temporaries++);
+    }
+
+    std::string AllocLongTemporary() {
+        return fmt::format("{}.x", AllocLongVectorTemporary());
     }
 
     std::string AllocVectorTemporary() {
@@ -466,8 +467,13 @@ private:
         return fmt::format("T{}", num_temporaries++);
     }
 
+    std::string AllocTemporary() {
+        return fmt::format("{}.x", AllocVectorTemporary());
+    }
+
     void ResetTemporaries() noexcept {
         num_temporaries = 0;
+        num_long_temporaries = 0;
     }
 
     const Device& device;
@@ -478,6 +484,11 @@ private:
     std::size_t num_temporaries = 0;
     std::size_t max_temporaries = 0;
 
+    std::size_t num_long_temporaries = 0;
+    std::size_t max_long_temporaries = 0;
+
+    std::map<GlobalMemoryBase, u32> global_memory_names;
+
     std::string shader_source;
 
     static constexpr std::string_view ADD_F32 = "ADD.F32";
@@ -784,6 +795,8 @@ private:
 ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
                              ShaderType stage, std::string_view identifier)
     : device{device}, ir{ir}, registry{registry}, stage{stage} {
+    DefineGlobalMemory();
+
     AddLine("TEMP RC;");
     AddLine("TEMP FSWZA[4];");
     AddLine("TEMP FSWZB[4];");
@@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) {
     }
 }
 
+void ARBDecompiler::DefineGlobalMemory() {
+    u32 binding = 0;
+    for (const auto& pair : ir.GetGlobalMemory()) {
+        const GlobalMemoryBase base = pair.first;
+        global_memory_names.emplace(base, binding);
+        ++binding;
+    }
+}
+
 void ARBDecompiler::DeclareHeader() {
     AddLine("!!NV{}5.0", HeaderStageName(stage));
     // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
     AddLine("OPTION NV_internal;");
     AddLine("OPTION NV_gpu_program_fp64;");
-    AddLine("OPTION NV_shader_storage_buffer;");
     AddLine("OPTION NV_shader_thread_group;");
     if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
         AddLine("OPTION NV_shader_thread_shuffle;");
@@ -892,11 +913,19 @@ void ARBDecompiler::DeclareCompute() {
     const ComputeInfo& info = registry.GetComputeInfo();
     AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
             info.workgroup_size[2]);
-    if (info.shared_memory_size_in_words > 0) {
-        const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
-        AddLine("SHARED_MEMORY {};", size_in_bytes);
-        AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
+    if (info.shared_memory_size_in_words == 0) {
+        return;
+    }
+    const u32 limit = device.GetMaxComputeSharedMemorySize();
+    u32 size_in_bytes = info.shared_memory_size_in_words * 4;
+    if (size_in_bytes > limit) {
+        LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
+                  size_in_bytes, limit);
+        size_in_bytes = limit;
     }
+
+    AddLine("SHARED_MEMORY {};", size_in_bytes);
+    AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
 }
 
 void ARBDecompiler::DeclareInputAttributes() {
@@ -951,11 +980,10 @@ void ARBDecompiler::DeclareLocalMemory() {
 }
 
 void ARBDecompiler::DeclareGlobalMemory() {
-    u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
-    for (const auto& pair : ir.GetGlobalMemory()) {
-        const auto& base = pair.first;
-        AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
-        ++binding;
+    const std::size_t num_entries = ir.GetGlobalMemory().size();
+    if (num_entries > 0) {
+        const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
+        AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
     }
 }
 
@@ -977,6 +1005,9 @@ void ARBDecompiler::DeclareTemporaries() {
     for (std::size_t i = 0; i < max_temporaries; ++i) {
         AddLine("TEMP T{};", i);
     }
+    for (std::size_t i = 0; i < max_long_temporaries; ++i) {
+        AddLine("LONG TEMP L{};", i);
+    }
 }
 
 void ARBDecompiler::DeclarePredicates() {
@@ -1260,13 +1291,6 @@ std::string ARBDecompiler::Visit(const Node& node) {
             return "{0, 0, 0, 0}.x";
         }
 
-        const auto buffer_index = [this, &abuf]() -> std::string {
-            if (stage != ShaderType::Geometry) {
-                return "";
-            }
-            return fmt::format("[{}]", Visit(abuf->GetBuffer()));
-        };
-
         const Attribute::Index index = abuf->GetIndex();
         const u32 element = abuf->GetElement();
         const char swizzle = Swizzle(element);
@@ -1339,10 +1363,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
 
     if (const auto gmem = std::get_if<GmemNode>(&*node)) {
         std::string temporary = AllocTemporary();
-        AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
-                Visit(gmem->GetBaseAddress()));
-        AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
-                temporary);
+        AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
         return temporary;
     }
 
@@ -1375,7 +1396,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
         return {};
     }
 
-    if (const auto cmt = std::get_if<CommentNode>(&*node)) {
+    if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
         // Uncommenting this will generate invalid code. GLASM lacks comments.
         // AddLine("// {}", cmt->GetText());
         return {};
@@ -1419,6 +1440,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
     return fmt::format(", offset({})", temporary);
 }
 
+std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
+    const u32 binding = global_memory_names.at(gmem.GetDescriptor());
+    const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
+
+    const std::string pointer = AllocLongVectorTemporary();
+    std::string temporary = AllocTemporary();
+
+    const u32 local_index = binding / 2;
+    AddLine("PK64.U {}, c[{}];", pointer, local_index);
+    AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
+            Visit(gmem.GetBaseAddress()));
+    AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
+    AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
+    return fmt::format("{}.x", pointer);
+}
+
 void ARBDecompiler::Exit() {
     if (stage != ShaderType::Fragment) {
         AddLine("RET;");
@@ -1515,11 +1552,7 @@ std::string ARBDecompiler::Assign(Operation operation) {
         ResetTemporaries();
         return {};
     } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
-        const std::string temporary = AllocTemporary();
-        AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
-                Visit(gmem->GetBaseAddress()));
-        AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
-                temporary);
+        AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
         ResetTemporaries();
         return {};
     } else {
@@ -1671,7 +1704,7 @@ std::string ARBDecompiler::HCastFloat(Operation operation) {
 }
 
 std::string ARBDecompiler::HUnpack(Operation operation) {
-    const std::string operand = Visit(operation[0]);
+    std::string operand = Visit(operation[0]);
     switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
     case Tegra::Shader::HalfType::H0_H1:
         return operand;
@@ -2021,7 +2054,7 @@ std::string ARBDecompiler::InvocationId(Operation) {
 
 std::string ARBDecompiler::YNegate(Operation) {
     LOG_WARNING(Render_OpenGL, "(STUBBED)");
-    const std::string temporary = AllocTemporary();
+    std::string temporary = AllocTemporary();
     AddLine("MOV.F {}, 1;", temporary);
     return temporary;
 }
@@ -2044,10 +2077,6 @@ std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
 }
 
 std::string ARBDecompiler::Barrier(Operation) {
-    if (!ir.IsDecompiled()) {
-        LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
-        return {};
-    }
     AddLine("BAR;");
     return {};
 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index e461e4c70..b1c4cd62f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
     : VideoCommon::BufferBlock{cpu_addr, size} {
     gl_buffer.Create();
     glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
-    if (device.HasVertexBufferUnifiedMemory()) {
+    if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
         glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
         glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
     }
@@ -59,9 +59,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
                              static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
 }
 
-OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
+OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+                               Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
                                const Device& device_, std::size_t stream_size)
-    : GenericBufferCache{rasterizer, system,
+    : GenericBufferCache{rasterizer, gpu_memory, cpu_memory,
                          std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
       device{device_} {
     if (!device.HasFastBufferSubData()) {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 88fdc0536..f75b32e31 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -52,7 +52,8 @@ private:
 using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
 class OGLBufferCache final : public GenericBufferCache {
 public:
-    explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
+    explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+                            Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
                             const Device& device, std::size_t stream_size);
     ~OGLBufferCache();
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index c1f20f0ab..a94e4f72e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -193,7 +193,6 @@ bool IsASTCSupported() {
 Device::Device()
     : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
     const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
-    const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
     const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
     const std::vector extensions = GetExtensions();
 
@@ -212,6 +211,7 @@ Device::Device()
     shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
+    max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
     has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
                           GLAD_GL_NV_shader_thread_shuffle;
     has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -233,6 +233,8 @@ Device::Device()
                            GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
                            GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
 
+    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
+
     LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
     LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
     LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
@@ -248,6 +250,7 @@ Device::Device(std::nullptr_t) {
     shader_storage_alignment = 4;
     max_vertex_attributes = 16;
     max_varyings = 15;
+    max_compute_shared_memory_size = 0x10000;
     has_warp_intrinsics = true;
     has_shader_ballot = true;
     has_vertex_viewport_layer = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e1d811966..8a4b6b9fc 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -52,6 +52,10 @@ public:
         return max_varyings;
     }
 
+    u32 GetMaxComputeSharedMemorySize() const {
+        return max_compute_shared_memory_size;
+    }
+
     bool HasWarpIntrinsics() const {
         return has_warp_intrinsics;
     }
@@ -104,6 +108,10 @@ public:
         return use_assembly_shaders;
     }
 
+    bool UseAsynchronousShaders() const {
+        return use_asynchronous_shaders;
+    }
+
 private:
     static bool TestVariableAoffi();
     static bool TestPreciseBug();
@@ -114,6 +122,7 @@ private:
     std::size_t shader_storage_alignment{};
     u32 max_vertex_attributes{};
     u32 max_varyings{};
+    u32 max_compute_shared_memory_size{};
     bool has_warp_intrinsics{};
     bool has_shader_ballot{};
     bool has_vertex_viewport_layer{};
@@ -127,6 +136,7 @@ private:
     bool has_fast_buffer_sub_data{};
     bool has_nv_viewport_array2{};
     bool use_assembly_shaders{};
+    bool use_asynchronous_shaders{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index ec5421afa..b532fdcc2 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,16 +4,17 @@
 
 #include "common/assert.h"
 
+#include <glad/glad.h>
+
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_fence_manager.h"
 
 namespace OpenGL {
 
-GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
-    : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
+GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {}
 
 GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
-    : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
+    : FenceBase(address, payload, is_stubbed) {}
 
 GLInnerFence::~GLInnerFence() = default;
 
@@ -44,11 +45,10 @@ void GLInnerFence::Wait() {
     glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
 }
 
-FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
-                                       VideoCore::RasterizerInterface& rasterizer,
+FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
                                        TextureCacheOpenGL& texture_cache,
                                        OGLBufferCache& buffer_cache, QueryCache& query_cache)
-    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
+    : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {}
 
 Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
     return std::make_shared<GLInnerFence>(value, is_stubbed);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index c917b3343..da1dcdace 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include <memory>
-#include <glad/glad.h>
 
 #include "common/common_types.h"
 #include "video_core/fence_manager.h"
@@ -38,9 +37,9 @@ using GenericFenceManager =
 
 class FenceManagerOpenGL final : public GenericFenceManager {
 public:
-    FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                       TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
-                       QueryCache& query_cache);
+    explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+                                TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
+                                QueryCache& query_cache);
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index d7ba57aca..1a3d9720e 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -30,12 +30,11 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
 
 } // Anonymous namespace
 
-QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
-    : VideoCommon::QueryCacheBase<
-          QueryCache, CachedQuery, CounterStream, HostCounter,
-          std::vector<OGLQuery>>{system,
-                                 static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
-      gl_rasterizer{gl_rasterizer} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+                       Tegra::MemoryManager& gpu_memory)
+    : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>(
+          rasterizer, maxwell3d, gpu_memory),
+      gl_rasterizer{rasterizer} {}
 
 QueryCache::~QueryCache() = default;
 
@@ -90,6 +89,8 @@ u64 HostCounter::BlockingQuery() const {
 CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
     : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
 
+CachedQuery::~CachedQuery() = default;
+
 CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
     : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
 
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d8e7052a1..82cac51ee 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -26,10 +26,11 @@ class RasterizerOpenGL;
 
 using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
 
-class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
-                                                            HostCounter, std::vector<OGLQuery>> {
+class QueryCache final
+    : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
 public:
-    explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
+    explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+                        Tegra::MemoryManager& gpu_memory);
     ~QueryCache();
 
     OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -40,6 +41,7 @@ public:
 
 private:
     RasterizerOpenGL& gl_rasterizer;
+    std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
 };
 
 class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
@@ -62,10 +64,12 @@ class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
 public:
     explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
                          u8* host_ptr);
-    CachedQuery(CachedQuery&& rhs) noexcept;
-    CachedQuery(const CachedQuery&) = delete;
+    ~CachedQuery() override;
 
+    CachedQuery(CachedQuery&& rhs) noexcept;
     CachedQuery& operator=(CachedQuery&& rhs) noexcept;
+
+    CachedQuery(const CachedQuery&) = delete;
     CachedQuery& operator=(const CachedQuery&) = delete;
 
     void Flush() override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e960a0ef1..bbb2eb17c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -139,17 +139,33 @@ void oglEnable(GLenum cap, bool state) {
     (state ? glEnable : glDisable)(cap);
 }
 
+void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
+    if (num_entries == 0) {
+        return;
+    }
+    if (num_entries % 2 == 1) {
+        pointers[num_entries] = 0;
+    }
+    const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
+    glProgramLocalParametersI4uivNV(target, 0, num_vectors,
+                                    reinterpret_cast<const GLuint*>(pointers));
+}
+
 } // Anonymous namespace
 
-RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
-                                   const Device& device, ScreenInfo& info,
-                                   ProgramManager& program_manager, StateTracker& state_tracker)
-    : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
-                                                                            state_tracker},
-      shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
-      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
-      fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
-      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
+                                   Core::Memory::Memory& cpu_memory, const Device& device_,
+                                   ScreenInfo& screen_info_, ProgramManager& program_manager_,
+                                   StateTracker& state_tracker_)
+    : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
+      kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
+      screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
+      texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
+      shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device),
+      query_cache(*this, maxwell3d, gpu_memory),
+      buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE),
+      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+      async_shaders(emu_window) {
     CheckExtensions();
 
     unified_uniform_buffer.Create();
@@ -162,6 +178,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
                                  nullptr, 0);
         }
     }
+
+    if (device.UseAsynchronousShaders()) {
+        async_shaders.AllocateWorkers();
+    }
 }
 
 RasterizerOpenGL::~RasterizerOpenGL() {
@@ -179,8 +199,7 @@ void RasterizerOpenGL::CheckExtensions() {
 }
 
 void RasterizerOpenGL::SetupVertexFormat() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::VertexFormats]) {
         return;
     }
@@ -200,7 +219,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
         }
         flags[Dirty::VertexFormat0 + index] = false;
 
-        const auto attrib = gpu.regs.vertex_attrib_format[index];
+        const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
         const auto gl_index = static_cast<GLuint>(index);
 
         // Disable constant attributes.
@@ -224,8 +243,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
 }
 
 void RasterizerOpenGL::SetupVertexBuffer() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::VertexBuffers]) {
         return;
     }
@@ -236,7 +254,7 @@ void RasterizerOpenGL::SetupVertexBuffer() {
     const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
 
     // Upload all guest vertex arrays sequentially to our buffer
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
         if (!flags[Dirty::VertexBuffer0 + index]) {
             continue;
@@ -273,14 +291,13 @@ void RasterizerOpenGL::SetupVertexBuffer() {
 }
 
 void RasterizerOpenGL::SetupVertexInstances() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::VertexInstances]) {
         return;
     }
     flags[Dirty::VertexInstances] = false;
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
         if (!flags[Dirty::VertexInstance0 + index]) {
             continue;
@@ -296,7 +313,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
 
 GLintptr RasterizerOpenGL::SetupIndexBuffer() {
     MICROPROFILE_SCOPE(OpenGL_Index);
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     const std::size_t size = CalculateIndexBufferSize();
     const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
@@ -305,16 +322,14 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
 
 void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     MICROPROFILE_SCOPE(OpenGL_Shader);
-    auto& gpu = system.GPU().Maxwell3D();
-    std::size_t num_ssbos = 0;
     u32 clip_distances = 0;
 
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
-        const auto& shader_config = gpu.regs.shader_config[index];
+        const auto& shader_config = maxwell3d.regs.shader_config[index];
         const auto program{static_cast<Maxwell::ShaderProgram>(index)};
 
         // Skip stages that are not enabled
-        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
             switch (program) {
             case Maxwell::ShaderProgram::Geometry:
                 program_manager.UseGeometryShader(0);
@@ -329,31 +344,15 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
         }
 
         // Currently this stages are not supported in the OpenGL backend.
-        // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
-        if (program == Maxwell::ShaderProgram::TesselationControl) {
-            continue;
-        } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+        // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+        if (program == Maxwell::ShaderProgram::TesselationControl ||
+            program == Maxwell::ShaderProgram::TesselationEval) {
             continue;
         }
 
-        Shader* const shader = shader_cache.GetStageProgram(program);
-
-        if (device.UseAssemblyShaders()) {
-            // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
-            // all stages share the same bindings.
-            const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
-            ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
-            num_ssbos += num_stage_ssbos;
-        }
-
-        // Stage indices are 0 - 5
-        const std::size_t stage = index == 0 ? 0 : index - 1;
-        SetupDrawConstBuffers(stage, shader);
-        SetupDrawGlobalMemory(stage, shader);
-        SetupDrawTextures(stage, shader);
-        SetupDrawImages(stage, shader);
+        Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
 
-        const GLuint program_handle = shader->GetHandle();
+        const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
         switch (program) {
         case Maxwell::ShaderProgram::VertexA:
         case Maxwell::ShaderProgram::VertexB:
@@ -370,6 +369,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
                               shader_config.enable.Value(), shader_config.offset);
         }
 
+        // Stage indices are 0 - 5
+        const std::size_t stage = index == 0 ? 0 : index - 1;
+        SetupDrawConstBuffers(stage, shader);
+        SetupDrawGlobalMemory(stage, shader);
+        SetupDrawTextures(stage, shader);
+        SetupDrawImages(stage, shader);
+
         // Workaround for Intel drivers.
         // When a clip distance is enabled but not set in the shader it crops parts of the screen
         // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -384,11 +390,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     }
 
     SyncClipEnabled(clip_distances);
-    gpu.dirty.flags[Dirty::Shaders] = false;
+    maxwell3d.dirty.flags[Dirty::Shaders] = false;
 }
 
 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
 
     std::size_t size = 0;
     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -406,34 +412,27 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
 }
 
 std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
-    const auto& regs = system.GPU().Maxwell3D().regs;
-
-    return static_cast<std::size_t>(regs.index_array.count) *
-           static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+    return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
+           static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
 }
 
-void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
+void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
                                          const VideoCore::DiskResourceLoadCallback& callback) {
-    shader_cache.LoadDiskCache(stop_loading, callback);
-}
-
-void RasterizerOpenGL::SetupDirtyFlags() {
-    state_tracker.Initialize();
+    shader_cache.LoadDiskCache(title_id, stop_loading, callback);
 }
 
 void RasterizerOpenGL::ConfigureFramebuffers() {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
-    auto& gpu = system.GPU().Maxwell3D();
-    if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
+    if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
         return;
     }
-    gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
+    maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
 
     texture_cache.GuardRenderTargets(true);
 
     View depth_surface = texture_cache.GetDepthBufferSurface(true);
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
 
     // Bind the framebuffer surfaces
@@ -465,8 +464,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
 }
 
 void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
-    auto& gpu = system.GPU().Maxwell3D();
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
 
     texture_cache.GuardRenderTargets(true);
     View color_surface;
@@ -516,12 +514,11 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_de
 }
 
 void RasterizerOpenGL::Clear() {
-    const auto& gpu = system.GPU().Maxwell3D();
-    if (!gpu.ShouldExecute()) {
+    if (!maxwell3d.ShouldExecute()) {
         return;
     }
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     bool use_color{};
     bool use_depth{};
     bool use_stencil{};
@@ -586,7 +583,6 @@ void RasterizerOpenGL::Clear() {
 
 void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
     MICROPROFILE_SCOPE(OpenGL_Drawing);
-    auto& gpu = system.GPU().Maxwell3D();
 
     query_cache.UpdateCounters();
 
@@ -634,7 +630,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 
     if (invalidated) {
         // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
-        auto& dirty = gpu.dirty.flags;
+        auto& dirty = maxwell3d.dirty.flags;
         dirty[Dirty::VertexBuffers] = true;
         for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
             dirty[index] = true;
@@ -655,7 +651,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
     // Setup emulation uniform buffer.
     if (!device.UseAssemblyShaders()) {
         MaxwellUniformData ubo;
-        ubo.SetFromRegs(gpu);
+        ubo.SetFromRegs(maxwell3d);
         const auto info =
             buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
         glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
@@ -664,7 +660,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 
     // Setup shaders and their used resources.
     texture_cache.GuardSamplers(true);
-    const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
+    const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
     SetupShaders(primitive_mode);
     texture_cache.GuardSamplers(false);
 
@@ -681,14 +677,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 
     BeginTransformFeedback(primitive_mode);
 
-    const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+    const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
     const GLsizei num_instances =
-        static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+        static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
     if (is_indexed) {
-        const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
-        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+        const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
+        const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
         const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
-        const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+        const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
         if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
             glDrawElements(primitive_mode, num_vertices, format, offset);
         } else if (num_instances == 1 && base_instance == 0) {
@@ -707,8 +703,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
                                                           base_instance);
         }
     } else {
-        const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
-        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+        const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
+        const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
         if (num_instances == 1 && base_instance == 0) {
             glDrawArrays(primitive_mode, base_vertex, num_vertices);
         } else if (base_instance == 0) {
@@ -723,7 +719,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 
     ++num_queued_commands;
 
-    system.GPU().TickWork();
+    gpu.TickWork();
 }
 
 void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -731,6 +727,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
     current_cbuf = 0;
 
     auto kernel = shader_cache.GetComputeKernel(code_addr);
+    program_manager.BindCompute(kernel->GetHandle());
+
     SetupComputeTextures(kernel);
     SetupComputeImages(kernel);
 
@@ -744,7 +742,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
 
     buffer_cache.Unmap();
 
-    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+    const auto& launch_desc = kepler_compute.launch_description;
     program_manager.BindCompute(kernel->GetHandle());
     glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
     ++num_queued_commands;
@@ -807,17 +805,14 @@ void RasterizerOpenGL::SyncGuestHost() {
 }
 
 void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
-    auto& gpu{system.GPU()};
     if (!gpu.IsAsync()) {
-        auto& memory_manager{gpu.MemoryManager()};
-        memory_manager.Write<u32>(addr, value);
+        gpu_memory.Write<u32>(addr, value);
         return;
     }
     fence_manager.SignalSemaphore(addr, value);
 }
 
 void RasterizerOpenGL::SignalSyncPoint(u32 value) {
-    auto& gpu{system.GPU()};
     if (!gpu.IsAsync()) {
         gpu.IncrementSyncPoint(value);
         return;
@@ -826,7 +821,6 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
 }
 
 void RasterizerOpenGL::ReleaseFences() {
-    auto& gpu{system.GPU()};
     if (!gpu.IsAsync()) {
         return;
     }
@@ -912,7 +906,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
         GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
 
     MICROPROFILE_SCOPE(OpenGL_UBO);
-    const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
+    const auto& stages = maxwell3d.state.shader_stages;
     const auto& shader_stage = stages[stage_index];
     const auto& entries = shader->GetEntries();
     const bool use_unified = entries.use_unified_uniforms;
@@ -937,7 +931,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
 
 void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
     MICROPROFILE_SCOPE(OpenGL_UBO);
-    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+    const auto& launch_desc = kepler_compute.launch_description;
     const auto& entries = kernel->GetEntries();
     const bool use_unified = entries.use_unified_uniforms;
 
@@ -1005,45 +999,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
 }
 
 void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
-    auto& gpu{system.GPU()};
-    auto& memory_manager{gpu.MemoryManager()};
-    const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
+    static constexpr std::array TARGET_LUT = {
+        GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+        GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+    };
+
+    const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
+    const auto& entries{shader->GetEntries().global_memory_entries};
 
-    u32 binding =
-        device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
-    for (const auto& entry : shader->GetEntries().global_memory_entries) {
+    std::array<GLuint64EXT, 32> pointers;
+    ASSERT(entries.size() < pointers.size());
+
+    const bool assembly_shaders = device.UseAssemblyShaders();
+    u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
+    for (const auto& entry : entries) {
         const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
-        const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
-        const u32 size{memory_manager.Read<u32>(addr + 8)};
-        SetupGlobalMemory(binding++, entry, gpu_addr, size);
+        const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
+        const u32 size{gpu_memory.Read<u32>(addr + 8)};
+        SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+        ++binding;
+    }
+    if (assembly_shaders) {
+        UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
     }
 }
 
 void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
-    auto& gpu{system.GPU()};
-    auto& memory_manager{gpu.MemoryManager()};
-    const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
+    const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
+    const auto& entries{kernel->GetEntries().global_memory_entries};
+
+    std::array<GLuint64EXT, 32> pointers;
+    ASSERT(entries.size() < pointers.size());
 
     u32 binding = 0;
-    for (const auto& entry : kernel->GetEntries().global_memory_entries) {
-        const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
-        const auto gpu_addr{memory_manager.Read<u64>(addr)};
-        const auto size{memory_manager.Read<u32>(addr + 8)};
-        SetupGlobalMemory(binding++, entry, gpu_addr, size);
+    for (const auto& entry : entries) {
+        const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
+        const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
+        const u32 size{gpu_memory.Read<u32>(addr + 8)};
+        SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+        ++binding;
+    }
+    if (device.UseAssemblyShaders()) {
+        UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
     }
 }
 
 void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
-                                         GPUVAddr gpu_addr, std::size_t size) {
-    const auto alignment{device.GetShaderStorageBufferAlignment()};
+                                         GPUVAddr gpu_addr, std::size_t size,
+                                         GLuint64EXT* pointer) {
+    const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
     const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
-    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
-                      static_cast<GLsizeiptr>(size));
+    if (device.UseAssemblyShaders()) {
+        *pointer = info.address + info.offset;
+    } else {
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
+                          static_cast<GLsizeiptr>(size));
+    }
 }
 
 void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
     MICROPROFILE_SCOPE(OpenGL_Texture);
-    const auto& maxwell3d = system.GPU().Maxwell3D();
     u32 binding = device.GetBaseBindings(stage_index).sampler;
     for (const auto& entry : shader->GetEntries().samplers) {
         const auto shader_type = static_cast<ShaderType>(stage_index);
@@ -1056,11 +1071,10 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader
 
 void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
     MICROPROFILE_SCOPE(OpenGL_Texture);
-    const auto& compute = system.GPU().KeplerCompute();
     u32 binding = 0;
     for (const auto& entry : kernel->GetEntries().samplers) {
         for (std::size_t i = 0; i < entry.size; ++i) {
-            const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
+            const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
             SetupTexture(binding++, texture, entry);
         }
     }
@@ -1084,20 +1098,18 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
 }
 
 void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
-    const auto& maxwell3d = system.GPU().Maxwell3D();
     u32 binding = device.GetBaseBindings(stage_index).image;
     for (const auto& entry : shader->GetEntries().images) {
-        const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
+        const auto shader_type = static_cast<ShaderType>(stage_index);
         const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
         SetupImage(binding++, tic, entry);
     }
 }
 
 void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
-    const auto& compute = system.GPU().KeplerCompute();
     u32 binding = 0;
     for (const auto& entry : shader->GetEntries().images) {
-        const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
+        const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
         SetupImage(binding++, tic, entry);
     }
 }
@@ -1117,9 +1129,8 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
 }
 
 void RasterizerOpenGL::SyncViewport() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
-    const auto& regs = gpu.regs;
+    auto& flags = maxwell3d.dirty.flags;
+    const auto& regs = maxwell3d.regs;
 
     const bool dirty_viewport = flags[Dirty::Viewports];
     const bool dirty_clip_control = flags[Dirty::ClipControl];
@@ -1191,25 +1202,23 @@ void RasterizerOpenGL::SyncViewport() {
 }
 
 void RasterizerOpenGL::SyncDepthClamp() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::DepthClampEnabled]) {
         return;
     }
     flags[Dirty::DepthClampEnabled] = false;
 
-    oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0);
+    oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
 }
 
 void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
         return;
     }
     flags[Dirty::ClipDistances] = false;
 
-    clip_mask &= gpu.regs.clip_distance_enabled;
+    clip_mask &= maxwell3d.regs.clip_distance_enabled;
     if (clip_mask == last_clip_distance_mask) {
         return;
     }
@@ -1225,9 +1234,8 @@ void RasterizerOpenGL::SyncClipCoef() {
 }
 
 void RasterizerOpenGL::SyncCullMode() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
-    const auto& regs = gpu.regs;
+    auto& flags = maxwell3d.dirty.flags;
+    const auto& regs = maxwell3d.regs;
 
     if (flags[Dirty::CullTest]) {
         flags[Dirty::CullTest] = false;
@@ -1242,26 +1250,24 @@ void RasterizerOpenGL::SyncCullMode() {
 }
 
 void RasterizerOpenGL::SyncPrimitiveRestart() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::PrimitiveRestart]) {
         return;
     }
     flags[Dirty::PrimitiveRestart] = false;
 
-    if (gpu.regs.primitive_restart.enabled) {
+    if (maxwell3d.regs.primitive_restart.enabled) {
         glEnable(GL_PRIMITIVE_RESTART);
-        glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
+        glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
     } else {
         glDisable(GL_PRIMITIVE_RESTART);
     }
 }
 
 void RasterizerOpenGL::SyncDepthTestState() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
+    const auto& regs = maxwell3d.regs;
 
-    const auto& regs = gpu.regs;
     if (flags[Dirty::DepthMask]) {
         flags[Dirty::DepthMask] = false;
         glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
@@ -1279,14 +1285,13 @@ void RasterizerOpenGL::SyncDepthTestState() {
 }
 
 void RasterizerOpenGL::SyncStencilTestState() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::StencilTest]) {
         return;
     }
     flags[Dirty::StencilTest] = false;
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
 
     glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
@@ -1311,25 +1316,24 @@ void RasterizerOpenGL::SyncStencilTestState() {
 }
 
 void RasterizerOpenGL::SyncRasterizeEnable() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::RasterizeEnable]) {
         return;
     }
     flags[Dirty::RasterizeEnable] = false;
 
-    oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
+    oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
 }
 
 void RasterizerOpenGL::SyncPolygonModes() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::PolygonModes]) {
         return;
     }
     flags[Dirty::PolygonModes] = false;
 
-    if (gpu.regs.fill_rectangle) {
+    const auto& regs = maxwell3d.regs;
+    if (regs.fill_rectangle) {
         if (!GLAD_GL_NV_fill_rectangle) {
             LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
             glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
@@ -1342,27 +1346,26 @@ void RasterizerOpenGL::SyncPolygonModes() {
         return;
     }
 
-    if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
+    if (regs.polygon_mode_front == regs.polygon_mode_back) {
         flags[Dirty::PolygonModeFront] = false;
         flags[Dirty::PolygonModeBack] = false;
-        glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+        glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
         return;
     }
 
     if (flags[Dirty::PolygonModeFront]) {
         flags[Dirty::PolygonModeFront] = false;
-        glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+        glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
     }
 
     if (flags[Dirty::PolygonModeBack]) {
         flags[Dirty::PolygonModeBack] = false;
-        glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
+        glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
     }
 }
 
 void RasterizerOpenGL::SyncColorMask() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::ColorMasks]) {
         return;
     }
@@ -1371,7 +1374,7 @@ void RasterizerOpenGL::SyncColorMask() {
     const bool force = flags[Dirty::ColorMaskCommon];
     flags[Dirty::ColorMaskCommon] = false;
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     if (regs.color_mask_common) {
         if (!force && !flags[Dirty::ColorMask0]) {
             return;
@@ -1396,33 +1399,30 @@ void RasterizerOpenGL::SyncColorMask() {
 }
 
 void RasterizerOpenGL::SyncMultiSampleState() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::MultisampleControl]) {
         return;
     }
     flags[Dirty::MultisampleControl] = false;
 
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
     oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
 }
 
 void RasterizerOpenGL::SyncFragmentColorClampState() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::FragmentClampColor]) {
         return;
     }
     flags[Dirty::FragmentClampColor] = false;
 
-    glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
+    glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
 }
 
 void RasterizerOpenGL::SyncBlendState() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
-    const auto& regs = gpu.regs;
+    auto& flags = maxwell3d.dirty.flags;
+    const auto& regs = maxwell3d.regs;
 
     if (flags[Dirty::BlendColor]) {
         flags[Dirty::BlendColor] = false;
@@ -1479,14 +1479,13 @@ void RasterizerOpenGL::SyncBlendState() {
 }
 
 void RasterizerOpenGL::SyncLogicOpState() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::LogicOp]) {
         return;
     }
     flags[Dirty::LogicOp] = false;
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     if (regs.logic_op.enable) {
         glEnable(GL_COLOR_LOGIC_OP);
         glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
@@ -1496,14 +1495,13 @@ void RasterizerOpenGL::SyncLogicOpState() {
 }
 
 void RasterizerOpenGL::SyncScissorTest() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::Scissors]) {
         return;
     }
     flags[Dirty::Scissors] = false;
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
         if (!flags[Dirty::Scissor0 + index]) {
             continue;
@@ -1522,16 +1520,15 @@ void RasterizerOpenGL::SyncScissorTest() {
 }
 
 void RasterizerOpenGL::SyncPointState() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::PointSize]) {
         return;
     }
     flags[Dirty::PointSize] = false;
 
-    oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
+    oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
 
-    if (gpu.regs.vp_point_size.enable) {
+    if (maxwell3d.regs.vp_point_size.enable) {
         // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
         glEnable(GL_PROGRAM_POINT_SIZE);
         return;
@@ -1539,32 +1536,30 @@ void RasterizerOpenGL::SyncPointState() {
 
     // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
     // in OpenGL).
-    glPointSize(std::max(1.0f, gpu.regs.point_size));
+    glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
     glDisable(GL_PROGRAM_POINT_SIZE);
 }
 
 void RasterizerOpenGL::SyncLineState() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::LineWidth]) {
         return;
     }
     flags[Dirty::LineWidth] = false;
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
     glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
 }
 
 void RasterizerOpenGL::SyncPolygonOffset() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::PolygonOffset]) {
         return;
     }
     flags[Dirty::PolygonOffset] = false;
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
     oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
     oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
@@ -1578,14 +1573,13 @@ void RasterizerOpenGL::SyncPolygonOffset() {
 }
 
 void RasterizerOpenGL::SyncAlphaTest() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::AlphaTest]) {
         return;
     }
     flags[Dirty::AlphaTest] = false;
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
         LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
     }
@@ -1599,20 +1593,19 @@ void RasterizerOpenGL::SyncAlphaTest() {
 }
 
 void RasterizerOpenGL::SyncFramebufferSRGB() {
-    auto& gpu = system.GPU().Maxwell3D();
-    auto& flags = gpu.dirty.flags;
+    auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::FramebufferSRGB]) {
         return;
     }
     flags[Dirty::FramebufferSRGB] = false;
 
-    oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
+    oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
 }
 
 void RasterizerOpenGL::SyncTransformFeedback() {
     // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
     // when this is required.
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
 
     static constexpr std::size_t STRIDE = 3;
     std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
@@ -1664,7 +1657,7 @@ void RasterizerOpenGL::SyncTransformFeedback() {
 }
 
 void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     if (regs.tfb_enabled == 0) {
         return;
     }
@@ -1707,7 +1700,7 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
 }
 
 void RasterizerOpenGL::EndTransformFeedback() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     if (regs.tfb_enabled == 0) {
         return;
     }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4f082592f..f451404b2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -33,10 +33,11 @@
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 #include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/async_shaders.h"
 #include "video_core/textures/texture.h"
 
-namespace Core {
-class System;
+namespace Core::Memory {
+class Memory;
 }
 
 namespace Core::Frontend {
@@ -54,9 +55,10 @@ struct DrawParameters;
 
 class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
 public:
-    explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
-                              const Device& device, ScreenInfo& info,
-                              ProgramManager& program_manager, StateTracker& state_tracker);
+    explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+                              Core::Memory::Memory& cpu_memory, const Device& device,
+                              ScreenInfo& screen_info, ProgramManager& program_manager,
+                              StateTracker& state_tracker);
     ~RasterizerOpenGL() override;
 
     void Draw(bool is_indexed, bool is_instanced) override;
@@ -82,15 +84,22 @@ public:
                                const Tegra::Engines::Fermi2D::Config& copy_config) override;
     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                            u32 pixel_stride) override;
-    void LoadDiskResources(const std::atomic_bool& stop_loading,
+    void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
                            const VideoCore::DiskResourceLoadCallback& callback) override;
-    void SetupDirtyFlags() override;
 
     /// Returns true when there are commands queued to the OpenGL server.
     bool AnyCommandQueued() const {
         return num_queued_commands > 0;
     }
 
+    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+        return async_shaders;
+    }
+
+    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+        return async_shaders;
+    }
+
 private:
     /// Configures the color and depth framebuffer states.
     void ConfigureFramebuffers();
@@ -115,9 +124,9 @@ private:
     /// Configures the current global memory entries to use for the kernel invocation.
     void SetupComputeGlobalMemory(Shader* kernel);
 
-    /// Configures a constant buffer.
+    /// Configures a global memory buffer.
     void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
-                           std::size_t size);
+                           std::size_t size, GLuint64EXT* pointer);
 
     /// Configures the current textures to use for the draw command.
     void SetupDrawTextures(std::size_t stage_index, Shader* shader);
@@ -228,7 +237,15 @@ private:
 
     void SetupShaders(GLenum primitive_mode);
 
+    Tegra::GPU& gpu;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    Tegra::MemoryManager& gpu_memory;
+
     const Device& device;
+    ScreenInfo& screen_info;
+    ProgramManager& program_manager;
+    StateTracker& state_tracker;
 
     TextureCacheOpenGL texture_cache;
     ShaderCacheOpenGL shader_cache;
@@ -238,10 +255,7 @@ private:
     OGLBufferCache buffer_cache;
     FenceManagerOpenGL fence_manager;
 
-    Core::System& system;
-    ScreenInfo& screen_info;
-    ProgramManager& program_manager;
-    StateTracker& state_tracker;
+    VideoCommon::Shader::AsyncShaders async_shaders;
 
     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
 
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index a787e27d2..0ebcec427 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <string_view>
 #include <utility>
 #include <glad/glad.h>
 #include "common/common_types.h"
@@ -82,11 +83,13 @@ void OGLSampler::Release() {
     handle = 0;
 }
 
-void OGLShader::Create(const char* source, GLenum type) {
-    if (handle != 0)
+void OGLShader::Create(std::string_view source, GLenum type) {
+    if (handle != 0) {
         return;
-    if (source == nullptr)
+    }
+    if (source.empty()) {
         return;
+    }
 
     MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
     handle = GLShader::LoadShader(source, type);
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f8b322227..f48398669 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <string_view>
 #include <utility>
 #include <glad/glad.h>
 #include "common/common_types.h"
@@ -127,7 +128,7 @@ public:
         return *this;
     }
 
-    void Create(const char* source, GLenum type);
+    void Create(std::string_view source, GLenum type);
 
     void Release();
 
@@ -177,6 +178,12 @@ public:
         Release();
     }
 
+    OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
+        Release();
+        handle = std::exchange(o.handle, 0);
+        return *this;
+    }
+
     /// Deletes the internal OpenGL resource
     void Release();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c6a3bf3a1..bd56bed0c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -22,6 +22,7 @@
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_arb_decompiler.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
@@ -31,6 +32,7 @@
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
 
 namespace OpenGL {
 
@@ -125,7 +127,7 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
     const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
     const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
                                                            entry.graphics_info, entry.compute_info};
-    const auto registry = std::make_shared<Registry>(entry.type, info);
+    auto registry = std::make_shared<Registry>(entry.type, info);
     for (const auto& [address, value] : entry.keys) {
         const auto [buffer, offset] = address;
         registry->InsertKey(buffer, offset, value);
@@ -140,9 +142,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
     return registry;
 }
 
+std::unordered_set<GLenum> GetSupportedFormats() {
+    GLint num_formats;
+    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+
+    std::vector<GLint> formats(num_formats);
+    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+
+    std::unordered_set<GLenum> supported_formats;
+    for (const GLint format : formats) {
+        supported_formats.insert(static_cast<GLenum>(format));
+    }
+    return supported_formats;
+}
+
+} // Anonymous namespace
+
 ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
-                             const ShaderIR& ir, const Registry& registry,
-                             bool hint_retrievable = false) {
+                             const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
     const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
     LOG_INFO(Render_OpenGL, "{}", shader_id);
 
@@ -181,30 +198,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
     return program;
 }
 
-std::unordered_set<GLenum> GetSupportedFormats() {
-    GLint num_formats;
-    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
-
-    std::vector<GLint> formats(num_formats);
-    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
-
-    std::unordered_set<GLenum> supported_formats;
-    for (const GLint format : formats) {
-        supported_formats.insert(static_cast<GLenum>(format));
-    }
-    return supported_formats;
-}
-
-} // Anonymous namespace
-
 Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
-               ProgramSharedPtr program_)
-    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
+               ProgramSharedPtr program_, bool is_built)
+    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
+      is_built(is_built) {
     handle = program->assembly_program.handle;
     if (handle == 0) {
         handle = program->source_program.handle;
     }
-    ASSERT(handle != 0);
+    if (is_built) {
+        ASSERT(handle != 0);
+    }
 }
 
 Shader::~Shader() = default;
@@ -214,43 +218,78 @@ GLuint Shader::GetHandle() const {
     return handle;
 }
 
-std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
-                                                      Maxwell::ShaderProgram program_type,
-                                                      ProgramCode code, ProgramCode code_b) {
+bool Shader::IsBuilt() const {
+    return is_built;
+}
+
+void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
+    program->source_program = std::move(new_program);
+    handle = program->source_program.handle;
+    is_built = true;
+}
+
+void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
+    program->assembly_program = std::move(new_program);
+    handle = program->assembly_program.handle;
+    is_built = true;
+}
+
+std::unique_ptr<Shader> Shader::CreateStageFromMemory(
+    const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
+    ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
     const auto shader_type = GetShaderType(program_type);
-    const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
-    auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
-    const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
-    // TODO(Rodrigo): Handle VertexA shaders
-    // std::optional<ShaderIR> ir_b;
-    // if (!code_b.empty()) {
-    //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
-    // }
-    auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+    auto& gpu = params.gpu;
+    gpu.ShaderNotify().MarkSharderBuilding();
+
+    auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
+    if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
+        const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+        // TODO(Rodrigo): Handle VertexA shaders
+        // std::optional<ShaderIR> ir_b;
+        // if (!code_b.empty()) {
+        //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
+        // }
+        auto program =
+            BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+        ShaderDiskCacheEntry entry;
+        entry.type = shader_type;
+        entry.code = std::move(code);
+        entry.code_b = std::move(code_b);
+        entry.unique_identifier = params.unique_identifier;
+        entry.bound_buffer = registry->GetBoundBuffer();
+        entry.graphics_info = registry->GetGraphicsInfo();
+        entry.keys = registry->GetKeys();
+        entry.bound_samplers = registry->GetBoundSamplers();
+        entry.bindless_samplers = registry->GetBindlessSamplers();
+        params.disk_cache.SaveEntry(std::move(entry));
+
+        gpu.ShaderNotify().MarkShaderComplete();
+
+        return std::unique_ptr<Shader>(new Shader(std::move(registry),
+                                                  MakeEntries(params.device, ir, shader_type),
+                                                  std::move(program), true));
+    } else {
+        // Required for entries
+        const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+        auto entries = MakeEntries(params.device, ir, shader_type);
 
-    ShaderDiskCacheEntry entry;
-    entry.type = shader_type;
-    entry.code = std::move(code);
-    entry.code_b = std::move(code_b);
-    entry.unique_identifier = params.unique_identifier;
-    entry.bound_buffer = registry->GetBoundBuffer();
-    entry.graphics_info = registry->GetGraphicsInfo();
-    entry.keys = registry->GetKeys();
-    entry.bound_samplers = registry->GetBoundSamplers();
-    entry.bindless_samplers = registry->GetBindlessSamplers();
-    params.disk_cache.SaveEntry(std::move(entry));
+        async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
+                                        std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
+                                        COMPILER_SETTINGS, *registry, cpu_addr);
 
-    return std::unique_ptr<Shader>(new Shader(
-        std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
+        auto program = std::make_shared<ProgramHandle>();
+        return std::unique_ptr<Shader>(
+            new Shader(std::move(registry), std::move(entries), std::move(program), false));
+    }
 }
 
 std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
                                                        ProgramCode code) {
-    const std::size_t size_in_bytes = code.size() * sizeof(u64);
+    auto& gpu = params.gpu;
+    gpu.ShaderNotify().MarkSharderBuilding();
 
-    auto& engine = params.system.GPU().KeplerCompute();
-    auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
+    auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
     const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
     const u64 uid = params.unique_identifier;
     auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
@@ -266,6 +305,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
+    gpu.ShaderNotify().MarkShaderComplete();
+
     return std::unique_ptr<Shader>(new Shader(std::move(registry),
                                               MakeEntries(params.device, ir, ShaderType::Compute),
                                               std::move(program)));
@@ -277,15 +318,20 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
         precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
 }
 
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
-                                     Core::Frontend::EmuWindow& emu_window, const Device& device)
-    : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
-      emu_window{emu_window}, device{device}, disk_cache{system} {}
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer,
+                                     Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+                                     Tegra::Engines::Maxwell3D& maxwell3d_,
+                                     Tegra::Engines::KeplerCompute& kepler_compute_,
+                                     Tegra::MemoryManager& gpu_memory_, const Device& device_)
+    : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_},
+      gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_},
+      kepler_compute{kepler_compute_}, device{device_} {}
 
 ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
 
-void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
+void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
                                       const VideoCore::DiskResourceLoadCallback& callback) {
+    disk_cache.BindTitleID(title_id);
     const std::optional transferable = disk_cache.LoadTransferable();
     if (!transferable) {
         return;
@@ -361,7 +407,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
         }
     };
 
-    const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
+    const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
     const std::size_t bucket_size{transferable->size() / num_workers};
     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
     std::vector<std::thread> threads(num_workers);
@@ -436,42 +482,77 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
     return program;
 }
 
-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
-    if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
-        return last_shaders[static_cast<std::size_t>(program)];
+Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
+                                           VideoCommon::Shader::AsyncShaders& async_shaders) {
+    if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
+        auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
+        if (last_shader->IsBuilt()) {
+            return last_shader;
+        }
     }
 
-    auto& memory_manager{system.GPU().MemoryManager()};
-    const GPUVAddr address{GetShaderAddress(system, program)};
+    const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
+
+    if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
+        auto completed_work = async_shaders.GetCompletedWork();
+        for (auto& work : completed_work) {
+            Shader* shader = TryGet(work.cpu_address);
+            gpu.ShaderNotify().MarkShaderComplete();
+            if (shader == nullptr) {
+                continue;
+            }
+            using namespace VideoCommon::Shader;
+            if (work.backend == AsyncShaders::Backend::OpenGL) {
+                shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
+            } else if (work.backend == AsyncShaders::Backend::GLASM) {
+                shader->AsyncGLASMBuilt(std::move(work.program.glasm));
+            }
+
+            auto& registry = shader->GetRegistry();
+
+            ShaderDiskCacheEntry entry;
+            entry.type = work.shader_type;
+            entry.code = std::move(work.code);
+            entry.code_b = std::move(work.code_b);
+            entry.unique_identifier = work.uid;
+            entry.bound_buffer = registry.GetBoundBuffer();
+            entry.graphics_info = registry.GetGraphicsInfo();
+            entry.keys = registry.GetKeys();
+            entry.bound_samplers = registry.GetBoundSamplers();
+            entry.bindless_samplers = registry.GetBindlessSamplers();
+            disk_cache.SaveEntry(std::move(entry));
+        }
+    }
 
     // Look up shader in the cache based on address
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
+    const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
     if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
         return last_shaders[static_cast<std::size_t>(program)] = shader;
     }
 
-    const auto host_ptr{memory_manager.GetPointer(address)};
+    const u8* const host_ptr{gpu_memory.GetPointer(address)};
 
     // No shader found - create a new one
-    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)};
+    ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
     ProgramCode code_b;
     if (program == Maxwell::ShaderProgram::VertexA) {
-        const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
-        const u8* host_ptr_b = memory_manager.GetPointer(address_b);
-        code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
+        const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
+        const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
+        code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
     }
     const std::size_t code_size = code.size() * sizeof(u64);
 
     const u64 unique_identifier = GetUniqueIdentifier(
         GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
 
-    const ShaderParameters params{system,    disk_cache, device,
-                                  *cpu_addr, host_ptr,   unique_identifier};
+    const ShaderParameters params{gpu,       maxwell3d, disk_cache,       device,
+                                  *cpu_addr, host_ptr,  unique_identifier};
 
     std::unique_ptr<Shader> shader;
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
-        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
+        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
+                                               async_shaders, cpu_addr.value_or(0));
     } else {
         shader = Shader::CreateFromCache(params, found->second);
     }
@@ -487,21 +568,20 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 }
 
 Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
-    auto& memory_manager{system.GPU().MemoryManager()};
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
+    const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
 
     if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
         return kernel;
     }
 
-    const auto host_ptr{memory_manager.GetPointer(code_addr)};
     // No kernel found, create a new one
-    ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
+    const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
+    ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
     const std::size_t code_size{code.size() * sizeof(u64)};
     const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
 
-    const ShaderParameters params{system,    disk_cache, device,
-                                  *cpu_addr, host_ptr,   unique_identifier};
+    const ShaderParameters params{gpu,       kepler_compute, disk_cache,       device,
+                                  *cpu_addr, host_ptr,       unique_identifier};
 
     std::unique_ptr<Shader> kernel;
     const auto found = runtime_cache.find(unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 994aaeaf2..1708af06a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -25,14 +25,18 @@
 #include "video_core/shader/shader_ir.h"
 #include "video_core/shader_cache.h"
 
-namespace Core {
-class System;
+namespace Tegra {
+class MemoryManager;
 }
 
 namespace Core::Frontend {
 class EmuWindow;
 }
 
+namespace VideoCommon::Shader {
+class AsyncShaders;
+}
+
 namespace OpenGL {
 
 class Device;
@@ -53,14 +57,20 @@ struct PrecompiledShader {
 };
 
 struct ShaderParameters {
-    Core::System& system;
+    Tegra::GPU& gpu;
+    Tegra::Engines::ConstBufferEngineInterface& engine;
     ShaderDiskCacheOpenGL& disk_cache;
     const Device& device;
     VAddr cpu_addr;
-    u8* host_ptr;
+    const u8* host_ptr;
     u64 unique_identifier;
 };
 
+ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
+                             u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
+                             const VideoCommon::Shader::Registry& registry,
+                             bool hint_retrievable = false);
+
 class Shader final {
 public:
     ~Shader();
@@ -68,15 +78,28 @@ public:
     /// Gets the GL program handle for the shader
     GLuint GetHandle() const;
 
+    bool IsBuilt() const;
+
     /// Gets the shader entries for the shader
     const ShaderEntries& GetEntries() const {
         return entries;
     }
 
-    static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
-                                                         Maxwell::ShaderProgram program_type,
-                                                         ProgramCode program_code,
-                                                         ProgramCode program_code_b);
+    const VideoCommon::Shader::Registry& GetRegistry() const {
+        return *registry;
+    }
+
+    /// Mark a OpenGL shader as built
+    void AsyncOpenGLBuilt(OGLProgram new_program);
+
+    /// Mark a GLASM shader as built
+    void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
+
+    static std::unique_ptr<Shader> CreateStageFromMemory(
+        const ShaderParameters& params, Maxwell::ShaderProgram program_type,
+        ProgramCode program_code, ProgramCode program_code_b,
+        VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
+
     static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
                                                           ProgramCode code);
 
@@ -85,26 +108,30 @@ public:
 
 private:
     explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
-                    ProgramSharedPtr program);
+                    ProgramSharedPtr program, bool is_built = true);
 
     std::shared_ptr<VideoCommon::Shader::Registry> registry;
     ShaderEntries entries;
     ProgramSharedPtr program;
     GLuint handle = 0;
+    bool is_built{};
 };
 
 class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
 public:
-    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
-                               Core::Frontend::EmuWindow& emu_window, const Device& device);
+    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window,
+                               Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d,
+                               Tegra::Engines::KeplerCompute& kepler_compute,
+                               Tegra::MemoryManager& gpu_memory, const Device& device);
     ~ShaderCacheOpenGL() override;
 
     /// Loads disk cache for the current game
-    void LoadDiskCache(const std::atomic_bool& stop_loading,
+    void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
                        const VideoCore::DiskResourceLoadCallback& callback);
 
     /// Gets the current specified shader stage program
-    Shader* GetStageProgram(Maxwell::ShaderProgram program);
+    Shader* GetStageProgram(Maxwell::ShaderProgram program,
+                            VideoCommon::Shader::AsyncShaders& async_shaders);
 
     /// Gets a compute kernel in the passed address
     Shader* GetComputeKernel(GPUVAddr code_addr);
@@ -114,9 +141,13 @@ private:
         const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
         const std::unordered_set<GLenum>& supported_formats);
 
-    Core::System& system;
     Core::Frontend::EmuWindow& emu_window;
+    Tegra::GPU& gpu;
+    Tegra::MemoryManager& gpu_memory;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
     const Device& device;
+
     ShaderDiskCacheOpenGL disk_cache;
     std::unordered_map<u64, PrecompiledShader> runtime_cache;
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2c49aeaac..bbb8fb095 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -602,8 +602,15 @@ private:
             return;
         }
         const auto& info = registry.GetComputeInfo();
-        if (const u32 size = info.shared_memory_size_in_words; size > 0) {
-            code.AddLine("shared uint smem[{}];", size);
+        if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
+            const u32 limit = device.GetMaxComputeSharedMemorySize();
+            if (size > limit) {
+                LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
+                          size, limit);
+                size = limit;
+            }
+
+            code.AddLine("shared uint smem[{}];", size / 4);
             code.AddNewLine();
         }
         code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
@@ -806,7 +813,7 @@ private:
         const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
         const auto it = transform_feedback.find(location);
         if (it == transform_feedback.end()) {
-            return {};
+            return std::nullopt;
         }
         return it->second.components;
     }
@@ -1288,21 +1295,21 @@ private:
             switch (element) {
             case 0:
                 UNIMPLEMENTED();
-                return {};
+                return std::nullopt;
             case 1:
                 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
-                    return {};
+                    return std::nullopt;
                 }
                 return {{"gl_Layer", Type::Int}};
             case 2:
                 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
-                    return {};
+                    return std::nullopt;
                 }
                 return {{"gl_ViewportIndex", Type::Int}};
             case 3:
                 return {{"gl_PointSize", Type::Float}};
             }
-            return {};
+            return std::nullopt;
         case Attribute::Index::FrontColor:
             return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
         case Attribute::Index::FrontSecondaryColor:
@@ -1325,7 +1332,7 @@ private:
                          Type::Float}};
             }
             UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
-            return {};
+            return std::nullopt;
         }
     }
 
@@ -1436,8 +1443,10 @@ private:
                 return expr + ", vec2(0.0), vec2(0.0))";
             case TextureType::TextureCube:
                 return expr + ", vec3(0.0), vec3(0.0))";
+            default:
+                UNREACHABLE();
+                break;
             }
-            UNREACHABLE();
         }
 
         for (const auto& variant : extras) {
@@ -1912,7 +1921,7 @@ private:
     Expression Comparison(Operation operation) {
         static_assert(!unordered || type == Type::Float);
 
-        const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
+        Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
 
         if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
             // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
@@ -1952,10 +1961,6 @@ private:
         return {fmt::format("({} != 0)", carry), Type::Bool};
     }
 
-    Expression LogicalFIsNan(Operation operation) {
-        return GenerateUnary(operation, "isnan", Type::Bool, Type::Float);
-    }
-
     Expression LogicalAssign(Operation operation) {
         const Node& dest = operation[0];
         const Node& src = operation[1];
@@ -2771,15 +2776,6 @@ private:
         return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
     }
 
-    bool IsRenderTargetEnabled(u32 render_target) const {
-        for (u32 component = 0; component < 4; ++component) {
-            if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
     const Device& device;
     const ShaderIR& ir;
     const Registry& registry;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 2dcc2b0eb..166ee34e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -73,7 +73,7 @@ ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
 
 ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
 
-bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
+bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
     if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
         return false;
     }
@@ -144,7 +144,7 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
     return true;
 }
 
-bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
+bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
     if (file.WriteObject(static_cast<u32>(type)) != 1 ||
         file.WriteObject(static_cast<u32>(code.size())) != 1 ||
         file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
@@ -206,28 +206,32 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
                flat_bindless_samplers.size();
 }
 
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
 
 ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
 
+void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
+    title_id = title_id_;
+}
+
 std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
     // Skip games without title id
-    const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
+    const bool has_title_id = title_id != 0;
     if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
-        return {};
+        return std::nullopt;
     }
 
-    FileUtil::IOFile file(GetTransferablePath(), "rb");
+    Common::FS::IOFile file(GetTransferablePath(), "rb");
     if (!file.IsOpen()) {
         LOG_INFO(Render_OpenGL, "No transferable shader cache found");
         is_usable = true;
-        return {};
+        return std::nullopt;
     }
 
     u32 version{};
     if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
         LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
-        return {};
+        return std::nullopt;
     }
 
     if (version < NativeVersion) {
@@ -235,12 +239,12 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
         file.Close();
         InvalidateTransferable();
         is_usable = true;
-        return {};
+        return std::nullopt;
     }
     if (version > NativeVersion) {
         LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
                                    "of the emulator, skipping");
-        return {};
+        return std::nullopt;
     }
 
     // Version is valid, load the shaders
@@ -249,7 +253,7 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
         ShaderDiskCacheEntry& entry = entries.emplace_back();
         if (!entry.Load(file)) {
             LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
-            return {};
+            return std::nullopt;
         }
     }
 
@@ -262,7 +266,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled()
         return {};
     }
 
-    FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+    Common::FS::IOFile file(GetPrecompiledPath(), "rb");
     if (!file.IsOpen()) {
         LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
         return {};
@@ -279,7 +283,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled()
 }
 
 std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
-    FileUtil::IOFile& file) {
+    Common::FS::IOFile& file) {
     // Read compressed file from disk and decompress to virtual precompiled cache file
     std::vector<u8> compressed(file.GetSize());
     file.ReadBytes(compressed.data(), compressed.size());
@@ -290,12 +294,12 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
     ShaderCacheVersionHash file_hash{};
     if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
         precompiled_cache_virtual_file_offset = 0;
-        return {};
+        return std::nullopt;
     }
     if (GetShaderCacheVersionHash() != file_hash) {
         LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
         precompiled_cache_virtual_file_offset = 0;
-        return {};
+        return std::nullopt;
     }
 
     std::vector<ShaderDiskCachePrecompiled> entries;
@@ -305,19 +309,20 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
         if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
             !LoadObjectFromPrecompiled(entry.binary_format) ||
             !LoadObjectFromPrecompiled(binary_size)) {
-            return {};
+            return std::nullopt;
         }
 
         entry.binary.resize(binary_size);
         if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
-            return {};
+            return std::nullopt;
         }
     }
-    return entries;
+
+    return std::move(entries);
 }
 
 void ShaderDiskCacheOpenGL::InvalidateTransferable() {
-    if (!FileUtil::Delete(GetTransferablePath())) {
+    if (!Common::FS::Delete(GetTransferablePath())) {
         LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
                   GetTransferablePath());
     }
@@ -328,7 +333,7 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
     // Clear virtaul precompiled cache file
     precompiled_cache_virtual_file.Resize(0);
 
-    if (!FileUtil::Delete(GetPrecompiledPath())) {
+    if (!Common::FS::Delete(GetPrecompiledPath())) {
         LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
     }
 }
@@ -344,7 +349,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
         return;
     }
 
-    FileUtil::IOFile file = AppendTransferableFile();
+    Common::FS::IOFile file = AppendTransferableFile();
     if (!file.IsOpen()) {
         return;
     }
@@ -386,15 +391,15 @@ void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint progra
     }
 }
 
-FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
+Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
     if (!EnsureDirectories()) {
         return {};
     }
 
     const auto transferable_path{GetTransferablePath()};
-    const bool existed = FileUtil::Exists(transferable_path);
+    const bool existed = Common::FS::Exists(transferable_path);
 
-    FileUtil::IOFile file(transferable_path, "ab");
+    Common::FS::IOFile file(transferable_path, "ab");
     if (!file.IsOpen()) {
         LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
         return {};
@@ -426,7 +431,7 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
         Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
 
     const auto precompiled_path{GetPrecompiledPath()};
-    FileUtil::IOFile file(precompiled_path, "wb");
+    Common::FS::IOFile file(precompiled_path, "wb");
 
     if (!file.IsOpen()) {
         LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
@@ -440,24 +445,24 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
 
 bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
     const auto CreateDir = [](const std::string& dir) {
-        if (!FileUtil::CreateDir(dir)) {
+        if (!Common::FS::CreateDir(dir)) {
             LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
             return false;
         }
         return true;
     };
 
-    return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
+    return CreateDir(Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)) &&
            CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
            CreateDir(GetPrecompiledDir());
 }
 
 std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
-    return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+    return Common::FS::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
 }
 
 std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
-    return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+    return Common::FS::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
 }
 
 std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
@@ -469,11 +474,11 @@ std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
 }
 
 std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
-    return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
+    return Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir) + DIR_SEP "opengl";
 }
 
 std::string ShaderDiskCacheOpenGL::GetTitleID() const {
-    return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
+    return fmt::format("{:016X}", title_id);
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index a79cef0e9..aef841c1d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -21,11 +21,7 @@
 #include "video_core/engines/shader_type.h"
 #include "video_core/shader/registry.h"
 
-namespace Core {
-class System;
-}
-
-namespace FileUtil {
+namespace Common::FS {
 class IOFile;
 }
 
@@ -38,9 +34,9 @@ struct ShaderDiskCacheEntry {
     ShaderDiskCacheEntry();
     ~ShaderDiskCacheEntry();
 
-    bool Load(FileUtil::IOFile& file);
+    bool Load(Common::FS::IOFile& file);
 
-    bool Save(FileUtil::IOFile& file) const;
+    bool Save(Common::FS::IOFile& file) const;
 
     bool HasProgramA() const {
         return !code.empty() && !code_b.empty();
@@ -70,9 +66,12 @@ struct ShaderDiskCachePrecompiled {
 
 class ShaderDiskCacheOpenGL {
 public:
-    explicit ShaderDiskCacheOpenGL(Core::System& system);
+    explicit ShaderDiskCacheOpenGL();
     ~ShaderDiskCacheOpenGL();
 
+    /// Binds a title ID for all future operations.
+    void BindTitleID(u64 title_id);
+
     /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
     std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
 
@@ -97,10 +96,10 @@ public:
 private:
     /// Loads the transferable cache. Returns empty on failure.
     std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
-        FileUtil::IOFile& file);
+        Common::FS::IOFile& file);
 
     /// Opens current game's transferable file and write it's header if it doesn't exist
-    FileUtil::IOFile AppendTransferableFile() const;
+    Common::FS::IOFile AppendTransferableFile() const;
 
     /// Save precompiled header to precompiled_cache_in_memory
     void SavePrecompiledHeaderToVirtualPrecompiledCache();
@@ -157,8 +156,6 @@ private:
         return LoadArrayFromPrecompiled(&object, 1);
     }
 
-    Core::System& system;
-
     // Stores whole precompiled cache which will be read from or saved to the precompiled chache
     // file
     FileSys::VectorVfsFile precompiled_cache_virtual_file;
@@ -168,8 +165,11 @@ private:
     // Stored transferable shaders
     std::unordered_set<u64> stored_transferable;
 
+    /// Title ID to operate on
+    u64 title_id = 0;
+
     // The cache has been loaded at boot
-    bool is_usable{};
+    bool is_usable = false;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 8e754fa90..691c6c79b 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -11,8 +11,30 @@
 
 namespace OpenGL {
 
-ProgramManager::ProgramManager(const Device& device) {
-    use_assembly_programs = device.UseAssemblyShaders();
+namespace {
+
+void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
+    if (current == old) {
+        return;
+    }
+    if (current == 0) {
+        if (enabled) {
+            enabled = false;
+            glDisable(stage);
+        }
+        return;
+    }
+    if (!enabled) {
+        enabled = true;
+        glEnable(stage);
+    }
+    glBindProgramARB(stage, current);
+}
+
+} // Anonymous namespace
+
+ProgramManager::ProgramManager(const Device& device)
+    : use_assembly_programs{device.UseAssemblyShaders()} {
     if (use_assembly_programs) {
         glEnable(GL_COMPUTE_PROGRAM_NV);
     } else {
@@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) {
 }
 
 void ProgramManager::BindGraphicsPipeline() {
-    if (use_assembly_programs) {
-        UpdateAssemblyPrograms();
-    } else {
+    if (!use_assembly_programs) {
         UpdateSourcePrograms();
     }
 }
@@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() {
     }
 }
 
-void ProgramManager::UpdateAssemblyPrograms() {
-    const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
-        if (current == old) {
-            return;
-        }
-        if (current == 0) {
-            if (enabled) {
-                enabled = false;
-                glDisable(stage);
-            }
-            return;
-        }
-        if (!enabled) {
-            enabled = true;
-            glEnable(stage);
-        }
-        glBindProgramARB(stage, current);
-    };
+void ProgramManager::UseVertexShader(GLuint program) {
+    if (use_assembly_programs) {
+        BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
+    }
+    current_state.vertex = program;
+}
 
-    update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
-    update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
-                 old_state.geometry);
-    update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
-                 old_state.fragment);
+void ProgramManager::UseGeometryShader(GLuint program) {
+    if (use_assembly_programs) {
+        BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
+    }
+    current_state.geometry = program;
+}
 
-    old_state = current_state;
+void ProgramManager::UseFragmentShader(GLuint program) {
+    if (use_assembly_programs) {
+        BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
+    }
+    current_state.fragment = program;
 }
 
 void ProgramManager::UpdateSourcePrograms() {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 0f03b4f12..950e0dfcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,17 +45,9 @@ public:
     /// Rewinds BindHostPipeline state changes.
     void RestoreGuestPipeline();
 
-    void UseVertexShader(GLuint program) {
-        current_state.vertex = program;
-    }
-
-    void UseGeometryShader(GLuint program) {
-        current_state.geometry = program;
-    }
-
-    void UseFragmentShader(GLuint program) {
-        current_state.fragment = program;
-    }
+    void UseVertexShader(GLuint program);
+    void UseGeometryShader(GLuint program);
+    void UseFragmentShader(GLuint program);
 
 private:
     struct PipelineState {
@@ -64,9 +56,6 @@ private:
         GLuint fragment = 0;
     };
 
-    /// Update NV_gpu_program5 programs.
-    void UpdateAssemblyPrograms();
-
     /// Update GLSL programs.
     void UpdateSourcePrograms();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 9e74eda0d..4bf0d6090 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <string_view>
 #include <vector>
 #include <glad/glad.h>
 #include "common/assert.h"
@@ -11,7 +12,8 @@
 namespace OpenGL::GLShader {
 
 namespace {
-const char* GetStageDebugName(GLenum type) {
+
+std::string_view StageDebugName(GLenum type) {
     switch (type) {
     case GL_VERTEX_SHADER:
         return "vertex";
@@ -25,12 +27,17 @@ const char* GetStageDebugName(GLenum type) {
     UNIMPLEMENTED();
     return "unknown";
 }
+
 } // Anonymous namespace
 
-GLuint LoadShader(const char* source, GLenum type) {
-    const char* debug_type = GetStageDebugName(type);
+GLuint LoadShader(std::string_view source, GLenum type) {
+    const std::string_view debug_type = StageDebugName(type);
     const GLuint shader_id = glCreateShader(type);
-    glShaderSource(shader_id, 1, &source, nullptr);
+
+    const GLchar* source_string = source.data();
+    const GLint source_length = static_cast<GLint>(source.size());
+
+    glShaderSource(shader_id, 1, &source_string, &source_length);
     LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
     glCompileShader(shader_id);
 
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 03b7548c2..1b770532e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -38,7 +38,7 @@ void LogShaderSource(T... shaders) {
  * @param source String of the GLSL shader program
  * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
  */
-GLuint LoadShader(const char* source, GLenum type);
+GLuint LoadShader(std::string_view source, GLenum type);
 
 /**
  * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index d24fad3de..6bcf831f2 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -214,10 +214,8 @@ void SetupDirtyMisc(Tables& tables) {
 
 } // Anonymous namespace
 
-StateTracker::StateTracker(Core::System& system) : system{system} {}
-
-void StateTracker::Initialize() {
-    auto& dirty = system.GPU().Maxwell3D().dirty;
+StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
+    auto& dirty = gpu.Maxwell3D().dirty;
     auto& tables = dirty.tables;
     SetupDirtyRenderTargets(tables);
     SetupDirtyColorMasks(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 0f823288e..9d127548f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -13,8 +13,8 @@
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/maxwell_3d.h"
 
-namespace Core {
-class System;
+namespace Tegra {
+class GPU;
 }
 
 namespace OpenGL {
@@ -90,9 +90,7 @@ static_assert(Last <= std::numeric_limits<u8>::max());
 
 class StateTracker {
 public:
-    explicit StateTracker(Core::System& system);
-
-    void Initialize();
+    explicit StateTracker(Tegra::GPU& gpu);
 
     void BindIndexBuffer(GLuint new_index_buffer) {
         if (index_buffer == new_index_buffer) {
@@ -103,7 +101,6 @@ public:
     }
 
     void NotifyScreenDrawVertexArray() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::VertexFormats] = true;
         flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
         flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
@@ -117,98 +114,81 @@ public:
     }
 
     void NotifyPolygonModes() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::PolygonModes] = true;
         flags[OpenGL::Dirty::PolygonModeFront] = true;
         flags[OpenGL::Dirty::PolygonModeBack] = true;
     }
 
     void NotifyViewport0() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::Viewports] = true;
         flags[OpenGL::Dirty::Viewport0] = true;
     }
 
     void NotifyScissor0() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::Scissors] = true;
         flags[OpenGL::Dirty::Scissor0] = true;
     }
 
     void NotifyColorMask0() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::ColorMasks] = true;
         flags[OpenGL::Dirty::ColorMask0] = true;
     }
 
     void NotifyBlend0() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::BlendStates] = true;
         flags[OpenGL::Dirty::BlendState0] = true;
     }
 
     void NotifyFramebuffer() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[VideoCommon::Dirty::RenderTargets] = true;
     }
 
     void NotifyFrontFace() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::FrontFace] = true;
     }
 
     void NotifyCullTest() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::CullTest] = true;
     }
 
     void NotifyDepthMask() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::DepthMask] = true;
     }
 
     void NotifyDepthTest() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::DepthTest] = true;
     }
 
     void NotifyStencilTest() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::StencilTest] = true;
     }
 
     void NotifyPolygonOffset() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::PolygonOffset] = true;
     }
 
     void NotifyRasterizeEnable() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::RasterizeEnable] = true;
     }
 
     void NotifyFramebufferSRGB() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::FramebufferSRGB] = true;
     }
 
     void NotifyLogicOp() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::LogicOp] = true;
     }
 
     void NotifyClipControl() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::ClipControl] = true;
     }
 
     void NotifyAlphaTest() {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         flags[OpenGL::Dirty::AlphaTest] = true;
     }
 
 private:
-    Core::System& system;
+    Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
 
     GLuint index_buffer = 0;
 };
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 3655ff629..887995cf4 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver
     mapped_ptr = static_cast<u8*>(
         glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
 
-    if (device.HasVertexBufferUnifiedMemory()) {
+    if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
         glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
         glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
     }
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 61505879b..a863ef218 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -41,91 +41,103 @@ struct FormatTuple {
 };
 
 constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},             // ABGR8U
-    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                           // ABGR8S
-    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},              // ABGR8UI
-    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},             // B5G6R5U
-    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},       // A2B10G10R10U
-    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},         // A1B5G5R5U
-    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                            // R8U
-    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                  // R8UI
-    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                         // RGBA16F
-    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                      // RGBA16U
-    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                         // RGBA16S
-    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},            // RGBA16UI
-    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F
-    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},              // RGBA32UI
-    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                           // DXT1
-    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                           // DXT23
-    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                           // DXT45
-    {GL_COMPRESSED_RED_RGTC1},                                    // DXN1
-    {GL_COMPRESSED_RG_RGTC2},                                     // DXN2UNORM
-    {GL_COMPRESSED_SIGNED_RG_RGTC2},                              // DXN2SNORM
-    {GL_COMPRESSED_RGBA_BPTC_UNORM},                              // BC7U
-    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                      // BC6H_UF16
-    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                        // BC6H_SF16
-    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                            // ASTC_2D_4X4
-    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                        // BGRA8
-    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                              // RGBA32F
-    {GL_RG32F, GL_RG, GL_FLOAT},                                  // RG32F
-    {GL_R32F, GL_RED, GL_FLOAT},                                  // R32F
-    {GL_R16F, GL_RED, GL_HALF_FLOAT},                             // R16F
-    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                          // R16U
-    {GL_R16_SNORM, GL_RED, GL_SHORT},                             // R16S
-    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                // R16UI
-    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                          // R16I
-    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                          // RG16
-    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                             // RG16F
-    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                // RG16UI
-    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                          // RG16I
-    {GL_RG16_SNORM, GL_RG, GL_SHORT},                             // RG16S
-    {GL_RGB32F, GL_RGB, GL_FLOAT},                                // RGB32F
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},      // RGBA8_SRGB
-    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                            // RG8U
-    {GL_RG8_SNORM, GL_RG, GL_BYTE},                               // RG8S
-    {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                   // RG8UI
-    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                  // RG32UI
-    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                          // RGBX16F
-    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                  // R32UI
-    {GL_R32I, GL_RED_INTEGER, GL_INT},                            // R32I
-    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                            // ASTC_2D_8X8
-    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                            // ASTC_2D_8X5
-    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                            // ASTC_2D_5X4
-    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE},                 // BGRA8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // A8B8G8R8_UNORM
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                               // A8B8G8R8_SNORM
+    {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                            // A8B8G8R8_SINT
+    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                  // A8B8G8R8_UINT
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                     // R5G6B5_UNORM
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                 // B5G6R5_UNORM
+    {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM
+    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM
+    {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM
+    {GL_R8_SNORM, GL_RED, GL_BYTE},                                   // R8_SNORM
+    {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                // R8_SINT
+    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                      // R8_UINT
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                             // R16G16B16A16_FLOAT
+    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                          // R16G16B16A16_UNORM
+    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                             // R16G16B16A16_SNORM
+    {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                          // R16G16B16A16_SINT
+    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                // R16G16B16A16_UINT
+    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},     // B10G11R11_FLOAT
+    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                  // R32G32B32A32_UINT
+    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                               // BC1_RGBA_UNORM
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                               // BC2_UNORM
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                               // BC3_UNORM
+    {GL_COMPRESSED_RED_RGTC1},                                        // BC4_UNORM
+    {GL_COMPRESSED_SIGNED_RED_RGTC1},                                 // BC4_SNORM
+    {GL_COMPRESSED_RG_RGTC2},                                         // BC5_UNORM
+    {GL_COMPRESSED_SIGNED_RG_RGTC2},                                  // BC5_SNORM
+    {GL_COMPRESSED_RGBA_BPTC_UNORM},                                  // BC7_UNORM
+    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT
+    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT
+    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM
+    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM
+    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT
+    {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT
+    {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT
+    {GL_RG32I, GL_RG_INTEGER, GL_INT},                                // R32G32_SINT
+    {GL_R32F, GL_RED, GL_FLOAT},                                      // R32_FLOAT
+    {GL_R16F, GL_RED, GL_HALF_FLOAT},                                 // R16_FLOAT
+    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                              // R16_UNORM
+    {GL_R16_SNORM, GL_RED, GL_SHORT},                                 // R16_SNORM
+    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                    // R16_UINT
+    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                              // R16_SINT
+    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                              // R16G16_UNORM
+    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                 // R16G16_FLOAT
+    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                    // R16G16_UINT
+    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                              // R16G16_SINT
+    {GL_RG16_SNORM, GL_RG, GL_SHORT},                                 // R16G16_SNORM
+    {GL_RGB32F, GL_RGB, GL_FLOAT},                                    // R32G32B32_FLOAT
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},          // A8B8G8R8_SRGB
+    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                // R8G8_UNORM
+    {GL_RG8_SNORM, GL_RG, GL_BYTE},                                   // R8G8_SNORM
+    {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                // R8G8_SINT
+    {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                      // R8G8_UINT
+    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                      // R32G32_UINT
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16X16_FLOAT
+    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                      // R32_UINT
+    {GL_R32I, GL_RED_INTEGER, GL_INT},                                // R32_SINT
+    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM
+    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM
+    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM
+    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_UNORM
     // Compressed sRGB formats
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},           // DXT1_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},           // DXT23_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},           // DXT45_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},              // BC7U_SRGB
-    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},           // BC1_RGBA_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},           // BC2_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},           // BC3_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},              // BC7_SRGB
+    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},          // ASTC_2D_4X4_SRGB
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},          // ASTC_2D_8X8_SRGB
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},          // ASTC_2D_8X5_SRGB
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},          // ASTC_2D_5X4_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                  // ASTC_2D_5X5
+    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                  // ASTC_2D_5X5_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},          // ASTC_2D_5X5_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                 // ASTC_2D_10X8
+    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                 // ASTC_2D_10X8_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},         // ASTC_2D_10X8_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                  // ASTC_2D_6X6
+    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                  // ASTC_2D_6X6_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},          // ASTC_2D_6X6_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                // ASTC_2D_10X10
+    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                // ASTC_2D_10X10_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},        // ASTC_2D_10X10_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                // ASTC_2D_12X12
+    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                // ASTC_2D_12X12_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},        // ASTC_2D_12X12_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                  // ASTC_2D_8X6
+    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                  // ASTC_2D_8X6_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},          // ASTC_2D_8X6_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                  // ASTC_2D_6X5
+    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                  // ASTC_2D_6X5_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},          // ASTC_2D_6X5_SRGB
-    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},  // E5B9G9R9F
+    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},  // E5B9G9R9_FLOAT
 
     // Depth formats
-    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},         // Z32F
-    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16
+    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},         // D32_FLOAT
+    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
 
     // DepthStencil formats
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},               // Z24S8
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},               // S8Z24
-    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
+     GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
 }};
 
 const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
@@ -178,10 +190,10 @@ GLint GetSwizzleSource(SwizzleSource source) {
 
 GLenum GetComponent(PixelFormat format, bool is_first) {
     switch (format) {
-    case PixelFormat::Z24S8:
-    case PixelFormat::Z32FS8:
+    case PixelFormat::D24_UNORM_S8_UINT:
+    case PixelFormat::D32_FLOAT_S8_UINT:
         return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
-    case PixelFormat::S8Z24:
+    case PixelFormat::S8_UINT_D24_UNORM:
         return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
     default:
         UNREACHABLE();
@@ -391,7 +403,7 @@ void CachedSurface::DecorateSurfaceName() {
     LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
 }
 
-void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
+void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) {
     LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
 }
 
@@ -482,9 +494,9 @@ GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_sou
     std::array swizzle{x_source, y_source, z_source, w_source};
 
     switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
-    case PixelFormat::Z24S8:
-    case PixelFormat::Z32FS8:
-    case PixelFormat::S8Z24:
+    case PixelFormat::D24_UNORM_S8_UINT:
+    case PixelFormat::D32_FLOAT_S8_UINT:
+    case PixelFormat::S8_UINT_D24_UNORM:
         UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
         glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
                             GetComponent(format, x_source == SwizzleSource::R));
@@ -520,10 +532,12 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
     return texture_view;
 }
 
-TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
-                                       VideoCore::RasterizerInterface& rasterizer,
-                                       const Device& device, StateTracker& state_tracker)
-    : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
+TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
+                                       Tegra::Engines::Maxwell3D& maxwell3d,
+                                       Tegra::MemoryManager& gpu_memory, const Device& device,
+                                       StateTracker& state_tracker_)
+    : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{
+                                                                                 state_tracker_} {
     src_framebuffer.Create();
     dst_framebuffer.Create();
 }
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index bfc4ddf5d..7787134fc 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -90,7 +90,7 @@ public:
                       Tegra::Texture::SwizzleSource z_source,
                       Tegra::Texture::SwizzleSource w_source);
 
-    void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
+    void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
 
     void MarkAsModified(u64 tick) {
         surface.MarkAsModified(true, tick);
@@ -129,8 +129,10 @@ private:
 
 class TextureCacheOpenGL final : public TextureCacheBase {
 public:
-    explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                                const Device& device, StateTracker& state_tracker);
+    explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
+                                Tegra::Engines::Maxwell3D& maxwell3d,
+                                Tegra::MemoryManager& gpu_memory, const Device& device,
+                                StateTracker& state_tracker);
     ~TextureCacheOpenGL();
 
 protected:
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index fe9bd4b5a..a8be2aa37 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -47,6 +47,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
             return GL_UNSIGNED_INT;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return GL_UNSIGNED_INT_2_10_10_10_REV;
+        default:
+            break;
         }
         break;
     case Maxwell::VertexAttribute::Type::SignedNorm:
@@ -70,6 +72,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
             return GL_INT;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return GL_INT_2_10_10_10_REV;
+        default:
+            break;
         }
         break;
     case Maxwell::VertexAttribute::Type::Float:
@@ -84,6 +88,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
         case Maxwell::VertexAttribute::Size::Size_32_32_32:
         case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
             return GL_FLOAT;
+        default:
+            break;
         }
         break;
     }
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e66cdc083..2ccca1993 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -21,6 +21,8 @@
 #include "core/perf_stats.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"
+#include "video_core/host_shaders/opengl_present_frag.h"
+#include "video_core/host_shaders/opengl_present_vert.h"
 #include "video_core/morton.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -30,60 +32,6 @@ namespace OpenGL {
 
 namespace {
 
-constexpr std::size_t SWAP_CHAIN_SIZE = 3;
-
-struct Frame {
-    u32 width{};                      /// Width of the frame (to detect resize)
-    u32 height{};                     /// Height of the frame
-    bool color_reloaded{};            /// Texture attachment was recreated (ie: resized)
-    OpenGL::OGLRenderbuffer color{};  /// Buffer shared between the render/present FBO
-    OpenGL::OGLFramebuffer render{};  /// FBO created on the render thread
-    OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread
-    GLsync render_fence{};            /// Fence created on the render thread
-    GLsync present_fence{};           /// Fence created on the presentation thread
-    bool is_srgb{};                   /// Framebuffer is sRGB or RGB
-};
-
-constexpr char VERTEX_SHADER[] = R"(
-#version 430 core
-
-out gl_PerVertex {
-    vec4 gl_Position;
-};
-
-layout (location = 0) in vec2 vert_position;
-layout (location = 1) in vec2 vert_tex_coord;
-layout (location = 0) out vec2 frag_tex_coord;
-
-// This is a truncated 3x3 matrix for 2D transformations:
-// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
-// The third column performs translation.
-// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
-// implicitly be [0, 0, 1]
-layout (location = 0) uniform mat3x2 modelview_matrix;
-
-void main() {
-    // Multiply input position by the rotscale part of the matrix and then manually translate by
-    // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
-    // to `vec3(vert_position.xy, 1.0)`
-    gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
-    frag_tex_coord = vert_tex_coord;
-}
-)";
-
-constexpr char FRAGMENT_SHADER[] = R"(
-#version 430 core
-
-layout (location = 0) in vec2 frag_tex_coord;
-layout (location = 0) out vec4 color;
-
-layout (binding = 0) uniform sampler2D color_texture;
-
-void main() {
-    color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
-}
-)";
-
 constexpr GLint PositionLocation = 0;
 constexpr GLint TexCoordLocation = 1;
 constexpr GLint ModelViewMatrixLocation = 0;
@@ -96,24 +44,6 @@ struct ScreenRectVertex {
     std::array<GLfloat, 2> tex_coord;
 };
 
-/// Returns true if any debug tool is attached
-bool HasDebugTool() {
-    const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
-    if (nsight) {
-        return true;
-    }
-
-    GLint num_extensions;
-    glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
-    for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
-        const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
-        if (!std::strcmp(name, "GL_EXT_debug_tool")) {
-            return true;
-        }
-    }
-    return false;
-}
-
 /**
  * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
  * corner and (width, height) on the lower-bottom.
@@ -197,132 +127,15 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
 
 } // Anonymous namespace
 
-/**
- * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
- * but also make sure that rendering happens at the pace that the frontend dictates. This is a
- * helper class that the renderer uses to sync frames between the render thread and the presentation
- * thread
- */
-class FrameMailbox {
-public:
-    std::mutex swap_chain_lock;
-    std::condition_variable present_cv;
-    std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
-    std::queue<Frame*> free_queue;
-    std::deque<Frame*> present_queue;
-    Frame* previous_frame{};
-
-    FrameMailbox() {
-        for (auto& frame : swap_chain) {
-            free_queue.push(&frame);
-        }
-    }
-
-    ~FrameMailbox() {
-        // lock the mutex and clear out the present and free_queues and notify any people who are
-        // blocked to prevent deadlock on shutdown
-        std::scoped_lock lock{swap_chain_lock};
-        std::queue<Frame*>().swap(free_queue);
-        present_queue.clear();
-        present_cv.notify_all();
-    }
-
-    void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
-        frame->present.Release();
-        frame->present.Create();
-        GLint previous_draw_fbo{};
-        glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
-        glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
-        glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
-                                  frame->color.handle);
-        if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
-            LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
-        }
-        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
-        frame->color_reloaded = false;
-    }
-
-    void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
-        // Recreate the color texture attachment
-        frame->color.Release();
-        frame->color.Create();
-        const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
-        glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
-
-        // Recreate the FBO for the render target
-        frame->render.Release();
-        frame->render.Create();
-        glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
-        glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
-                                  frame->color.handle);
-        if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
-            LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
-        }
-
-        frame->width = width;
-        frame->height = height;
-        frame->color_reloaded = true;
-    }
-
-    Frame* GetRenderFrame() {
-        std::unique_lock lock{swap_chain_lock};
-
-        // If theres no free frames, we will reuse the oldest render frame
-        if (free_queue.empty()) {
-            auto frame = present_queue.back();
-            present_queue.pop_back();
-            return frame;
-        }
-
-        Frame* frame = free_queue.front();
-        free_queue.pop();
-        return frame;
-    }
-
-    void ReleaseRenderFrame(Frame* frame) {
-        std::unique_lock lock{swap_chain_lock};
-        present_queue.push_front(frame);
-        present_cv.notify_one();
-    }
-
-    Frame* TryGetPresentFrame(int timeout_ms) {
-        std::unique_lock lock{swap_chain_lock};
-        // wait for new entries in the present_queue
-        present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
-                            [&] { return !present_queue.empty(); });
-        if (present_queue.empty()) {
-            // timed out waiting for a frame to draw so return the previous frame
-            return previous_frame;
-        }
-
-        // free the previous frame and add it back to the free queue
-        if (previous_frame) {
-            free_queue.push(previous_frame);
-        }
-
-        // the newest entries are pushed to the front of the queue
-        Frame* frame = present_queue.front();
-        present_queue.pop_front();
-        // remove all old entries from the present queue and move them back to the free_queue
-        for (auto f : present_queue) {
-            free_queue.push(f);
-        }
-        present_queue.clear();
-        previous_frame = frame;
-        return frame;
-    }
-};
-
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
-                               Core::Frontend::GraphicsContext& context)
-    : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
-      program_manager{device}, has_debug_tool{HasDebugTool()} {}
+RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
+                               Core::Frontend::EmuWindow& emu_window_,
+                               Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+                               std::unique_ptr<Core::Frontend::GraphicsContext> context)
+    : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_},
+      emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
 
 RendererOpenGL::~RendererOpenGL() = default;
 
-MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64));
-MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
-
 void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     if (!framebuffer) {
         return;
@@ -331,79 +144,34 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     PrepareRendertarget(framebuffer);
     RenderScreenshot();
 
-    Frame* frame;
-    {
-        MICROPROFILE_SCOPE(OpenGL_WaitPresent);
-
-        frame = frame_mailbox->GetRenderFrame();
-
-        // Clean up sync objects before drawing
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+    DrawScreen(emu_window.GetFramebufferLayout());
 
-        // INTEL driver workaround. We can't delete the previous render sync object until we are
-        // sure that the presentation is done
-        if (frame->present_fence) {
-            glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
-        }
-
-        // delete the draw fence if the frame wasn't presented
-        if (frame->render_fence) {
-            glDeleteSync(frame->render_fence);
-            frame->render_fence = 0;
-        }
-
-        // wait for the presentation to be done
-        if (frame->present_fence) {
-            glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
-            glDeleteSync(frame->present_fence);
-            frame->present_fence = 0;
-        }
-    }
+    ++m_current_frame;
 
-    {
-        MICROPROFILE_SCOPE(OpenGL_RenderFrame);
-        const auto& layout = render_window.GetFramebufferLayout();
-
-        // Recreate the frame if the size of the window has changed
-        if (layout.width != frame->width || layout.height != frame->height ||
-            screen_info.display_srgb != frame->is_srgb) {
-            LOG_DEBUG(Render_OpenGL, "Reloading render frame");
-            frame->is_srgb = screen_info.display_srgb;
-            frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
-        }
-        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
-        DrawScreen(layout);
-        // Create a fence for the frontend to wait on and swap this frame to OffTex
-        frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-        glFlush();
-        frame_mailbox->ReleaseRenderFrame(frame);
-        m_current_frame++;
-        rasterizer->TickFrame();
-    }
+    rasterizer->TickFrame();
 
     render_window.PollEvents();
-    if (has_debug_tool) {
-        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
-        Present(0);
-        context.SwapBuffers();
-    }
+    context->SwapBuffers();
 }
 
 void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
-    if (framebuffer) {
-        // If framebuffer is provided, reload it from memory to a texture
-        if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
-            screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
-            screen_info.texture.pixel_format != framebuffer->pixel_format ||
-            gl_framebuffer_data.empty()) {
-            // Reallocate texture if the framebuffer size has changed.
-            // This is expected to not happen very often and hence should not be a
-            // performance problem.
-            ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
-        }
-
-        // Load the framebuffer from memory, draw it to the screen, and swap buffers
-        LoadFBToScreenInfo(*framebuffer);
+    if (!framebuffer) {
+        return;
+    }
+    // If framebuffer is provided, reload it from memory to a texture
+    if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
+        screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
+        screen_info.texture.pixel_format != framebuffer->pixel_format ||
+        gl_framebuffer_data.empty()) {
+        // Reallocate texture if the framebuffer size has changed.
+        // This is expected to not happen very often and hence should not be a
+        // performance problem.
+        ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
     }
+
+    // Load the framebuffer from memory, draw it to the screen, and swap buffers
+    LoadFBToScreenInfo(*framebuffer);
 }
 
 void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
@@ -423,7 +191,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
         VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
     const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
     const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
-    u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)};
+    u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
     rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
 
     // TODO(Rodrigo): Read this from HLE
@@ -453,17 +221,15 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
 }
 
 void RendererOpenGL::InitOpenGLObjects() {
-    frame_mailbox = std::make_unique<FrameMailbox>();
-
     glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
                  Settings::values.bg_blue.GetValue(), 0.0f);
 
     // Create shader programs
     OGLShader vertex_shader;
-    vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
+    vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
 
     OGLShader fragment_shader;
-    fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
+    fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
 
     vertex_program.Create(true, false, vertex_shader.handle);
     fragment_program.Create(true, false, fragment_shader.handle);
@@ -508,18 +274,18 @@ void RendererOpenGL::AddTelemetryFields() {
     LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
     LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
 
-    auto& telemetry_session = system.TelemetrySession();
-    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
-    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
-    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+    constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
+    telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor);
+    telemetry_session.AddField(user_system, "GPU_Model", gpu_model);
+    telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version);
 }
 
 void RendererOpenGL::CreateRasterizer() {
     if (rasterizer) {
         return;
     }
-    rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
-                                                    program_manager, state_tracker);
+    rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
+                                                    screen_info, program_manager, state_tracker);
 }
 
 void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -535,12 +301,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
 
     GLint internal_format;
     switch (framebuffer.pixel_format) {
-    case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+    case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
         internal_format = GL_RGBA8;
         texture.gl_format = GL_RGBA;
         texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
         break;
-    case Tegra::FramebufferConfig::PixelFormat::RGB565:
+    case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
         internal_format = GL_RGB565;
         texture.gl_format = GL_RGB;
         texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
@@ -682,51 +448,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     program_manager.RestoreGuestPipeline();
 }
 
-bool RendererOpenGL::TryPresent(int timeout_ms) {
-    if (has_debug_tool) {
-        LOG_DEBUG(Render_OpenGL,
-                  "Skipping presentation because we are presenting on the main context");
-        return false;
-    }
-    return Present(timeout_ms);
-}
-
-bool RendererOpenGL::Present(int timeout_ms) {
-    const auto& layout = render_window.GetFramebufferLayout();
-    auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
-    if (!frame) {
-        LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
-        return false;
-    }
-
-    // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
-    // readback since we won't be doing any blending
-    glClear(GL_COLOR_BUFFER_BIT);
-
-    // Recreate the presentation FBO if the color attachment was changed
-    if (frame->color_reloaded) {
-        LOG_DEBUG(Render_OpenGL, "Reloading present frame");
-        frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
-    }
-    glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED);
-    // INTEL workaround.
-    // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
-    // it on the emulation thread without too much penalty
-    // glDeleteSync(frame.render_sync);
-    // frame.render_sync = 0;
-
-    glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
-    glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height,
-                      GL_COLOR_BUFFER_BIT, GL_LINEAR);
-
-    // Insert fence for the main thread to block on
-    frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-    glFlush();
-
-    glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
-    return true;
-}
-
 void RendererOpenGL::RenderScreenshot() {
     if (!renderer_settings.screenshot_requested) {
         return;
@@ -741,7 +462,7 @@ void RendererOpenGL::RenderScreenshot() {
     screenshot_framebuffer.Create();
     glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
 
-    Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
+    const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
 
     GLuint renderbuffer;
     glGenRenderbuffers(1, &renderbuffer);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 8b18d32e6..9ef181f95 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -16,16 +16,25 @@
 
 namespace Core {
 class System;
-}
+class TelemetrySession;
+} // namespace Core
 
 namespace Core::Frontend {
 class EmuWindow;
 }
 
+namespace Core::Memory {
+class Memory;
+}
+
 namespace Layout {
 struct FramebufferLayout;
 }
 
+namespace Tegra {
+class GPU;
+}
+
 namespace OpenGL {
 
 /// Structure used for storing information about the textures for the Switch screen
@@ -46,24 +55,17 @@ struct ScreenInfo {
     TextureInfo texture;
 };
 
-struct PresentationTexture {
-    u32 width = 0;
-    u32 height = 0;
-    OGLTexture texture;
-};
-
-class FrameMailbox;
-
 class RendererOpenGL final : public VideoCore::RendererBase {
 public:
-    explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
-                            Core::Frontend::GraphicsContext& context);
+    explicit RendererOpenGL(Core::TelemetrySession& telemetry_session,
+                            Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+                            Tegra::GPU& gpu,
+                            std::unique_ptr<Core::Frontend::GraphicsContext> context);
     ~RendererOpenGL() override;
 
     bool Init() override;
     void ShutDown() override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    bool TryPresent(int timeout_ms) override;
 
 private:
     /// Initializes the OpenGL state and creates persistent objects.
@@ -91,14 +93,13 @@ private:
 
     void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
 
-    bool Present(int timeout_ms);
-
+    Core::TelemetrySession& telemetry_session;
     Core::Frontend::EmuWindow& emu_window;
-    Core::System& system;
-    Core::Frontend::GraphicsContext& context;
-    const Device device;
+    Core::Memory::Memory& cpu_memory;
+    Tegra::GPU& gpu;
 
-    StateTracker state_tracker{system};
+    const Device device;
+    StateTracker state_tracker{gpu};
 
     // OpenGL object IDs
     OGLBuffer vertex_buffer;
@@ -120,13 +121,8 @@ private:
     std::vector<u8> gl_framebuffer_data;
 
     /// Used for transforming the framebuffer orientation
-    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
+    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
     Common::Rectangle<int> framebuffer_crop_rect;
-
-    /// Frame presentation mailbox
-    std::unique_ptr<FrameMailbox> frame_mailbox;
-
-    bool has_debug_tool = false;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index d1f0ea932..81a39a3b8 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -40,7 +40,6 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
 } // Anonymous namespace
 
 void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
-    const auto& clip = regs.view_volume_clip_control;
     const std::array enabled_lut = {regs.polygon_offset_point_enable,
                                     regs.polygon_offset_line_enable,
                                     regs.polygon_offset_fill_enable};
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index d7f1ae89f..d22de1d81 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -78,9 +78,10 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
     case Tegra::Texture::WrapMode::MirrorOnceBorder:
         UNIMPLEMENTED();
         return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+        return {};
     }
-    UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
-    return {};
 }
 
 VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -117,90 +118,101 @@ struct FormatTuple {
     VkFormat format; ///< Vulkan format
     int usage = 0;   ///< Describes image format usage
 } constexpr tex_format_tuples[] = {
-    {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage},    // ABGR8U
-    {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage},    // ABGR8S
-    {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage},     // ABGR8UI
-    {VK_FORMAT_B5G6R5_UNORM_PACK16},                            // B5G6R5U
-    {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U
-    {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},              // A1B5G5R5U (flipped with swizzle)
-    {VK_FORMAT_R8_UNORM, Attachable | Storage},                 // R8U
-    {VK_FORMAT_R8_UINT, Attachable | Storage},                  // R8UI
-    {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage},      // RGBA16F
-    {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage},       // RGBA16U
-    {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage},       // RGBA16S
-    {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage},        // RGBA16UI
-    {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage},  // R11FG11FB10F
-    {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage},        // RGBA32UI
-    {VK_FORMAT_BC1_RGBA_UNORM_BLOCK},                           // DXT1
-    {VK_FORMAT_BC2_UNORM_BLOCK},                                // DXT23
-    {VK_FORMAT_BC3_UNORM_BLOCK},                                // DXT45
-    {VK_FORMAT_BC4_UNORM_BLOCK},                                // DXN1
-    {VK_FORMAT_BC5_UNORM_BLOCK},                                // DXN2UNORM
-    {VK_FORMAT_BC5_SNORM_BLOCK},                                // DXN2SNORM
-    {VK_FORMAT_BC7_UNORM_BLOCK},                                // BC7U
-    {VK_FORMAT_BC6H_UFLOAT_BLOCK},                              // BC6H_UF16
-    {VK_FORMAT_BC6H_SFLOAT_BLOCK},                              // BC6H_SF16
-    {VK_FORMAT_ASTC_4x4_UNORM_BLOCK},                           // ASTC_2D_4X4
-    {VK_FORMAT_B8G8R8A8_UNORM, Attachable},                     // BGRA8
-    {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage},      // RGBA32F
-    {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage},            // RG32F
-    {VK_FORMAT_R32_SFLOAT, Attachable | Storage},               // R32F
-    {VK_FORMAT_R16_SFLOAT, Attachable | Storage},               // R16F
-    {VK_FORMAT_R16_UNORM, Attachable | Storage},                // R16U
-    {VK_FORMAT_UNDEFINED},                                      // R16S
-    {VK_FORMAT_R16_UINT, Attachable | Storage},                 // R16UI
-    {VK_FORMAT_UNDEFINED},                                      // R16I
-    {VK_FORMAT_R16G16_UNORM, Attachable | Storage},             // RG16
-    {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage},            // RG16F
-    {VK_FORMAT_UNDEFINED},                                      // RG16UI
-    {VK_FORMAT_UNDEFINED},                                      // RG16I
-    {VK_FORMAT_R16G16_SNORM, Attachable | Storage},             // RG16S
-    {VK_FORMAT_UNDEFINED},                                      // RGB32F
-    {VK_FORMAT_R8G8B8A8_SRGB, Attachable},                      // RGBA8_SRGB
-    {VK_FORMAT_R8G8_UNORM, Attachable | Storage},               // RG8U
-    {VK_FORMAT_R8G8_SNORM, Attachable | Storage},               // RG8S
-    {VK_FORMAT_R8G8_UINT, Attachable | Storage},                // RG8UI
-    {VK_FORMAT_R32G32_UINT, Attachable | Storage},              // RG32UI
-    {VK_FORMAT_UNDEFINED},                                      // RGBX16F
-    {VK_FORMAT_R32_UINT, Attachable | Storage},                 // R32UI
-    {VK_FORMAT_R32_SINT, Attachable | Storage},                 // R32I
-    {VK_FORMAT_ASTC_8x8_UNORM_BLOCK},                           // ASTC_2D_8X8
-    {VK_FORMAT_UNDEFINED},                                      // ASTC_2D_8X5
-    {VK_FORMAT_UNDEFINED},                                      // ASTC_2D_5X4
-    {VK_FORMAT_B8G8R8A8_SRGB, Attachable},                      // BGRA8_SRGB
-    {VK_FORMAT_BC1_RGBA_SRGB_BLOCK},                            // DXT1_SRGB
-    {VK_FORMAT_BC2_SRGB_BLOCK},                                 // DXT23_SRGB
-    {VK_FORMAT_BC3_SRGB_BLOCK},                                 // DXT45_SRGB
-    {VK_FORMAT_BC7_SRGB_BLOCK},                                 // BC7U_SRGB
-    {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable},              // R4G4B4A4U
-    {VK_FORMAT_ASTC_4x4_SRGB_BLOCK},                            // ASTC_2D_4X4_SRGB
-    {VK_FORMAT_ASTC_8x8_SRGB_BLOCK},                            // ASTC_2D_8X8_SRGB
-    {VK_FORMAT_ASTC_8x5_SRGB_BLOCK},                            // ASTC_2D_8X5_SRGB
-    {VK_FORMAT_ASTC_5x4_SRGB_BLOCK},                            // ASTC_2D_5X4_SRGB
-    {VK_FORMAT_ASTC_5x5_UNORM_BLOCK},                           // ASTC_2D_5X5
-    {VK_FORMAT_ASTC_5x5_SRGB_BLOCK},                            // ASTC_2D_5X5_SRGB
-    {VK_FORMAT_ASTC_10x8_UNORM_BLOCK},                          // ASTC_2D_10X8
-    {VK_FORMAT_ASTC_10x8_SRGB_BLOCK},                           // ASTC_2D_10X8_SRGB
-    {VK_FORMAT_ASTC_6x6_UNORM_BLOCK},                           // ASTC_2D_6X6
-    {VK_FORMAT_ASTC_6x6_SRGB_BLOCK},                            // ASTC_2D_6X6_SRGB
-    {VK_FORMAT_ASTC_10x10_UNORM_BLOCK},                         // ASTC_2D_10X10
-    {VK_FORMAT_ASTC_10x10_SRGB_BLOCK},                          // ASTC_2D_10X10_SRGB
-    {VK_FORMAT_ASTC_12x12_UNORM_BLOCK},                         // ASTC_2D_12X12
-    {VK_FORMAT_ASTC_12x12_SRGB_BLOCK},                          // ASTC_2D_12X12_SRGB
-    {VK_FORMAT_ASTC_8x6_UNORM_BLOCK},                           // ASTC_2D_8X6
-    {VK_FORMAT_ASTC_8x6_SRGB_BLOCK},                            // ASTC_2D_8X6_SRGB
-    {VK_FORMAT_ASTC_6x5_UNORM_BLOCK},                           // ASTC_2D_6X5
-    {VK_FORMAT_ASTC_6x5_SRGB_BLOCK},                            // ASTC_2D_6X5_SRGB
-    {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32},                         // E5B9G9R9F
+    {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage},    // A8B8G8R8_UNORM
+    {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage},    // A8B8G8R8_SNORM
+    {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage},     // A8B8G8R8_SINT
+    {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage},     // A8B8G8R8_UINT
+    {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable},                // R5G6B5_UNORM
+    {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable},                // B5G6R5_UNORM
+    {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},              // A1R5G5B5_UNORM
+    {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
+    {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage},  // A2B10G10R10_UINT
+    {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},         // A1B5G5R5_UNORM (flipped with swizzle)
+    {VK_FORMAT_R8_UNORM, Attachable | Storage},            // R8_UNORM
+    {VK_FORMAT_R8_SNORM, Attachable | Storage},            // R8_SNORM
+    {VK_FORMAT_R8_SINT, Attachable | Storage},             // R8_SINT
+    {VK_FORMAT_R8_UINT, Attachable | Storage},             // R8_UINT
+    {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16A16_FLOAT
+    {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage},  // R16G16B16A16_UNORM
+    {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage},  // R16G16B16A16_SNORM
+    {VK_FORMAT_R16G16B16A16_SINT, Attachable | Storage},   // R16G16B16A16_SINT
+    {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage},   // R16G16B16A16_UINT
+    {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // B10G11R11_FLOAT
+    {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage},       // R32G32B32A32_UINT
+    {VK_FORMAT_BC1_RGBA_UNORM_BLOCK},                          // BC1_RGBA_UNORM
+    {VK_FORMAT_BC2_UNORM_BLOCK},                               // BC2_UNORM
+    {VK_FORMAT_BC3_UNORM_BLOCK},                               // BC3_UNORM
+    {VK_FORMAT_BC4_UNORM_BLOCK},                               // BC4_UNORM
+    {VK_FORMAT_BC4_SNORM_BLOCK},                               // BC4_SNORM
+    {VK_FORMAT_BC5_UNORM_BLOCK},                               // BC5_UNORM
+    {VK_FORMAT_BC5_SNORM_BLOCK},                               // BC5_SNORM
+    {VK_FORMAT_BC7_UNORM_BLOCK},                               // BC7_UNORM
+    {VK_FORMAT_BC6H_UFLOAT_BLOCK},                             // BC6H_UFLOAT
+    {VK_FORMAT_BC6H_SFLOAT_BLOCK},                             // BC6H_SFLOAT
+    {VK_FORMAT_ASTC_4x4_UNORM_BLOCK},                          // ASTC_2D_4X4_UNORM
+    {VK_FORMAT_B8G8R8A8_UNORM, Attachable},                    // B8G8R8A8_UNORM
+    {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage},     // R32G32B32A32_FLOAT
+    {VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage},       // R32G32B32A32_SINT
+    {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage},           // R32G32_FLOAT
+    {VK_FORMAT_R32G32_SINT, Attachable | Storage},             // R32G32_SINT
+    {VK_FORMAT_R32_SFLOAT, Attachable | Storage},              // R32_FLOAT
+    {VK_FORMAT_R16_SFLOAT, Attachable | Storage},              // R16_FLOAT
+    {VK_FORMAT_R16_UNORM, Attachable | Storage},               // R16_UNORM
+    {VK_FORMAT_UNDEFINED},                                     // R16_SNORM
+    {VK_FORMAT_R16_UINT, Attachable | Storage},                // R16_UINT
+    {VK_FORMAT_UNDEFINED},                                     // R16_SINT
+    {VK_FORMAT_R16G16_UNORM, Attachable | Storage},            // R16G16_UNORM
+    {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage},           // R16G16_FLOAT
+    {VK_FORMAT_UNDEFINED},                                     // R16G16_UINT
+    {VK_FORMAT_UNDEFINED},                                     // R16G16_SINT
+    {VK_FORMAT_R16G16_SNORM, Attachable | Storage},            // R16G16_SNORM
+    {VK_FORMAT_UNDEFINED},                                     // R32G32B32_FLOAT
+    {VK_FORMAT_R8G8B8A8_SRGB, Attachable},                     // A8B8G8R8_SRGB
+    {VK_FORMAT_R8G8_UNORM, Attachable | Storage},              // R8G8_UNORM
+    {VK_FORMAT_R8G8_SNORM, Attachable | Storage},              // R8G8_SNORM
+    {VK_FORMAT_R8G8_SINT, Attachable | Storage},               // R8G8_SINT
+    {VK_FORMAT_R8G8_UINT, Attachable | Storage},               // R8G8_UINT
+    {VK_FORMAT_R32G32_UINT, Attachable | Storage},             // R32G32_UINT
+    {VK_FORMAT_UNDEFINED},                                     // R16G16B16X16_FLOAT
+    {VK_FORMAT_R32_UINT, Attachable | Storage},                // R32_UINT
+    {VK_FORMAT_R32_SINT, Attachable | Storage},                // R32_SINT
+    {VK_FORMAT_ASTC_8x8_UNORM_BLOCK},                          // ASTC_2D_8X8_UNORM
+    {VK_FORMAT_UNDEFINED},                                     // ASTC_2D_8X5_UNORM
+    {VK_FORMAT_UNDEFINED},                                     // ASTC_2D_5X4_UNORM
+    {VK_FORMAT_B8G8R8A8_SRGB, Attachable},                     // B8G8R8A8_SRGB
+    {VK_FORMAT_BC1_RGBA_SRGB_BLOCK},                           // BC1_RGBA_SRGB
+    {VK_FORMAT_BC2_SRGB_BLOCK},                                // BC2_SRGB
+    {VK_FORMAT_BC3_SRGB_BLOCK},                                // BC3_SRGB
+    {VK_FORMAT_BC7_SRGB_BLOCK},                                // BC7_SRGB
+    {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable},             // A4B4G4R4_UNORM
+    {VK_FORMAT_ASTC_4x4_SRGB_BLOCK},                           // ASTC_2D_4X4_SRGB
+    {VK_FORMAT_ASTC_8x8_SRGB_BLOCK},                           // ASTC_2D_8X8_SRGB
+    {VK_FORMAT_ASTC_8x5_SRGB_BLOCK},                           // ASTC_2D_8X5_SRGB
+    {VK_FORMAT_ASTC_5x4_SRGB_BLOCK},                           // ASTC_2D_5X4_SRGB
+    {VK_FORMAT_ASTC_5x5_UNORM_BLOCK},                          // ASTC_2D_5X5_UNORM
+    {VK_FORMAT_ASTC_5x5_SRGB_BLOCK},                           // ASTC_2D_5X5_SRGB
+    {VK_FORMAT_ASTC_10x8_UNORM_BLOCK},                         // ASTC_2D_10X8_UNORM
+    {VK_FORMAT_ASTC_10x8_SRGB_BLOCK},                          // ASTC_2D_10X8_SRGB
+    {VK_FORMAT_ASTC_6x6_UNORM_BLOCK},                          // ASTC_2D_6X6_UNORM
+    {VK_FORMAT_ASTC_6x6_SRGB_BLOCK},                           // ASTC_2D_6X6_SRGB
+    {VK_FORMAT_ASTC_10x10_UNORM_BLOCK},                        // ASTC_2D_10X10_UNORM
+    {VK_FORMAT_ASTC_10x10_SRGB_BLOCK},                         // ASTC_2D_10X10_SRGB
+    {VK_FORMAT_ASTC_12x12_UNORM_BLOCK},                        // ASTC_2D_12X12_UNORM
+    {VK_FORMAT_ASTC_12x12_SRGB_BLOCK},                         // ASTC_2D_12X12_SRGB
+    {VK_FORMAT_ASTC_8x6_UNORM_BLOCK},                          // ASTC_2D_8X6_UNORM
+    {VK_FORMAT_ASTC_8x6_SRGB_BLOCK},                           // ASTC_2D_8X6_SRGB
+    {VK_FORMAT_ASTC_6x5_UNORM_BLOCK},                          // ASTC_2D_6X5_UNORM
+    {VK_FORMAT_ASTC_6x5_SRGB_BLOCK},                           // ASTC_2D_6X5_SRGB
+    {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32},                        // E5B9G9R9_FLOAT
 
     // Depth formats
-    {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F
-    {VK_FORMAT_D16_UNORM, Attachable},  // Z16
+    {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
+    {VK_FORMAT_D16_UNORM, Attachable},  // D16_UNORM
 
     // DepthStencil formats
-    {VK_FORMAT_D24_UNORM_S8_UINT, Attachable},  // Z24S8
-    {VK_FORMAT_D24_UNORM_S8_UINT, Attachable},  // S8Z24 (emulated)
-    {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8
+    {VK_FORMAT_D24_UNORM_S8_UINT, Attachable},  // D24_UNORM_S8_UINT
+    {VK_FORMAT_D24_UNORM_S8_UINT, Attachable},  // S8_UINT_D24_UNORM (emulated)
+    {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // D32_FLOAT_S8_UINT
 };
 static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);
 
@@ -221,7 +233,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
         return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
     }
 
-    // Use ABGR8 on hardware that doesn't support ASTC natively
+    // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
     if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
         tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
                            ? VK_FORMAT_A8B8G8R8_SRGB_PACK32
@@ -287,9 +299,10 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
     case Maxwell::PrimitiveTopology::Patches:
         return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+        return {};
     }
-    UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
-    return {};
 }
 
 VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
@@ -314,6 +327,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
             return VK_FORMAT_R16G16B16A16_UNORM;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+        default:
+            break;
         }
         break;
     case Maxwell::VertexAttribute::Type::SignedNorm:
@@ -336,6 +351,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
             return VK_FORMAT_R16G16B16A16_SNORM;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
+        default:
+            break;
         }
         break;
     case Maxwell::VertexAttribute::Type::UnsignedScaled:
@@ -358,6 +375,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
             return VK_FORMAT_R16G16B16A16_USCALED;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
+        default:
+            break;
         }
         break;
     case Maxwell::VertexAttribute::Type::SignedScaled:
@@ -380,6 +399,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
             return VK_FORMAT_R16G16B16A16_SSCALED;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
+        default:
+            break;
         }
         break;
     case Maxwell::VertexAttribute::Type::UnsignedInt:
@@ -410,6 +431,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
             return VK_FORMAT_R32G32B32A32_UINT;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return VK_FORMAT_A2B10G10R10_UINT_PACK32;
+        default:
+            break;
         }
         break;
     case Maxwell::VertexAttribute::Type::SignedInt:
@@ -440,6 +463,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
             return VK_FORMAT_R32G32B32A32_SINT;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return VK_FORMAT_A2B10G10R10_SINT_PACK32;
+        default:
+            break;
         }
         break;
     case Maxwell::VertexAttribute::Type::Float:
@@ -460,6 +485,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
             return VK_FORMAT_R32G32B32_SFLOAT;
         case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
             return VK_FORMAT_R32G32B32A32_SFLOAT;
+        default:
+            break;
         }
         break;
     }
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
index 435c8c1b8..5b01020ec 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
+++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
@@ -65,10 +65,10 @@ bool NsightAftermathTracker::Initialize() {
         return false;
     }
 
-    dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash";
+    dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
 
-    (void)FileUtil::DeleteDirRecursively(dump_dir);
-    if (!FileUtil::CreateDir(dump_dir)) {
+    (void)Common::FS::DeleteDirRecursively(dump_dir);
+    if (!Common::FS::CreateDir(dump_dir)) {
         LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
         return false;
     }
@@ -106,7 +106,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
         return;
     }
 
-    FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
+    Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
     if (!file.IsOpen()) {
         LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
         return;
@@ -156,12 +156,12 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
     }();
 
     std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
-    if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
+    if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
         LOG_ERROR(Render_Vulkan, "Failed to write dump file");
         return;
     }
     const std::string_view json_view(json.data(), json.size());
-    if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
+    if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
         LOG_ERROR(Render_Vulkan, "Failed to write JSON");
         return;
     }
@@ -180,7 +180,7 @@ void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_
 
     const std::string path =
         fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
-    FileUtil::IOFile file(path, "wb");
+    Common::FS::IOFile file(path, "wb");
     if (!file.IsOpen()) {
         LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
         return;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 2258479f5..715182b3b 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -25,9 +25,9 @@
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
 #include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -56,7 +56,7 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
                        VkDebugUtilsMessageTypeFlagsEXT type,
                        const VkDebugUtilsMessengerCallbackDataEXT* data,
                        [[maybe_unused]] void* user_data) {
-    const char* message{data->pMessage};
+    const char* const message{data->pMessage};
 
     if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
         LOG_CRITICAL(Render_Vulkan, "{}", message);
@@ -78,7 +78,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
     if (!libvulkan_env || !library.Open(libvulkan_env)) {
         // Use the libvulkan.dylib from the application bundle.
         const std::string filename =
-            FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+            Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
         library.Open(filename.c_str());
     }
 #else
@@ -86,7 +86,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
     if (!library.Open(filename.c_str())) {
         // Android devices may not have libvulkan.so.1, only libvulkan.so.
         filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
-        library.Open(filename.c_str());
+        (void)library.Open(filename.c_str());
     }
 #endif
     return library;
@@ -237,8 +237,12 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
 
 } // Anonymous namespace
 
-RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
-    : RendererBase(window), system{system} {}
+RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
+                               Core::Frontend::EmuWindow& emu_window,
+                               Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+                               std::unique_ptr<Core::Frontend::GraphicsContext> context)
+    : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_},
+      cpu_memory{cpu_memory_}, gpu{gpu_} {}
 
 RendererVulkan::~RendererVulkan() {
     ShutDown();
@@ -265,11 +269,11 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
         scheduler->WaitWorker();
 
         swapchain->AcquireNextImage();
-        const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated);
+        const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
 
-        scheduler->Flush(false, render_semaphore);
+        scheduler->Flush(render_semaphore);
 
-        if (swapchain->Present(render_semaphore, fence)) {
+        if (swapchain->Present(render_semaphore)) {
             blit_screen->Recreate();
         }
 
@@ -279,11 +283,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     render_window.PollEvents();
 }
 
-bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
-    // TODO (bunnei): ImplementMe
-    return true;
-}
-
 bool RendererVulkan::Init() {
     library = OpenVulkanLibrary();
     instance = CreateInstance(library, dld, render_window.GetWindowInfo().type,
@@ -296,23 +295,21 @@ bool RendererVulkan::Init() {
 
     memory_manager = std::make_unique<VKMemoryManager>(*device);
 
-    resource_manager = std::make_unique<VKResourceManager>(*device);
+    state_tracker = std::make_unique<StateTracker>(gpu);
+
+    scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
 
     const auto& framebuffer = render_window.GetFramebufferLayout();
-    swapchain = std::make_unique<VKSwapchain>(*surface, *device);
+    swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
     swapchain->Create(framebuffer.width, framebuffer.height, false);
 
-    state_tracker = std::make_unique<StateTracker>(system);
-
-    scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
-
-    rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
-                                                    *resource_manager, *memory_manager,
-                                                    *state_tracker, *scheduler);
+    rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
+                                                    cpu_memory, screen_info, *device,
+                                                    *memory_manager, *state_tracker, *scheduler);
 
-    blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
-                                                 *resource_manager, *memory_manager, *swapchain,
-                                                 *scheduler, screen_info);
+    blit_screen =
+        std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
+                                       *memory_manager, *swapchain, *scheduler, screen_info);
 
     return true;
 }
@@ -330,7 +327,6 @@ void RendererVulkan::ShutDown() {
     scheduler.reset();
     swapchain.reset();
     memory_manager.reset();
-    resource_manager.reset();
     device.reset();
 }
 
@@ -438,8 +434,7 @@ void RendererVulkan::Report() const {
     LOG_INFO(Render_Vulkan, "Device: {}", model_name);
     LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
 
-    auto& telemetry_session = system.TelemetrySession();
-    constexpr auto field = Telemetry::FieldType::UserSystem;
+    static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
     telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
     telemetry_session.AddField(field, "GPU_Model", model_name);
     telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 522b5bff8..49a4141ec 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -14,7 +14,15 @@
 #include "video_core/renderer_vulkan/wrapper.h"
 
 namespace Core {
-class System;
+class TelemetrySession;
+}
+
+namespace Core::Memory {
+class Memory;
+}
+
+namespace Tegra {
+class GPU;
 }
 
 namespace Vulkan {
@@ -22,9 +30,7 @@ namespace Vulkan {
 class StateTracker;
 class VKBlitScreen;
 class VKDevice;
-class VKFence;
 class VKMemoryManager;
-class VKResourceManager;
 class VKSwapchain;
 class VKScheduler;
 class VKImage;
@@ -38,13 +44,15 @@ struct VKScreenInfo {
 
 class RendererVulkan final : public VideoCore::RendererBase {
 public:
-    explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
+    explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
+                            Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+                            Tegra::GPU& gpu,
+                            std::unique_ptr<Core::Frontend::GraphicsContext> context);
     ~RendererVulkan() override;
 
     bool Init() override;
     void ShutDown() override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    bool TryPresent(int timeout_ms) override;
 
     static std::vector<std::string> EnumerateDevices();
 
@@ -57,7 +65,9 @@ private:
 
     void Report() const;
 
-    Core::System& system;
+    Core::TelemetrySession& telemetry_session;
+    Core::Memory::Memory& cpu_memory;
+    Tegra::GPU& gpu;
 
     Common::DynamicLibrary library;
     vk::InstanceDispatch dld;
@@ -69,11 +79,10 @@ private:
 
     vk::DebugCallback debug_callback;
     std::unique_ptr<VKDevice> device;
-    std::unique_ptr<VKSwapchain> swapchain;
     std::unique_ptr<VKMemoryManager> memory_manager;
-    std::unique_ptr<VKResourceManager> resource_manager;
     std::unique_ptr<StateTracker> state_tracker;
     std::unique_ptr<VKScheduler> scheduler;
+    std::unique_ptr<VKSwapchain> swapchain;
     std::unique_ptr<VKBlitScreen> blit_screen;
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 866813465..b5b60309e 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -12,11 +12,9 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
-
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
 #include "core/memory.h"
-
 #include "video_core/gpu.h"
 #include "video_core/morton.h"
 #include "video_core/rasterizer_interface.h"
@@ -24,8 +22,8 @@
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -187,9 +185,9 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
 
 VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
     switch (framebuffer.pixel_format) {
-    case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+    case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
         return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
-    case Tegra::FramebufferConfig::PixelFormat::RGB565:
+    case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
         return VK_FORMAT_R5G6B5_UNORM_PACK16;
     default:
         UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
@@ -210,17 +208,15 @@ struct VKBlitScreen::BufferData {
     // Unaligned image data goes here
 };
 
-VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
-                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                           VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
-                           VKSwapchain& swapchain, VKScheduler& scheduler,
-                           const VKScreenInfo& screen_info)
-    : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device},
-      resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain},
-      scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} {
-    watches.resize(image_count);
-    std::generate(watches.begin(), watches.end(),
-                  []() { return std::make_unique<VKFenceWatch>(); });
+VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
+                           Core::Frontend::EmuWindow& render_window_,
+                           VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_,
+                           VKMemoryManager& memory_manager_, VKSwapchain& swapchain_,
+                           VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
+    : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
+      device{device_}, memory_manager{memory_manager_}, swapchain{swapchain_},
+      scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
+    resource_ticks.resize(image_count);
 
     CreateStaticResources();
     CreateDynamicResources();
@@ -232,15 +228,16 @@ void VKBlitScreen::Recreate() {
     CreateDynamicResources();
 }
 
-std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
-                                                     bool use_accelerated) {
+VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) {
     RefreshResources(framebuffer);
 
     // Finish any pending renderpass
     scheduler.RequestOutsideRenderPassOperationContext();
 
     const std::size_t image_index = swapchain.GetImageIndex();
-    watches[image_index]->Watch(scheduler.GetFence());
+
+    scheduler.Wait(resource_ticks[image_index]);
+    resource_ticks[image_index] = scheduler.CurrentTick();
 
     VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
 
@@ -259,7 +256,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
         const auto pixel_format =
             VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
         const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
-        const auto host_ptr = system.Memory().GetPointer(framebuffer_addr);
+        const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr);
         rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
 
         // TODO(Rodrigo): Read this from HLE
@@ -343,7 +340,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
         cmdbuf.EndRenderPass();
     });
 
-    return {scheduler.GetFence(), *semaphores[image_index]};
+    return *semaphores[image_index];
 }
 
 void VKBlitScreen::CreateStaticResources() {
@@ -696,6 +693,7 @@ void VKBlitScreen::CreateFramebuffers() {
         .flags = 0,
         .renderPass = *renderpass,
         .attachmentCount = 1,
+        .pAttachments = nullptr,
         .width = size.width,
         .height = size.height,
         .layers = 1,
@@ -710,7 +708,7 @@ void VKBlitScreen::CreateFramebuffers() {
 
 void VKBlitScreen::ReleaseRawImages() {
     for (std::size_t i = 0; i < raw_images.size(); ++i) {
-        watches[i]->Wait();
+        scheduler.Wait(resource_ticks.at(i));
     }
     raw_images.clear();
     raw_buffer_commits.clear();
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 243640fab..8f2839214 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -5,16 +5,18 @@
 #pragma once
 
 #include <memory>
-#include <tuple>
 
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
 namespace Core {
 class System;
 }
 
+namespace Core::Memory {
+class Memory;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -30,26 +32,26 @@ class RasterizerInterface;
 namespace Vulkan {
 
 struct ScreenInfo;
+
 class RasterizerVulkan;
 class VKDevice;
-class VKFence;
 class VKImage;
 class VKScheduler;
 class VKSwapchain;
 
 class VKBlitScreen final {
 public:
-    explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
+    explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
+                          Core::Frontend::EmuWindow& render_window,
                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                          VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
-                          VKSwapchain& swapchain, VKScheduler& scheduler,
-                          const VKScreenInfo& screen_info);
+                          VKMemoryManager& memory_manager, VKSwapchain& swapchain,
+                          VKScheduler& scheduler, const VKScreenInfo& screen_info);
     ~VKBlitScreen();
 
     void Recreate();
 
-    std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer,
-                                           bool use_accelerated);
+    [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
+                                   bool use_accelerated);
 
 private:
     struct BufferData;
@@ -81,11 +83,10 @@ private:
     u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
                           std::size_t image_index) const;
 
-    Core::System& system;
+    Core::Memory::Memory& cpu_memory;
     Core::Frontend::EmuWindow& render_window;
     VideoCore::RasterizerInterface& rasterizer;
     const VKDevice& device;
-    VKResourceManager& resource_manager;
     VKMemoryManager& memory_manager;
     VKSwapchain& swapchain;
     VKScheduler& scheduler;
@@ -106,7 +107,7 @@ private:
     vk::Buffer buffer;
     VKMemoryCommit buffer_commit;
 
-    std::vector<std::unique_ptr<VKFenceWatch>> watches;
+    std::vector<u64> resource_ticks;
 
     std::vector<vk::Semaphore> semaphores;
     std::vector<std::unique_ptr<VKImage>> raw_images;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 2be38d419..d9d3da9ea 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -39,16 +39,17 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
 
 Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
                VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
-    : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
-    VkBufferCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.size = static_cast<VkDeviceSize>(size);
-    ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
-    ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-    ci.queueFamilyIndexCount = 0;
-    ci.pQueueFamilyIndices = nullptr;
+    : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
+    const VkBufferCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = static_cast<VkDeviceSize>(size),
+        .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    };
 
     buffer.handle = device.GetLogical().CreateBuffer(ci);
     buffer.commit = memory_manager.Commit(buffer.handle, false);
@@ -66,16 +67,17 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
     scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
         cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
 
-        VkBufferMemoryBarrier barrier;
-        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barrier.pNext = nullptr;
-        barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-        barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
-        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.buffer = handle;
-        barrier.offset = offset;
-        barrier.size = size;
+        const VkBufferMemoryBarrier barrier{
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = handle,
+            .offset = offset,
+            .size = size,
+        };
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
                                barrier, {});
     });
@@ -87,16 +89,17 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
 
     const VkBuffer handle = Handle();
     scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
-        VkBufferMemoryBarrier barrier;
-        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barrier.pNext = nullptr;
-        barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
-        barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
-        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.buffer = handle;
-        barrier.offset = offset;
-        barrier.size = size;
+        const VkBufferMemoryBarrier barrier{
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = handle,
+            .offset = offset,
+            .size = size,
+        };
 
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
                                    VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
@@ -142,14 +145,15 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
     });
 }
 
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
-                             const VKDevice& device, VKMemoryManager& memory_manager,
-                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
-    : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
-                                                                 CreateStreamBuffer(device,
-                                                                                    scheduler)},
-      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
-                                                                                staging_pool} {}
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
+                             Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
+                             const VKDevice& device_, VKMemoryManager& memory_manager_,
+                             VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_)
+    : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory,
+                                                                 CreateStreamBuffer(device_,
+                                                                                    scheduler_)},
+      device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
+                                                                                   staging_pool_} {}
 
 VKBufferCache::~VKBufferCache() = default;
 
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 991ee451c..7fb5ceedf 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -13,10 +13,6 @@
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
-namespace Core {
-class System;
-}
-
 namespace Vulkan {
 
 class VKDevice;
@@ -53,7 +49,8 @@ private:
 
 class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
 public:
-    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
+                           Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
                            const VKDevice& device, VKMemoryManager& memory_manager,
                            VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
     ~VKBufferCache();
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
new file mode 100644
index 000000000..6339f4fe0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -0,0 +1,46 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstddef>
+
+#include "video_core/renderer_vulkan/vk_command_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
+
+struct CommandPool::Pool {
+    vk::CommandPool handle;
+    vk::CommandBuffers cmdbufs;
+};
+
+CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device)
+    : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {}
+
+CommandPool::~CommandPool() = default;
+
+void CommandPool::Allocate(size_t begin, size_t end) {
+    // Command buffers are going to be commited, recorded, executed every single usage cycle.
+    // They are also going to be reseted when commited.
+    Pool& pool = pools.emplace_back();
+    pool.handle = device.GetLogical().CreateCommandPool({
+        .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+        .pNext = nullptr,
+        .flags =
+            VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+        .queueFamilyIndex = device.GetGraphicsFamily(),
+    });
+    pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
+}
+
+VkCommandBuffer CommandPool::Commit() {
+    const size_t index = CommitResource();
+    const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
+    const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
+    return pools[pool_index].cmdbufs[sub_index];
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h
new file mode 100644
index 000000000..b9cb3fb5d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_pool.h
@@ -0,0 +1,34 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+class MasterSemaphore;
+class VKDevice;
+
+class CommandPool final : public ResourcePool {
+public:
+    explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device);
+    ~CommandPool() override;
+
+    void Allocate(size_t begin, size_t end) override;
+
+    VkCommandBuffer Commit();
+
+private:
+    struct Pool;
+
+    const VKDevice& device;
+    std::vector<Pool> pools;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index da71e710c..9637c6059 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -112,35 +112,36 @@ constexpr u8 quad_array[] = {
     0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
     0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
     0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
-    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
 
 VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
-    VkDescriptorSetLayoutBinding binding;
-    binding.binding = 0;
-    binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-    binding.descriptorCount = 1;
-    binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
-    binding.pImmutableSamplers = nullptr;
-    return binding;
+    return {
+        .binding = 0,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .descriptorCount = 1,
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .pImmutableSamplers = nullptr,
+    };
 }
 
 VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
-    VkDescriptorUpdateTemplateEntryKHR entry;
-    entry.dstBinding = 0;
-    entry.dstArrayElement = 0;
-    entry.descriptorCount = 1;
-    entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-    entry.offset = 0;
-    entry.stride = sizeof(DescriptorUpdateEntry);
-    return entry;
+    return {
+        .dstBinding = 0,
+        .dstArrayElement = 0,
+        .descriptorCount = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .offset = 0,
+        .stride = sizeof(DescriptorUpdateEntry),
+    };
 }
 
 VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
-    VkPushConstantRange range;
-    range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
-    range.offset = 0;
-    range.size = static_cast<u32>(size);
-    return range;
+    return {
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .offset = 0,
+        .size = static_cast<u32>(size),
+    };
 }
 
 // Uint8 SPIR-V module. Generated from the "shaders/" directory.
@@ -218,7 +219,8 @@ constexpr u8 uint8_pass[] = {
     0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
     0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
     0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
-    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
 
 // Quad indexed SPIR-V module. Generated from the "shaders/" directory.
 constexpr u8 QUAD_INDEXED_SPV[] = {
@@ -341,32 +343,37 @@ constexpr u8 QUAD_INDEXED_SPV[] = {
     0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
     0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
     0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
-    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
 
 std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
-    std::array<VkDescriptorSetLayoutBinding, 2> bindings;
-    bindings[0].binding = 0;
-    bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-    bindings[0].descriptorCount = 1;
-    bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
-    bindings[0].pImmutableSamplers = nullptr;
-    bindings[1].binding = 1;
-    bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-    bindings[1].descriptorCount = 1;
-    bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
-    bindings[1].pImmutableSamplers = nullptr;
-    return bindings;
+    return {{
+        {
+            .binding = 0,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            .pImmutableSamplers = nullptr,
+        },
+        {
+            .binding = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            .pImmutableSamplers = nullptr,
+        },
+    }};
 }
 
 VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
-    VkDescriptorUpdateTemplateEntryKHR entry;
-    entry.dstBinding = 0;
-    entry.dstArrayElement = 0;
-    entry.descriptorCount = 2;
-    entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-    entry.offset = 0;
-    entry.stride = sizeof(DescriptorUpdateEntry);
-    return entry;
+    return {
+        .dstBinding = 0,
+        .dstArrayElement = 0,
+        .descriptorCount = 2,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .offset = 0,
+        .stride = sizeof(DescriptorUpdateEntry),
+    };
 }
 
 } // Anonymous namespace
@@ -376,37 +383,37 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
                              vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
                              vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
                              const u8* code) {
-    VkDescriptorSetLayoutCreateInfo descriptor_layout_ci;
-    descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
-    descriptor_layout_ci.pNext = nullptr;
-    descriptor_layout_ci.flags = 0;
-    descriptor_layout_ci.bindingCount = bindings.size();
-    descriptor_layout_ci.pBindings = bindings.data();
-    descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci);
-
-    VkPipelineLayoutCreateInfo pipeline_layout_ci;
-    pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
-    pipeline_layout_ci.pNext = nullptr;
-    pipeline_layout_ci.flags = 0;
-    pipeline_layout_ci.setLayoutCount = 1;
-    pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address();
-    pipeline_layout_ci.pushConstantRangeCount = push_constants.size();
-    pipeline_layout_ci.pPushConstantRanges = push_constants.data();
-    layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci);
+    descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .bindingCount = bindings.size(),
+        .pBindings = bindings.data(),
+    });
+
+    layout = device.GetLogical().CreatePipelineLayout({
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .setLayoutCount = 1,
+        .pSetLayouts = descriptor_set_layout.address(),
+        .pushConstantRangeCount = push_constants.size(),
+        .pPushConstantRanges = push_constants.data(),
+    });
 
     if (!templates.empty()) {
-        VkDescriptorUpdateTemplateCreateInfoKHR template_ci;
-        template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
-        template_ci.pNext = nullptr;
-        template_ci.flags = 0;
-        template_ci.descriptorUpdateEntryCount = templates.size();
-        template_ci.pDescriptorUpdateEntries = templates.data();
-        template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
-        template_ci.descriptorSetLayout = *descriptor_set_layout;
-        template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
-        template_ci.pipelineLayout = *layout;
-        template_ci.set = 0;
-        descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci);
+        descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
+            .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+            .pNext = nullptr,
+            .flags = 0,
+            .descriptorUpdateEntryCount = templates.size(),
+            .pDescriptorUpdateEntries = templates.data(),
+            .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+            .descriptorSetLayout = *descriptor_set_layout,
+            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+            .pipelineLayout = *layout,
+            .set = 0,
+        });
 
         descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
     }
@@ -414,42 +421,42 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
     auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
     std::memcpy(code_copy.get(), code, code_size);
 
-    VkShaderModuleCreateInfo module_ci;
-    module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
-    module_ci.pNext = nullptr;
-    module_ci.flags = 0;
-    module_ci.codeSize = code_size;
-    module_ci.pCode = code_copy.get();
-    module = device.GetLogical().CreateShaderModule(module_ci);
-
-    VkComputePipelineCreateInfo pipeline_ci;
-    pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
-    pipeline_ci.pNext = nullptr;
-    pipeline_ci.flags = 0;
-    pipeline_ci.layout = *layout;
-    pipeline_ci.basePipelineHandle = nullptr;
-    pipeline_ci.basePipelineIndex = 0;
-
-    VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage;
-    stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
-    stage_ci.pNext = nullptr;
-    stage_ci.flags = 0;
-    stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
-    stage_ci.module = *module;
-    stage_ci.pName = "main";
-    stage_ci.pSpecializationInfo = nullptr;
-
-    pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci);
+    module = device.GetLogical().CreateShaderModule({
+        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .codeSize = code_size,
+        .pCode = code_copy.get(),
+    });
+
+    pipeline = device.GetLogical().CreateComputePipeline({
+        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stage =
+            {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .pNext = nullptr,
+                .flags = 0,
+                .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+                .module = *module,
+                .pName = "main",
+                .pSpecializationInfo = nullptr,
+            },
+        .layout = *layout,
+        .basePipelineHandle = nullptr,
+        .basePipelineIndex = 0,
+    });
 }
 
 VKComputePass::~VKComputePass() = default;
 
-VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
-                                                   VKFence& fence) {
+VkDescriptorSet VKComputePass::CommitDescriptorSet(
+    VKUpdateDescriptorQueue& update_descriptor_queue) {
     if (!descriptor_template) {
         return nullptr;
     }
-    const auto set = descriptor_allocator->Commit(fence);
+    const VkDescriptorSet set = descriptor_allocator->Commit();
     update_descriptor_queue.Send(*descriptor_template, set);
     return set;
 }
@@ -473,7 +480,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
-    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+    const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
 
     scheduler.RequestOutsideRenderPassOperationContext();
 
@@ -516,13 +523,13 @@ Uint8Pass::~Uint8Pass() = default;
 
 std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
                                              u64 src_offset) {
-    const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
+    const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
     auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
     update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
-    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+    const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
 
     scheduler.RequestOutsideRenderPassOperationContext();
     scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
@@ -585,7 +592,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
     update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
-    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+    const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
 
     scheduler.RequestOutsideRenderPassOperationContext();
     scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 230b526bc..acc94f27e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -15,7 +15,6 @@
 namespace Vulkan {
 
 class VKDevice;
-class VKFence;
 class VKScheduler;
 class VKStagingBufferPool;
 class VKUpdateDescriptorQueue;
@@ -30,8 +29,7 @@ public:
     ~VKComputePass();
 
 protected:
-    VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
-                                        VKFence& fence);
+    VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
 
     vk::DescriptorUpdateTemplateKHR descriptor_template;
     vk::PipelineLayout layout;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 281bf9ac3..9be72dc9b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -32,7 +32,7 @@ VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
     if (!descriptor_template) {
         return {};
     }
-    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    const VkDescriptorSet set = descriptor_allocator.Commit();
     update_descriptor_queue.Send(*descriptor_template, set);
     return set;
 }
@@ -43,12 +43,13 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
     const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
         // TODO(Rodrigo): Maybe make individual bindings here?
         for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
-            VkDescriptorSetLayoutBinding& entry = bindings.emplace_back();
-            entry.binding = binding++;
-            entry.descriptorType = descriptor_type;
-            entry.descriptorCount = 1;
-            entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
-            entry.pImmutableSamplers = nullptr;
+            bindings.push_back({
+                .binding = binding++,
+                .descriptorType = descriptor_type,
+                .descriptorCount = 1,
+                .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+                .pImmutableSamplers = nullptr,
+            });
         }
     };
     add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
@@ -58,25 +59,25 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
     add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
     add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
 
-    VkDescriptorSetLayoutCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.bindingCount = static_cast<u32>(bindings.size());
-    ci.pBindings = bindings.data();
-    return device.GetLogical().CreateDescriptorSetLayout(ci);
+    return device.GetLogical().CreateDescriptorSetLayout({
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .bindingCount = static_cast<u32>(bindings.size()),
+        .pBindings = bindings.data(),
+    });
 }
 
 vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
-    VkPipelineLayoutCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.setLayoutCount = 1;
-    ci.pSetLayouts = descriptor_set_layout.address();
-    ci.pushConstantRangeCount = 0;
-    ci.pPushConstantRanges = nullptr;
-    return device.GetLogical().CreatePipelineLayout(ci);
+    return device.GetLogical().CreatePipelineLayout({
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .setLayoutCount = 1,
+        .pSetLayouts = descriptor_set_layout.address(),
+        .pushConstantRangeCount = 0,
+        .pPushConstantRanges = nullptr,
+    });
 }
 
 vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
@@ -89,59 +90,63 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat
         return {};
     }
 
-    VkDescriptorUpdateTemplateCreateInfoKHR ci;
-    ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size());
-    ci.pDescriptorUpdateEntries = template_entries.data();
-    ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
-    ci.descriptorSetLayout = *descriptor_set_layout;
-    ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
-    ci.pipelineLayout = *layout;
-    ci.set = DESCRIPTOR_SET;
-    return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
+    return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+        .pNext = nullptr,
+        .flags = 0,
+        .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
+        .pDescriptorUpdateEntries = template_entries.data(),
+        .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+        .descriptorSetLayout = *descriptor_set_layout,
+        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+        .pipelineLayout = *layout,
+        .set = DESCRIPTOR_SET,
+    });
 }
 
 vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
     device.SaveShader(code);
 
-    VkShaderModuleCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.codeSize = code.size() * sizeof(u32);
-    ci.pCode = code.data();
-    return device.GetLogical().CreateShaderModule(ci);
+    return device.GetLogical().CreateShaderModule({
+        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .codeSize = code.size() * sizeof(u32),
+        .pCode = code.data(),
+    });
 }
 
 vk::Pipeline VKComputePipeline::CreatePipeline() const {
-    VkComputePipelineCreateInfo ci;
-    VkPipelineShaderStageCreateInfo& stage_ci = ci.stage;
-    stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
-    stage_ci.pNext = nullptr;
-    stage_ci.flags = 0;
-    stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
-    stage_ci.module = *shader_module;
-    stage_ci.pName = "main";
-    stage_ci.pSpecializationInfo = nullptr;
-
-    VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
-    subgroup_size_ci.sType =
-        VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT;
-    subgroup_size_ci.pNext = nullptr;
-    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+
+    VkComputePipelineCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stage =
+            {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .pNext = nullptr,
+                .flags = 0,
+                .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+                .module = *shader_module,
+                .pName = "main",
+                .pSpecializationInfo = nullptr,
+            },
+        .layout = *layout,
+        .basePipelineHandle = nullptr,
+        .basePipelineIndex = 0,
+    };
+
+    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .requiredSubgroupSize = GuestWarpSize,
+    };
 
     if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
-        stage_ci.pNext = &subgroup_size_ci;
+        ci.stage.pNext = &subgroup_size_ci;
     }
 
-    ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.layout = *layout;
-    ci.basePipelineHandle = nullptr;
-    ci.basePipelineIndex = 0;
     return device.GetLogical().CreateComputePipeline(ci);
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index 9259b618d..f38e089d5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -7,7 +7,8 @@
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
 namespace Vulkan {
@@ -15,14 +16,15 @@ namespace Vulkan {
 // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
 constexpr std::size_t SETS_GROW_RATE = 0x20;
 
-DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
-                                         VkDescriptorSetLayout layout)
-    : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
+                                         VkDescriptorSetLayout layout_)
+    : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
+      descriptor_pool{descriptor_pool_}, layout{layout_} {}
 
 DescriptorAllocator::~DescriptorAllocator() = default;
 
-VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
-    const std::size_t index = CommitResource(fence);
+VkDescriptorSet DescriptorAllocator::Commit() {
+    const std::size_t index = CommitResource();
     return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
 }
 
@@ -30,8 +32,9 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
     descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
 }
 
-VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
-    : device{device}, active_pool{AllocateNewPool()} {}
+VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler)
+    : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
+                                                                             AllocateNewPool()} {}
 
 VKDescriptorPool::~VKDescriptorPool() = default;
 
@@ -43,27 +46,30 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
         {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
         {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
         {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
-        {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
-
-    VkDescriptorPoolCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
-    ci.maxSets = num_sets;
-    ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes));
-    ci.pPoolSizes = std::data(pool_sizes);
+        {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
+    };
+
+    const VkDescriptorPoolCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+        .maxSets = num_sets,
+        .poolSizeCount = static_cast<u32>(std::size(pool_sizes)),
+        .pPoolSizes = std::data(pool_sizes),
+    };
     return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
 }
 
 vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
                                                          std::size_t count) {
     const std::vector layout_copies(count, layout);
-    VkDescriptorSetAllocateInfo ai;
-    ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
-    ai.pNext = nullptr;
-    ai.descriptorPool = **active_pool;
-    ai.descriptorSetCount = static_cast<u32>(count);
-    ai.pSetLayouts = layout_copies.data();
+    VkDescriptorSetAllocateInfo ai{
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .descriptorPool = **active_pool,
+        .descriptorSetCount = static_cast<u32>(count),
+        .pSetLayouts = layout_copies.data(),
+    };
 
     vk::DescriptorSets sets = active_pool->Allocate(ai);
     if (!sets.IsOutOfPoolMemory()) {
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index 9efa66bef..544f32a20 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -6,21 +6,24 @@
 
 #include <vector>
 
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
 namespace Vulkan {
 
+class VKDevice;
 class VKDescriptorPool;
+class VKScheduler;
 
-class DescriptorAllocator final : public VKFencedPool {
+class DescriptorAllocator final : public ResourcePool {
 public:
     explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
     ~DescriptorAllocator() override;
 
+    DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
     DescriptorAllocator(const DescriptorAllocator&) = delete;
 
-    VkDescriptorSet Commit(VKFence& fence);
+    VkDescriptorSet Commit();
 
 protected:
     void Allocate(std::size_t begin, std::size_t end) override;
@@ -36,15 +39,19 @@ class VKDescriptorPool final {
     friend DescriptorAllocator;
 
 public:
-    explicit VKDescriptorPool(const VKDevice& device);
+    explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler);
     ~VKDescriptorPool();
 
+    VKDescriptorPool(const VKDescriptorPool&) = delete;
+    VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
+
 private:
     vk::DescriptorPool* AllocateNewPool();
 
     vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
 
     const VKDevice& device;
+    MasterSemaphore& master_semaphore;
 
     std::vector<vk::DescriptorPool> pools;
     vk::DescriptorPool* active_pool;
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 9226e591c..3d8d3213d 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -42,6 +42,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
     VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
     VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
     VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
+    VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
     VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
     VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
     VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
@@ -84,14 +85,19 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
         VK_FORMAT_A8B8G8R8_UNORM_PACK32,
         VK_FORMAT_A8B8G8R8_UINT_PACK32,
         VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+        VK_FORMAT_A8B8G8R8_SINT_PACK32,
         VK_FORMAT_A8B8G8R8_SRGB_PACK32,
         VK_FORMAT_B5G6R5_UNORM_PACK16,
         VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+        VK_FORMAT_A2B10G10R10_UINT_PACK32,
         VK_FORMAT_A1R5G5B5_UNORM_PACK16,
         VK_FORMAT_R32G32B32A32_SFLOAT,
+        VK_FORMAT_R32G32B32A32_SINT,
         VK_FORMAT_R32G32B32A32_UINT,
         VK_FORMAT_R32G32_SFLOAT,
+        VK_FORMAT_R32G32_SINT,
         VK_FORMAT_R32G32_UINT,
+        VK_FORMAT_R16G16B16A16_SINT,
         VK_FORMAT_R16G16B16A16_UINT,
         VK_FORMAT_R16G16B16A16_SNORM,
         VK_FORMAT_R16G16B16A16_UNORM,
@@ -103,8 +109,11 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
         VK_FORMAT_R8G8B8A8_SRGB,
         VK_FORMAT_R8G8_UNORM,
         VK_FORMAT_R8G8_SNORM,
+        VK_FORMAT_R8G8_SINT,
         VK_FORMAT_R8G8_UINT,
         VK_FORMAT_R8_UNORM,
+        VK_FORMAT_R8_SNORM,
+        VK_FORMAT_R8_SINT,
         VK_FORMAT_R8_UINT,
         VK_FORMAT_B10G11R11_UFLOAT_PACK32,
         VK_FORMAT_R32_SFLOAT,
@@ -124,6 +133,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
         VK_FORMAT_BC2_UNORM_BLOCK,
         VK_FORMAT_BC3_UNORM_BLOCK,
         VK_FORMAT_BC4_UNORM_BLOCK,
+        VK_FORMAT_BC4_SNORM_BLOCK,
         VK_FORMAT_BC5_UNORM_BLOCK,
         VK_FORMAT_BC5_SNORM_BLOCK,
         VK_FORMAT_BC7_UNORM_BLOCK,
@@ -241,6 +251,13 @@ bool VKDevice::Create() {
         .inheritedQueries = false,
     };
 
+    VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
+        .pNext = nullptr,
+        .timelineSemaphore = true,
+    };
+    SetNext(next, timeline_semaphore);
+
     VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
         .pNext = nullptr,
@@ -373,6 +390,8 @@ bool VKDevice::Create() {
 
     graphics_queue = logical.GetQueue(graphics_family);
     present_queue = logical.GetQueue(present_family);
+
+    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
     return true;
 }
 
@@ -757,14 +776,15 @@ std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const
     queue_cis.reserve(unique_queue_families.size());
 
     for (const u32 queue_family : unique_queue_families) {
-        queue_cis.push_back({
+        auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{
             .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
             .pNext = nullptr,
             .flags = 0,
             .queueFamilyIndex = queue_family,
             .queueCount = 1,
-            .pQueuePriorities = &QUEUE_PRIORITY,
+            .pQueuePriorities = nullptr,
         });
+        ci.pQueuePriorities = &QUEUE_PRIORITY;
     }
 
     return queue_cis;
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index ae5c21baa..26a233db1 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,6 +122,11 @@ public:
         return properties.limits.maxPushConstantsSize;
     }
 
+    /// Returns the maximum size for shared memory.
+    u32 GetMaxComputeSharedMemorySize() const {
+        return properties.limits.maxComputeSharedMemorySize;
+    }
+
     /// Returns true if ASTC is natively supported.
     bool IsOptimalAstcSupported() const {
         return is_optimal_astc_supported;
@@ -197,6 +202,11 @@ public:
         return reported_extensions;
     }
 
+    /// Returns true if the setting for async shader compilation is enabled.
+    bool UseAsynchronousShaders() const {
+        return use_asynchronous_shaders;
+    }
+
     /// Checks if the physical device is suitable.
     static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
 
@@ -247,6 +257,9 @@ private:
     bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.
     bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.
 
+    // Asynchronous Graphics Pipeline setting
+    bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
+
     // Telemetry parameters
     std::string vendor_name;                      ///< Device's driver name.
     std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index a02be5487..5babbdd0b 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -30,7 +30,7 @@ void InnerFence::Queue() {
     ASSERT(!event);
 
     event = device.GetLogical().CreateEvent();
-    ticks = scheduler.Ticks();
+    ticks = scheduler.CurrentTick();
 
     scheduler.RequestOutsideRenderPassOperationContext();
     scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
@@ -52,7 +52,7 @@ void InnerFence::Wait() {
     }
     ASSERT(event);
 
-    if (ticks >= scheduler.Ticks()) {
+    if (ticks >= scheduler.CurrentTick()) {
         scheduler.Flush();
     }
     while (!IsEventSignalled()) {
@@ -71,12 +71,12 @@ bool InnerFence::IsEventSignalled() const {
     }
 }
 
-VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                               const VKDevice& device, VKScheduler& scheduler,
-                               VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
-                               VKQueryCache& query_cache)
-    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
-      device{device}, scheduler{scheduler} {}
+VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+                               Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
+                               VKBufferCache& buffer_cache, VKQueryCache& query_cache,
+                               const VKDevice& device_, VKScheduler& scheduler_)
+    : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache),
+      device{device_}, scheduler{scheduler_} {}
 
 Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
     return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 043fe7947..1547d6d30 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -55,10 +55,10 @@ using GenericFenceManager =
 
 class VKFenceManager final : public GenericFenceManager {
 public:
-    explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                            const VKDevice& device, VKScheduler& scheduler,
-                            VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
-                            VKQueryCache& query_cache);
+    explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+                            Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
+                            VKBufferCache& buffer_cache, VKQueryCache& query_cache,
+                            const VKDevice& device, VKScheduler& scheduler);
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index aaf930b90..a4b9e7ef5 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
                                        const GraphicsPipelineCacheKey& key,
                                        vk::Span<VkDescriptorSetLayoutBinding> bindings,
                                        const SPIRVProgram& program)
-    : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+    : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
       descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
       descriptor_allocator{descriptor_pool, *descriptor_set_layout},
       update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
       descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
                                                                         program)},
-      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
-                                                                             key.renderpass_params,
-                                                                             program)} {}
+      renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
+      pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
 
 VKGraphicsPipeline::~VKGraphicsPipeline() = default;
 
@@ -94,7 +93,7 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
     if (!descriptor_template) {
         return {};
     }
-    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    const VkDescriptorSet set = descriptor_allocator.Commit();
     update_descriptor_queue.Send(*descriptor_template, set);
     return set;
 }
@@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
 
 vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
                                                 const SPIRVProgram& program) const {
-    const auto& state = fixed_state;
+    const auto& state = cache_key.fixed_state;
     const auto& viewport_swizzles = state.viewport_swizzles;
 
     FixedPipelineState::DynamicState dynamic;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index a1d699a6c..58aa35efd 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -19,7 +19,27 @@ namespace Vulkan {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
-struct GraphicsPipelineCacheKey;
+struct GraphicsPipelineCacheKey {
+    RenderPassParams renderpass_params;
+    u32 padding;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    FixedPipelineState fixed_state;
+
+    std::size_t Hash() const noexcept;
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    std::size_t Size() const noexcept {
+        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
+    }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 
 class VKDescriptorPool;
 class VKDevice;
@@ -54,6 +74,10 @@ public:
         return renderpass;
     }
 
+    GraphicsPipelineCacheKey GetCacheKey() const {
+        return cache_key;
+    }
+
 private:
     vk::DescriptorSetLayout CreateDescriptorSetLayout(
         vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@@ -70,7 +94,7 @@ private:
 
     const VKDevice& device;
     VKScheduler& scheduler;
-    const FixedPipelineState fixed_state;
+    const GraphicsPipelineCacheKey cache_key;
     const u64 hash;
 
     vk::DescriptorSetLayout descriptor_set_layout;
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp
index 9bceb3861..1c418ea17 100644
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ b/src/video_core/renderer_vulkan/vk_image.cpp
@@ -102,21 +102,29 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num
 
 void VKImage::CreatePresentView() {
     // Image type has to be 2D to be presented.
-    VkImageViewCreateInfo image_view_ci;
-    image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
-    image_view_ci.pNext = nullptr;
-    image_view_ci.flags = 0;
-    image_view_ci.image = *image;
-    image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D;
-    image_view_ci.format = format;
-    image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
-                                VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
-    image_view_ci.subresourceRange.aspectMask = aspect_mask;
-    image_view_ci.subresourceRange.baseMipLevel = 0;
-    image_view_ci.subresourceRange.levelCount = 1;
-    image_view_ci.subresourceRange.baseArrayLayer = 0;
-    image_view_ci.subresourceRange.layerCount = 1;
-    present_view = device.GetLogical().CreateImageView(image_view_ci);
+    present_view = device.GetLogical().CreateImageView({
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .image = *image,
+        .viewType = VK_IMAGE_VIEW_TYPE_2D,
+        .format = format,
+        .components =
+            {
+                .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+            },
+        .subresourceRange =
+            {
+                .aspectMask = aspect_mask,
+                .baseMipLevel = 0,
+                .levelCount = 1,
+                .baseArrayLayer = 0,
+                .layerCount = 1,
+            },
+    });
 }
 
 VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
new file mode 100644
index 000000000..ae26e558d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -0,0 +1,56 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <atomic>
+#include <chrono>
+
+#include "core/settings.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+using namespace std::chrono_literals;
+
+MasterSemaphore::MasterSemaphore(const VKDevice& device) {
+    static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
+        .pNext = nullptr,
+        .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
+        .initialValue = 0,
+    };
+    static constexpr VkSemaphoreCreateInfo semaphore_ci{
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+        .pNext = &semaphore_type_ci,
+        .flags = 0,
+    };
+    semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
+
+    if (!Settings::values.renderer_debug) {
+        return;
+    }
+    // Validation layers have a bug where they fail to track resource usage when using timeline
+    // semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
+    // a separate thread waiting for each timeline semaphore value.
+    debug_thread = std::thread([this] {
+        u64 counter = 0;
+        while (!shutdown) {
+            if (semaphore.Wait(counter, 10'000'000)) {
+                ++counter;
+            }
+        }
+    });
+}
+
+MasterSemaphore::~MasterSemaphore() {
+    shutdown = true;
+
+    // This thread might not be started
+    if (debug_thread.joinable()) {
+        debug_thread.join();
+    }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
new file mode 100644
index 000000000..0e93706d7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <thread>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+class MasterSemaphore {
+public:
+    explicit MasterSemaphore(const VKDevice& device);
+    ~MasterSemaphore();
+
+    /// Returns the current logical tick.
+    [[nodiscard]] u64 CurrentTick() const noexcept {
+        return current_tick;
+    }
+
+    /// Returns the timeline semaphore handle.
+    [[nodiscard]] VkSemaphore Handle() const noexcept {
+        return *semaphore;
+    }
+
+    /// Returns true when a tick has been hit by the GPU.
+    [[nodiscard]] bool IsFree(u64 tick) {
+        return gpu_tick >= tick;
+    }
+
+    /// Advance to the logical tick.
+    void NextTick() noexcept {
+        ++current_tick;
+    }
+
+    /// Refresh the known GPU tick
+    void Refresh() {
+        gpu_tick = semaphore.GetCounter();
+    }
+
+    /// Waits for a tick to be hit on the GPU
+    void Wait(u64 tick) {
+        // No need to wait if the GPU is ahead of the tick
+        if (IsFree(tick)) {
+            return;
+        }
+        // Update the GPU tick and try again
+        Refresh();
+        if (IsFree(tick)) {
+            return;
+        }
+        // If none of the above is hit, fallback to a regular wait
+        semaphore.Wait(tick);
+    }
+
+private:
+    vk::Semaphore semaphore;           ///< Timeline semaphore.
+    std::atomic<u64> gpu_tick{0};      ///< Current known GPU tick.
+    std::atomic<u64> current_tick{1};  ///< Current logical tick.
+    std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed.
+    std::thread debug_thread;          ///< Debug thread to workaround validation layer bugs.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index b4c650a63..24c8960ac 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -178,13 +178,12 @@ bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 t
     }();
 
     // Try to allocate found type.
-    VkMemoryAllocateInfo memory_ai;
-    memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-    memory_ai.pNext = nullptr;
-    memory_ai.allocationSize = size;
-    memory_ai.memoryTypeIndex = type;
-
-    vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai);
+    vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .allocationSize = size,
+        .memoryTypeIndex = type,
+    });
     if (!memory) {
         LOG_CRITICAL(Render_Vulkan, "Device allocation failed!");
         return false;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 3da835324..5c038f4bc 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -28,6 +28,7 @@
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
 
 namespace Vulkan {
 
@@ -88,12 +89,13 @@ void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& bindi
             // Combined image samplers can be arrayed.
             count = container[i].size;
         }
-        VkDescriptorSetLayoutBinding& entry = bindings.emplace_back();
-        entry.binding = binding++;
-        entry.descriptorType = descriptor_type;
-        entry.descriptorCount = count;
-        entry.stageFlags = stage_flags;
-        entry.pImmutableSamplers = nullptr;
+        bindings.push_back({
+            .binding = binding++,
+            .descriptorType = descriptor_type,
+            .descriptorCount = count,
+            .stageFlags = stage_flags,
+            .pImmutableSamplers = nullptr,
+        });
     }
 }
 
@@ -133,64 +135,56 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
     return std::memcmp(&rhs, this, sizeof *this) == 0;
 }
 
-Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-               VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
-    : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
-      registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
-                                                           compiler_settings, registry},
-      entries{GenerateShaderEntries(shader_ir)} {}
+Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage,
+               GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_,
+               u32 main_offset)
+    : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine),
+      shader_ir(program_code, main_offset, compiler_settings, registry),
+      entries(GenerateShaderEntries(shader_ir)) {}
 
 Shader::~Shader() = default;
 
-Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
-                                                              Tegra::Engines::ShaderType stage) {
-    if (stage == ShaderType::Compute) {
-        return system.GPU().KeplerCompute();
-    } else {
-        return system.GPU().Maxwell3D();
-    }
-}
-
-VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
-                                 const VKDevice& device, VKScheduler& scheduler,
-                                 VKDescriptorPool& descriptor_pool,
-                                 VKUpdateDescriptorQueue& update_descriptor_queue,
-                                 VKRenderPassCache& renderpass_cache)
-    : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
-      scheduler{scheduler}, descriptor_pool{descriptor_pool},
-      update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
+VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_,
+                                 Tegra::Engines::Maxwell3D& maxwell3d_,
+                                 Tegra::Engines::KeplerCompute& kepler_compute_,
+                                 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
+                                 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue_,
+                                 VKRenderPassCache& renderpass_cache_)
+    : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_},
+      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
+      scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+      update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {}
 
 VKPipelineCache::~VKPipelineCache() = default;
 
 std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
-    const auto& gpu = system.GPU().Maxwell3D();
-
     std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
+
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto program{static_cast<Maxwell::ShaderProgram>(index)};
 
         // Skip stages that are not enabled
-        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
             continue;
         }
 
-        auto& memory_manager{system.GPU().MemoryManager()};
-        const GPUVAddr program_addr{GetShaderAddress(system, program)};
-        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+        const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
         ASSERT(cpu_addr);
 
         Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
         if (!result) {
-            const auto host_ptr{memory_manager.GetPointer(program_addr)};
+            const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
 
             // No shader found - create a new one
-            constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
+            static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
             const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
-            ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+            ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
             const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
-            auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
-                                                   stage_offset);
+            auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
+                                                   std::move(code), stage_offset);
             result = shader.get();
 
             if (cpu_addr) {
@@ -204,24 +198,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
     return last_shaders = shaders;
 }
 
-VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
+    const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
     if (last_graphics_pipeline && last_graphics_key == key) {
-        return *last_graphics_pipeline;
+        return last_graphics_pipeline;
     }
     last_graphics_key = key;
 
+    if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
+        std::unique_lock lock{pipeline_cache};
+        const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+        if (is_cache_miss) {
+            gpu.ShaderNotify().MarkSharderBuilding();
+            LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+            const auto [program, bindings] = DecompileShaders(key.fixed_state);
+            async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
+                                            update_descriptor_queue, renderpass_cache, bindings,
+                                            program, key);
+        }
+        last_graphics_pipeline = pair->second.get();
+        return last_graphics_pipeline;
+    }
+
     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
     auto& entry = pair->second;
     if (is_cache_miss) {
+        gpu.ShaderNotify().MarkSharderBuilding();
         LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-        const auto [program, bindings] = DecompileShaders(key);
+        const auto [program, bindings] = DecompileShaders(key.fixed_state);
         entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
                                                      update_descriptor_queue, renderpass_cache, key,
                                                      bindings, program);
+        gpu.ShaderNotify().MarkShaderComplete();
     }
-    return *(last_graphics_pipeline = entry.get());
+    last_graphics_pipeline = entry.get();
+    return last_graphics_pipeline;
 }
 
 VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
@@ -234,22 +247,21 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     }
     LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
 
-    auto& memory_manager = system.GPU().MemoryManager();
-    const auto program_addr = key.shader;
+    const GPUVAddr gpu_addr = key.shader;
 
-    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
     ASSERT(cpu_addr);
 
     Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
     if (!shader) {
         // No shader found - create a new one
-        const auto host_ptr = memory_manager.GetPointer(program_addr);
+        const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
 
-        ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+        ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
         const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
-        auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
-                                                    std::move(code), KERNEL_MAIN_OFFSET);
+        auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
+                                                    *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
         shader = shader_info.get();
 
         if (cpu_addr) {
@@ -259,10 +271,15 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
         }
     }
 
-    Specialization specialization;
-    specialization.workgroup_size = key.workgroup_size;
-    specialization.shared_memory_size = key.shared_memory_size;
-
+    const Specialization specialization{
+        .base_binding = 0,
+        .workgroup_size = key.workgroup_size,
+        .shared_memory_size = key.shared_memory_size,
+        .point_size = std::nullopt,
+        .enabled_attributes = {},
+        .attribute_types = {},
+        .ndc_minus_one_to_one = false,
+    };
     const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
                                              shader->GetRegistry(), specialization),
                                    shader->GetEntries()};
@@ -271,6 +288,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     return *entry;
 }
 
+void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
+    gpu.ShaderNotify().MarkShaderComplete();
+    std::unique_lock lock{pipeline_cache};
+    graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+}
+
 void VKPipelineCache::OnShaderRemoval(Shader* shader) {
     bool finished = false;
     const auto Finish = [&] {
@@ -306,11 +329,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
 }
 
 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
-    const auto& fixed_state = key.fixed_state;
-    auto& memory_manager = system.GPU().MemoryManager();
-    const auto& gpu = system.GPU().Maxwell3D();
-
+VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
     Specialization specialization;
     if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points ||
         device.IsExtExtendedDynamicStateSupported()) {
@@ -333,12 +352,12 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
         const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
 
         // Skip stages that are not enabled
-        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
             continue;
         }
 
-        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
-        const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+        const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
         Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
 
         const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
@@ -370,13 +389,14 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
     if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
         for (u32 i = 0; i < count; ++i) {
             const u32 num_samplers = container[i].size;
-            VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
-            entry.dstBinding = binding;
-            entry.dstArrayElement = 0;
-            entry.descriptorCount = num_samplers;
-            entry.descriptorType = descriptor_type;
-            entry.offset = offset;
-            entry.stride = entry_size;
+            template_entries.push_back({
+                .dstBinding = binding,
+                .dstArrayElement = 0,
+                .descriptorCount = num_samplers,
+                .descriptorType = descriptor_type,
+                .offset = offset,
+                .stride = entry_size,
+            });
 
             ++binding;
             offset += num_samplers * entry_size;
@@ -389,22 +409,24 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
         // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
         // Note: Fixed in driver Windows 443.24, Linux 440.66.15
         for (u32 i = 0; i < count; ++i) {
-            VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
-            entry.dstBinding = binding + i;
-            entry.dstArrayElement = 0;
-            entry.descriptorCount = 1;
-            entry.descriptorType = descriptor_type;
-            entry.offset = static_cast<std::size_t>(offset + i * entry_size);
-            entry.stride = entry_size;
+            template_entries.push_back({
+                .dstBinding = binding + i,
+                .dstArrayElement = 0,
+                .descriptorCount = 1,
+                .descriptorType = descriptor_type,
+                .offset = static_cast<std::size_t>(offset + i * entry_size),
+                .stride = entry_size,
+            });
         }
     } else if (count > 0) {
-        VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
-        entry.dstBinding = binding;
-        entry.dstArrayElement = 0;
-        entry.descriptorCount = count;
-        entry.descriptorType = descriptor_type;
-        entry.offset = offset;
-        entry.stride = entry_size;
+        template_entries.push_back({
+            .dstBinding = binding,
+            .dstArrayElement = 0,
+            .descriptorCount = count,
+            .descriptorType = descriptor_type,
+            .offset = offset,
+            .stride = entry_size,
+        });
     }
     offset += count * entry_size;
     binding += count;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0a3fe65fb..e558e6658 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -22,6 +22,7 @@
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
@@ -37,34 +38,11 @@ class RasterizerVulkan;
 class VKComputePipeline;
 class VKDescriptorPool;
 class VKDevice;
-class VKFence;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
-struct GraphicsPipelineCacheKey {
-    RenderPassParams renderpass_params;
-    u32 padding;
-    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
-    FixedPipelineState fixed_state;
-
-    std::size_t Hash() const noexcept;
-
-    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
-
-    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
-        return !operator==(rhs);
-    }
-
-    std::size_t Size() const noexcept {
-        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
-    }
-};
-static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
-
 struct ComputePipelineCacheKey {
     GPUVAddr shader;
     u32 shared_memory_size;
@@ -106,7 +84,8 @@ namespace Vulkan {
 
 class Shader {
 public:
-    explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+    explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine,
+                    Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr,
                     VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
     ~Shader();
 
@@ -118,22 +97,19 @@ public:
         return shader_ir;
     }
 
-    const VideoCommon::Shader::Registry& GetRegistry() const {
-        return registry;
-    }
-
     const VideoCommon::Shader::ShaderIR& GetIR() const {
         return shader_ir;
     }
 
+    const VideoCommon::Shader::Registry& GetRegistry() const {
+        return registry;
+    }
+
     const ShaderEntries& GetEntries() const {
         return entries;
     }
 
 private:
-    static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
-                                                                 Tegra::Engines::ShaderType stage);
-
     GPUVAddr gpu_addr{};
     VideoCommon::Shader::ProgramCode program_code;
     VideoCommon::Shader::Registry registry;
@@ -143,27 +119,36 @@ private:
 
 class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
 public:
-    explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
-                             const VKDevice& device, VKScheduler& scheduler,
-                             VKDescriptorPool& descriptor_pool,
+    explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
+                             Tegra::Engines::Maxwell3D& maxwell3d,
+                             Tegra::Engines::KeplerCompute& kepler_compute,
+                             Tegra::MemoryManager& gpu_memory, const VKDevice& device,
+                             VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
                              VKUpdateDescriptorQueue& update_descriptor_queue,
                              VKRenderPassCache& renderpass_cache);
     ~VKPipelineCache() override;
 
     std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
 
-    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+    VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+                                            VideoCommon::Shader::AsyncShaders& async_shaders);
 
     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 
+    void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
+
 protected:
     void OnShaderRemoval(Shader* shader) final;
 
 private:
     std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
-        const GraphicsPipelineCacheKey& key);
+        const FixedPipelineState& fixed_state);
+
+    Tegra::GPU& gpu;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    Tegra::MemoryManager& gpu_memory;
 
-    Core::System& system;
     const VKDevice& device;
     VKScheduler& scheduler;
     VKDescriptorPool& descriptor_pool;
@@ -178,6 +163,7 @@ private:
     GraphicsPipelineCacheKey last_graphics_key;
     VKGraphicsPipeline* last_graphics_pipeline = nullptr;
 
+    std::mutex pipeline_cache;
     std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
         graphics_cache;
     std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index bc91c48cc..ee2d871e3 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -9,35 +9,33 @@
 
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
 namespace Vulkan {
 
+using VideoCore::QueryType;
+
 namespace {
 
 constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
 
-constexpr VkQueryType GetTarget(VideoCore::QueryType type) {
+constexpr VkQueryType GetTarget(QueryType type) {
     return QUERY_TARGETS[static_cast<std::size_t>(type)];
 }
 
 } // Anonymous namespace
 
-QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
+QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_)
+    : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
 
 QueryPool::~QueryPool() = default;
 
-void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
-    device = &device_;
-    type = type_;
-}
-
-std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
+std::pair<VkQueryPool, u32> QueryPool::Commit() {
     std::size_t index;
     do {
-        index = CommitResource(fence);
+        index = CommitResource();
     } while (usage[index]);
     usage[index] = true;
 
@@ -47,14 +45,14 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
 void QueryPool::Allocate(std::size_t begin, std::size_t end) {
     usage.resize(end);
 
-    VkQueryPoolCreateInfo query_pool_ci;
-    query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
-    query_pool_ci.pNext = nullptr;
-    query_pool_ci.flags = 0;
-    query_pool_ci.queryType = GetTarget(type);
-    query_pool_ci.queryCount = static_cast<u32>(end - begin);
-    query_pool_ci.pipelineStatistics = 0;
-    pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci));
+    pools.push_back(device.GetLogical().CreateQueryPool({
+        .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .queryType = GetTarget(type),
+        .queryCount = static_cast<u32>(end - begin),
+        .pipelineStatistics = 0,
+    }));
 }
 
 void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
@@ -68,30 +66,39 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
     usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
 }
 
-VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
+                           Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
                            const VKDevice& device, VKScheduler& scheduler)
-    : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
-                                  QueryPool>{system, rasterizer},
-      device{device}, scheduler{scheduler} {
-    for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
-        query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
+    : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream,
+                                  HostCounter>{rasterizer, maxwell3d, gpu_memory},
+      device{device}, scheduler{scheduler}, query_pools{
+                                                QueryPool{device, scheduler,
+                                                          QueryType::SamplesPassed},
+                                            } {}
+
+VKQueryCache::~VKQueryCache() {
+    // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
+    // destructor is called. The query cache should be redesigned to have a proper ownership model
+    // instead of using shared pointers.
+    for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
+        auto& stream = Stream(static_cast<QueryType>(query_type));
+        stream.Update(false);
+        stream.Reset();
     }
 }
 
-VKQueryCache::~VKQueryCache() = default;
-
-std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
-    return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
+std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(QueryType type) {
+    return query_pools[static_cast<std::size_t>(type)].Commit();
 }
 
-void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) {
+void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
     query_pools[static_cast<std::size_t>(type)].Reserve(query);
 }
 
 HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
-                         VideoCore::QueryType type)
+                         QueryType type)
     : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
-      type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
+      type{type}, query{cache.AllocateQuery(type)}, tick{cache.Scheduler().CurrentTick()} {
     const vk::Device* logical = &cache.Device().GetLogical();
     cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
         logical->ResetQueryPoolEXT(query.first, query.second, 1);
@@ -109,7 +116,7 @@ void HostCounter::EndQuery() {
 }
 
 u64 HostCounter::BlockingQuery() const {
-    if (ticks >= cache.Scheduler().Ticks()) {
+    if (tick >= cache.Scheduler().CurrentTick()) {
         cache.Scheduler().Flush();
     }
     u64 data;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 40119e6d3..2e57fb75d 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -11,7 +11,7 @@
 
 #include "common/common_types.h"
 #include "video_core/query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
 namespace VideoCore {
@@ -28,14 +28,12 @@ class VKScheduler;
 
 using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
 
-class QueryPool final : public VKFencedPool {
+class QueryPool final : public ResourcePool {
 public:
-    explicit QueryPool();
+    explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type);
     ~QueryPool() override;
 
-    void Initialize(const VKDevice& device, VideoCore::QueryType type);
-
-    std::pair<VkQueryPool, u32> Commit(VKFence& fence);
+    std::pair<VkQueryPool, u32> Commit();
 
     void Reserve(std::pair<VkQueryPool, u32> query);
 
@@ -45,18 +43,18 @@ protected:
 private:
     static constexpr std::size_t GROW_STEP = 512;
 
-    const VKDevice* device = nullptr;
-    VideoCore::QueryType type = {};
+    const VKDevice& device;
+    const VideoCore::QueryType type;
 
     std::vector<vk::QueryPool> pools;
     std::vector<bool> usage;
 };
 
 class VKQueryCache final
-    : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
-                                         QueryPool> {
+    : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> {
 public:
-    explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+    explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
+                          Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
                           const VKDevice& device, VKScheduler& scheduler);
     ~VKQueryCache();
 
@@ -75,6 +73,7 @@ public:
 private:
     const VKDevice& device;
     VKScheduler& scheduler;
+    std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
 };
 
 class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
@@ -91,7 +90,7 @@ private:
     VKQueryCache& cache;
     const VideoCore::QueryType type;
     const std::pair<VkQueryPool, u32> query;
-    const u64 ticks;
+    const u64 tick;
 };
 
 class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7625871c2..f3c2483c8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -14,6 +14,7 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
+#include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "video_core/engines/kepler_compute.h"
@@ -30,7 +31,6 @@
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_sampler_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -64,20 +64,22 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si
     const auto& src = regs.viewport_transform[index];
     const float width = src.scale_x * 2.0f;
     const float height = src.scale_y * 2.0f;
+    const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
 
-    VkViewport viewport;
-    viewport.x = src.translate_x - src.scale_x;
-    viewport.y = src.translate_y - src.scale_y;
-    viewport.width = width != 0.0f ? width : 1.0f;
-    viewport.height = height != 0.0f ? height : 1.0f;
+    VkViewport viewport{
+        .x = src.translate_x - src.scale_x,
+        .y = src.translate_y - src.scale_y,
+        .width = width != 0.0f ? width : 1.0f,
+        .height = height != 0.0f ? height : 1.0f,
+        .minDepth = src.translate_z - src.scale_z * reduce_z,
+        .maxDepth = src.translate_z + src.scale_z,
+    };
 
-    const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
-    viewport.minDepth = src.translate_z - src.scale_z * reduce_z;
-    viewport.maxDepth = src.translate_z + src.scale_z;
     if (!device.IsExtDepthRangeUnrestrictedSupported()) {
         viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
         viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
     }
+
     return viewport;
 }
 
@@ -378,28 +380,32 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
     }
 }
 
-RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
-                                   VKScreenInfo& screen_info, const VKDevice& device,
-                                   VKResourceManager& resource_manager,
-                                   VKMemoryManager& memory_manager, StateTracker& state_tracker,
-                                   VKScheduler& scheduler)
-    : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
-      screen_info{screen_info}, device{device}, resource_manager{resource_manager},
-      memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
-      staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
-      update_descriptor_queue(device, scheduler), renderpass_cache(device),
+RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
+                                   Tegra::MemoryManager& gpu_memory_,
+                                   Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_,
+                                   const VKDevice& device_, VKMemoryManager& memory_manager_,
+                                   StateTracker& state_tracker_, VKScheduler& scheduler_)
+    : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_),
+      maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_),
+      device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_),
+      scheduler(scheduler_), staging_pool(device, memory_manager, scheduler),
+      descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler),
+      renderpass_cache(device),
       quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
       quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
-      texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
-                    staging_pool),
-      pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
-                     renderpass_cache),
-      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
-      sampler_cache(device),
-      fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
-      query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
+      texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool),
+      pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+                     descriptor_pool, update_descriptor_queue, renderpass_cache),
+      buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool),
+      sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler),
+      fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
+                    scheduler),
+      wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window) {
     scheduler.SetQueryCache(query_cache);
+    if (device.UseAsynchronousShaders()) {
+        async_shaders.AllocateWorkers();
+    }
 }
 
 RasterizerVulkan::~RasterizerVulkan() = default;
@@ -407,13 +413,13 @@ RasterizerVulkan::~RasterizerVulkan() = default;
 void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     MICROPROFILE_SCOPE(Vulkan_Drawing);
 
+    SCOPE_EXIT({ gpu.TickWork(); });
     FlushWork();
 
     query_cache.UpdateCounters();
 
-    const auto& gpu = system.GPU().Maxwell3D();
     GraphicsPipelineCacheKey key;
-    key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
+    key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
 
     buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
 
@@ -437,10 +443,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
-    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+    auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
+        // Async graphics pipeline was not ready.
+        return;
+    }
+
+    scheduler.BindGraphicsPipeline(pipeline->GetHandle());
 
-    const auto renderpass = pipeline.GetRenderPass();
+    const auto renderpass = pipeline->GetRenderPass();
     const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
     scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
 
@@ -450,8 +461,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     BeginTransformFeedback();
 
-    const auto pipeline_layout = pipeline.GetLayout();
-    const auto descriptor_set = pipeline.CommitDescriptorSet();
+    const auto pipeline_layout = pipeline->GetLayout();
+    const auto descriptor_set = pipeline->CommitDescriptorSet();
     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
         if (descriptor_set) {
             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
@@ -461,15 +472,12 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     });
 
     EndTransformFeedback();
-
-    system.GPU().TickWork();
 }
 
 void RasterizerVulkan::Clear() {
     MICROPROFILE_SCOPE(Vulkan_Clearing);
 
-    const auto& gpu = system.GPU().Maxwell3D();
-    if (!system.GPU().Maxwell3D().ShouldExecute()) {
+    if (!maxwell3d.ShouldExecute()) {
         return;
     }
 
@@ -478,7 +486,7 @@ void RasterizerVulkan::Clear() {
 
     query_cache.UpdateCounters();
 
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
     const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
                            regs.clear_buffers.A;
     const bool use_depth = regs.clear_buffers.Z;
@@ -508,10 +516,11 @@ void RasterizerVulkan::Clear() {
 
         const u32 color_attachment = regs.clear_buffers.RT;
         scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
-            VkClearAttachment attachment;
-            attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-            attachment.colorAttachment = color_attachment;
-            attachment.clearValue = clear_value;
+            const VkClearAttachment attachment{
+                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                .colorAttachment = color_attachment,
+                .clearValue = clear_value,
+            };
             cmdbuf.ClearAttachments(attachment, clear_rect);
         });
     }
@@ -529,10 +538,6 @@ void RasterizerVulkan::Clear() {
 
     scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
                       clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
-        VkClearValue clear_value;
-        clear_value.depthStencil.depth = clear_depth;
-        clear_value.depthStencil.stencil = clear_stencil;
-
         VkClearAttachment attachment;
         attachment.aspectMask = aspect_flags;
         attachment.colorAttachment = 0;
@@ -550,14 +555,17 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
 
     query_cache.UpdateCounters();
 
-    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
-    ComputePipelineCacheKey key;
-    key.shader = code_addr;
-    key.shared_memory_size = launch_desc.shared_alloc;
-    key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
-                          launch_desc.block_dim_z};
-
-    auto& pipeline = pipeline_cache.GetComputePipeline(key);
+    const auto& launch_desc = kepler_compute.launch_description;
+    auto& pipeline = pipeline_cache.GetComputePipeline({
+        .shader = code_addr,
+        .shared_memory_size = launch_desc.shared_alloc,
+        .workgroup_size =
+            {
+                launch_desc.block_dim_x,
+                launch_desc.block_dim_y,
+                launch_desc.block_dim_z,
+            },
+    });
 
     // Compute dispatches can't be executed inside a renderpass
     scheduler.RequestOutsideRenderPassOperationContext();
@@ -643,16 +651,14 @@ void RasterizerVulkan::SyncGuestHost() {
 }
 
 void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
-    auto& gpu{system.GPU()};
     if (!gpu.IsAsync()) {
-        gpu.MemoryManager().Write<u32>(addr, value);
+        gpu_memory.Write<u32>(addr, value);
         return;
     }
     fence_manager.SignalSemaphore(addr, value);
 }
 
 void RasterizerVulkan::SignalSyncPoint(u32 value) {
-    auto& gpu{system.GPU()};
     if (!gpu.IsAsync()) {
         gpu.IncrementSyncPoint(value);
         return;
@@ -661,7 +667,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
 }
 
 void RasterizerVulkan::ReleaseFences() {
-    auto& gpu{system.GPU()};
     if (!gpu.IsAsync()) {
         return;
     }
@@ -739,10 +744,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     return true;
 }
 
-void RasterizerVulkan::SetupDirtyFlags() {
-    state_tracker.Initialize();
-}
-
 void RasterizerVulkan::FlushWork() {
     static constexpr u32 DRAWS_TO_DISPATCH = 4096;
 
@@ -766,10 +767,9 @@ void RasterizerVulkan::FlushWork() {
 
 RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
     MICROPROFILE_SCOPE(Vulkan_RenderTargets);
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    auto& dirty = maxwell3d.dirty.flags;
-    auto& regs = maxwell3d.regs;
 
+    const auto& regs = maxwell3d.regs;
+    auto& dirty = maxwell3d.dirty.flags;
     const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
     dirty[VideoCommon::Dirty::RenderTargets] = false;
 
@@ -813,8 +813,13 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen
 
 std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
     VkRenderPass renderpass) {
-    FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
-                            std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
+    FramebufferCacheKey key{
+        .renderpass = renderpass,
+        .width = std::numeric_limits<u32>::max(),
+        .height = std::numeric_limits<u32>::max(),
+        .layers = std::numeric_limits<u32>::max(),
+        .views = {},
+    };
 
     const auto try_push = [&key](const View& view) {
         if (!view) {
@@ -827,7 +832,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
         return true;
     };
 
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
     for (std::size_t index = 0; index < num_attachments; ++index) {
         if (try_push(color_attachments[index])) {
@@ -841,17 +846,17 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
     const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
     auto& framebuffer = fbentry->second;
     if (is_cache_miss) {
-        VkFramebufferCreateInfo framebuffer_ci;
-        framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
-        framebuffer_ci.pNext = nullptr;
-        framebuffer_ci.flags = 0;
-        framebuffer_ci.renderPass = key.renderpass;
-        framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size());
-        framebuffer_ci.pAttachments = key.views.data();
-        framebuffer_ci.width = key.width;
-        framebuffer_ci.height = key.height;
-        framebuffer_ci.layers = key.layers;
-        framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci);
+        framebuffer = device.GetLogical().CreateFramebuffer({
+            .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .renderPass = key.renderpass,
+            .attachmentCount = static_cast<u32>(key.views.size()),
+            .pAttachments = key.views.data(),
+            .width = key.width,
+            .height = key.height,
+            .layers = key.layers,
+        });
     }
 
     return {*framebuffer, VkExtent2D{key.width, key.height}};
@@ -863,13 +868,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
                                                                  bool is_instanced) {
     MICROPROFILE_SCOPE(Vulkan_Geometry);
 
-    const auto& gpu = system.GPU().Maxwell3D();
-    const auto& regs = gpu.regs;
+    const auto& regs = maxwell3d.regs;
 
     SetupVertexArrays(buffer_bindings);
 
     const u32 base_instance = regs.vb_base_instance;
-    const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
+    const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
     const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
     const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
 
@@ -930,7 +934,7 @@ void RasterizerVulkan::SetupImageTransitions(
 }
 
 void RasterizerVulkan::UpdateDynamicStates() {
-    auto& regs = system.GPU().Maxwell3D().regs;
+    auto& regs = maxwell3d.regs;
     UpdateViewportsState(regs);
     UpdateScissorsState(regs);
     UpdateDepthBias(regs);
@@ -951,7 +955,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
 }
 
 void RasterizerVulkan::BeginTransformFeedback() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     if (regs.tfb_enabled == 0) {
         return;
     }
@@ -983,7 +987,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
 }
 
 void RasterizerVulkan::EndTransformFeedback() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     if (regs.tfb_enabled == 0) {
         return;
     }
@@ -996,7 +1000,7 @@ void RasterizerVulkan::EndTransformFeedback() {
 }
 
 void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
 
     for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
         const auto& vertex_array = regs.vertex_array[index];
@@ -1022,7 +1026,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
     if (params.num_vertices == 0) {
         return;
     }
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     switch (regs.draw.topology) {
     case Maxwell::PrimitiveTopology::Quads: {
         if (!params.is_indexed) {
@@ -1070,8 +1074,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
 
 void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
     MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
-    const auto& gpu = system.GPU().Maxwell3D();
-    const auto& shader_stage = gpu.state.shader_stages[stage];
+    const auto& shader_stage = maxwell3d.state.shader_stages[stage];
     for (const auto& entry : entries.const_buffers) {
         SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
     }
@@ -1079,8 +1082,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s
 
 void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
     MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
-    auto& gpu{system.GPU()};
-    const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
+    const auto& cbufs{maxwell3d.state.shader_stages[stage]};
 
     for (const auto& entry : entries.global_buffers) {
         const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
@@ -1090,19 +1092,17 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
 
 void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
     MICROPROFILE_SCOPE(Vulkan_Textures);
-    const auto& gpu = system.GPU().Maxwell3D();
     for (const auto& entry : entries.uniform_texels) {
-        const auto image = GetTextureInfo(gpu, entry, stage).tic;
+        const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
         SetupUniformTexels(image, entry);
     }
 }
 
 void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
     MICROPROFILE_SCOPE(Vulkan_Textures);
-    const auto& gpu = system.GPU().Maxwell3D();
     for (const auto& entry : entries.samplers) {
         for (std::size_t i = 0; i < entry.size; ++i) {
-            const auto texture = GetTextureInfo(gpu, entry, stage, i);
+            const auto texture = GetTextureInfo(maxwell3d, entry, stage, i);
             SetupTexture(texture, entry);
         }
     }
@@ -1110,25 +1110,23 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
 
 void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
     MICROPROFILE_SCOPE(Vulkan_Textures);
-    const auto& gpu = system.GPU().Maxwell3D();
     for (const auto& entry : entries.storage_texels) {
-        const auto image = GetTextureInfo(gpu, entry, stage).tic;
+        const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
         SetupStorageTexel(image, entry);
     }
 }
 
 void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
     MICROPROFILE_SCOPE(Vulkan_Images);
-    const auto& gpu = system.GPU().Maxwell3D();
     for (const auto& entry : entries.images) {
-        const auto tic = GetTextureInfo(gpu, entry, stage).tic;
+        const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic;
         SetupImage(tic, entry);
     }
 }
 
 void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
     MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
-    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+    const auto& launch_desc = kepler_compute.launch_description;
     for (const auto& entry : entries.const_buffers) {
         const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
         const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
@@ -1142,7 +1140,7 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
 
 void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
     MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
-    const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
+    const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
     for (const auto& entry : entries.global_buffers) {
         const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
         SetupGlobalBuffer(entry, addr);
@@ -1151,19 +1149,17 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
 
 void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
     MICROPROFILE_SCOPE(Vulkan_Textures);
-    const auto& gpu = system.GPU().KeplerCompute();
     for (const auto& entry : entries.uniform_texels) {
-        const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+        const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
         SetupUniformTexels(image, entry);
     }
 }
 
 void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
     MICROPROFILE_SCOPE(Vulkan_Textures);
-    const auto& gpu = system.GPU().KeplerCompute();
     for (const auto& entry : entries.samplers) {
         for (std::size_t i = 0; i < entry.size; ++i) {
-            const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
+            const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i);
             SetupTexture(texture, entry);
         }
     }
@@ -1171,18 +1167,16 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
 
 void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
     MICROPROFILE_SCOPE(Vulkan_Textures);
-    const auto& gpu = system.GPU().KeplerCompute();
     for (const auto& entry : entries.storage_texels) {
-        const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+        const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
         SetupStorageTexel(image, entry);
     }
 }
 
 void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
     MICROPROFILE_SCOPE(Vulkan_Images);
-    const auto& gpu = system.GPU().KeplerCompute();
     for (const auto& entry : entries.images) {
-        const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+        const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
         SetupImage(tic, entry);
     }
 }
@@ -1206,9 +1200,8 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
 }
 
 void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
-    auto& memory_manager{system.GPU().MemoryManager()};
-    const auto actual_addr = memory_manager.Read<u64>(address);
-    const auto size = memory_manager.Read<u32>(address + 8);
+    const u64 actual_addr = gpu_memory.Read<u64>(address);
+    const u32 size = gpu_memory.Read<u32>(address + 8);
 
     if (size == 0) {
         // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
@@ -1426,10 +1419,10 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
 }
 
 void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) {
-    if (!state_tracker.TouchPrimitiveTopology()) {
+    const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
+    if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) {
         return;
     }
-    const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
     scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) {
         cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology));
     });
@@ -1491,7 +1484,7 @@ std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
 }
 
 std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
 
     std::size_t size = 0;
     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -1506,9 +1499,8 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
 }
 
 std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
-    const auto& regs = system.GPU().Maxwell3D().regs;
-    return static_cast<std::size_t>(regs.index_array.count) *
-           static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+    return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
+           static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
 }
 
 std::size_t RasterizerVulkan::CalculateConstBufferSize(
@@ -1523,7 +1515,7 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
 }
 
 RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    const auto& regs = maxwell3d.regs;
     const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
 
     RenderPassParams params;
@@ -1553,17 +1545,17 @@ VkBuffer RasterizerVulkan::DefaultBuffer() {
         return *default_buffer;
     }
 
-    VkBufferCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.size = DEFAULT_BUFFER_SIZE;
-    ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
-               VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
-    ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-    ci.queueFamilyIndexCount = 0;
-    ci.pQueueFamilyIndices = nullptr;
-    default_buffer = device.GetLogical().CreateBuffer(ci);
+    default_buffer = device.GetLogical().CreateBuffer({
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = DEFAULT_BUFFER_SIZE,
+        .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
+                 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    });
     default_buffer_commit = memory_manager.Commit(default_buffer, false);
 
     scheduler.RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 923178b0b..b47c8fc13 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -25,13 +25,13 @@
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_sampler_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
 
 namespace Core {
 class System;
@@ -105,10 +105,11 @@ struct ImageView {
 
 class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
 public:
-    explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
+    explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+                              Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
                               VKScreenInfo& screen_info, const VKDevice& device,
-                              VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
-                              StateTracker& state_tracker, VKScheduler& scheduler);
+                              VKMemoryManager& memory_manager, StateTracker& state_tracker,
+                              VKScheduler& scheduler);
     ~RasterizerVulkan() override;
 
     void Draw(bool is_indexed, bool is_instanced) override;
@@ -134,7 +135,14 @@ public:
                                const Tegra::Engines::Fermi2D::Config& copy_config) override;
     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                            u32 pixel_stride) override;
-    void SetupDirtyFlags() override;
+
+    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+        return async_shaders;
+    }
+
+    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+        return async_shaders;
+    }
 
     /// Maximum supported size that a constbuffer can have in bytes.
     static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -270,11 +278,13 @@ private:
 
     VkBuffer DefaultBuffer();
 
-    Core::System& system;
-    Core::Frontend::EmuWindow& render_window;
+    Tegra::GPU& gpu;
+    Tegra::MemoryManager& gpu_memory;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+
     VKScreenInfo& screen_info;
     const VKDevice& device;
-    VKResourceManager& resource_manager;
     VKMemoryManager& memory_manager;
     StateTracker& state_tracker;
     VKScheduler& scheduler;
@@ -291,12 +301,13 @@ private:
     VKPipelineCache pipeline_cache;
     VKBufferCache buffer_cache;
     VKSamplerCache sampler_cache;
-    VKFenceManager fence_manager;
     VKQueryCache query_cache;
+    VKFenceManager fence_manager;
 
     vk::Buffer default_buffer;
     VKMemoryCommit default_buffer_commit;
     vk::Event wfi_event;
+    VideoCommon::Shader::AsyncShaders async_shaders;
 
     std::array<View, Maxwell::NumRenderTargets> color_attachments;
     View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
index 3f71d005e..80284cf92 100644
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -39,10 +39,14 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
 
 vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
     using namespace VideoCore::Surface;
+    const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
+
     std::vector<VkAttachmentDescription> descriptors;
+    descriptors.reserve(num_attachments);
+
     std::vector<VkAttachmentReference> color_references;
+    color_references.reserve(num_attachments);
 
-    const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
     for (std::size_t rt = 0; rt < num_attachments; ++rt) {
         const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
         const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
@@ -54,20 +58,22 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
         const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
                                                ? VK_IMAGE_LAYOUT_GENERAL
                                                : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-        VkAttachmentDescription& descriptor = descriptors.emplace_back();
-        descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT;
-        descriptor.format = format.format;
-        descriptor.samples = VK_SAMPLE_COUNT_1_BIT;
-        descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-        descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
-        descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
-        descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-        descriptor.initialLayout = color_layout;
-        descriptor.finalLayout = color_layout;
-
-        VkAttachmentReference& reference = color_references.emplace_back();
-        reference.attachment = static_cast<u32>(rt);
-        reference.layout = color_layout;
+        descriptors.push_back({
+            .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+            .format = format.format,
+            .samples = VK_SAMPLE_COUNT_1_BIT,
+            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+            .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+            .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+            .initialLayout = color_layout,
+            .finalLayout = color_layout,
+        });
+
+        color_references.push_back({
+            .attachment = static_cast<u32>(rt),
+            .layout = color_layout,
+        });
     }
 
     VkAttachmentReference zeta_attachment_ref;
@@ -82,32 +88,36 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
         const VkImageLayout zeta_layout = params.zeta_texception != 0
                                               ? VK_IMAGE_LAYOUT_GENERAL
                                               : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-        VkAttachmentDescription& descriptor = descriptors.emplace_back();
-        descriptor.flags = 0;
-        descriptor.format = format.format;
-        descriptor.samples = VK_SAMPLE_COUNT_1_BIT;
-        descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-        descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
-        descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-        descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
-        descriptor.initialLayout = zeta_layout;
-        descriptor.finalLayout = zeta_layout;
-
-        zeta_attachment_ref.attachment = static_cast<u32>(num_attachments);
-        zeta_attachment_ref.layout = zeta_layout;
+        descriptors.push_back({
+            .flags = 0,
+            .format = format.format,
+            .samples = VK_SAMPLE_COUNT_1_BIT,
+            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+            .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+            .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+            .initialLayout = zeta_layout,
+            .finalLayout = zeta_layout,
+        });
+
+        zeta_attachment_ref = {
+            .attachment = static_cast<u32>(num_attachments),
+            .layout = zeta_layout,
+        };
     }
 
-    VkSubpassDescription subpass_description;
-    subpass_description.flags = 0;
-    subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
-    subpass_description.inputAttachmentCount = 0;
-    subpass_description.pInputAttachments = nullptr;
-    subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size());
-    subpass_description.pColorAttachments = color_references.data();
-    subpass_description.pResolveAttachments = nullptr;
-    subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr;
-    subpass_description.preserveAttachmentCount = 0;
-    subpass_description.pPreserveAttachments = nullptr;
+    const VkSubpassDescription subpass_description{
+        .flags = 0,
+        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+        .inputAttachmentCount = 0,
+        .pInputAttachments = nullptr,
+        .colorAttachmentCount = static_cast<u32>(color_references.size()),
+        .pColorAttachments = color_references.data(),
+        .pResolveAttachments = nullptr,
+        .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
+        .preserveAttachmentCount = 0,
+        .pPreserveAttachments = nullptr,
+    };
 
     VkAccessFlags access = 0;
     VkPipelineStageFlags stage = 0;
@@ -122,26 +132,27 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
         stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
     }
 
-    VkSubpassDependency subpass_dependency;
-    subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
-    subpass_dependency.dstSubpass = 0;
-    subpass_dependency.srcStageMask = stage;
-    subpass_dependency.dstStageMask = stage;
-    subpass_dependency.srcAccessMask = 0;
-    subpass_dependency.dstAccessMask = access;
-    subpass_dependency.dependencyFlags = 0;
-
-    VkRenderPassCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.attachmentCount = static_cast<u32>(descriptors.size());
-    ci.pAttachments = descriptors.data();
-    ci.subpassCount = 1;
-    ci.pSubpasses = &subpass_description;
-    ci.dependencyCount = 1;
-    ci.pDependencies = &subpass_dependency;
-    return device.GetLogical().CreateRenderPass(ci);
+    const VkSubpassDependency subpass_dependency{
+        .srcSubpass = VK_SUBPASS_EXTERNAL,
+        .dstSubpass = 0,
+        .srcStageMask = stage,
+        .dstStageMask = stage,
+        .srcAccessMask = 0,
+        .dstAccessMask = access,
+        .dependencyFlags = 0,
+    };
+
+    return device.GetLogical().CreateRenderPass({
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .attachmentCount = static_cast<u32>(descriptors.size()),
+        .pAttachments = descriptors.data(),
+        .subpassCount = 1,
+        .pSubpasses = &subpass_description,
+        .dependencyCount = 1,
+        .pDependencies = &subpass_dependency,
+    });
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
deleted file mode 100644
index dc06f545a..000000000
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ /dev/null
@@ -1,312 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <optional>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-namespace {
-
-// TODO(Rodrigo): Fine tune these numbers.
-constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
-constexpr std::size_t FENCES_GROW_STEP = 0x40;
-
-VkFenceCreateInfo BuildFenceCreateInfo() {
-    VkFenceCreateInfo fence_ci;
-    fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
-    fence_ci.pNext = nullptr;
-    fence_ci.flags = 0;
-    return fence_ci;
-}
-
-} // Anonymous namespace
-
-class CommandBufferPool final : public VKFencedPool {
-public:
-    CommandBufferPool(const VKDevice& device)
-        : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
-
-    void Allocate(std::size_t begin, std::size_t end) override {
-        // Command buffers are going to be commited, recorded, executed every single usage cycle.
-        // They are also going to be reseted when commited.
-        VkCommandPoolCreateInfo command_pool_ci;
-        command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
-        command_pool_ci.pNext = nullptr;
-        command_pool_ci.flags =
-            VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
-        command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily();
-
-        Pool& pool = pools.emplace_back();
-        pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci);
-        pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
-    }
-
-    VkCommandBuffer Commit(VKFence& fence) {
-        const std::size_t index = CommitResource(fence);
-        const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
-        const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
-        return pools[pool_index].cmdbufs[sub_index];
-    }
-
-private:
-    struct Pool {
-        vk::CommandPool handle;
-        vk::CommandBuffers cmdbufs;
-    };
-
-    const VKDevice& device;
-    std::vector<Pool> pools;
-};
-
-VKResource::VKResource() = default;
-
-VKResource::~VKResource() = default;
-
-VKFence::VKFence(const VKDevice& device)
-    : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {}
-
-VKFence::~VKFence() = default;
-
-void VKFence::Wait() {
-    switch (const VkResult result = handle.Wait()) {
-    case VK_SUCCESS:
-        return;
-    case VK_ERROR_DEVICE_LOST:
-        device.ReportLoss();
-        [[fallthrough]];
-    default:
-        throw vk::Exception(result);
-    }
-}
-
-void VKFence::Release() {
-    ASSERT(is_owned);
-    is_owned = false;
-}
-
-void VKFence::Commit() {
-    is_owned = true;
-    is_used = true;
-}
-
-bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
-    if (!is_used) {
-        // If a fence is not used it's always free.
-        return true;
-    }
-    if (is_owned && !owner_wait) {
-        // The fence is still being owned (Release has not been called) and ownership wait has
-        // not been asked.
-        return false;
-    }
-
-    if (gpu_wait) {
-        // Wait for the fence if it has been requested.
-        (void)handle.Wait();
-    } else {
-        if (handle.GetStatus() != VK_SUCCESS) {
-            // Vulkan fence is not ready, not much it can do here
-            return false;
-        }
-    }
-
-    // Broadcast resources their free state.
-    for (auto* resource : protected_resources) {
-        resource->OnFenceRemoval(this);
-    }
-    protected_resources.clear();
-
-    // Prepare fence for reusage.
-    handle.Reset();
-    is_used = false;
-    return true;
-}
-
-void VKFence::Protect(VKResource* resource) {
-    protected_resources.push_back(resource);
-}
-
-void VKFence::Unprotect(VKResource* resource) {
-    const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
-    ASSERT(it != protected_resources.end());
-
-    resource->OnFenceRemoval(this);
-    protected_resources.erase(it);
-}
-
-void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept {
-    std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource,
-                 new_resource);
-}
-
-VKFenceWatch::VKFenceWatch() = default;
-
-VKFenceWatch::VKFenceWatch(VKFence& initial_fence) {
-    Watch(initial_fence);
-}
-
-VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept {
-    fence = std::exchange(rhs.fence, nullptr);
-    if (fence) {
-        fence->RedirectProtection(&rhs, this);
-    }
-}
-
-VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept {
-    fence = std::exchange(rhs.fence, nullptr);
-    if (fence) {
-        fence->RedirectProtection(&rhs, this);
-    }
-    return *this;
-}
-
-VKFenceWatch::~VKFenceWatch() {
-    if (fence) {
-        fence->Unprotect(this);
-    }
-}
-
-void VKFenceWatch::Wait() {
-    if (fence == nullptr) {
-        return;
-    }
-    fence->Wait();
-    fence->Unprotect(this);
-}
-
-void VKFenceWatch::Watch(VKFence& new_fence) {
-    Wait();
-    fence = &new_fence;
-    fence->Protect(this);
-}
-
-bool VKFenceWatch::TryWatch(VKFence& new_fence) {
-    if (fence) {
-        return false;
-    }
-    fence = &new_fence;
-    fence->Protect(this);
-    return true;
-}
-
-void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
-    ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
-    fence = nullptr;
-}
-
-VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
-
-VKFencedPool::~VKFencedPool() = default;
-
-std::size_t VKFencedPool::CommitResource(VKFence& fence) {
-    const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
-        for (std::size_t iterator = begin; iterator < end; ++iterator) {
-            if (watches[iterator]->TryWatch(fence)) {
-                // The resource is now being watched, a free resource was successfully found.
-                return iterator;
-            }
-        }
-        return {};
-    };
-    // Try to find a free resource from the hinted position to the end.
-    auto found = Search(free_iterator, watches.size());
-    if (!found) {
-        // Search from beginning to the hinted position.
-        found = Search(0, free_iterator);
-        if (!found) {
-            // Both searches failed, the pool is full; handle it.
-            const std::size_t free_resource = ManageOverflow();
-
-            // Watch will wait for the resource to be free.
-            watches[free_resource]->Watch(fence);
-            found = free_resource;
-        }
-    }
-    // Free iterator is hinted to the resource after the one that's been commited.
-    free_iterator = (*found + 1) % watches.size();
-    return *found;
-}
-
-std::size_t VKFencedPool::ManageOverflow() {
-    const std::size_t old_capacity = watches.size();
-    Grow();
-
-    // The last entry is guaranted to be free, since it's the first element of the freshly
-    // allocated resources.
-    return old_capacity;
-}
-
-void VKFencedPool::Grow() {
-    const std::size_t old_capacity = watches.size();
-    watches.resize(old_capacity + grow_step);
-    std::generate(watches.begin() + old_capacity, watches.end(),
-                  []() { return std::make_unique<VKFenceWatch>(); });
-    Allocate(old_capacity, old_capacity + grow_step);
-}
-
-VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
-    GrowFences(FENCES_GROW_STEP);
-    command_buffer_pool = std::make_unique<CommandBufferPool>(device);
-}
-
-VKResourceManager::~VKResourceManager() = default;
-
-VKFence& VKResourceManager::CommitFence() {
-    const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
-        const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
-        const auto hinted = fences.begin() + fences_iterator;
-
-        auto it = std::find_if(hinted, fences.end(), Tick);
-        if (it == fences.end()) {
-            it = std::find_if(fences.begin(), hinted, Tick);
-            if (it == hinted) {
-                return nullptr;
-            }
-        }
-        fences_iterator = std::distance(fences.begin(), it) + 1;
-        if (fences_iterator >= fences.size())
-            fences_iterator = 0;
-
-        auto& fence = *it;
-        fence->Commit();
-        return fence.get();
-    };
-
-    VKFence* found_fence = StepFences(false, false);
-    if (!found_fence) {
-        // Try again, this time waiting.
-        found_fence = StepFences(true, false);
-
-        if (!found_fence) {
-            // Allocate new fences and try again.
-            LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
-                     fences.size() + FENCES_GROW_STEP);
-
-            GrowFences(FENCES_GROW_STEP);
-            found_fence = StepFences(true, false);
-            ASSERT(found_fence != nullptr);
-        }
-    }
-    return *found_fence;
-}
-
-VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
-    return command_buffer_pool->Commit(fence);
-}
-
-void VKResourceManager::GrowFences(std::size_t new_fences_count) {
-    const std::size_t previous_size = fences.size();
-    fences.resize(previous_size + new_fences_count);
-
-    std::generate(fences.begin() + previous_size, fences.end(),
-                  [this] { return std::make_unique<VKFence>(device); });
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
deleted file mode 100644
index f683d2276..000000000
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <memory>
-#include <vector>
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-class VKDevice;
-class VKFence;
-class VKResourceManager;
-
-class CommandBufferPool;
-
-/// Interface for a Vulkan resource
-class VKResource {
-public:
-    explicit VKResource();
-    virtual ~VKResource();
-
-    /**
-     * Signals the object that an owning fence has been signaled.
-     * @param signaling_fence Fence that signals its usage end.
-     */
-    virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
-};
-
-/**
- * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
- * They must be commited from the resource manager. Their usage flow is: commit the fence from the
- * resource manager, protect resources with it and use them, send the fence to an execution queue
- * and Wait for it if needed and then call Release. Used resources will automatically be signaled
- * when they are free to be reused.
- * @brief Protects resources for concurrent usage and signals its release.
- */
-class VKFence {
-    friend class VKResourceManager;
-
-public:
-    explicit VKFence(const VKDevice& device);
-    ~VKFence();
-
-    /**
-     * Waits for the fence to be signaled.
-     * @warning You must have ownership of the fence and it has to be previously sent to a queue to
-     * call this function.
-     */
-    void Wait();
-
-    /**
-     * Releases ownership of the fence. Pass after it has been sent to an execution queue.
-     * Unmanaged usage of the fence after the call will result in undefined behavior because it may
-     * be being used for something else.
-     */
-    void Release();
-
-    /// Protects a resource with this fence.
-    void Protect(VKResource* resource);
-
-    /// Removes protection for a resource.
-    void Unprotect(VKResource* resource);
-
-    /// Redirects one protected resource to a new address.
-    void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept;
-
-    /// Retreives the fence.
-    operator VkFence() const {
-        return *handle;
-    }
-
-private:
-    /// Take ownership of the fence.
-    void Commit();
-
-    /**
-     * Updates the fence status.
-     * @warning Waiting for the owner might soft lock the execution.
-     * @param gpu_wait Wait for the fence to be signaled by the driver.
-     * @param owner_wait Wait for the owner to signal its freedom.
-     * @returns True if the fence is free. Waiting for gpu and owner will always return true.
-     */
-    bool Tick(bool gpu_wait, bool owner_wait);
-
-    const VKDevice& device;                       ///< Device handler
-    vk::Fence handle;                             ///< Vulkan fence
-    std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
-    bool is_owned = false; ///< The fence has been commited but not released yet.
-    bool is_used = false;  ///< The fence has been commited but it has not been checked to be free.
-};
-
-/**
- * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
- * resources without having to inherit VKResource from their handlers.
- */
-class VKFenceWatch final : public VKResource {
-public:
-    explicit VKFenceWatch();
-    VKFenceWatch(VKFence& initial_fence);
-    VKFenceWatch(VKFenceWatch&&) noexcept;
-    VKFenceWatch(const VKFenceWatch&) = delete;
-    ~VKFenceWatch() override;
-
-    VKFenceWatch& operator=(VKFenceWatch&&) noexcept;
-
-    /// Waits for the fence to be released.
-    void Wait();
-
-    /**
-     * Waits for a previous fence and watches a new one.
-     * @param new_fence New fence to wait to.
-     */
-    void Watch(VKFence& new_fence);
-
-    /**
-     * Checks if it's currently being watched and starts watching it if it's available.
-     * @returns True if a watch has started, false if it's being watched.
-     */
-    bool TryWatch(VKFence& new_fence);
-
-    void OnFenceRemoval(VKFence* signaling_fence) override;
-
-    /**
-     * Do not use it paired with Watch. Use TryWatch instead.
-     * Returns true when the watch is free.
-     */
-    bool IsUsed() const {
-        return fence != nullptr;
-    }
-
-private:
-    VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
-};
-
-/**
- * Handles a pool of resources protected by fences. Manages resource overflow allocating more
- * resources.
- */
-class VKFencedPool {
-public:
-    explicit VKFencedPool(std::size_t grow_step);
-    virtual ~VKFencedPool();
-
-protected:
-    /**
-     * Commits a free resource and protects it with a fence. It may allocate new resources.
-     * @param fence Fence that protects the commited resource.
-     * @returns Index of the resource commited.
-     */
-    std::size_t CommitResource(VKFence& fence);
-
-    /// Called when a chunk of resources have to be allocated.
-    virtual void Allocate(std::size_t begin, std::size_t end) = 0;
-
-private:
-    /// Manages pool overflow allocating new resources.
-    std::size_t ManageOverflow();
-
-    /// Allocates a new page of resources.
-    void Grow();
-
-    std::size_t grow_step = 0;     ///< Number of new resources created after an overflow
-    std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
-    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
-};
-
-/**
- * The resource manager handles all resources that can be protected with a fence avoiding
- * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
- */
-class VKResourceManager final {
-public:
-    explicit VKResourceManager(const VKDevice& device);
-    ~VKResourceManager();
-
-    /// Commits a fence. It has to be sent to a queue and released.
-    VKFence& CommitFence();
-
-    /// Commits an unused command buffer and protects it with a fence.
-    VkCommandBuffer CommitCommandBuffer(VKFence& fence);
-
-private:
-    /// Allocates new fences.
-    void GrowFences(std::size_t new_fences_count);
-
-    const VKDevice& device;          ///< Device handler.
-    std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
-    std::vector<std::unique_ptr<VKFence>> fences;           ///< Pool of fences.
-    std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
new file mode 100644
index 000000000..ee274ac59
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -0,0 +1,63 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+
+namespace Vulkan {
+
+ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
+    : master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
+
+ResourcePool::~ResourcePool() = default;
+
+size_t ResourcePool::CommitResource() {
+    // Refresh semaphore to query updated results
+    master_semaphore.Refresh();
+
+    const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> {
+        for (size_t iterator = begin; iterator < end; ++iterator) {
+            if (master_semaphore.IsFree(ticks[iterator])) {
+                ticks[iterator] = master_semaphore.CurrentTick();
+                return iterator;
+            }
+        }
+        return {};
+    };
+    // Try to find a free resource from the hinted position to the end.
+    auto found = search(free_iterator, ticks.size());
+    if (!found) {
+        // Search from beginning to the hinted position.
+        found = search(0, free_iterator);
+        if (!found) {
+            // Both searches failed, the pool is full; handle it.
+            const size_t free_resource = ManageOverflow();
+
+            ticks[free_resource] = master_semaphore.CurrentTick();
+            found = free_resource;
+        }
+    }
+    // Free iterator is hinted to the resource after the one that's been commited.
+    free_iterator = (*found + 1) % ticks.size();
+    return *found;
+}
+
+size_t ResourcePool::ManageOverflow() {
+    const size_t old_capacity = ticks.size();
+    Grow();
+
+    // The last entry is guaranted to be free, since it's the first element of the freshly
+    // allocated resources.
+    return old_capacity;
+}
+
+void ResourcePool::Grow() {
+    const size_t old_capacity = ticks.size();
+    ticks.resize(old_capacity + grow_step);
+    Allocate(old_capacity, old_capacity + grow_step);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
new file mode 100644
index 000000000..a018c7ec2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -0,0 +1,43 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Vulkan {
+
+class MasterSemaphore;
+
+/**
+ * Handles a pool of resources protected by fences. Manages resource overflow allocating more
+ * resources.
+ */
+class ResourcePool {
+public:
+    explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
+    virtual ~ResourcePool();
+
+protected:
+    size_t CommitResource();
+
+    /// Called when a chunk of resources have to be allocated.
+    virtual void Allocate(size_t begin, size_t end) = 0;
+
+private:
+    /// Manages pool overflow allocating new resources.
+    size_t ManageOverflow();
+
+    /// Allocates a new page of resources.
+    void Grow();
+
+    MasterSemaphore& master_semaphore;
+    size_t grow_step = 0;     ///< Number of new resources created after an overflow
+    size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
+    std::vector<u64> ticks;   ///< Ticks for each resource
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 616eacc36..b068888f9 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -44,32 +44,36 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
     const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
     const std::array color = tsc.GetBorderColor();
 
-    VkSamplerCustomBorderColorCreateInfoEXT border;
-    border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT;
-    border.pNext = nullptr;
-    border.format = VK_FORMAT_UNDEFINED;
+    VkSamplerCustomBorderColorCreateInfoEXT border{
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .customBorderColor = {},
+        .format = VK_FORMAT_UNDEFINED,
+    };
     std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
 
-    VkSamplerCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
-    ci.pNext = arbitrary_borders ? &border : nullptr;
-    ci.flags = 0;
-    ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter);
-    ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter);
-    ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter);
-    ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter);
-    ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter);
-    ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter);
-    ci.mipLodBias = tsc.GetLodBias();
-    ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE;
-    ci.maxAnisotropy = tsc.GetMaxAnisotropy();
-    ci.compareEnable = tsc.depth_compare_enabled;
-    ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
-    ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
-    ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
-    ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
-    ci.unnormalizedCoordinates = VK_FALSE;
-    return device.GetLogical().CreateSampler(ci);
+    return device.GetLogical().CreateSampler({
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+        .pNext = arbitrary_borders ? &border : nullptr,
+        .flags = 0,
+        .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+        .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
+        .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+        .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+        .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+        .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
+        .mipLodBias = tsc.GetLodBias(),
+        .anisotropyEnable =
+            static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
+        .maxAnisotropy = tsc.GetMaxAnisotropy(),
+        .compareEnable = tsc.depth_compare_enabled,
+        .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
+        .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
+        .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
+        .borderColor =
+            arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
+        .unnormalizedCoordinates = VK_FALSE,
+    });
 }
 
 VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 56524e6f3..1a483dc71 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -10,9 +10,10 @@
 
 #include "common/microprofile.h"
 #include "common/thread.h"
+#include "video_core/renderer_vulkan/vk_command_pool.h"
 #include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
 #include "video_core/renderer_vulkan/wrapper.h"
@@ -35,10 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
     last = nullptr;
 }
 
-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
-                         StateTracker& state_tracker)
-    : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker},
-      next_fence{&resource_manager.CommitFence()} {
+VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_)
+    : device{device_}, state_tracker{state_tracker_},
+      master_semaphore{std::make_unique<MasterSemaphore>(device)},
+      command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
     AcquireNewChunk();
     AllocateNewContext();
     worker_thread = std::thread(&VKScheduler::WorkerThread, this);
@@ -50,20 +51,27 @@ VKScheduler::~VKScheduler() {
     worker_thread.join();
 }
 
-void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) {
+u64 VKScheduler::CurrentTick() const noexcept {
+    return master_semaphore->CurrentTick();
+}
+
+bool VKScheduler::IsFree(u64 tick) const noexcept {
+    return master_semaphore->IsFree(tick);
+}
+
+void VKScheduler::Wait(u64 tick) {
+    master_semaphore->Wait(tick);
+}
+
+void VKScheduler::Flush(VkSemaphore semaphore) {
     SubmitExecution(semaphore);
-    if (release_fence) {
-        current_fence->Release();
-    }
     AllocateNewContext();
 }
 
-void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore semaphore) {
+    const u64 presubmit_tick = CurrentTick();
     SubmitExecution(semaphore);
-    current_fence->Wait();
-    if (release_fence) {
-        current_fence->Release();
-    }
+    Wait(presubmit_tick);
     AllocateNewContext();
 }
 
@@ -100,16 +108,19 @@ void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer frame
     state.framebuffer = framebuffer;
     state.render_area = render_area;
 
-    VkRenderPassBeginInfo renderpass_bi;
-    renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
-    renderpass_bi.pNext = nullptr;
-    renderpass_bi.renderPass = renderpass;
-    renderpass_bi.framebuffer = framebuffer;
-    renderpass_bi.renderArea.offset.x = 0;
-    renderpass_bi.renderArea.offset.y = 0;
-    renderpass_bi.renderArea.extent = render_area;
-    renderpass_bi.clearValueCount = 0;
-    renderpass_bi.pClearValues = nullptr;
+    const VkRenderPassBeginInfo renderpass_bi{
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+        .pNext = nullptr,
+        .renderPass = renderpass,
+        .framebuffer = framebuffer,
+        .renderArea =
+            {
+                .offset = {.x = 0, .y = 0},
+                .extent = render_area,
+            },
+        .clearValueCount = 0,
+        .pClearValues = nullptr,
+    };
 
     Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
         if (end_renderpass) {
@@ -157,17 +168,38 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
 
     current_cmdbuf.End();
 
-    VkSubmitInfo submit_info;
-    submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    submit_info.pNext = nullptr;
-    submit_info.waitSemaphoreCount = 0;
-    submit_info.pWaitSemaphores = nullptr;
-    submit_info.pWaitDstStageMask = nullptr;
-    submit_info.commandBufferCount = 1;
-    submit_info.pCommandBuffers = current_cmdbuf.address();
-    submit_info.signalSemaphoreCount = semaphore ? 1 : 0;
-    submit_info.pSignalSemaphores = &semaphore;
-    switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
+    const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+    const u32 num_signal_semaphores = semaphore ? 2U : 1U;
+
+    const u64 signal_value = master_semaphore->CurrentTick();
+    const u64 wait_value = signal_value - 1;
+    const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+
+    master_semaphore->NextTick();
+
+    const std::array signal_values{signal_value, u64(0)};
+    const std::array signal_semaphores{timeline_semaphore, semaphore};
+
+    const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
+        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+        .pNext = nullptr,
+        .waitSemaphoreValueCount = 1,
+        .pWaitSemaphoreValues = &wait_value,
+        .signalSemaphoreValueCount = num_signal_semaphores,
+        .pSignalSemaphoreValues = signal_values.data(),
+    };
+    const VkSubmitInfo submit_info{
+        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .pNext = &timeline_si,
+        .waitSemaphoreCount = 1,
+        .pWaitSemaphores = &timeline_semaphore,
+        .pWaitDstStageMask = &wait_stage_mask,
+        .commandBufferCount = 1,
+        .pCommandBuffers = current_cmdbuf.address(),
+        .signalSemaphoreCount = num_signal_semaphores,
+        .pSignalSemaphores = signal_semaphores.data(),
+    };
+    switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
     case VK_SUCCESS:
         break;
     case VK_ERROR_DEVICE_LOST:
@@ -179,21 +211,15 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
 }
 
 void VKScheduler::AllocateNewContext() {
-    ++ticks;
-
-    VkCommandBufferBeginInfo cmdbuf_bi;
-    cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-    cmdbuf_bi.pNext = nullptr;
-    cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-    cmdbuf_bi.pInheritanceInfo = nullptr;
-
     std::unique_lock lock{mutex};
-    current_fence = next_fence;
-    next_fence = &resource_manager.CommitFence();
 
-    current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence),
-                                       device.GetDispatchLoader());
-    current_cmdbuf.Begin(cmdbuf_bi);
+    current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
+    current_cmdbuf.Begin({
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .pNext = nullptr,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+        .pInheritanceInfo = nullptr,
+    });
 
     // Enable counters once again. These are disabled when a command buffer is finished.
     if (query_cache) {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 970a65566..7be8a19f0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -16,42 +16,33 @@
 
 namespace Vulkan {
 
+class CommandPool;
+class MasterSemaphore;
 class StateTracker;
 class VKDevice;
-class VKFence;
 class VKQueryCache;
-class VKResourceManager;
-
-class VKFenceView {
-public:
-    VKFenceView() = default;
-    VKFenceView(VKFence* const& fence) : fence{fence} {}
-
-    VKFence* operator->() const noexcept {
-        return fence;
-    }
-
-    operator VKFence&() const noexcept {
-        return *fence;
-    }
-
-private:
-    VKFence* const& fence;
-};
 
 /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 /// OpenGL-like operations on Vulkan command buffers.
 class VKScheduler {
 public:
-    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
-                         StateTracker& state_tracker);
+    explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker);
     ~VKScheduler();
 
+    /// Returns the current command buffer tick.
+    [[nodiscard]] u64 CurrentTick() const noexcept;
+
+    /// Returns true when a tick has been triggered by the GPU.
+    [[nodiscard]] bool IsFree(u64 tick) const noexcept;
+
+    /// Waits for the given tick to trigger on the GPU.
+    void Wait(u64 tick);
+
     /// Sends the current execution context to the GPU.
-    void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr);
+    void Flush(VkSemaphore semaphore = nullptr);
 
     /// Sends the current execution context to the GPU and waits for it to complete.
-    void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr);
+    void Finish(VkSemaphore semaphore = nullptr);
 
     /// Waits for the worker thread to finish executing everything. After this function returns it's
     /// safe to touch worker resources.
@@ -86,14 +77,9 @@ public:
         (void)chunk->Record(command);
     }
 
-    /// Gets a reference to the current fence.
-    VKFenceView GetFence() const {
-        return current_fence;
-    }
-
-    /// Returns the current command buffer tick.
-    u64 Ticks() const {
-        return ticks;
+    /// Returns the master timeline semaphore.
+    [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
+        return *master_semaphore;
     }
 
 private:
@@ -171,6 +157,13 @@ private:
         std::array<u8, 0x8000> data{};
     };
 
+    struct State {
+        VkRenderPass renderpass = nullptr;
+        VkFramebuffer framebuffer = nullptr;
+        VkExtent2D render_area = {0, 0};
+        VkPipeline graphics_pipeline = nullptr;
+    };
+
     void WorkerThread();
 
     void SubmitExecution(VkSemaphore semaphore);
@@ -186,30 +179,23 @@ private:
     void AcquireNewChunk();
 
     const VKDevice& device;
-    VKResourceManager& resource_manager;
     StateTracker& state_tracker;
 
+    std::unique_ptr<MasterSemaphore> master_semaphore;
+    std::unique_ptr<CommandPool> command_pool;
+
     VKQueryCache* query_cache = nullptr;
 
     vk::CommandBuffer current_cmdbuf;
-    VKFence* current_fence = nullptr;
-    VKFence* next_fence = nullptr;
-
-    struct State {
-        VkRenderPass renderpass = nullptr;
-        VkFramebuffer framebuffer = nullptr;
-        VkExtent2D render_area = {0, 0};
-        VkPipeline graphics_pipeline = nullptr;
-    } state;
 
     std::unique_ptr<CommandChunk> chunk;
     std::thread worker_thread;
 
+    State state;
     Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
     Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
     std::mutex mutex;
     std::condition_variable cv;
-    std::atomic<u64> ticks = 0;
     bool quit = false;
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 97429cc59..cd7d7a4e4 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -685,13 +685,19 @@ private:
         }
         t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
 
-        const u32 smem_size = specialization.shared_memory_size;
+        u32 smem_size = specialization.shared_memory_size * 4;
         if (smem_size == 0) {
             // Avoid declaring an empty array.
             return;
         }
-        const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4);
-        const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count));
+        const u32 limit = device.GetMaxComputeSharedMemorySize();
+        if (smem_size > limit) {
+            LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
+                      smem_size, limit);
+            smem_size = limit;
+        }
+
+        const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
         const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
         Name(type_pointer, "SharedMemory");
 
@@ -700,9 +706,9 @@ private:
     }
 
     void DeclareInternalFlags() {
-        constexpr std::array names = {"zero", "sign", "carry", "overflow"};
+        static constexpr std::array names{"zero", "sign", "carry", "overflow"};
+
         for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
-            const auto flag_code = static_cast<InternalFlag>(flag);
             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
             internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
         }
@@ -2798,7 +2804,6 @@ private:
     std::map<GlobalMemoryBase, Id> global_buffers;
     std::map<u32, TexelBuffer> uniform_texels;
     std::map<u32, SampledImage> sampled_images;
-    std::map<u32, TexelBuffer> storage_texels;
     std::map<u32, StorageImage> images;
 
     std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index 112df9c71..c1a218d76 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -19,13 +19,13 @@ vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, cons
     const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
     std::memcpy(data.get(), code_data, code_size);
 
-    VkShaderModuleCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.codeSize = code_size;
-    ci.pCode = data.get();
-    return device.GetLogical().CreateShaderModule(ci);
+    return device.GetLogical().CreateShaderModule({
+        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .codeSize = code_size,
+        .pCode = data.get(),
+    });
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 45c180221..2fd3b7f39 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -10,36 +10,18 @@
 #include "common/bit_util.h"
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
 namespace Vulkan {
 
-VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence,
-                                                  u64 last_epoch)
-    : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {}
+VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_)
+    : buffer{std::move(buffer_)} {}
 
-VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept {
-    buffer = std::move(rhs.buffer);
-    watch = std::move(rhs.watch);
-    last_epoch = rhs.last_epoch;
-}
-
-VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default;
-
-VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=(
-    StagingBuffer&& rhs) noexcept {
-    buffer = std::move(rhs.buffer);
-    watch = std::move(rhs.watch);
-    last_epoch = rhs.last_epoch;
-    return *this;
-}
-
-VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
-                                         VKScheduler& scheduler)
-    : device{device}, memory_manager{memory_manager}, scheduler{scheduler} {}
+VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_,
+                                         VKScheduler& scheduler_)
+    : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {}
 
 VKStagingBufferPool::~VKStagingBufferPool() = default;
 
@@ -51,7 +33,6 @@ VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visib
 }
 
 void VKStagingBufferPool::TickFrame() {
-    ++epoch;
     current_delete_level = (current_delete_level + 1) % NumLevels;
 
     ReleaseCache(true);
@@ -59,11 +40,12 @@ void VKStagingBufferPool::TickFrame() {
 }
 
 VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
-    for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
-        if (entry.watch.TryWatch(scheduler.GetFence())) {
-            entry.last_epoch = epoch;
-            return &*entry.buffer;
+    for (StagingBuffer& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
+        if (!scheduler.IsFree(entry.tick)) {
+            continue;
         }
+        entry.tick = scheduler.CurrentTick();
+        return &*entry.buffer;
     }
     return nullptr;
 }
@@ -71,24 +53,25 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_
 VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
     const u32 log2 = Common::Log2Ceil64(size);
 
-    VkBufferCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.size = 1ULL << log2;
-    ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
-               VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
-               VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
-    ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-    ci.queueFamilyIndexCount = 0;
-    ci.pQueueFamilyIndices = nullptr;
-
     auto buffer = std::make_unique<VKBuffer>();
-    buffer->handle = device.GetLogical().CreateBuffer(ci);
+    buffer->handle = device.GetLogical().CreateBuffer({
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = 1ULL << log2,
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+                 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    });
     buffer->commit = memory_manager.Commit(buffer->handle, host_visible);
 
-    auto& entries = GetCache(host_visible)[log2].entries;
-    return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
+    std::vector<StagingBuffer>& entries = GetCache(host_visible)[log2].entries;
+    StagingBuffer& entry = entries.emplace_back(std::move(buffer));
+    entry.tick = scheduler.CurrentTick();
+    return *entry.buffer;
 }
 
 VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
@@ -110,9 +93,8 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo
     auto& entries = staging.entries;
     const std::size_t old_size = entries.size();
 
-    const auto is_deleteable = [this](const auto& entry) {
-        static constexpr u64 epochs_to_destroy = 180;
-        return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
+    const auto is_deleteable = [this](const StagingBuffer& entry) {
+        return scheduler.IsFree(entry.tick);
     };
     const std::size_t begin_offset = staging.delete_index;
     const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 3c4901437..2dd5049ac 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -10,13 +10,11 @@
 #include "common/common_types.h"
 
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
 namespace Vulkan {
 
 class VKDevice;
-class VKFenceWatch;
 class VKScheduler;
 
 struct VKBuffer final {
@@ -36,16 +34,10 @@ public:
 
 private:
     struct StagingBuffer final {
-        explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch);
-        StagingBuffer(StagingBuffer&& rhs) noexcept;
-        StagingBuffer(const StagingBuffer&) = delete;
-        ~StagingBuffer();
-
-        StagingBuffer& operator=(StagingBuffer&& rhs) noexcept;
+        explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer);
 
         std::unique_ptr<VKBuffer> buffer;
-        VKFenceWatch watch;
-        u64 last_epoch = 0;
+        u64 tick = 0;
     };
 
     struct StagingBuffers final {
@@ -73,8 +65,6 @@ private:
     StagingBuffersCache host_staging_buffers;
     StagingBuffersCache device_staging_buffers;
 
-    u64 epoch = 0;
-
     std::size_t current_delete_level = 0;
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 9151d9fb1..5d2c4a796 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -42,7 +42,6 @@ Flags MakeInvalidationFlags() {
     flags[DepthWriteEnable] = true;
     flags[DepthCompareOp] = true;
     flags[FrontFace] = true;
-    flags[PrimitiveTopology] = true;
     flags[StencilOp] = true;
     flags[StencilTestEnable] = true;
     return flags;
@@ -112,10 +111,6 @@ void SetupDirtyFrontFace(Tables& tables) {
     table[OFF(screen_y_control)] = FrontFace;
 }
 
-void SetupDirtyPrimitiveTopology(Tables& tables) {
-    tables[0][OFF(draw.topology)] = PrimitiveTopology;
-}
-
 void SetupDirtyStencilOp(Tables& tables) {
     auto& table = tables[0];
     table[OFF(stencil_front_op_fail)] = StencilOp;
@@ -137,12 +132,9 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
 
 } // Anonymous namespace
 
-StateTracker::StateTracker(Core::System& system)
-    : system{system}, invalidation_flags{MakeInvalidationFlags()} {}
-
-void StateTracker::Initialize() {
-    auto& dirty = system.GPU().Maxwell3D().dirty;
-    auto& tables = dirty.tables;
+StateTracker::StateTracker(Tegra::GPU& gpu)
+    : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
+    auto& tables = gpu.Maxwell3D().dirty.tables;
     SetupDirtyRenderTargets(tables);
     SetupDirtyViewports(tables);
     SetupDirtyScissors(tables);
@@ -156,13 +148,8 @@ void StateTracker::Initialize() {
     SetupDirtyDepthWriteEnable(tables);
     SetupDirtyDepthCompareOp(tables);
     SetupDirtyFrontFace(tables);
-    SetupDirtyPrimitiveTopology(tables);
     SetupDirtyStencilOp(tables);
     SetupDirtyStencilTestEnable(tables);
 }
 
-void StateTracker::InvalidateCommandBufferState() {
-    system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
-}
-
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 54ca0d6c6..1de789e57 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -32,7 +32,6 @@ enum : u8 {
     DepthWriteEnable,
     DepthCompareOp,
     FrontFace,
-    PrimitiveTopology,
     StencilOp,
     StencilTestEnable,
 
@@ -43,12 +42,15 @@ static_assert(Last <= std::numeric_limits<u8>::max());
 } // namespace Dirty
 
 class StateTracker {
-public:
-    explicit StateTracker(Core::System& system);
+    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
-    void Initialize();
+public:
+    explicit StateTracker(Tegra::GPU& gpu);
 
-    void InvalidateCommandBufferState();
+    void InvalidateCommandBufferState() {
+        flags |= invalidation_flags;
+        current_topology = INVALID_TOPOLOGY;
+    }
 
     bool TouchViewports() {
         return Exchange(Dirty::Viewports, false);
@@ -102,10 +104,6 @@ public:
         return Exchange(Dirty::FrontFace, false);
     }
 
-    bool TouchPrimitiveTopology() {
-        return Exchange(Dirty::PrimitiveTopology, false);
-    }
-
     bool TouchStencilOp() {
         return Exchange(Dirty::StencilOp, false);
     }
@@ -114,16 +112,24 @@ public:
         return Exchange(Dirty::StencilTestEnable, false);
     }
 
+    bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
+        const bool has_changed = current_topology != new_topology;
+        current_topology = new_topology;
+        return has_changed;
+    }
+
 private:
+    static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
+
     bool Exchange(std::size_t id, bool new_value) const noexcept {
-        auto& flags = system.GPU().Maxwell3D().dirty.flags;
         const bool is_dirty = flags[id];
         flags[id] = new_value;
         return is_dirty;
     }
 
-    Core::System& system;
+    Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
     Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
+    Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 2d28a6c47..1b59612b9 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -11,7 +11,6 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 #include "video_core/renderer_vulkan/wrapper.h"
@@ -57,9 +56,9 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
 
 } // Anonymous namespace
 
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_,
                                VkBufferUsageFlags usage)
-    : device{device}, scheduler{scheduler} {
+    : device{device_}, scheduler{scheduler_} {
     CreateBuffers(usage);
     ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
     ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
@@ -111,7 +110,7 @@ void VKStreamBuffer::Unmap(u64 size) {
     }
     auto& watch = current_watches[current_watch_cursor++];
     watch.upper_bound = offset;
-    watch.fence.Watch(scheduler.GetFence());
+    watch.tick = scheduler.CurrentTick();
 }
 
 void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
@@ -121,31 +120,29 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
 
     // Substract from the preferred heap size some bytes to avoid getting out of memory.
     const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
-    const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024;
-
-    VkBufferCreateInfo buffer_ci;
-    buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-    buffer_ci.pNext = nullptr;
-    buffer_ci.flags = 0;
-    buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size);
-    buffer_ci.usage = usage;
-    buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-    buffer_ci.queueFamilyIndexCount = 0;
-    buffer_ci.pQueueFamilyIndices = nullptr;
-
-    buffer = device.GetLogical().CreateBuffer(buffer_ci);
+    // As per DXVK's example, using `heap_size / 2`
+    const VkDeviceSize allocable_size = heap_size / 2;
+    buffer = device.GetLogical().CreateBuffer({
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
+        .usage = usage,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    });
 
     const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
     const u32 required_flags = requirements.memoryTypeBits;
     stream_buffer_size = static_cast<u64>(requirements.size);
 
-    VkMemoryAllocateInfo memory_ai;
-    memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-    memory_ai.pNext = nullptr;
-    memory_ai.allocationSize = requirements.size;
-    memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags);
-
-    memory = device.GetLogical().AllocateMemory(memory_ai);
+    memory = device.GetLogical().AllocateMemory({
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .allocationSize = requirements.size,
+        .memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
+    });
     buffer.BindMemory(*memory, 0);
 }
 
@@ -160,7 +157,7 @@ void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
     while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
         auto& watch = previous_watches[wait_cursor];
         wait_bound = watch.upper_bound;
-        watch.fence.Wait();
+        scheduler.Wait(watch.tick);
         ++wait_cursor;
     }
 }
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 689f0d276..5e15ad78f 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -14,7 +14,6 @@
 namespace Vulkan {
 
 class VKDevice;
-class VKFence;
 class VKFenceWatch;
 class VKScheduler;
 
@@ -44,8 +43,8 @@ public:
     }
 
 private:
-    struct Watch final {
-        VKFenceWatch fence;
+    struct Watch {
+        u64 tick{};
         u64 upper_bound{};
     };
 
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index bffd8f32a..9636a7c65 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -12,7 +12,7 @@
 #include "core/core.h"
 #include "core/frontend/framebuffer_layout.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 
@@ -56,8 +56,8 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
 
 } // Anonymous namespace
 
-VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device)
-    : surface{surface}, device{device} {}
+VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_)
+    : surface{surface_}, device{device_}, scheduler{scheduler_} {}
 
 VKSwapchain::~VKSwapchain() = default;
 
@@ -75,35 +75,33 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
     CreateSemaphores();
     CreateImageViews();
 
-    fences.resize(image_count, nullptr);
+    resource_ticks.clear();
+    resource_ticks.resize(image_count);
 }
 
 void VKSwapchain::AcquireNextImage() {
     device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
                                             *present_semaphores[frame_index], {}, &image_index);
 
-    if (auto& fence = fences[image_index]; fence) {
-        fence->Wait();
-        fence->Release();
-        fence = nullptr;
-    }
+    scheduler.Wait(resource_ticks[image_index]);
 }
 
-bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
+bool VKSwapchain::Present(VkSemaphore render_semaphore) {
     const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
     const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
     const auto present_queue{device.GetPresentQueue()};
     bool recreated = false;
 
-    VkPresentInfoKHR present_info;
-    present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
-    present_info.pNext = nullptr;
-    present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U;
-    present_info.pWaitSemaphores = semaphores.data();
-    present_info.swapchainCount = 1;
-    present_info.pSwapchains = swapchain.address();
-    present_info.pImageIndices = &image_index;
-    present_info.pResults = nullptr;
+    const VkPresentInfoKHR present_info{
+        .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+        .pNext = nullptr,
+        .waitSemaphoreCount = render_semaphore ? 2U : 1U,
+        .pWaitSemaphores = semaphores.data(),
+        .swapchainCount = 1,
+        .pSwapchains = swapchain.address(),
+        .pImageIndices = &image_index,
+        .pResults = nullptr,
+    };
 
     switch (const VkResult result = present_queue.Present(present_info)) {
     case VK_SUCCESS:
@@ -122,8 +120,7 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
         break;
     }
 
-    ASSERT(fences[image_index] == nullptr);
-    fences[image_index] = &fence;
+    resource_ticks[image_index] = scheduler.CurrentTick();
     frame_index = (frame_index + 1) % static_cast<u32>(image_count);
     return recreated;
 }
@@ -147,24 +144,26 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
         requested_image_count = capabilities.maxImageCount;
     }
 
-    VkSwapchainCreateInfoKHR swapchain_ci;
-    swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
-    swapchain_ci.pNext = nullptr;
-    swapchain_ci.flags = 0;
-    swapchain_ci.surface = surface;
-    swapchain_ci.minImageCount = requested_image_count;
-    swapchain_ci.imageFormat = surface_format.format;
-    swapchain_ci.imageColorSpace = surface_format.colorSpace;
-    swapchain_ci.imageArrayLayers = 1;
-    swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
-    swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
-    swapchain_ci.queueFamilyIndexCount = 0;
-    swapchain_ci.pQueueFamilyIndices = nullptr;
-    swapchain_ci.preTransform = capabilities.currentTransform;
-    swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
-    swapchain_ci.presentMode = present_mode;
-    swapchain_ci.clipped = VK_FALSE;
-    swapchain_ci.oldSwapchain = nullptr;
+    VkSwapchainCreateInfoKHR swapchain_ci{
+        .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+        .pNext = nullptr,
+        .flags = 0,
+        .surface = surface,
+        .minImageCount = requested_image_count,
+        .imageFormat = surface_format.format,
+        .imageColorSpace = surface_format.colorSpace,
+        .imageExtent = {},
+        .imageArrayLayers = 1,
+        .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+        .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+        .preTransform = capabilities.currentTransform,
+        .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+        .presentMode = present_mode,
+        .clipped = VK_FALSE,
+        .oldSwapchain = nullptr,
+    };
 
     const u32 graphics_family{device.GetGraphicsFamily()};
     const u32 present_family{device.GetPresentFamily()};
@@ -173,8 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
         swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
         swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
         swapchain_ci.pQueueFamilyIndices = queue_indices.data();
-    } else {
-        swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
     }
 
     // Request the size again to reduce the possibility of a TOCTOU race condition.
@@ -200,20 +197,29 @@ void VKSwapchain::CreateSemaphores() {
 }
 
 void VKSwapchain::CreateImageViews() {
-    VkImageViewCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    // ci.image
-    ci.viewType = VK_IMAGE_VIEW_TYPE_2D;
-    ci.format = image_format;
-    ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
-                     VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
-    ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-    ci.subresourceRange.baseMipLevel = 0;
-    ci.subresourceRange.levelCount = 1;
-    ci.subresourceRange.baseArrayLayer = 0;
-    ci.subresourceRange.layerCount = 1;
+    VkImageViewCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .image = {},
+        .viewType = VK_IMAGE_VIEW_TYPE_2D,
+        .format = image_format,
+        .components =
+            {
+                .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+            },
+        .subresourceRange =
+            {
+                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                .baseMipLevel = 0,
+                .levelCount = 1,
+                .baseArrayLayer = 0,
+                .layerCount = 1,
+            },
+    };
 
     image_views.resize(image_count);
     for (std::size_t i = 0; i < image_count; i++) {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index a35d61345..6b39befdf 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -16,11 +16,11 @@ struct FramebufferLayout;
 namespace Vulkan {
 
 class VKDevice;
-class VKFence;
+class VKScheduler;
 
 class VKSwapchain {
 public:
-    explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device);
+    explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler);
     ~VKSwapchain();
 
     /// Creates (or recreates) the swapchain with a given size.
@@ -31,7 +31,7 @@ public:
 
     /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
     /// recreated. Takes responsability for the ownership of fence.
-    bool Present(VkSemaphore render_semaphore, VKFence& fence);
+    bool Present(VkSemaphore render_semaphore);
 
     /// Returns true when the framebuffer layout has changed.
     bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
@@ -74,6 +74,7 @@ private:
 
     const VkSurfaceKHR surface;
     const VKDevice& device;
+    VKScheduler& scheduler;
 
     vk::SwapchainKHR swapchain;
 
@@ -81,7 +82,7 @@ private:
     std::vector<VkImage> images;
     std::vector<vk::ImageView> image_views;
     std::vector<vk::Framebuffer> framebuffers;
-    std::vector<VKFence*> fences;
+    std::vector<u64> resource_ticks;
     std::vector<vk::Semaphore> present_semaphores;
 
     u32 image_index{};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index bd93dcf20..f2c8f2ae1 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -95,17 +95,18 @@ VkImageViewType GetImageViewType(SurfaceTarget target) {
 vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
                         std::size_t host_memory_size) {
     // TODO(Rodrigo): Move texture buffer creation to the buffer cache
-    VkBufferCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.size = static_cast<VkDeviceSize>(host_memory_size);
-    ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
-               VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
-    ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-    ci.queueFamilyIndexCount = 0;
-    ci.pQueueFamilyIndices = nullptr;
-    return device.GetLogical().CreateBuffer(ci);
+    return device.GetLogical().CreateBuffer({
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = static_cast<VkDeviceSize>(host_memory_size),
+        .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+                 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+                 VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    });
 }
 
 VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
@@ -113,15 +114,16 @@ VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
                                                     std::size_t host_memory_size) {
     ASSERT(params.IsBuffer());
 
-    VkBufferViewCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.buffer = buffer;
-    ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
-    ci.offset = 0;
-    ci.range = static_cast<VkDeviceSize>(host_memory_size);
-    return ci;
+    return {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .buffer = buffer,
+        .format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format,
+        .offset = 0,
+        .range = static_cast<VkDeviceSize>(host_memory_size),
+    };
 }
 
 VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
@@ -130,23 +132,24 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
     const auto [format, attachable, storage] =
         MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
 
-    VkImageCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.imageType = SurfaceTargetToImage(params.target);
-    ci.format = format;
-    ci.mipLevels = params.num_levels;
-    ci.arrayLayers = static_cast<u32>(params.GetNumLayers());
-    ci.samples = VK_SAMPLE_COUNT_1_BIT;
-    ci.tiling = VK_IMAGE_TILING_OPTIMAL;
-    ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-    ci.queueFamilyIndexCount = 0;
-    ci.pQueueFamilyIndices = nullptr;
-    ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-
-    ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
-               VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+    VkImageCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .imageType = SurfaceTargetToImage(params.target),
+        .format = format,
+        .extent = {},
+        .mipLevels = params.num_levels,
+        .arrayLayers = static_cast<u32>(params.GetNumLayers()),
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .tiling = VK_IMAGE_TILING_OPTIMAL,
+        .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+    };
     if (attachable) {
         ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
                                                : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
@@ -185,12 +188,10 @@ u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::Swizzl
 
 } // Anonymous namespace
 
-CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
-                             VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+CachedSurface::CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager,
                              VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
                              GPUVAddr gpu_addr, const SurfaceParams& params)
-    : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
-      device{device}, resource_manager{resource_manager},
+    : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device},
       memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
     if (params.IsBuffer()) {
         buffer = CreateBuffer(device, params, host_memory_size);
@@ -233,7 +234,7 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
 void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
     UNIMPLEMENTED_IF(params.IsBuffer());
 
-    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
         LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
     }
 
@@ -321,22 +322,25 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
 }
 
 VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
-    VkBufferImageCopy copy;
-    copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted);
-    copy.bufferRowLength = 0;
-    copy.bufferImageHeight = 0;
-    copy.imageSubresource.aspectMask = image->GetAspectMask();
-    copy.imageSubresource.mipLevel = level;
-    copy.imageSubresource.baseArrayLayer = 0;
-    copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers());
-    copy.imageOffset.x = 0;
-    copy.imageOffset.y = 0;
-    copy.imageOffset.z = 0;
-    copy.imageExtent.width = params.GetMipWidth(level);
-    copy.imageExtent.height = params.GetMipHeight(level);
-    copy.imageExtent.depth =
-        params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
-    return copy;
+    return {
+        .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted),
+        .bufferRowLength = 0,
+        .bufferImageHeight = 0,
+        .imageSubresource =
+            {
+                .aspectMask = image->GetAspectMask(),
+                .mipLevel = level,
+                .baseArrayLayer = 0,
+                .layerCount = static_cast<u32>(params.GetNumLayers()),
+            },
+        .imageOffset = {.x = 0, .y = 0, .z = 0},
+        .imageExtent =
+            {
+                .width = params.GetMipWidth(level),
+                .height = params.GetMipHeight(level),
+                .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U,
+            },
+    };
 }
 
 VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
@@ -380,7 +384,7 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc
 
     std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
                        MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
-    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
         // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
         std::swap(swizzle[0], swizzle[2]);
     }
@@ -392,11 +396,11 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc
         UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
         const bool is_first = x_source == SwizzleSource::R;
         switch (params.pixel_format) {
-        case VideoCore::Surface::PixelFormat::Z24S8:
-        case VideoCore::Surface::PixelFormat::Z32FS8:
+        case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
+        case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
             aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
             break;
-        case VideoCore::Surface::PixelFormat::S8Z24:
+        case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
             aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
             break;
         default:
@@ -416,20 +420,29 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc
         ASSERT(num_slices == params.depth);
     }
 
-    VkImageViewCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.image = surface.GetImageHandle();
-    ci.viewType = image_view_type;
-    ci.format = surface.GetImage().GetFormat();
-    ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]};
-    ci.subresourceRange.aspectMask = aspect;
-    ci.subresourceRange.baseMipLevel = base_level;
-    ci.subresourceRange.levelCount = num_levels;
-    ci.subresourceRange.baseArrayLayer = base_layer;
-    ci.subresourceRange.layerCount = num_layers;
-    image_view = device.GetLogical().CreateImageView(ci);
+    image_view = device.GetLogical().CreateImageView({
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .image = surface.GetImageHandle(),
+        .viewType = image_view_type,
+        .format = surface.GetImage().GetFormat(),
+        .components =
+            {
+                .r = swizzle[0],
+                .g = swizzle[1],
+                .b = swizzle[2],
+                .a = swizzle[3],
+            },
+        .subresourceRange =
+            {
+                .aspectMask = aspect,
+                .baseMipLevel = base_level,
+                .levelCount = num_levels,
+                .baseArrayLayer = base_layer,
+                .layerCount = num_layers,
+            },
+    });
 
     return last_image_view = *image_view;
 }
@@ -439,17 +452,29 @@ VkImageView CachedSurfaceView::GetAttachment() {
         return *render_target;
     }
 
-    VkImageViewCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.image = surface.GetImageHandle();
-    ci.format = surface.GetImage().GetFormat();
-    ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
-                     VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
-    ci.subresourceRange.aspectMask = aspect_mask;
-    ci.subresourceRange.baseMipLevel = base_level;
-    ci.subresourceRange.levelCount = num_levels;
+    VkImageViewCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .image = surface.GetImageHandle(),
+        .viewType = VK_IMAGE_VIEW_TYPE_1D,
+        .format = surface.GetImage().GetFormat(),
+        .components =
+            {
+                .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+                .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+            },
+        .subresourceRange =
+            {
+                .aspectMask = aspect_mask,
+                .baseMipLevel = base_level,
+                .levelCount = num_levels,
+                .baseArrayLayer = 0,
+                .layerCount = 0,
+            },
+    };
     if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
         ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
         ci.subresourceRange.baseArrayLayer = base_slice;
@@ -463,19 +488,20 @@ VkImageView CachedSurfaceView::GetAttachment() {
     return *render_target;
 }
 
-VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                               const VKDevice& device, VKResourceManager& resource_manager,
-                               VKMemoryManager& memory_manager, VKScheduler& scheduler,
-                               VKStagingBufferPool& staging_pool)
-    : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
-      resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
-      staging_pool{staging_pool} {}
+VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
+                               Tegra::Engines::Maxwell3D& maxwell3d,
+                               Tegra::MemoryManager& gpu_memory, const VKDevice& device_,
+                               VKMemoryManager& memory_manager_, VKScheduler& scheduler_,
+                               VKStagingBufferPool& staging_pool_)
+    : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()),
+      device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
+                                                                                   staging_pool_} {}
 
 VKTextureCache::~VKTextureCache() = default;
 
 Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
-    return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
-                                           scheduler, staging_pool, gpu_addr, params);
+    return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool,
+                                           gpu_addr, params);
 }
 
 void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -502,24 +528,40 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
                             VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
 
-    VkImageCopy copy;
-    copy.srcSubresource.aspectMask = src_surface->GetAspectMask();
-    copy.srcSubresource.mipLevel = copy_params.source_level;
-    copy.srcSubresource.baseArrayLayer = copy_params.source_z;
-    copy.srcSubresource.layerCount = num_layers;
-    copy.srcOffset.x = copy_params.source_x;
-    copy.srcOffset.y = copy_params.source_y;
-    copy.srcOffset.z = 0;
-    copy.dstSubresource.aspectMask = dst_surface->GetAspectMask();
-    copy.dstSubresource.mipLevel = copy_params.dest_level;
-    copy.dstSubresource.baseArrayLayer = dst_base_layer;
-    copy.dstSubresource.layerCount = num_layers;
-    copy.dstOffset.x = copy_params.dest_x;
-    copy.dstOffset.y = copy_params.dest_y;
-    copy.dstOffset.z = dst_offset_z;
-    copy.extent.width = copy_params.width;
-    copy.extent.height = copy_params.height;
-    copy.extent.depth = extent_z;
+    const VkImageCopy copy{
+        .srcSubresource =
+            {
+                .aspectMask = src_surface->GetAspectMask(),
+                .mipLevel = copy_params.source_level,
+                .baseArrayLayer = copy_params.source_z,
+                .layerCount = num_layers,
+            },
+        .srcOffset =
+            {
+                .x = static_cast<s32>(copy_params.source_x),
+                .y = static_cast<s32>(copy_params.source_y),
+                .z = 0,
+            },
+        .dstSubresource =
+            {
+                .aspectMask = dst_surface->GetAspectMask(),
+                .mipLevel = copy_params.dest_level,
+                .baseArrayLayer = dst_base_layer,
+                .layerCount = num_layers,
+            },
+        .dstOffset =
+            {
+                .x = static_cast<s32>(copy_params.dest_x),
+                .y = static_cast<s32>(copy_params.dest_y),
+                .z = static_cast<s32>(dst_offset_z),
+            },
+        .extent =
+            {
+                .width = copy_params.width,
+                .height = copy_params.height,
+                .depth = extent_z,
+            },
+    };
 
     const VkImage src_image = src_surface->GetImageHandle();
     const VkImage dst_image = dst_surface->GetImageHandle();
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 807e26c8a..39202feba 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -15,10 +15,6 @@
 #include "video_core/texture_cache/surface_base.h"
 #include "video_core/texture_cache/texture_cache.h"
 
-namespace Core {
-class System;
-}
-
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -27,7 +23,6 @@ namespace Vulkan {
 
 class RasterizerVulkan;
 class VKDevice;
-class VKResourceManager;
 class VKScheduler;
 class VKStagingBufferPool;
 
@@ -45,8 +40,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
     friend CachedSurfaceView;
 
 public:
-    explicit CachedSurface(Core::System& system, const VKDevice& device,
-                           VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+    explicit CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager,
                            VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
                            GPUVAddr gpu_addr, const SurfaceParams& params);
     ~CachedSurface();
@@ -101,9 +95,7 @@ private:
 
     VkImageSubresourceRange GetImageSubresourceRange() const;
 
-    Core::System& system;
     const VKDevice& device;
-    VKResourceManager& resource_manager;
     VKMemoryManager& memory_manager;
     VKScheduler& scheduler;
     VKStagingBufferPool& staging_pool;
@@ -201,10 +193,10 @@ private:
 
 class VKTextureCache final : public TextureCacheBase {
 public:
-    explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                            const VKDevice& device, VKResourceManager& resource_manager,
-                            VKMemoryManager& memory_manager, VKScheduler& scheduler,
-                            VKStagingBufferPool& staging_pool);
+    explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
+                            Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
+                            const VKDevice& device, VKMemoryManager& memory_manager,
+                            VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
     ~VKTextureCache();
 
 private:
@@ -219,7 +211,6 @@ private:
     void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
 
     const VKDevice& device;
-    VKResourceManager& resource_manager;
     VKMemoryManager& memory_manager;
     VKScheduler& scheduler;
     VKStagingBufferPool& staging_pool;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 051298cc8..2598440fb 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -6,6 +6,7 @@
 #include <exception>
 #include <memory>
 #include <optional>
+#include <string_view>
 #include <utility>
 #include <vector>
 
@@ -17,21 +18,42 @@ namespace Vulkan::vk {
 
 namespace {
 
+template <typename Func>
+void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld,
+                         Func&& func) {
+    // Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap
+    // functions.
+    std::stable_sort(devices.begin(), devices.end(),
+                     [&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) {
+                         return func(vk::PhysicalDevice(lhs, dld).GetProperties(),
+                                     vk::PhysicalDevice(rhs, dld).GetProperties());
+                     });
+}
+
+void SortPhysicalDevicesPerVendor(std::vector<VkPhysicalDevice>& devices,
+                                  const InstanceDispatch& dld,
+                                  std::initializer_list<u32> vendor_ids) {
+    for (auto it = vendor_ids.end(); it != vendor_ids.begin();) {
+        --it;
+        SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) {
+            return lhs.vendorID == id && rhs.vendorID != id;
+        });
+    }
+}
+
 void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) {
-    std::stable_sort(devices.begin(), devices.end(), [&](auto lhs, auto rhs) {
-        // This will call Vulkan more than needed, but these calls are cheap.
-        const auto lhs_properties = vk::PhysicalDevice(lhs, dld).GetProperties();
-        const auto rhs_properties = vk::PhysicalDevice(rhs, dld).GetProperties();
-
-        // Prefer discrete GPUs, Nvidia over AMD, AMD over Intel, Intel over the rest.
-        const bool preferred =
-            (lhs_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
-             rhs_properties.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) ||
-            (lhs_properties.vendorID == 0x10DE && rhs_properties.vendorID != 0x10DE) ||
-            (lhs_properties.vendorID == 0x1002 && rhs_properties.vendorID != 0x1002) ||
-            (lhs_properties.vendorID == 0x8086 && rhs_properties.vendorID != 0x8086);
-        return !preferred;
+    // Sort by name, this will set a base and make GPUs with higher numbers appear first
+    // (e.g. GTX 1650 will intentionally be listed before a GTX 1080).
+    SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
+        return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName};
+    });
+    // Prefer discrete over non-discrete
+    SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
+        return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
+               rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
     });
+    // Prefer Nvidia over AMD, AMD over Intel, Intel over the rest.
+    SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086});
 }
 
 template <typename T>
@@ -148,6 +170,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkGetFenceStatus);
     X(vkGetImageMemoryRequirements);
     X(vkGetQueryPoolResults);
+    X(vkGetSemaphoreCounterValueKHR);
     X(vkMapMemory);
     X(vkQueueSubmit);
     X(vkResetFences);
@@ -156,6 +179,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkUpdateDescriptorSetWithTemplateKHR);
     X(vkUpdateDescriptorSets);
     X(vkWaitForFences);
+    X(vkWaitSemaphoresKHR);
 #undef X
 }
 
@@ -262,6 +286,22 @@ const char* ToString(VkResult result) noexcept {
         return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT";
     case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
         return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
+    case VkResult::VK_ERROR_UNKNOWN:
+        return "VK_ERROR_UNKNOWN";
+    case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
+        return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
+    case VkResult::VK_THREAD_IDLE_KHR:
+        return "VK_THREAD_IDLE_KHR";
+    case VkResult::VK_THREAD_DONE_KHR:
+        return "VK_THREAD_DONE_KHR";
+    case VkResult::VK_OPERATION_DEFERRED_KHR:
+        return "VK_OPERATION_DEFERRED_KHR";
+    case VkResult::VK_OPERATION_NOT_DEFERRED_KHR:
+        return "VK_OPERATION_NOT_DEFERRED_KHR";
+    case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT:
+        return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
+    case VkResult::VK_RESULT_MAX_ENUM:
+        return "VK_RESULT_MAX_ENUM";
     }
     return "Unknown";
 }
@@ -377,24 +417,26 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe
 
 Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions,
                           InstanceDispatch& dld) noexcept {
-    VkApplicationInfo application_info;
-    application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
-    application_info.pNext = nullptr;
-    application_info.pApplicationName = "yuzu Emulator";
-    application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
-    application_info.pEngineName = "yuzu Emulator";
-    application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
-    application_info.apiVersion = VK_API_VERSION_1_1;
-
-    VkInstanceCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.pApplicationInfo = &application_info;
-    ci.enabledLayerCount = layers.size();
-    ci.ppEnabledLayerNames = layers.data();
-    ci.enabledExtensionCount = extensions.size();
-    ci.ppEnabledExtensionNames = extensions.data();
+    static constexpr VkApplicationInfo application_info{
+        .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+        .pNext = nullptr,
+        .pApplicationName = "yuzu Emulator",
+        .applicationVersion = VK_MAKE_VERSION(0, 1, 0),
+        .pEngineName = "yuzu Emulator",
+        .engineVersion = VK_MAKE_VERSION(0, 1, 0),
+        .apiVersion = VK_API_VERSION_1_1,
+    };
+
+    const VkInstanceCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .pApplicationInfo = &application_info,
+        .enabledLayerCount = layers.size(),
+        .ppEnabledLayerNames = layers.data(),
+        .enabledExtensionCount = extensions.size(),
+        .ppEnabledExtensionNames = extensions.data(),
+    };
 
     VkInstance instance;
     if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
@@ -425,19 +467,20 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices(
 
 DebugCallback Instance::TryCreateDebugCallback(
     PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
-    VkDebugUtilsMessengerCreateInfoEXT ci;
-    ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
-                         VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
-                         VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
-                         VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT;
-    ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
-                     VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
-                     VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
-    ci.pfnUserCallback = callback;
-    ci.pUserData = nullptr;
+    const VkDebugUtilsMessengerCreateInfoEXT ci{
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .flags = 0,
+        .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
+                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
+        .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+                       VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+                       VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+        .pfnUserCallback = callback,
+        .pUserData = nullptr,
+    };
 
     VkDebugUtilsMessengerEXT messenger;
     if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
@@ -468,12 +511,13 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c
 }
 
 CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
-    VkCommandBufferAllocateInfo ai;
-    ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-    ai.pNext = nullptr;
-    ai.commandPool = handle;
-    ai.level = level;
-    ai.commandBufferCount = static_cast<u32>(num_buffers);
+    const VkCommandBufferAllocateInfo ai{
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .commandPool = handle,
+        .level = level,
+        .commandBufferCount = static_cast<u32>(num_buffers),
+    };
 
     std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers);
     switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) {
@@ -497,17 +541,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
 Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
                       Span<const char*> enabled_extensions, const void* next,
                       DeviceDispatch& dld) noexcept {
-    VkDeviceCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-    ci.pNext = next;
-    ci.flags = 0;
-    ci.queueCreateInfoCount = queues_ci.size();
-    ci.pQueueCreateInfos = queues_ci.data();
-    ci.enabledLayerCount = 0;
-    ci.ppEnabledLayerNames = nullptr;
-    ci.enabledExtensionCount = enabled_extensions.size();
-    ci.ppEnabledExtensionNames = enabled_extensions.data();
-    ci.pEnabledFeatures = nullptr;
+    const VkDeviceCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+        .pNext = next,
+        .flags = 0,
+        .queueCreateInfoCount = queues_ci.size(),
+        .pQueueCreateInfos = queues_ci.data(),
+        .enabledLayerCount = 0,
+        .ppEnabledLayerNames = nullptr,
+        .enabledExtensionCount = enabled_extensions.size(),
+        .ppEnabledExtensionNames = enabled_extensions.data(),
+        .pEnabledFeatures = nullptr,
+    };
 
     VkDevice device;
     if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
@@ -548,11 +593,15 @@ ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
 }
 
 Semaphore Device::CreateSemaphore() const {
-    VkSemaphoreCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
+    static constexpr VkSemaphoreCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+    };
+    return CreateSemaphore(ci);
+}
 
+Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const {
     VkSemaphore object;
     Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
     return Semaphore(object, handle, *dld);
@@ -639,10 +688,12 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
 }
 
 Event Device::CreateEvent() const {
-    VkEventCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
+    static constexpr VkEventCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+    };
+
     VkEvent object;
     Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
     return Event(object, handle, *dld);
@@ -778,7 +829,7 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
         VK_SUCCESS) {
         return std::nullopt;
     }
-    return properties;
+    return std::move(properties);
 }
 
 std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 71daac9d7..234e01693 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -267,6 +267,7 @@ struct DeviceDispatch : public InstanceDispatch {
     PFN_vkGetFenceStatus vkGetFenceStatus;
     PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
     PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
+    PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR;
     PFN_vkMapMemory vkMapMemory;
     PFN_vkQueueSubmit vkQueueSubmit;
     PFN_vkResetFences vkResetFences;
@@ -275,6 +276,7 @@ struct DeviceDispatch : public InstanceDispatch {
     PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
     PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
     PFN_vkWaitForFences vkWaitForFences;
+    PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR;
 };
 
 /// Loads instance agnostic function pointers.
@@ -550,7 +552,6 @@ using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
 using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
 using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;
 using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>;
-using Semaphore = Handle<VkSemaphore, VkDevice, DeviceDispatch>;
 using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;
 using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
 
@@ -582,7 +583,8 @@ public:
     /// Construct a queue handle.
     constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
 
-    VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept {
+    VkResult Submit(Span<VkSubmitInfo> submit_infos,
+                    VkFence fence = VK_NULL_HANDLE) const noexcept {
         return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence);
     }
 
@@ -674,6 +676,44 @@ public:
     }
 };
 
+class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
+    using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    [[nodiscard]] u64 GetCounter() const {
+        u64 value;
+        Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value));
+        return value;
+    }
+
+    /**
+     * Waits for a timeline semaphore on the host.
+     *
+     * @param value   Value to wait
+     * @param timeout Time in nanoseconds to timeout
+     * @return        True on successful wait, false on timeout
+     */
+    bool Wait(u64 value, u64 timeout = std::numeric_limits<u64>::max()) const {
+        const VkSemaphoreWaitInfoKHR wait_info{
+            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR,
+            .pNext = nullptr,
+            .flags = 0,
+            .semaphoreCount = 1,
+            .pSemaphores = &handle,
+            .pValues = &value,
+        };
+        const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout);
+        switch (result) {
+        case VK_SUCCESS:
+            return true;
+        case VK_TIMEOUT:
+            return false;
+        default:
+            throw Exception(result);
+        }
+    }
+};
+
 class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
     using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
 
@@ -694,6 +734,8 @@ public:
 
     Semaphore CreateSemaphore() const;
 
+    Semaphore CreateSemaphore(const VkSemaphoreCreateInfo& ci) const;
+
     Fence CreateFence(const VkFenceCreateInfo& ci) const;
 
     DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const;
@@ -756,8 +798,8 @@ public:
     }
 
     VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
-                             void* data, VkDeviceSize stride, VkQueryResultFlags flags) const
-        noexcept {
+                             void* data, VkDeviceSize stride,
+                             VkQueryResultFlags flags) const noexcept {
         return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
                                           flags);
     }
@@ -849,8 +891,8 @@ public:
         dld->vkCmdBindPipeline(handle, bind_point, pipeline);
     }
 
-    void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const
-        noexcept {
+    void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset,
+                         VkIndexType index_type) const noexcept {
         dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type);
     }
 
@@ -863,8 +905,8 @@ public:
         BindVertexBuffers(binding, 1, &buffer, &offset);
     }
 
-    void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const
-        noexcept {
+    void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex,
+              u32 first_instance) const noexcept {
         dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance);
     }
 
@@ -874,15 +916,15 @@ public:
                               first_instance);
     }
 
-    void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const
-        noexcept {
+    void ClearAttachments(Span<VkClearAttachment> attachments,
+                          Span<VkClearRect> rects) const noexcept {
         dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
                                    rects.data());
     }
 
     void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
-                   VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const
-        noexcept {
+                   VkImageLayout dst_layout, Span<VkImageBlit> regions,
+                   VkFilter filter) const noexcept {
         dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
                             regions.data(), filter);
     }
@@ -907,8 +949,8 @@ public:
                                     regions.data());
     }
 
-    void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const
-        noexcept {
+    void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
+                    Span<VkBufferCopy> regions) const noexcept {
         dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data());
     }
 
@@ -924,8 +966,8 @@ public:
                                     regions.data());
     }
 
-    void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const
-        noexcept {
+    void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size,
+                    u32 data) const noexcept {
         dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data);
     }
 
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index cca13bcde..8e5a22ab3 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -199,55 +199,48 @@ public:
     }
 
     std::optional<u32> GetGotoLabel() const {
-        auto inner = std::get_if<ASTGoto>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTGoto>(&data)) {
             return {inner->label};
         }
-        return {};
+        return std::nullopt;
     }
 
     Expr GetGotoCondition() const {
-        auto inner = std::get_if<ASTGoto>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTGoto>(&data)) {
             return inner->condition;
         }
         return nullptr;
     }
 
     void MarkLabelUnused() {
-        auto inner = std::get_if<ASTLabel>(&data);
-        if (inner) {
+        if (auto* inner = std::get_if<ASTLabel>(&data)) {
             inner->unused = true;
         }
     }
 
     bool IsLabelUnused() const {
-        auto inner = std::get_if<ASTLabel>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTLabel>(&data)) {
             return inner->unused;
         }
         return true;
     }
 
     std::optional<u32> GetLabelIndex() const {
-        auto inner = std::get_if<ASTLabel>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTLabel>(&data)) {
             return {inner->index};
         }
-        return {};
+        return std::nullopt;
     }
 
     Expr GetIfCondition() const {
-        auto inner = std::get_if<ASTIfThen>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
             return inner->condition;
         }
         return nullptr;
     }
 
     void SetGotoCondition(Expr new_condition) {
-        auto inner = std::get_if<ASTGoto>(&data);
-        if (inner) {
+        if (auto* inner = std::get_if<ASTGoto>(&data)) {
             inner->condition = std::move(new_condition);
         }
     }
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
new file mode 100644
index 000000000..aabd62c5c
--- /dev/null
+++ b/src/video_core/shader/async_shaders.cpp
@@ -0,0 +1,221 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/shader/async_shaders.h"
+
+namespace VideoCommon::Shader {
+
+AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
+
+AsyncShaders::~AsyncShaders() {
+    KillWorkers();
+}
+
+void AsyncShaders::AllocateWorkers() {
+    // Max worker threads we should allow
+    constexpr u32 MAX_THREADS = 4;
+    // Deduce how many threads we can use
+    const u32 threads_used = std::thread::hardware_concurrency() / 4;
+    // Always allow at least 1 thread regardless of our settings
+    const auto max_worker_count = std::max(1U, threads_used);
+    // Don't use more than MAX_THREADS
+    const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+
+    // If we already have workers queued, ignore
+    if (num_workers == worker_threads.size()) {
+        return;
+    }
+
+    // If workers already exist, clear them
+    if (!worker_threads.empty()) {
+        FreeWorkers();
+    }
+
+    // Create workers
+    for (std::size_t i = 0; i < num_workers; i++) {
+        context_list.push_back(emu_window.CreateSharedContext());
+        worker_threads.push_back(
+            std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
+    }
+}
+
+void AsyncShaders::FreeWorkers() {
+    // Mark all threads to quit
+    is_thread_exiting.store(true);
+    cv.notify_all();
+    for (auto& thread : worker_threads) {
+        thread.join();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+
+    // Clear our worker threads
+    worker_threads.clear();
+}
+
+void AsyncShaders::KillWorkers() {
+    is_thread_exiting.store(true);
+    for (auto& thread : worker_threads) {
+        thread.detach();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+
+    // Clear our worker threads
+    worker_threads.clear();
+}
+
+bool AsyncShaders::HasWorkQueued() const {
+    return !pending_queue.empty();
+}
+
+bool AsyncShaders::HasCompletedWork() const {
+    std::shared_lock lock{completed_mutex};
+    return !finished_work.empty();
+}
+
+bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
+    const auto& regs = gpu.Maxwell3D().regs;
+
+    // If something is using depth, we can assume that games are not rendering anything which will
+    // be used one time.
+    if (regs.zeta_enable) {
+        return true;
+    }
+
+    // If games are using a small index count, we can assume these are full screen quads. Usually
+    // these shaders are only used once for building textures so we can assume they can't be built
+    // async
+    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
+        return false;
+    }
+
+    return true;
+}
+
+std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
+    std::vector<Result> results;
+    {
+        std::unique_lock lock{completed_mutex};
+        results.assign(std::make_move_iterator(finished_work.begin()),
+                       std::make_move_iterator(finished_work.end()));
+        finished_work.clear();
+    }
+    return results;
+}
+
+void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
+                                     Tegra::Engines::ShaderType shader_type, u64 uid,
+                                     std::vector<u64> code, std::vector<u64> code_b,
+                                     u32 main_offset,
+                                     VideoCommon::Shader::CompilerSettings compiler_settings,
+                                     const VideoCommon::Shader::Registry& registry,
+                                     VAddr cpu_addr) {
+    WorkerParams params{
+        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
+        .device = &device,
+        .shader_type = shader_type,
+        .uid = uid,
+        .code = std::move(code),
+        .code_b = std::move(code_b),
+        .main_offset = main_offset,
+        .compiler_settings = compiler_settings,
+        .registry = registry,
+        .cpu_address = cpu_addr,
+    };
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push(std::move(params));
+    cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+                                     const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+                                     Vulkan::VKDescriptorPool& descriptor_pool,
+                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     Vulkan::VKRenderPassCache& renderpass_cache,
+                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
+                                     Vulkan::SPIRVProgram program,
+                                     Vulkan::GraphicsPipelineCacheKey key) {
+    WorkerParams params{
+        .backend = Backend::Vulkan,
+        .pp_cache = pp_cache,
+        .vk_device = &device,
+        .scheduler = &scheduler,
+        .descriptor_pool = &descriptor_pool,
+        .update_descriptor_queue = &update_descriptor_queue,
+        .renderpass_cache = &renderpass_cache,
+        .bindings = bindings,
+        .program = program,
+        .key = key,
+    };
+
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push(std::move(params));
+    cv.notify_one();
+}
+
+void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
+    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
+        std::unique_lock lock{queue_mutex};
+        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
+        if (is_thread_exiting) {
+            return;
+        }
+
+        // Partial lock to allow all threads to read at the same time
+        if (!HasWorkQueued()) {
+            continue;
+        }
+        // Another thread beat us, just unlock and wait for the next load
+        if (pending_queue.empty()) {
+            continue;
+        }
+
+        // Pull work from queue
+        WorkerParams work = std::move(pending_queue.front());
+        pending_queue.pop();
+        lock.unlock();
+
+        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
+            const auto scope = context->Acquire();
+            auto program =
+                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
+            Result result{};
+            result.backend = work.backend;
+            result.cpu_address = work.cpu_address;
+            result.uid = work.uid;
+            result.code = std::move(work.code);
+            result.code_b = std::move(work.code_b);
+            result.shader_type = work.shader_type;
+
+            if (work.backend == Backend::OpenGL) {
+                result.program.opengl = std::move(program->source_program);
+            } else if (work.backend == Backend::GLASM) {
+                result.program.glasm = std::move(program->assembly_program);
+            }
+
+            {
+                std::unique_lock complete_lock(completed_mutex);
+                finished_work.push_back(std::move(result));
+            }
+        } else if (work.backend == Backend::Vulkan) {
+            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+                *work.vk_device, *work.scheduler, *work.descriptor_pool,
+                *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
+                work.program);
+
+            work.pp_cache->EmplacePipeline(std::move(pipeline));
+        }
+    }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
new file mode 100644
index 000000000..7a99e1dc5
--- /dev/null
+++ b/src/video_core/shader/async_shaders.h
@@ -0,0 +1,147 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <condition_variable>
+#include <memory>
+#include <shared_mutex>
+#include <thread>
+
+// This header includes both Vulkan and OpenGL headers, this has to be fixed
+// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
+// Forcefully include glad early and undefine macros
+#include <glad/glad.h>
+#ifdef CreateEvent
+#undef CreateEvent
+#endif
+#ifdef CreateSemaphore
+#undef CreateSemaphore
+#endif
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+class GraphicsContext;
+} // namespace Core::Frontend
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Vulkan {
+class VKPipelineCache;
+}
+
+namespace VideoCommon::Shader {
+
+class AsyncShaders {
+public:
+    enum class Backend {
+        OpenGL,
+        GLASM,
+        Vulkan,
+    };
+
+    struct ResultPrograms {
+        OpenGL::OGLProgram opengl;
+        OpenGL::OGLAssemblyProgram glasm;
+    };
+
+    struct Result {
+        u64 uid;
+        VAddr cpu_address;
+        Backend backend;
+        ResultPrograms program;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        Tegra::Engines::ShaderType shader_type;
+    };
+
+    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
+    ~AsyncShaders();
+
+    /// Start up shader worker threads
+    void AllocateWorkers();
+
+    /// Clear the shader queue and kill all worker threads
+    void FreeWorkers();
+
+    // Force end all threads
+    void KillWorkers();
+
+    /// Check to see if any shaders have actually been compiled
+    [[nodiscard]] bool HasCompletedWork() const;
+
+    /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
+    /// every shader async as some shaders are only built and executed once. We try to "guess" which
+    /// shader would be used only once
+    [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
+
+    /// Pulls completed compiled shaders
+    [[nodiscard]] std::vector<Result> GetCompletedWork();
+
+    void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
+                           u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
+                           CompilerSettings compiler_settings, const Registry& registry,
+                           VAddr cpu_addr);
+
+    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+                           Vulkan::VKScheduler& scheduler,
+                           Vulkan::VKDescriptorPool& descriptor_pool,
+                           Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                           Vulkan::VKRenderPassCache& renderpass_cache,
+                           std::vector<VkDescriptorSetLayoutBinding> bindings,
+                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
+
+private:
+    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
+
+    /// Check our worker queue to see if we have any work queued already
+    [[nodiscard]] bool HasWorkQueued() const;
+
+    struct WorkerParams {
+        Backend backend;
+        // For OGL
+        const OpenGL::Device* device;
+        Tegra::Engines::ShaderType shader_type;
+        u64 uid;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        u32 main_offset;
+        CompilerSettings compiler_settings;
+        std::optional<Registry> registry;
+        VAddr cpu_address;
+
+        // For Vulkan
+        Vulkan::VKPipelineCache* pp_cache;
+        const Vulkan::VKDevice* vk_device;
+        Vulkan::VKScheduler* scheduler;
+        Vulkan::VKDescriptorPool* descriptor_pool;
+        Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
+        Vulkan::VKRenderPassCache* renderpass_cache;
+        std::vector<VkDescriptorSetLayoutBinding> bindings;
+        Vulkan::SPIRVProgram program;
+        Vulkan::GraphicsPipelineCacheKey key;
+    };
+
+    std::condition_variable cv;
+    mutable std::mutex queue_mutex;
+    mutable std::shared_mutex completed_mutex;
+    std::atomic<bool> is_thread_exiting{};
+    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
+    std::vector<std::thread> worker_threads;
+    std::queue<WorkerParams> pending_queue;
+    std::vector<Result> finished_work;
+    Core::Frontend::EmuWindow& emu_window;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 8d86020f6..4c8971615 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -187,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state,
 
 std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
                                     u64 ldc_tracked_register) {
-    return TrackInstruction<u64>(state, pos,
-                                 [ldc_tracked_register](auto instr, const auto& opcode) {
-                                     return opcode.GetId() == OpCode::Id::SHL_IMM &&
-                                            instr.gpr0.Value() == ldc_tracked_register;
-                                 },
-                                 [](auto instr, const auto&) { return instr.gpr8.Value(); });
+    return TrackInstruction<u64>(
+        state, pos,
+        [ldc_tracked_register](auto instr, const auto& opcode) {
+            return opcode.GetId() == OpCode::Id::SHL_IMM &&
+                   instr.gpr0.Value() == ldc_tracked_register;
+        },
+        [](auto instr, const auto&) { return instr.gpr8.Value(); });
 }
 
 std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
                                    u64 shl_tracked_register) {
-    return TrackInstruction<u32>(state, pos,
-                                 [shl_tracked_register](auto instr, const auto& opcode) {
-                                     return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
-                                            instr.gpr0.Value() == shl_tracked_register;
-                                 },
-                                 [](auto instr, const auto&) {
-                                     return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
-                                 });
+    return TrackInstruction<u32>(
+        state, pos,
+        [shl_tracked_register](auto instr, const auto& opcode) {
+            return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+                   instr.gpr0.Value() == shl_tracked_register;
+        },
+        [](auto instr, const auto&) {
+            return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+        });
 }
 
 std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
@@ -545,13 +547,13 @@ bool TryQuery(CFGRebuildState& state) {
     gather_labels(q2.ssy_stack, state.ssy_labels, block);
     gather_labels(q2.pbk_stack, state.pbk_labels, block);
     if (std::holds_alternative<SingleBranch>(*block.branch)) {
-        const auto branch = std::get_if<SingleBranch>(block.branch.get());
+        auto* branch = std::get_if<SingleBranch>(block.branch.get());
         if (!branch->condition.IsUnconditional()) {
             q2.address = block.end + 1;
             state.queries.push_back(q2);
         }
 
-        Query conditional_query{q2};
+        auto& conditional_query = state.queries.emplace_back(q2);
         if (branch->is_sync) {
             if (branch->address == unassigned_branch) {
                 branch->address = conditional_query.ssy_stack.top();
@@ -565,21 +567,21 @@ bool TryQuery(CFGRebuildState& state) {
             conditional_query.pbk_stack.pop();
         }
         conditional_query.address = branch->address;
-        state.queries.push_back(std::move(conditional_query));
         return true;
     }
-    const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+
+    const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
     for (const auto& branch_case : multi_branch->branches) {
-        Query conditional_query{q2};
+        auto& conditional_query = state.queries.emplace_back(q2);
         conditional_query.address = branch_case.address;
-        state.queries.push_back(std::move(conditional_query));
     }
+
     return true;
 }
 
 void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
-    const auto get_expr = ([&](const Condition& cond) -> Expr {
-        Expr result{};
+    const auto get_expr = [](const Condition& cond) -> Expr {
+        Expr result;
         if (cond.cc != ConditionCode::T) {
             result = MakeExpr<ExprCondCode>(cond.cc);
         }
@@ -592,10 +594,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
             }
             Expr extra = MakeExpr<ExprPredicate>(pred);
             if (negate) {
-                extra = MakeExpr<ExprNot>(extra);
+                extra = MakeExpr<ExprNot>(std::move(extra));
             }
             if (result) {
-                return MakeExpr<ExprAnd>(extra, result);
+                return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
             }
             return extra;
         }
@@ -603,9 +605,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
             return result;
         }
         return MakeExpr<ExprBoolean>(true);
-    });
+    };
+
     if (std::holds_alternative<SingleBranch>(*branch_info)) {
-        const auto branch = std::get_if<SingleBranch>(branch_info.get());
+        const auto* branch = std::get_if<SingleBranch>(branch_info.get());
         if (branch->address < 0) {
             if (branch->kill) {
                 mm.InsertReturn(get_expr(branch->condition), true);
@@ -617,7 +620,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
         mm.InsertGoto(get_expr(branch->condition), branch->address);
         return;
     }
-    const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+    const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
     for (const auto& branch_case : multi_branch->branches) {
         mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
                       branch_case.address);
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index a276aee44..88103fede 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -53,6 +53,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
         absolute_a = ((instr.value >> 44) & 1) != 0;
         absolute_b = ((instr.value >> 54) & 1) != 0;
         break;
+    default:
+        UNREACHABLE();
+        break;
     }
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index a041519b7..73155966f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
         op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
         op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
 
-        const Node value = [&]() {
-            const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
+        const Node value = [&] {
+            Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
             if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
                 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
             }
-            const Node shifted = [&]() {
+            const Node shifted = [&] {
                 switch (instr.iadd3.mode) {
                 case Tegra::Shader::IAdd3Mode::RightShift:
                     // TODO(tech4me): According to
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 73880db0e..2a30aab2b 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -28,23 +28,26 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
     case OpCode::Id::IADD32I: {
         UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
 
-        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
+        op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
 
-        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+        Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
 
-        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
+        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
+        SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
     case OpCode::Id::LOP32I: {
-        if (instr.alu.lop32i.invert_a)
-            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+        if (instr.alu.lop32i.invert_a) {
+            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
+        }
 
-        if (instr.alu.lop32i.invert_b)
-            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+        if (instr.alu.lop32i.invert_b) {
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+        }
 
-        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
-                            PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
+        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
+                            std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
+                            instr.op_32.generates_cc != 0);
         break;
     }
     default:
@@ -58,14 +61,14 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
 void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
                                    Node op_b, PredicateResultMode predicate_mode, Pred predicate,
                                    bool sets_cc) {
-    const Node result = [&]() {
+    Node result = [&] {
         switch (logic_op) {
         case LogicOperation::And:
-            return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
+            return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
         case LogicOperation::Or:
-            return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
+            return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
         case LogicOperation::Xor:
-            return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
+            return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
         case LogicOperation::PassB:
             return op_b;
         default:
@@ -84,8 +87,8 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
         return;
     case PredicateResultMode::NotZero: {
         // Set the predicate to true if the result is not zero.
-        const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
-        SetPredicate(bb, static_cast<u64>(predicate), compare);
+        Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
+        SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
         break;
     }
     default:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 07778dc3e..618d309d2 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
                                std::size_t component) {
     const TextureFormat format{descriptor.format};
     switch (format) {
-    case TextureFormat::R16_G16_B16_A16:
-    case TextureFormat::R32_G32_B32_A32:
-    case TextureFormat::R32_G32_B32:
-    case TextureFormat::R32_G32:
-    case TextureFormat::R16_G16:
+    case TextureFormat::R16G16B16A16:
+    case TextureFormat::R32G32B32A32:
+    case TextureFormat::R32G32B32:
+    case TextureFormat::R32G32:
+    case TextureFormat::R16G16:
     case TextureFormat::R32:
     case TextureFormat::R16:
     case TextureFormat::R8:
@@ -97,7 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
         break;
     case TextureFormat::B5G6R5:
     case TextureFormat::B6G5R5:
-    case TextureFormat::BF10GF11RF11:
+    case TextureFormat::B10G11R11:
         if (component == 0) {
             return descriptor.b_type;
         }
@@ -108,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
             return descriptor.r_type;
         }
         break;
-    case TextureFormat::G8R24:
-    case TextureFormat::G24R8:
-    case TextureFormat::G8R8:
+    case TextureFormat::R24G8:
+    case TextureFormat::R8G24:
+    case TextureFormat::R8G8:
     case TextureFormat::G4R4:
         if (component == 0) {
             return descriptor.g_type;
@@ -119,6 +119,8 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
             return descriptor.r_type;
         }
         break;
+    default:
+        break;
     }
     UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
     return ComponentType::FLOAT;
@@ -137,15 +139,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
 
 u32 GetComponentSize(TextureFormat format, std::size_t component) {
     switch (format) {
-    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R32G32B32A32:
         return 32;
-    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R16G16B16A16:
         return 16;
-    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32G32B32:
         return component <= 2 ? 32 : 0;
-    case TextureFormat::R32_G32:
+    case TextureFormat::R32G32:
         return component <= 1 ? 32 : 0;
-    case TextureFormat::R16_G16:
+    case TextureFormat::R16G16:
         return component <= 1 ? 16 : 0;
     case TextureFormat::R32:
         return component == 0 ? 32 : 0;
@@ -192,7 +194,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 6;
         }
         return 0;
-    case TextureFormat::BF10GF11RF11:
+    case TextureFormat::B10G11R11:
         if (component == 1 || component == 2) {
             return 11;
         }
@@ -200,7 +202,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 10;
         }
         return 0;
-    case TextureFormat::G8R24:
+    case TextureFormat::R24G8:
         if (component == 0) {
             return 8;
         }
@@ -208,7 +210,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 24;
         }
         return 0;
-    case TextureFormat::G24R8:
+    case TextureFormat::R8G24:
         if (component == 0) {
             return 8;
         }
@@ -216,13 +218,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 24;
         }
         return 0;
-    case TextureFormat::G8R8:
+    case TextureFormat::R8G8:
         return (component == 0 || component == 1) ? 8 : 0;
     case TextureFormat::G4R4:
         return (component == 0 || component == 1) ? 4 : 0;
+    default:
+        UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+        return 0;
     }
-    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
-    return 0;
 }
 
 std::size_t GetImageComponentMask(TextureFormat format) {
@@ -231,25 +234,25 @@ std::size_t GetImageComponentMask(TextureFormat format) {
     constexpr u8 B = 0b0100;
     constexpr u8 A = 0b1000;
     switch (format) {
-    case TextureFormat::R32_G32_B32_A32:
-    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R32G32B32A32:
+    case TextureFormat::R16G16B16A16:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::A4B4G4R4:
     case TextureFormat::A5B5G5R1:
     case TextureFormat::A1B5G5R5:
         return std::size_t{R | G | B | A};
-    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32G32B32:
     case TextureFormat::R32_B24G8:
     case TextureFormat::B5G6R5:
     case TextureFormat::B6G5R5:
-    case TextureFormat::BF10GF11RF11:
+    case TextureFormat::B10G11R11:
         return std::size_t{R | G | B};
-    case TextureFormat::R32_G32:
-    case TextureFormat::R16_G16:
-    case TextureFormat::G8R24:
-    case TextureFormat::G24R8:
-    case TextureFormat::G8R8:
+    case TextureFormat::R32G32:
+    case TextureFormat::R16G16:
+    case TextureFormat::R24G8:
+    case TextureFormat::R8G24:
+    case TextureFormat::R8G8:
     case TextureFormat::G4R4:
         return std::size_t{R | G};
     case TextureFormat::R32:
@@ -257,9 +260,10 @@ std::size_t GetImageComponentMask(TextureFormat format) {
     case TextureFormat::R8:
     case TextureFormat::R1:
         return std::size_t{R};
+    default:
+        UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+        return std::size_t{R | G | B | A};
     }
-    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
-    return std::size_t{R | G | B | A};
 }
 
 std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
@@ -463,7 +467,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
                     return OperationCode::AtomicImageXor;
                 case Tegra::Shader::ImageAtomicOperation::Exch:
                     return OperationCode::AtomicImageExchange;
+                default:
+                    break;
                 }
+                break;
             default:
                 break;
             }
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 63adbc4a3..e2bba88dd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::RED: {
-        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
+                             static_cast<int>(instr.red.type.Value()));
         const auto [real_address, base_address, descriptor] =
             TrackGlobalMemory(bb, instr, true, true);
         if (!real_address || !base_address) {
@@ -471,9 +472,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
 
     const auto [base_address, index, offset] =
         TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
-                          { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
-                          "Global memory tracking failed");
+    ASSERT_OR_EXECUTE_MSG(
+        base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+        "Global memory tracking failed");
 
     bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
 
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index c0a8f233f..29a7cfbfe 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -75,8 +75,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::LaneId:
-                LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
-                return Immediate(0U);
+                return Operation(OperationCode::ThreadId);
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 29ebf65ba..4e932a4b6 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -292,33 +292,36 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             break;
         }
 
-        std::vector<Node> coords;
-
-        // TODO: Add coordinates for different samplers once other texture types are implemented.
-        switch (texture_type) {
-        case TextureType::Texture1D:
-            coords.push_back(GetRegister(instr.gpr8));
-            break;
-        case TextureType::Texture2D:
-            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
-            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type));
+        const u64 base_index = is_array ? 1 : 0;
+        const u64 num_components = [texture_type] {
+            switch (texture_type) {
+            case TextureType::Texture1D:
+                return 1;
+            case TextureType::Texture2D:
+                return 2;
+            case TextureType::TextureCube:
+                return 3;
+            default:
+                UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type));
+                return 2;
+            }
+        }();
+        // TODO: What's the array component used for?
 
-            // Fallback to interpreting as a 2D texture for now
-            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
-            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+        std::vector<Node> coords;
+        coords.reserve(num_components);
+        for (u64 component = 0; component < num_components; ++component) {
+            coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
         }
+
         u32 indexer = 0;
         for (u32 element = 0; element < 2; ++element) {
             if (!instr.tmml.IsComponentEnabled(element)) {
                 continue;
             }
-            auto params = coords;
             MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
-            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
-            SetTemporary(bb, indexer++, value);
+            Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
+            SetTemporary(bb, indexer++, std::move(value));
         }
         for (u32 i = 0; i < indexer; ++i) {
             SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
@@ -763,7 +766,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
 
 Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
     const auto texture_type{instr.tld.texture_type};
-    const bool is_array{instr.tld.is_array};
+    const bool is_array{instr.tld.is_array != 0};
     const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
     const std::size_t coord_count{GetCoordCount(texture_type)};
 
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 64ba60ea2..1c0957277 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
-                               Tegra::Shader::VideoType type, u64 byte_height) {
+Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
+                               u64 byte_height) {
     if (!is_chunk) {
         return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
     }
-    const Node zero = Immediate(0);
 
     switch (type) {
-    case Tegra::Shader::VideoType::Size16_Low:
+    case VideoType::Size16_Low:
         return BitfieldExtract(op, 0, 16);
-    case Tegra::Shader::VideoType::Size16_High:
+    case VideoType::Size16_High:
         return BitfieldExtract(op, 16, 16);
-    case Tegra::Shader::VideoType::Size32:
+    case VideoType::Size32:
         // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
         // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
         UNIMPLEMENTED();
-        return zero;
-    case Tegra::Shader::VideoType::Invalid:
+        return Immediate(0);
+    case VideoType::Invalid:
         UNREACHABLE_MSG("Invalid instruction encoding");
-        return zero;
+        return Immediate(0);
     default:
         UNREACHABLE();
-        return zero;
+        return Immediate(0);
     }
 }
 
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index c83dc6615..233b8fa42 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
     SetTemporary(bb, 0, product);
     product = GetTemporary(0);
 
-    const Node original_c = op_c;
+    Node original_c = op_c;
     const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
-    op_c = [&]() {
+    op_c = [&] {
         switch (set_mode) {
         case Tegra::Shader::XmadMode::None:
             return original_c;
         case Tegra::Shader::XmadMode::CLo:
-            return BitfieldExtract(original_c, 0, 16);
+            return BitfieldExtract(std::move(original_c), 0, 16);
         case Tegra::Shader::XmadMode::CHi:
-            return BitfieldExtract(original_c, 16, 16);
+            return BitfieldExtract(std::move(original_c), 16, 16);
         case Tegra::Shader::XmadMode::CBcc: {
-            const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
-                                                   original_b, Immediate(16));
-            return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
+            Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
+                                             original_b, Immediate(16));
+            return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
+                                   std::move(shifted_b));
         }
         case Tegra::Shader::XmadMode::CSfu: {
             const Node comp_a =
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 5071c83ca..e18ccba8e 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -16,11 +16,10 @@
 
 namespace VideoCommon::Shader {
 
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
                           Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
-    const auto& gpu{system.GPU().Maxwell3D()};
-    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
-    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+    const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
+    return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
 }
 
 bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
index be90d24fd..4624d38e6 100644
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@@ -11,10 +11,6 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 
-namespace Core {
-class System;
-}
-
 namespace Tegra {
 class MemoryManager;
 }
@@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;
 
 /// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
                           Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
 
 /// Gets if the current instruction offset is a scheduler instruction
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index cdf274e54..148d91fcb 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -24,44 +24,45 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac
     if (shader_stage == ShaderType::Compute) {
         return {};
     }
-    auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
-
-    GraphicsInfo info;
-    info.tfb_layouts = graphics.regs.tfb_layouts;
-    info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
-    info.primitive_topology = graphics.regs.draw.topology;
-    info.tessellation_primitive = graphics.regs.tess_mode.prim;
-    info.tessellation_spacing = graphics.regs.tess_mode.spacing;
-    info.tfb_enabled = graphics.regs.tfb_enabled;
-    info.tessellation_clockwise = graphics.regs.tess_mode.cw;
-    return info;
+
+    auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
+
+    return {
+        .tfb_layouts = graphics.regs.tfb_layouts,
+        .tfb_varying_locs = graphics.regs.tfb_varying_locs,
+        .primitive_topology = graphics.regs.draw.topology,
+        .tessellation_primitive = graphics.regs.tess_mode.prim,
+        .tessellation_spacing = graphics.regs.tess_mode.spacing,
+        .tfb_enabled = graphics.regs.tfb_enabled != 0,
+        .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
+    };
 }
 
 ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
     if (shader_stage != ShaderType::Compute) {
         return {};
     }
-    auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
+
+    auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
     const auto& launch = compute.launch_description;
 
-    ComputeInfo info;
-    info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
-    info.local_memory_size_in_words = launch.local_pos_alloc;
-    info.shared_memory_size_in_words = launch.shared_alloc;
-    return info;
+    return {
+        .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
+        .shared_memory_size_in_words = launch.shared_alloc,
+        .local_memory_size_in_words = launch.local_pos_alloc,
+    };
 }
 
 } // Anonymous namespace
 
-Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
+Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
     : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
       bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
 
-Registry::Registry(Tegra::Engines::ShaderType shader_stage,
-                   Tegra::Engines::ConstBufferEngineInterface& engine)
-    : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
-      graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
-                                                                 shader_stage, engine)} {}
+Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
+    : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
+      graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
+                                                                  shader_stage, engine_)} {}
 
 Registry::~Registry() = default;
 
@@ -113,8 +114,7 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler
     return value;
 }
 
-std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
-                                                                                 u32 offset) {
+std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
     const std::pair key = {buffer, offset};
     const auto iter = bindless_samplers.find(key);
     if (iter != bindless_samplers.end()) {
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 231206765..4bebefdde 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -94,7 +94,7 @@ public:
     explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
 
     explicit Registry(Tegra::Engines::ShaderType shader_stage,
-                      Tegra::Engines::ConstBufferEngineInterface& engine);
+                      Tegra::Engines::ConstBufferEngineInterface& engine_);
 
     ~Registry();
 
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index e322c3402..29d794b34 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
 }
 
 Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
-    const Node node = MakeNode<InternalFlagNode>(flag);
+    Node node = MakeNode<InternalFlagNode>(flag);
     if (negated) {
-        return Operation(OperationCode::LogicalNegate, node);
+        return Operation(OperationCode::LogicalNegate, std::move(node));
     }
     return node;
 }
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index d5ed81442..6be3ea92b 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -205,12 +205,12 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
     const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
     const auto& found = result.first;
     if (!found) {
-        return {};
+        return std::nullopt;
     }
     if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
         return immediate->GetValue();
     }
-    return {};
+    return std::nullopt;
 }
 
 std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
new file mode 100644
index 000000000..c3c71657d
--- /dev/null
+++ b/src/video_core/shader_notify.cpp
@@ -0,0 +1,42 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/shader_notify.h"
+
+using namespace std::chrono_literals;
+
+namespace VideoCore {
+namespace {
+constexpr auto UPDATE_TICK = 32ms;
+}
+
+ShaderNotify::ShaderNotify() = default;
+ShaderNotify::~ShaderNotify() = default;
+
+std::size_t ShaderNotify::GetShadersBuilding() {
+    const auto now = std::chrono::high_resolution_clock::now();
+    const auto diff = now - last_update;
+    if (diff > UPDATE_TICK) {
+        std::shared_lock lock(mutex);
+        last_updated_count = accurate_count;
+    }
+    return last_updated_count;
+}
+
+std::size_t ShaderNotify::GetShadersBuildingAccurate() {
+    std::shared_lock lock{mutex};
+    return accurate_count;
+}
+
+void ShaderNotify::MarkShaderComplete() {
+    std::unique_lock lock{mutex};
+    accurate_count--;
+}
+
+void ShaderNotify::MarkSharderBuilding() {
+    std::unique_lock lock{mutex};
+    accurate_count++;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
new file mode 100644
index 000000000..a9c92d179
--- /dev/null
+++ b/src/video_core/shader_notify.h
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <shared_mutex>
+#include "common/common_types.h"
+
+namespace VideoCore {
+class ShaderNotify {
+public:
+    ShaderNotify();
+    ~ShaderNotify();
+
+    std::size_t GetShadersBuilding();
+    std::size_t GetShadersBuildingAccurate();
+
+    void MarkShaderComplete();
+    void MarkSharderBuilding();
+
+private:
+    std::size_t last_updated_count{};
+    std::size_t accurate_count{};
+    std::shared_mutex mutex;
+    std::chrono::high_resolution_clock::time_point last_update{};
+};
+} // namespace VideoCore
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index bbe93903c..1688267bb 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -74,117 +74,131 @@ bool SurfaceTargetIsArray(SurfaceTarget target) {
 
 PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
     switch (format) {
-    case Tegra::DepthFormat::S8_Z24_UNORM:
-        return PixelFormat::S8Z24;
-    case Tegra::DepthFormat::Z24_S8_UNORM:
-        return PixelFormat::Z24S8;
-    case Tegra::DepthFormat::Z32_FLOAT:
-        return PixelFormat::Z32F;
-    case Tegra::DepthFormat::Z16_UNORM:
-        return PixelFormat::Z16;
-    case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
-        return PixelFormat::Z32FS8;
+    case Tegra::DepthFormat::S8_UINT_Z24_UNORM:
+        return PixelFormat::S8_UINT_D24_UNORM;
+    case Tegra::DepthFormat::D24S8_UNORM:
+        return PixelFormat::D24_UNORM_S8_UINT;
+    case Tegra::DepthFormat::D32_FLOAT:
+        return PixelFormat::D32_FLOAT;
+    case Tegra::DepthFormat::D16_UNORM:
+        return PixelFormat::D16_UNORM;
+    case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT:
+        return PixelFormat::D32_FLOAT_S8_UINT;
     default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-        UNREACHABLE();
-        return PixelFormat::S8Z24;
+        UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
+        return PixelFormat::S8_UINT_D24_UNORM;
     }
 }
 
 PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
     switch (format) {
-    case Tegra::RenderTargetFormat::RGBA8_SRGB:
-        return PixelFormat::RGBA8_SRGB;
-    case Tegra::RenderTargetFormat::RGBA8_UNORM:
-        return PixelFormat::ABGR8U;
-    case Tegra::RenderTargetFormat::RGBA8_SNORM:
-        return PixelFormat::ABGR8S;
-    case Tegra::RenderTargetFormat::RGBA8_UINT:
-        return PixelFormat::ABGR8UI;
-    case Tegra::RenderTargetFormat::BGRA8_SRGB:
-        return PixelFormat::BGRA8_SRGB;
-    case Tegra::RenderTargetFormat::BGRA8_UNORM:
-        return PixelFormat::BGRA8;
-    case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
-        return PixelFormat::A2B10G10R10U;
-    case Tegra::RenderTargetFormat::RGBA16_FLOAT:
-        return PixelFormat::RGBA16F;
-    case Tegra::RenderTargetFormat::RGBA16_UNORM:
-        return PixelFormat::RGBA16U;
-    case Tegra::RenderTargetFormat::RGBA16_SNORM:
-        return PixelFormat::RGBA16S;
-    case Tegra::RenderTargetFormat::RGBA16_UINT:
-        return PixelFormat::RGBA16UI;
-    case Tegra::RenderTargetFormat::RGBA32_FLOAT:
-        return PixelFormat::RGBA32F;
-    case Tegra::RenderTargetFormat::RG32_FLOAT:
-        return PixelFormat::RG32F;
-    case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
-        return PixelFormat::R11FG11FB10F;
-    case Tegra::RenderTargetFormat::B5G6R5_UNORM:
-        return PixelFormat::B5G6R5U;
-    case Tegra::RenderTargetFormat::BGR5A1_UNORM:
-        return PixelFormat::A1B5G5R5U;
-    case Tegra::RenderTargetFormat::RGBA32_UINT:
-        return PixelFormat::RGBA32UI;
-    case Tegra::RenderTargetFormat::R8_UNORM:
-        return PixelFormat::R8U;
-    case Tegra::RenderTargetFormat::R8_UINT:
-        return PixelFormat::R8UI;
-    case Tegra::RenderTargetFormat::RG16_FLOAT:
-        return PixelFormat::RG16F;
-    case Tegra::RenderTargetFormat::RG16_UINT:
-        return PixelFormat::RG16UI;
-    case Tegra::RenderTargetFormat::RG16_SINT:
-        return PixelFormat::RG16I;
-    case Tegra::RenderTargetFormat::RG16_UNORM:
-        return PixelFormat::RG16;
-    case Tegra::RenderTargetFormat::RG16_SNORM:
-        return PixelFormat::RG16S;
-    case Tegra::RenderTargetFormat::RG8_UNORM:
-        return PixelFormat::RG8U;
-    case Tegra::RenderTargetFormat::RG8_SNORM:
-        return PixelFormat::RG8S;
-    case Tegra::RenderTargetFormat::RG8_UINT:
-        return PixelFormat::RG8UI;
-    case Tegra::RenderTargetFormat::R16_FLOAT:
-        return PixelFormat::R16F;
+    case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT:
+        return PixelFormat::R32G32B32A32_FLOAT;
+    case Tegra::RenderTargetFormat::R32G32B32A32_SINT:
+        return PixelFormat::R32G32B32A32_SINT;
+    case Tegra::RenderTargetFormat::R32G32B32A32_UINT:
+        return PixelFormat::R32G32B32A32_UINT;
+    case Tegra::RenderTargetFormat::R16G16B16A16_UNORM:
+        return PixelFormat::R16G16B16A16_UNORM;
+    case Tegra::RenderTargetFormat::R16G16B16A16_SNORM:
+        return PixelFormat::R16G16B16A16_SNORM;
+    case Tegra::RenderTargetFormat::R16G16B16A16_SINT:
+        return PixelFormat::R16G16B16A16_SINT;
+    case Tegra::RenderTargetFormat::R16G16B16A16_UINT:
+        return PixelFormat::R16G16B16A16_UINT;
+    case Tegra::RenderTargetFormat::R16G16B16A16_FLOAT:
+        return PixelFormat::R16G16B16A16_FLOAT;
+    case Tegra::RenderTargetFormat::R32G32_FLOAT:
+        return PixelFormat::R32G32_FLOAT;
+    case Tegra::RenderTargetFormat::R32G32_SINT:
+        return PixelFormat::R32G32_SINT;
+    case Tegra::RenderTargetFormat::R32G32_UINT:
+        return PixelFormat::R32G32_UINT;
+    case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT:
+        return PixelFormat::R16G16B16X16_FLOAT;
+    case Tegra::RenderTargetFormat::B8G8R8A8_UNORM:
+        return PixelFormat::B8G8R8A8_UNORM;
+    case Tegra::RenderTargetFormat::B8G8R8A8_SRGB:
+        return PixelFormat::B8G8R8A8_SRGB;
+    case Tegra::RenderTargetFormat::A2B10G10R10_UNORM:
+        return PixelFormat::A2B10G10R10_UNORM;
+    case Tegra::RenderTargetFormat::A2B10G10R10_UINT:
+        return PixelFormat::A2B10G10R10_UINT;
+    case Tegra::RenderTargetFormat::A8B8G8R8_UNORM:
+        return PixelFormat::A8B8G8R8_UNORM;
+    case Tegra::RenderTargetFormat::A8B8G8R8_SRGB:
+        return PixelFormat::A8B8G8R8_SRGB;
+    case Tegra::RenderTargetFormat::A8B8G8R8_SNORM:
+        return PixelFormat::A8B8G8R8_SNORM;
+    case Tegra::RenderTargetFormat::A8B8G8R8_SINT:
+        return PixelFormat::A8B8G8R8_SINT;
+    case Tegra::RenderTargetFormat::A8B8G8R8_UINT:
+        return PixelFormat::A8B8G8R8_UINT;
+    case Tegra::RenderTargetFormat::R16G16_UNORM:
+        return PixelFormat::R16G16_UNORM;
+    case Tegra::RenderTargetFormat::R16G16_SNORM:
+        return PixelFormat::R16G16_SNORM;
+    case Tegra::RenderTargetFormat::R16G16_SINT:
+        return PixelFormat::R16G16_SINT;
+    case Tegra::RenderTargetFormat::R16G16_UINT:
+        return PixelFormat::R16G16_UINT;
+    case Tegra::RenderTargetFormat::R16G16_FLOAT:
+        return PixelFormat::R16G16_FLOAT;
+    case Tegra::RenderTargetFormat::B10G11R11_FLOAT:
+        return PixelFormat::B10G11R11_FLOAT;
+    case Tegra::RenderTargetFormat::R32_SINT:
+        return PixelFormat::R32_SINT;
+    case Tegra::RenderTargetFormat::R32_UINT:
+        return PixelFormat::R32_UINT;
+    case Tegra::RenderTargetFormat::R32_FLOAT:
+        return PixelFormat::R32_FLOAT;
+    case Tegra::RenderTargetFormat::R5G6B5_UNORM:
+        return PixelFormat::R5G6B5_UNORM;
+    case Tegra::RenderTargetFormat::A1R5G5B5_UNORM:
+        return PixelFormat::A1R5G5B5_UNORM;
+    case Tegra::RenderTargetFormat::R8G8_UNORM:
+        return PixelFormat::R8G8_UNORM;
+    case Tegra::RenderTargetFormat::R8G8_SNORM:
+        return PixelFormat::R8G8_SNORM;
+    case Tegra::RenderTargetFormat::R8G8_SINT:
+        return PixelFormat::R8G8_SINT;
+    case Tegra::RenderTargetFormat::R8G8_UINT:
+        return PixelFormat::R8G8_UINT;
     case Tegra::RenderTargetFormat::R16_UNORM:
-        return PixelFormat::R16U;
+        return PixelFormat::R16_UNORM;
     case Tegra::RenderTargetFormat::R16_SNORM:
-        return PixelFormat::R16S;
-    case Tegra::RenderTargetFormat::R16_UINT:
-        return PixelFormat::R16UI;
+        return PixelFormat::R16_SNORM;
     case Tegra::RenderTargetFormat::R16_SINT:
-        return PixelFormat::R16I;
-    case Tegra::RenderTargetFormat::R32_FLOAT:
-        return PixelFormat::R32F;
-    case Tegra::RenderTargetFormat::R32_SINT:
-        return PixelFormat::R32I;
-    case Tegra::RenderTargetFormat::R32_UINT:
-        return PixelFormat::R32UI;
-    case Tegra::RenderTargetFormat::RG32_UINT:
-        return PixelFormat::RG32UI;
-    case Tegra::RenderTargetFormat::RGBX16_FLOAT:
-        return PixelFormat::RGBX16F;
+        return PixelFormat::R16_SINT;
+    case Tegra::RenderTargetFormat::R16_UINT:
+        return PixelFormat::R16_UINT;
+    case Tegra::RenderTargetFormat::R16_FLOAT:
+        return PixelFormat::R16_FLOAT;
+    case Tegra::RenderTargetFormat::R8_UNORM:
+        return PixelFormat::R8_UNORM;
+    case Tegra::RenderTargetFormat::R8_SNORM:
+        return PixelFormat::R8_SNORM;
+    case Tegra::RenderTargetFormat::R8_SINT:
+        return PixelFormat::R8_SINT;
+    case Tegra::RenderTargetFormat::R8_UINT:
+        return PixelFormat::R8_UINT;
     default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-        UNREACHABLE();
-        return PixelFormat::RGBA8_SRGB;
+        UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format));
+        return PixelFormat::A8B8G8R8_UNORM;
     }
 }
 
 PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
     switch (format) {
-    case Tegra::FramebufferConfig::PixelFormat::ABGR8:
-        return PixelFormat::ABGR8U;
-    case Tegra::FramebufferConfig::PixelFormat::RGB565:
-        return PixelFormat::B5G6R5U;
-    case Tegra::FramebufferConfig::PixelFormat::BGRA8:
-        return PixelFormat::BGRA8;
+    case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+        return PixelFormat::A8B8G8R8_UNORM;
+    case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+        return PixelFormat::R5G6B5_UNORM;
+    case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
+        return PixelFormat::B8G8R8A8_UNORM;
     default:
         UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
-        return PixelFormat::ABGR8U;
+        return PixelFormat::A8B8G8R8_UNORM;
     }
 }
 
@@ -212,27 +226,27 @@ SurfaceType GetFormatType(PixelFormat pixel_format) {
 
 bool IsPixelFormatASTC(PixelFormat format) {
     switch (format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_5X4:
-    case PixelFormat::ASTC_2D_5X5:
-    case PixelFormat::ASTC_2D_8X8:
-    case PixelFormat::ASTC_2D_8X5:
+    case PixelFormat::ASTC_2D_4X4_UNORM:
+    case PixelFormat::ASTC_2D_5X4_UNORM:
+    case PixelFormat::ASTC_2D_5X5_UNORM:
+    case PixelFormat::ASTC_2D_8X8_UNORM:
+    case PixelFormat::ASTC_2D_8X5_UNORM:
     case PixelFormat::ASTC_2D_4X4_SRGB:
     case PixelFormat::ASTC_2D_5X4_SRGB:
     case PixelFormat::ASTC_2D_5X5_SRGB:
     case PixelFormat::ASTC_2D_8X8_SRGB:
     case PixelFormat::ASTC_2D_8X5_SRGB:
-    case PixelFormat::ASTC_2D_10X8:
+    case PixelFormat::ASTC_2D_10X8_UNORM:
     case PixelFormat::ASTC_2D_10X8_SRGB:
-    case PixelFormat::ASTC_2D_6X6:
+    case PixelFormat::ASTC_2D_6X6_UNORM:
     case PixelFormat::ASTC_2D_6X6_SRGB:
-    case PixelFormat::ASTC_2D_10X10:
+    case PixelFormat::ASTC_2D_10X10_UNORM:
     case PixelFormat::ASTC_2D_10X10_SRGB:
-    case PixelFormat::ASTC_2D_12X12:
+    case PixelFormat::ASTC_2D_12X12_UNORM:
     case PixelFormat::ASTC_2D_12X12_SRGB:
-    case PixelFormat::ASTC_2D_8X6:
+    case PixelFormat::ASTC_2D_8X6_UNORM:
     case PixelFormat::ASTC_2D_8X6_SRGB:
-    case PixelFormat::ASTC_2D_6X5:
+    case PixelFormat::ASTC_2D_6X5_UNORM:
     case PixelFormat::ASTC_2D_6X5_SRGB:
         return true;
     default:
@@ -242,12 +256,12 @@ bool IsPixelFormatASTC(PixelFormat format) {
 
 bool IsPixelFormatSRGB(PixelFormat format) {
     switch (format) {
-    case PixelFormat::RGBA8_SRGB:
-    case PixelFormat::BGRA8_SRGB:
-    case PixelFormat::DXT1_SRGB:
-    case PixelFormat::DXT23_SRGB:
-    case PixelFormat::DXT45_SRGB:
-    case PixelFormat::BC7U_SRGB:
+    case PixelFormat::A8B8G8R8_SRGB:
+    case PixelFormat::B8G8R8A8_SRGB:
+    case PixelFormat::BC1_RGBA_SRGB:
+    case PixelFormat::BC2_SRGB:
+    case PixelFormat::BC3_SRGB:
+    case PixelFormat::BC7_SRGB:
     case PixelFormat::ASTC_2D_4X4_SRGB:
     case PixelFormat::ASTC_2D_8X8_SRGB:
     case PixelFormat::ASTC_2D_8X5_SRGB:
@@ -269,25 +283,4 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
     return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)};
 }
 
-bool IsFormatBCn(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::DXT1:
-    case PixelFormat::DXT23:
-    case PixelFormat::DXT45:
-    case PixelFormat::DXN1:
-    case PixelFormat::DXN2SNORM:
-    case PixelFormat::DXN2UNORM:
-    case PixelFormat::BC7U:
-    case PixelFormat::BC6H_UF16:
-    case PixelFormat::BC6H_SF16:
-    case PixelFormat::DXT1_SRGB:
-    case PixelFormat::DXT23_SRGB:
-    case PixelFormat::DXT45_SRGB:
-    case PixelFormat::BC7U_SRGB:
-        return true;
-    default:
-        return false;
-    }
-}
-
 } // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 6da6a1b97..cfd12fa61 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -15,94 +15,105 @@
 namespace VideoCore::Surface {
 
 enum class PixelFormat {
-    ABGR8U = 0,
-    ABGR8S = 1,
-    ABGR8UI = 2,
-    B5G6R5U = 3,
-    A2B10G10R10U = 4,
-    A1B5G5R5U = 5,
-    R8U = 6,
-    R8UI = 7,
-    RGBA16F = 8,
-    RGBA16U = 9,
-    RGBA16S = 10,
-    RGBA16UI = 11,
-    R11FG11FB10F = 12,
-    RGBA32UI = 13,
-    DXT1 = 14,
-    DXT23 = 15,
-    DXT45 = 16,
-    DXN1 = 17, // This is also known as BC4
-    DXN2UNORM = 18,
-    DXN2SNORM = 19,
-    BC7U = 20,
-    BC6H_UF16 = 21,
-    BC6H_SF16 = 22,
-    ASTC_2D_4X4 = 23,
-    BGRA8 = 24,
-    RGBA32F = 25,
-    RG32F = 26,
-    R32F = 27,
-    R16F = 28,
-    R16U = 29,
-    R16S = 30,
-    R16UI = 31,
-    R16I = 32,
-    RG16 = 33,
-    RG16F = 34,
-    RG16UI = 35,
-    RG16I = 36,
-    RG16S = 37,
-    RGB32F = 38,
-    RGBA8_SRGB = 39,
-    RG8U = 40,
-    RG8S = 41,
-    RG8UI = 42,
-    RG32UI = 43,
-    RGBX16F = 44,
-    R32UI = 45,
-    R32I = 46,
-    ASTC_2D_8X8 = 47,
-    ASTC_2D_8X5 = 48,
-    ASTC_2D_5X4 = 49,
-    BGRA8_SRGB = 50,
-    DXT1_SRGB = 51,
-    DXT23_SRGB = 52,
-    DXT45_SRGB = 53,
-    BC7U_SRGB = 54,
-    R4G4B4A4U = 55,
-    ASTC_2D_4X4_SRGB = 56,
-    ASTC_2D_8X8_SRGB = 57,
-    ASTC_2D_8X5_SRGB = 58,
-    ASTC_2D_5X4_SRGB = 59,
-    ASTC_2D_5X5 = 60,
-    ASTC_2D_5X5_SRGB = 61,
-    ASTC_2D_10X8 = 62,
-    ASTC_2D_10X8_SRGB = 63,
-    ASTC_2D_6X6 = 64,
-    ASTC_2D_6X6_SRGB = 65,
-    ASTC_2D_10X10 = 66,
-    ASTC_2D_10X10_SRGB = 67,
-    ASTC_2D_12X12 = 68,
-    ASTC_2D_12X12_SRGB = 69,
-    ASTC_2D_8X6 = 70,
-    ASTC_2D_8X6_SRGB = 71,
-    ASTC_2D_6X5 = 72,
-    ASTC_2D_6X5_SRGB = 73,
-    E5B9G9R9F = 74,
+    A8B8G8R8_UNORM,
+    A8B8G8R8_SNORM,
+    A8B8G8R8_SINT,
+    A8B8G8R8_UINT,
+    R5G6B5_UNORM,
+    B5G6R5_UNORM,
+    A1R5G5B5_UNORM,
+    A2B10G10R10_UNORM,
+    A2B10G10R10_UINT,
+    A1B5G5R5_UNORM,
+    R8_UNORM,
+    R8_SNORM,
+    R8_SINT,
+    R8_UINT,
+    R16G16B16A16_FLOAT,
+    R16G16B16A16_UNORM,
+    R16G16B16A16_SNORM,
+    R16G16B16A16_SINT,
+    R16G16B16A16_UINT,
+    B10G11R11_FLOAT,
+    R32G32B32A32_UINT,
+    BC1_RGBA_UNORM,
+    BC2_UNORM,
+    BC3_UNORM,
+    BC4_UNORM,
+    BC4_SNORM,
+    BC5_UNORM,
+    BC5_SNORM,
+    BC7_UNORM,
+    BC6H_UFLOAT,
+    BC6H_SFLOAT,
+    ASTC_2D_4X4_UNORM,
+    B8G8R8A8_UNORM,
+    R32G32B32A32_FLOAT,
+    R32G32B32A32_SINT,
+    R32G32_FLOAT,
+    R32G32_SINT,
+    R32_FLOAT,
+    R16_FLOAT,
+    R16_UNORM,
+    R16_SNORM,
+    R16_UINT,
+    R16_SINT,
+    R16G16_UNORM,
+    R16G16_FLOAT,
+    R16G16_UINT,
+    R16G16_SINT,
+    R16G16_SNORM,
+    R32G32B32_FLOAT,
+    A8B8G8R8_SRGB,
+    R8G8_UNORM,
+    R8G8_SNORM,
+    R8G8_SINT,
+    R8G8_UINT,
+    R32G32_UINT,
+    R16G16B16X16_FLOAT,
+    R32_UINT,
+    R32_SINT,
+    ASTC_2D_8X8_UNORM,
+    ASTC_2D_8X5_UNORM,
+    ASTC_2D_5X4_UNORM,
+    B8G8R8A8_SRGB,
+    BC1_RGBA_SRGB,
+    BC2_SRGB,
+    BC3_SRGB,
+    BC7_SRGB,
+    A4B4G4R4_UNORM,
+    ASTC_2D_4X4_SRGB,
+    ASTC_2D_8X8_SRGB,
+    ASTC_2D_8X5_SRGB,
+    ASTC_2D_5X4_SRGB,
+    ASTC_2D_5X5_UNORM,
+    ASTC_2D_5X5_SRGB,
+    ASTC_2D_10X8_UNORM,
+    ASTC_2D_10X8_SRGB,
+    ASTC_2D_6X6_UNORM,
+    ASTC_2D_6X6_SRGB,
+    ASTC_2D_10X10_UNORM,
+    ASTC_2D_10X10_SRGB,
+    ASTC_2D_12X12_UNORM,
+    ASTC_2D_12X12_SRGB,
+    ASTC_2D_8X6_UNORM,
+    ASTC_2D_8X6_SRGB,
+    ASTC_2D_6X5_UNORM,
+    ASTC_2D_6X5_SRGB,
+    E5B9G9R9_FLOAT,
 
     MaxColorFormat,
 
     // Depth formats
-    Z32F = 75,
-    Z16 = 76,
+    D32_FLOAT = MaxColorFormat,
+    D16_UNORM,
 
     MaxDepthFormat,
 
     // DepthStencil formats
-    Z24S8 = 77,
-    S8Z24 = 78,
-    Z32FS8 = 79,
+    D24_UNORM_S8_UINT = MaxDepthFormat,
+    S8_UINT_D24_UNORM,
+    D32_FLOAT_S8_UINT,
 
     MaxDepthStencilFormat,
 
@@ -130,86 +141,97 @@ enum class SurfaceTarget {
 };
 
 constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
-    0, // ABGR8U
-    0, // ABGR8S
-    0, // ABGR8UI
-    0, // B5G6R5U
-    0, // A2B10G10R10U
-    0, // A1B5G5R5U
-    0, // R8U
-    0, // R8UI
-    0, // RGBA16F
-    0, // RGBA16U
-    0, // RGBA16S
-    0, // RGBA16UI
-    0, // R11FG11FB10F
-    0, // RGBA32UI
-    2, // DXT1
-    2, // DXT23
-    2, // DXT45
-    2, // DXN1
-    2, // DXN2UNORM
-    2, // DXN2SNORM
-    2, // BC7U
-    2, // BC6H_UF16
-    2, // BC6H_SF16
-    2, // ASTC_2D_4X4
-    0, // BGRA8
-    0, // RGBA32F
-    0, // RG32F
-    0, // R32F
-    0, // R16F
-    0, // R16U
-    0, // R16S
-    0, // R16UI
-    0, // R16I
-    0, // RG16
-    0, // RG16F
-    0, // RG16UI
-    0, // RG16I
-    0, // RG16S
-    0, // RGB32F
-    0, // RGBA8_SRGB
-    0, // RG8U
-    0, // RG8S
-    0, // RG8UI
-    0, // RG32UI
-    0, // RGBX16F
-    0, // R32UI
-    0, // R32I
-    2, // ASTC_2D_8X8
-    2, // ASTC_2D_8X5
-    2, // ASTC_2D_5X4
-    0, // BGRA8_SRGB
-    2, // DXT1_SRGB
-    2, // DXT23_SRGB
-    2, // DXT45_SRGB
-    2, // BC7U_SRGB
-    0, // R4G4B4A4U
+    0, // A8B8G8R8_UNORM
+    0, // A8B8G8R8_SNORM
+    0, // A8B8G8R8_SINT
+    0, // A8B8G8R8_UINT
+    0, // R5G6B5_UNORM
+    0, // B5G6R5_UNORM
+    0, // A1R5G5B5_UNORM
+    0, // A2B10G10R10_UNORM
+    0, // A2B10G10R10_UINT
+    0, // A1B5G5R5_UNORM
+    0, // R8_UNORM
+    0, // R8_SNORM
+    0, // R8_SINT
+    0, // R8_UINT
+    0, // R16G16B16A16_FLOAT
+    0, // R16G16B16A16_UNORM
+    0, // R16G16B16A16_SNORM
+    0, // R16G16B16A16_SINT
+    0, // R16G16B16A16_UINT
+    0, // B10G11R11_FLOAT
+    0, // R32G32B32A32_UINT
+    2, // BC1_RGBA_UNORM
+    2, // BC2_UNORM
+    2, // BC3_UNORM
+    2, // BC4_UNORM
+    2, // BC4_SNORM
+    2, // BC5_UNORM
+    2, // BC5_SNORM
+    2, // BC7_UNORM
+    2, // BC6H_UFLOAT
+    2, // BC6H_SFLOAT
+    2, // ASTC_2D_4X4_UNORM
+    0, // B8G8R8A8_UNORM
+    0, // R32G32B32A32_FLOAT
+    0, // R32G32B32A32_SINT
+    0, // R32G32_FLOAT
+    0, // R32G32_SINT
+    0, // R32_FLOAT
+    0, // R16_FLOAT
+    0, // R16_UNORM
+    0, // R16_SNORM
+    0, // R16_UINT
+    0, // R16_SINT
+    0, // R16G16_UNORM
+    0, // R16G16_FLOAT
+    0, // R16G16_UINT
+    0, // R16G16_SINT
+    0, // R16G16_SNORM
+    0, // R32G32B32_FLOAT
+    0, // A8B8G8R8_SRGB
+    0, // R8G8_UNORM
+    0, // R8G8_SNORM
+    0, // R8G8_SINT
+    0, // R8G8_UINT
+    0, // R32G32_UINT
+    0, // R16G16B16X16_FLOAT
+    0, // R32_UINT
+    0, // R32_SINT
+    2, // ASTC_2D_8X8_UNORM
+    2, // ASTC_2D_8X5_UNORM
+    2, // ASTC_2D_5X4_UNORM
+    0, // B8G8R8A8_SRGB
+    2, // BC1_RGBA_SRGB
+    2, // BC2_SRGB
+    2, // BC3_SRGB
+    2, // BC7_SRGB
+    0, // A4B4G4R4_UNORM
     2, // ASTC_2D_4X4_SRGB
     2, // ASTC_2D_8X8_SRGB
     2, // ASTC_2D_8X5_SRGB
     2, // ASTC_2D_5X4_SRGB
-    2, // ASTC_2D_5X5
+    2, // ASTC_2D_5X5_UNORM
     2, // ASTC_2D_5X5_SRGB
-    2, // ASTC_2D_10X8
+    2, // ASTC_2D_10X8_UNORM
     2, // ASTC_2D_10X8_SRGB
-    2, // ASTC_2D_6X6
+    2, // ASTC_2D_6X6_UNORM
     2, // ASTC_2D_6X6_SRGB
-    2, // ASTC_2D_10X10
+    2, // ASTC_2D_10X10_UNORM
     2, // ASTC_2D_10X10_SRGB
-    2, // ASTC_2D_12X12
+    2, // ASTC_2D_12X12_UNORM
     2, // ASTC_2D_12X12_SRGB
-    2, // ASTC_2D_8X6
+    2, // ASTC_2D_8X6_UNORM
     2, // ASTC_2D_8X6_SRGB
-    2, // ASTC_2D_6X5
+    2, // ASTC_2D_6X5_UNORM
     2, // ASTC_2D_6X5_SRGB
-    0, // E5B9G9R9F
-    0, // Z32F
-    0, // Z16
-    0, // Z24S8
-    0, // S8Z24
-    0, // Z32FS8
+    0, // E5B9G9R9_FLOAT
+    0, // D32_FLOAT
+    0, // D16_UNORM
+    0, // D24_UNORM_S8_UINT
+    0, // S8_UINT_D24_UNORM
+    0, // D32_FLOAT_S8_UINT
 }};
 
 /**
@@ -229,86 +251,97 @@ inline constexpr u32 GetCompressionFactor(PixelFormat format) {
 }
 
 constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
-    1,  // ABGR8U
-    1,  // ABGR8S
-    1,  // ABGR8UI
-    1,  // B5G6R5U
-    1,  // A2B10G10R10U
-    1,  // A1B5G5R5U
-    1,  // R8U
-    1,  // R8UI
-    1,  // RGBA16F
-    1,  // RGBA16U
-    1,  // RGBA16S
-    1,  // RGBA16UI
-    1,  // R11FG11FB10F
-    1,  // RGBA32UI
-    4,  // DXT1
-    4,  // DXT23
-    4,  // DXT45
-    4,  // DXN1
-    4,  // DXN2UNORM
-    4,  // DXN2SNORM
-    4,  // BC7U
-    4,  // BC6H_UF16
-    4,  // BC6H_SF16
-    4,  // ASTC_2D_4X4
-    1,  // BGRA8
-    1,  // RGBA32F
-    1,  // RG32F
-    1,  // R32F
-    1,  // R16F
-    1,  // R16U
-    1,  // R16S
-    1,  // R16UI
-    1,  // R16I
-    1,  // RG16
-    1,  // RG16F
-    1,  // RG16UI
-    1,  // RG16I
-    1,  // RG16S
-    1,  // RGB32F
-    1,  // RGBA8_SRGB
-    1,  // RG8U
-    1,  // RG8S
-    1,  // RG8UI
-    1,  // RG32UI
-    1,  // RGBX16F
-    1,  // R32UI
-    1,  // R32I
-    8,  // ASTC_2D_8X8
-    8,  // ASTC_2D_8X5
-    5,  // ASTC_2D_5X4
-    1,  // BGRA8_SRGB
-    4,  // DXT1_SRGB
-    4,  // DXT23_SRGB
-    4,  // DXT45_SRGB
-    4,  // BC7U_SRGB
-    1,  // R4G4B4A4U
+    1,  // A8B8G8R8_UNORM
+    1,  // A8B8G8R8_SNORM
+    1,  // A8B8G8R8_SINT
+    1,  // A8B8G8R8_UINT
+    1,  // R5G6B5_UNORM
+    1,  // B5G6R5_UNORM
+    1,  // A1R5G5B5_UNORM
+    1,  // A2B10G10R10_UNORM
+    1,  // A2B10G10R10_UINT
+    1,  // A1B5G5R5_UNORM
+    1,  // R8_UNORM
+    1,  // R8_SNORM
+    1,  // R8_SINT
+    1,  // R8_UINT
+    1,  // R16G16B16A16_FLOAT
+    1,  // R16G16B16A16_UNORM
+    1,  // R16G16B16A16_SNORM
+    1,  // R16G16B16A16_SINT
+    1,  // R16G16B16A16_UINT
+    1,  // B10G11R11_FLOAT
+    1,  // R32G32B32A32_UINT
+    4,  // BC1_RGBA_UNORM
+    4,  // BC2_UNORM
+    4,  // BC3_UNORM
+    4,  // BC4_UNORM
+    4,  // BC4_SNORM
+    4,  // BC5_UNORM
+    4,  // BC5_SNORM
+    4,  // BC7_UNORM
+    4,  // BC6H_UFLOAT
+    4,  // BC6H_SFLOAT
+    4,  // ASTC_2D_4X4_UNORM
+    1,  // B8G8R8A8_UNORM
+    1,  // R32G32B32A32_FLOAT
+    1,  // R32G32B32A32_SINT
+    1,  // R32G32_FLOAT
+    1,  // R32G32_SINT
+    1,  // R32_FLOAT
+    1,  // R16_FLOAT
+    1,  // R16_UNORM
+    1,  // R16_SNORM
+    1,  // R16_UINT
+    1,  // R16_SINT
+    1,  // R16G16_UNORM
+    1,  // R16G16_FLOAT
+    1,  // R16G16_UINT
+    1,  // R16G16_SINT
+    1,  // R16G16_SNORM
+    1,  // R32G32B32_FLOAT
+    1,  // A8B8G8R8_SRGB
+    1,  // R8G8_UNORM
+    1,  // R8G8_SNORM
+    1,  // R8G8_SINT
+    1,  // R8G8_UINT
+    1,  // R32G32_UINT
+    1,  // R16G16B16X16_FLOAT
+    1,  // R32_UINT
+    1,  // R32_SINT
+    8,  // ASTC_2D_8X8_UNORM
+    8,  // ASTC_2D_8X5_UNORM
+    5,  // ASTC_2D_5X4_UNORM
+    1,  // B8G8R8A8_SRGB
+    4,  // BC1_RGBA_SRGB
+    4,  // BC2_SRGB
+    4,  // BC3_SRGB
+    4,  // BC7_SRGB
+    1,  // A4B4G4R4_UNORM
     4,  // ASTC_2D_4X4_SRGB
     8,  // ASTC_2D_8X8_SRGB
     8,  // ASTC_2D_8X5_SRGB
     5,  // ASTC_2D_5X4_SRGB
-    5,  // ASTC_2D_5X5
+    5,  // ASTC_2D_5X5_UNORM
     5,  // ASTC_2D_5X5_SRGB
-    10, // ASTC_2D_10X8
+    10, // ASTC_2D_10X8_UNORM
     10, // ASTC_2D_10X8_SRGB
-    6,  // ASTC_2D_6X6
+    6,  // ASTC_2D_6X6_UNORM
     6,  // ASTC_2D_6X6_SRGB
-    10, // ASTC_2D_10X10
+    10, // ASTC_2D_10X10_UNORM
     10, // ASTC_2D_10X10_SRGB
-    12, // ASTC_2D_12X12
+    12, // ASTC_2D_12X12_UNORM
     12, // ASTC_2D_12X12_SRGB
-    8,  // ASTC_2D_8X6
+    8,  // ASTC_2D_8X6_UNORM
     8,  // ASTC_2D_8X6_SRGB
-    6,  // ASTC_2D_6X5
+    6,  // ASTC_2D_6X5_UNORM
     6,  // ASTC_2D_6X5_SRGB
-    1,  // E5B9G9R9F
-    1,  // Z32F
-    1,  // Z16
-    1,  // Z24S8
-    1,  // S8Z24
-    1,  // Z32FS8
+    1,  // E5B9G9R9_FLOAT
+    1,  // D32_FLOAT
+    1,  // D16_UNORM
+    1,  // D24_UNORM_S8_UINT
+    1,  // S8_UINT_D24_UNORM
+    1,  // D32_FLOAT_S8_UINT
 }};
 
 static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
@@ -320,86 +353,97 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
 }
 
 constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
-    1,  // ABGR8U
-    1,  // ABGR8S
-    1,  // ABGR8UI
-    1,  // B5G6R5U
-    1,  // A2B10G10R10U
-    1,  // A1B5G5R5U
-    1,  // R8U
-    1,  // R8UI
-    1,  // RGBA16F
-    1,  // RGBA16U
-    1,  // RGBA16S
-    1,  // RGBA16UI
-    1,  // R11FG11FB10F
-    1,  // RGBA32UI
-    4,  // DXT1
-    4,  // DXT23
-    4,  // DXT45
-    4,  // DXN1
-    4,  // DXN2UNORM
-    4,  // DXN2SNORM
-    4,  // BC7U
-    4,  // BC6H_UF16
-    4,  // BC6H_SF16
-    4,  // ASTC_2D_4X4
-    1,  // BGRA8
-    1,  // RGBA32F
-    1,  // RG32F
-    1,  // R32F
-    1,  // R16F
-    1,  // R16U
-    1,  // R16S
-    1,  // R16UI
-    1,  // R16I
-    1,  // RG16
-    1,  // RG16F
-    1,  // RG16UI
-    1,  // RG16I
-    1,  // RG16S
-    1,  // RGB32F
-    1,  // RGBA8_SRGB
-    1,  // RG8U
-    1,  // RG8S
-    1,  // RG8UI
-    1,  // RG32UI
-    1,  // RGBX16F
-    1,  // R32UI
-    1,  // R32I
-    8,  // ASTC_2D_8X8
-    5,  // ASTC_2D_8X5
-    4,  // ASTC_2D_5X4
-    1,  // BGRA8_SRGB
-    4,  // DXT1_SRGB
-    4,  // DXT23_SRGB
-    4,  // DXT45_SRGB
-    4,  // BC7U_SRGB
-    1,  // R4G4B4A4U
+    1,  // A8B8G8R8_UNORM
+    1,  // A8B8G8R8_SNORM
+    1,  // A8B8G8R8_SINT
+    1,  // A8B8G8R8_UINT
+    1,  // R5G6B5_UNORM
+    1,  // B5G6R5_UNORM
+    1,  // A1R5G5B5_UNORM
+    1,  // A2B10G10R10_UNORM
+    1,  // A2B10G10R10_UINT
+    1,  // A1B5G5R5_UNORM
+    1,  // R8_UNORM
+    1,  // R8_SNORM
+    1,  // R8_SINT
+    1,  // R8_UINT
+    1,  // R16G16B16A16_FLOAT
+    1,  // R16G16B16A16_UNORM
+    1,  // R16G16B16A16_SNORM
+    1,  // R16G16B16A16_SINT
+    1,  // R16G16B16A16_UINT
+    1,  // B10G11R11_FLOAT
+    1,  // R32G32B32A32_UINT
+    4,  // BC1_RGBA_UNORM
+    4,  // BC2_UNORM
+    4,  // BC3_UNORM
+    4,  // BC4_UNORM
+    4,  // BC4_SNORM
+    4,  // BC5_UNORM
+    4,  // BC5_SNORM
+    4,  // BC7_UNORM
+    4,  // BC6H_UFLOAT
+    4,  // BC6H_SFLOAT
+    4,  // ASTC_2D_4X4_UNORM
+    1,  // B8G8R8A8_UNORM
+    1,  // R32G32B32A32_FLOAT
+    1,  // R32G32B32A32_SINT
+    1,  // R32G32_FLOAT
+    1,  // R32G32_SINT
+    1,  // R32_FLOAT
+    1,  // R16_FLOAT
+    1,  // R16_UNORM
+    1,  // R16_SNORM
+    1,  // R16_UINT
+    1,  // R16_SINT
+    1,  // R16G16_UNORM
+    1,  // R16G16_FLOAT
+    1,  // R16G16_UINT
+    1,  // R16G16_SINT
+    1,  // R16G16_SNORM
+    1,  // R32G32B32_FLOAT
+    1,  // A8B8G8R8_SRGB
+    1,  // R8G8_UNORM
+    1,  // R8G8_SNORM
+    1,  // R8G8_SINT
+    1,  // R8G8_UINT
+    1,  // R32G32_UINT
+    1,  // R16G16B16X16_FLOAT
+    1,  // R32_UINT
+    1,  // R32_SINT
+    8,  // ASTC_2D_8X8_UNORM
+    5,  // ASTC_2D_8X5_UNORM
+    4,  // ASTC_2D_5X4_UNORM
+    1,  // B8G8R8A8_SRGB
+    4,  // BC1_RGBA_SRGB
+    4,  // BC2_SRGB
+    4,  // BC3_SRGB
+    4,  // BC7_SRGB
+    1,  // A4B4G4R4_UNORM
     4,  // ASTC_2D_4X4_SRGB
     8,  // ASTC_2D_8X8_SRGB
     5,  // ASTC_2D_8X5_SRGB
     4,  // ASTC_2D_5X4_SRGB
-    5,  // ASTC_2D_5X5
+    5,  // ASTC_2D_5X5_UNORM
     5,  // ASTC_2D_5X5_SRGB
-    8,  // ASTC_2D_10X8
+    8,  // ASTC_2D_10X8_UNORM
     8,  // ASTC_2D_10X8_SRGB
-    6,  // ASTC_2D_6X6
+    6,  // ASTC_2D_6X6_UNORM
     6,  // ASTC_2D_6X6_SRGB
-    10, // ASTC_2D_10X10
+    10, // ASTC_2D_10X10_UNORM
     10, // ASTC_2D_10X10_SRGB
-    12, // ASTC_2D_12X12
+    12, // ASTC_2D_12X12_UNORM
     12, // ASTC_2D_12X12_SRGB
-    6,  // ASTC_2D_8X6
+    6,  // ASTC_2D_8X6_UNORM
     6,  // ASTC_2D_8X6_SRGB
-    5,  // ASTC_2D_6X5
+    5,  // ASTC_2D_6X5_UNORM
     5,  // ASTC_2D_6X5_SRGB
-    1,  // E5B9G9R9F
-    1,  // Z32F
-    1,  // Z16
-    1,  // Z24S8
-    1,  // S8Z24
-    1,  // Z32FS8
+    1,  // E5B9G9R9_FLOAT
+    1,  // D32_FLOAT
+    1,  // D16_UNORM
+    1,  // D24_UNORM_S8_UINT
+    1,  // S8_UINT_D24_UNORM
+    1,  // D32_FLOAT_S8_UINT
 }};
 
 static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
@@ -411,86 +455,97 @@ static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
 }
 
 constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
-    32,  // ABGR8U
-    32,  // ABGR8S
-    32,  // ABGR8UI
-    16,  // B5G6R5U
-    32,  // A2B10G10R10U
-    16,  // A1B5G5R5U
-    8,   // R8U
-    8,   // R8UI
-    64,  // RGBA16F
-    64,  // RGBA16U
-    64,  // RGBA16S
-    64,  // RGBA16UI
-    32,  // R11FG11FB10F
-    128, // RGBA32UI
-    64,  // DXT1
-    128, // DXT23
-    128, // DXT45
-    64,  // DXN1
-    128, // DXN2UNORM
-    128, // DXN2SNORM
-    128, // BC7U
-    128, // BC6H_UF16
-    128, // BC6H_SF16
-    128, // ASTC_2D_4X4
-    32,  // BGRA8
-    128, // RGBA32F
-    64,  // RG32F
-    32,  // R32F
-    16,  // R16F
-    16,  // R16U
-    16,  // R16S
-    16,  // R16UI
-    16,  // R16I
-    32,  // RG16
-    32,  // RG16F
-    32,  // RG16UI
-    32,  // RG16I
-    32,  // RG16S
-    96,  // RGB32F
-    32,  // RGBA8_SRGB
-    16,  // RG8U
-    16,  // RG8S
-    16,  // RG8UI
-    64,  // RG32UI
-    64,  // RGBX16F
-    32,  // R32UI
-    32,  // R32I
-    128, // ASTC_2D_8X8
-    128, // ASTC_2D_8X5
-    128, // ASTC_2D_5X4
-    32,  // BGRA8_SRGB
-    64,  // DXT1_SRGB
-    128, // DXT23_SRGB
-    128, // DXT45_SRGB
-    128, // BC7U
-    16,  // R4G4B4A4U
+    32,  // A8B8G8R8_UNORM
+    32,  // A8B8G8R8_SNORM
+    32,  // A8B8G8R8_SINT
+    32,  // A8B8G8R8_UINT
+    16,  // R5G6B5_UNORM
+    16,  // B5G6R5_UNORM
+    16,  // A1R5G5B5_UNORM
+    32,  // A2B10G10R10_UNORM
+    32,  // A2B10G10R10_UINT
+    16,  // A1B5G5R5_UNORM
+    8,   // R8_UNORM
+    8,   // R8_SNORM
+    8,   // R8_SINT
+    8,   // R8_UINT
+    64,  // R16G16B16A16_FLOAT
+    64,  // R16G16B16A16_UNORM
+    64,  // R16G16B16A16_SNORM
+    64,  // R16G16B16A16_SINT
+    64,  // R16G16B16A16_UINT
+    32,  // B10G11R11_FLOAT
+    128, // R32G32B32A32_UINT
+    64,  // BC1_RGBA_UNORM
+    128, // BC2_UNORM
+    128, // BC3_UNORM
+    64,  // BC4_UNORM
+    64,  // BC4_SNORM
+    128, // BC5_UNORM
+    128, // BC5_SNORM
+    128, // BC7_UNORM
+    128, // BC6H_UFLOAT
+    128, // BC6H_SFLOAT
+    128, // ASTC_2D_4X4_UNORM
+    32,  // B8G8R8A8_UNORM
+    128, // R32G32B32A32_FLOAT
+    128, // R32G32B32A32_SINT
+    64,  // R32G32_FLOAT
+    64,  // R32G32_SINT
+    32,  // R32_FLOAT
+    16,  // R16_FLOAT
+    16,  // R16_UNORM
+    16,  // R16_SNORM
+    16,  // R16_UINT
+    16,  // R16_SINT
+    32,  // R16G16_UNORM
+    32,  // R16G16_FLOAT
+    32,  // R16G16_UINT
+    32,  // R16G16_SINT
+    32,  // R16G16_SNORM
+    96,  // R32G32B32_FLOAT
+    32,  // A8B8G8R8_SRGB
+    16,  // R8G8_UNORM
+    16,  // R8G8_SNORM
+    16,  // R8G8_SINT
+    16,  // R8G8_UINT
+    64,  // R32G32_UINT
+    64,  // R16G16B16X16_FLOAT
+    32,  // R32_UINT
+    32,  // R32_SINT
+    128, // ASTC_2D_8X8_UNORM
+    128, // ASTC_2D_8X5_UNORM
+    128, // ASTC_2D_5X4_UNORM
+    32,  // B8G8R8A8_SRGB
+    64,  // BC1_RGBA_SRGB
+    128, // BC2_SRGB
+    128, // BC3_SRGB
+    128, // BC7_UNORM
+    16,  // A4B4G4R4_UNORM
     128, // ASTC_2D_4X4_SRGB
     128, // ASTC_2D_8X8_SRGB
     128, // ASTC_2D_8X5_SRGB
     128, // ASTC_2D_5X4_SRGB
-    128, // ASTC_2D_5X5
+    128, // ASTC_2D_5X5_UNORM
     128, // ASTC_2D_5X5_SRGB
-    128, // ASTC_2D_10X8
+    128, // ASTC_2D_10X8_UNORM
     128, // ASTC_2D_10X8_SRGB
-    128, // ASTC_2D_6X6
+    128, // ASTC_2D_6X6_UNORM
     128, // ASTC_2D_6X6_SRGB
-    128, // ASTC_2D_10X10
+    128, // ASTC_2D_10X10_UNORM
     128, // ASTC_2D_10X10_SRGB
-    128, // ASTC_2D_12X12
+    128, // ASTC_2D_12X12_UNORM
     128, // ASTC_2D_12X12_SRGB
-    128, // ASTC_2D_8X6
+    128, // ASTC_2D_8X6_UNORM
     128, // ASTC_2D_8X6_SRGB
-    128, // ASTC_2D_6X5
+    128, // ASTC_2D_6X5_UNORM
     128, // ASTC_2D_6X5_SRGB
-    32,  // E5B9G9R9F
-    32,  // Z32F
-    16,  // Z16
-    32,  // Z24S8
-    32,  // S8Z24
-    64,  // Z32FS8
+    32,  // E5B9G9R9_FLOAT
+    32,  // D32_FLOAT
+    16,  // D16_UNORM
+    32,  // D24_UNORM_S8_UINT
+    32,  // S8_UINT_D24_UNORM
+    64,  // D32_FLOAT_S8_UINT
 }};
 
 static constexpr u32 GetFormatBpp(PixelFormat format) {
@@ -529,7 +584,4 @@ bool IsPixelFormatSRGB(PixelFormat format);
 
 std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
 
-/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
-bool IsFormatBCn(PixelFormat format);
-
 } // namespace VideoCore::Surface
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index f476f03b0..7d5a75648 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -19,8 +19,6 @@ constexpr auto SNORM = ComponentType::SNORM;
 constexpr auto UNORM = ComponentType::UNORM;
 constexpr auto SINT = ComponentType::SINT;
 constexpr auto UINT = ComponentType::UINT;
-constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16;
-constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16;
 constexpr auto FLOAT = ComponentType::FLOAT;
 constexpr bool C = false; // Normal color
 constexpr bool S = true;  // Srgb
@@ -41,119 +39,126 @@ struct Table {
     ComponentType alpha_component;
     bool is_srgb;
 };
-constexpr std::array<Table, 78> DefinitionTable = {{
-    {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
-    {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
-    {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
-    {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB},
+constexpr std::array<Table, 86> DefinitionTable = {{
+    {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
+    {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
+    {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
+    {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
+    {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
 
-    {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U},
+    {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
 
-    {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U},
+    {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
+    {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
 
-    {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U},
+    {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
 
-    {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U},
+    {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
 
-    {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U},
-    {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI},
+    {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
+    {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
+    {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
+    {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
 
-    {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
-    {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
-    {TextureFormat::G8R8, C, UINT, UINT, UINT, UINT, PixelFormat::RG8UI},
+    {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
+    {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
+    {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
+    {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
 
-    {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
-    {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
-    {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
-    {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
+    {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
+    {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
+    {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
+    {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
+    {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
 
-    {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F},
-    {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16},
-    {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S},
-    {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI},
-    {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I},
+    {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
+    {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
+    {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
+    {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
+    {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
 
-    {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F},
-    {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U},
-    {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S},
-    {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI},
-    {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I},
+    {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
+    {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
+    {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
+    {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
+    {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
 
-    {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F},
+    {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
 
-    {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F},
-    {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI},
+    {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
+    {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
+    {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
 
-    {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F},
+    {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
 
-    {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F},
-    {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI},
+    {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
+    {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
+    {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
 
-    {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
-    {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
-    {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I},
+    {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
+    {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
+    {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
 
-    {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
+    {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
 
-    {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
-    {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
-    {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
-    {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
-    {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
+    {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
+    {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
+    {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
+    {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
+    {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
 
-    {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
-    {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
+    {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
+    {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
 
-    {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23},
-    {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB},
+    {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
+    {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
 
-    {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45},
-    {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB},
+    {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
+    {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
 
-    // TODO: Use a different pixel format for SNORM
-    {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1},
-    {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1},
+    {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
+    {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
 
-    {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM},
-    {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM},
+    {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
+    {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
 
-    {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U},
-    {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB},
+    {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
+    {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
 
-    {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16},
-    {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16},
+    {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
+    {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
 
-    {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4},
+    {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
     {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
 
-    {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4},
+    {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
     {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
 
-    {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5},
+    {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
     {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
 
-    {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8},
+    {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
     {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
 
-    {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5},
+    {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
     {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
 
-    {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8},
+    {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
     {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
 
-    {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6},
+    {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
     {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
 
-    {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10},
+    {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
     {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
 
-    {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12},
+    {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
     {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
 
-    {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6},
+    {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
     {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
 
-    {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5},
+    {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM},
     {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB},
 }};
 
@@ -184,7 +189,7 @@ PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb
                       static_cast<int>(format), is_srgb, static_cast<int>(red_component),
                       static_cast<int>(green_component), static_cast<int>(blue_component),
                       static_cast<int>(alpha_component));
-    return PixelFormat::ABGR8U;
+    return PixelFormat::A8B8G8R8_UNORM;
 }
 
 void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 0caf3b4f0..b44c09d71 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -115,20 +115,24 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
     if (gpu_addr == candidate_gpu_addr) {
         return {{0, 0}};
     }
+
     if (candidate_gpu_addr < gpu_addr) {
-        return {};
+        return std::nullopt;
     }
+
     const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
     const auto layer{static_cast<u32>(relative_address / layer_size)};
     if (layer >= params.depth) {
-        return {};
+        return std::nullopt;
     }
+
     const GPUVAddr mipmap_address = relative_address - layer_size * layer;
     const auto mipmap_it =
         Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
     if (mipmap_it == mipmap_offsets.end()) {
-        return {};
+        return std::nullopt;
     }
+
     const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
     return std::make_pair(layer, level);
 }
@@ -228,7 +232,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
         }
     }
 
-    if (!is_converted && params.pixel_format != PixelFormat::S8Z24) {
+    if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
         return;
     }
 
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 921562c1f..e8515321b 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -74,21 +74,21 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
     SurfaceParams params;
     params.is_tiled = tic.IsTiled();
     params.srgb_conversion = tic.IsSrgbConversionEnabled();
-    params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
-    params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
-    params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
+    params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
+    params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
+    params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
     params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
     params.pixel_format = lookup_table.GetPixelFormat(
         tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
     params.type = GetFormatType(params.pixel_format);
     if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
         switch (params.pixel_format) {
-        case PixelFormat::R16U:
-        case PixelFormat::R16F:
-            params.pixel_format = PixelFormat::Z16;
+        case PixelFormat::R16_UNORM:
+        case PixelFormat::R16_FLOAT:
+            params.pixel_format = PixelFormat::D16_UNORM;
             break;
-        case PixelFormat::R32F:
-            params.pixel_format = PixelFormat::Z32F;
+        case PixelFormat::R32_FLOAT:
+            params.pixel_format = PixelFormat::D32_FLOAT;
             break;
         default:
             UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
@@ -96,7 +96,6 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
         }
         params.type = GetFormatType(params.pixel_format);
     }
-    params.type = GetFormatType(params.pixel_format);
     // TODO: on 1DBuffer we should use the tic info.
     if (tic.IsBuffer()) {
         params.target = SurfaceTarget::TextureBuffer;
@@ -130,14 +129,13 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
     SurfaceParams params;
     params.is_tiled = tic.IsTiled();
     params.srgb_conversion = tic.IsSrgbConversionEnabled();
-    params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
-    params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
-    params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
+    params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
+    params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
+    params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
     params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
     params.pixel_format = lookup_table.GetPixelFormat(
         tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
     params.type = GetFormatType(params.pixel_format);
-    params.type = GetFormatType(params.pixel_format);
     params.target = ImageTypeToSurfaceTarget(entry.type);
     // TODO: on 1DBuffer we should use the tic info.
     if (tic.IsBuffer()) {
@@ -165,38 +163,40 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
     return params;
 }
 
-SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
-    const auto& regs = system.GPU().Maxwell3D().regs;
-    SurfaceParams params;
-    params.is_tiled = regs.zeta.memory_layout.type ==
-                      Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
-    params.srgb_conversion = false;
-    params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U);
-    params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U);
-    params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
-    params.tile_width_spacing = 1;
-    params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
-    params.type = GetFormatType(params.pixel_format);
-    params.width = regs.zeta_width;
-    params.height = regs.zeta_height;
-    params.pitch = 0;
-    params.num_levels = 1;
-    params.emulated_levels = 1;
-
-    const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0;
-    params.is_layered = is_layered;
-    params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
-    params.depth = is_layered ? regs.zeta_layers.Value() : 1U;
-    return params;
+SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
+    const auto& regs = maxwell3d.regs;
+    const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
+    const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
+    const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
+    return {
+        .is_tiled = regs.zeta.memory_layout.type ==
+                    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
+        .srgb_conversion = false,
+        .is_layered = is_layered,
+        .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
+        .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
+        .block_depth = block_depth,
+        .tile_width_spacing = 1,
+        .width = regs.zeta_width,
+        .height = regs.zeta_height,
+        .depth = is_layered ? regs.zeta_layers.Value() : 1U,
+        .pitch = 0,
+        .num_levels = 1,
+        .emulated_levels = 1,
+        .pixel_format = pixel_format,
+        .type = GetFormatType(pixel_format),
+        .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
+    };
 }
 
-SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
-    const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
+                                                  std::size_t index) {
+    const auto& config{maxwell3d.regs.rt[index]};
     SurfaceParams params;
     params.is_tiled =
         config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
-    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
-                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
+    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
+                             config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
     params.block_width = config.memory_layout.block_width;
     params.block_height = config.memory_layout.block_height;
     params.block_depth = config.memory_layout.block_depth;
@@ -233,24 +233,29 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
 
 SurfaceParams SurfaceParams::CreateForFermiCopySurface(
     const Tegra::Engines::Fermi2D::Regs::Surface& config) {
-    SurfaceParams params{};
-    params.is_tiled = !config.linear;
-    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
-                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
-    params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0,
-    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0,
-    params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0,
-    params.tile_width_spacing = 1;
-    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
-    params.type = GetFormatType(params.pixel_format);
-    params.width = config.width;
-    params.height = config.height;
-    params.pitch = config.pitch;
-    // TODO(Rodrigo): Try to guess texture arrays from parameters
-    params.target = SurfaceTarget::Texture2D;
-    params.depth = 1;
-    params.num_levels = 1;
-    params.emulated_levels = 1;
+    const bool is_tiled = !config.linear;
+    const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+
+    SurfaceParams params{
+        .is_tiled = is_tiled,
+        .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
+                           config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
+        .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
+        .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
+        .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
+        .tile_width_spacing = 1,
+        .width = config.width,
+        .height = config.height,
+        .depth = 1,
+        .pitch = config.pitch,
+        .num_levels = 1,
+        .emulated_levels = 1,
+        .pixel_format = pixel_format,
+        .type = GetFormatType(pixel_format),
+        // TODO(Rodrigo): Try to guess texture arrays from parameters
+        .target = SurfaceTarget::Texture2D,
+    };
+
     params.is_layered = params.IsLayered();
     return params;
 }
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 118aa689e..4466c3c34 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -33,10 +33,11 @@ public:
                                         const VideoCommon::Shader::Image& entry);
 
     /// Creates SurfaceCachedParams for a depth buffer configuration.
-    static SurfaceParams CreateForDepthBuffer(Core::System& system);
+    static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
 
     /// Creates SurfaceCachedParams from a framebuffer configuration.
-    static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
+    static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
+                                              std::size_t index);
 
     /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
     static SurfaceParams CreateForFermiCopySurface(
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index cdcddb225..ea835c59f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -135,8 +135,7 @@ public:
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
 
-        const std::optional<VAddr> cpu_addr =
-            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
         if (!cpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
@@ -160,8 +159,7 @@ public:
         if (!gpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
-        const std::optional<VAddr> cpu_addr =
-            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
         if (!cpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
@@ -183,11 +181,11 @@ public:
 
     TView GetDepthBufferSurface(bool preserve_contents) {
         std::lock_guard lock{mutex};
-        auto& maxwell3d = system.GPU().Maxwell3D();
-        if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
+        auto& dirty = maxwell3d.dirty;
+        if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
             return depth_buffer.view;
         }
-        maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
+        dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
 
         const auto& regs{maxwell3d.regs};
         const auto gpu_addr{regs.zeta.Address()};
@@ -195,13 +193,12 @@ public:
             SetEmptyDepthBuffer();
             return {};
         }
-        const std::optional<VAddr> cpu_addr =
-            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
         if (!cpu_addr) {
             SetEmptyDepthBuffer();
             return {};
         }
-        const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
+        const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
         auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
         if (depth_buffer.target)
             depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
@@ -215,7 +212,6 @@ public:
     TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
         std::lock_guard lock{mutex};
         ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
-        auto& maxwell3d = system.GPU().Maxwell3D();
         if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
             return render_targets[index].view;
         }
@@ -235,15 +231,14 @@ public:
             return {};
         }
 
-        const std::optional<VAddr> cpu_addr =
-            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
         if (!cpu_addr) {
             SetEmptyColorBuffer(index);
             return {};
         }
 
         auto surface_view =
-            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index),
                        preserve_contents, true);
         if (render_targets[index].target) {
             auto& surface = render_targets[index].target;
@@ -300,9 +295,8 @@ public:
         const GPUVAddr dst_gpu_addr = dst_config.Address();
         DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
 
-        const auto& memory_manager = system.GPU().MemoryManager();
-        const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
-        const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
+        const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
+        const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
         std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
         TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
         ImageBlit(src_surface, dst_surface.second, copy_config);
@@ -358,9 +352,11 @@ public:
     }
 
 protected:
-    explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                          bool is_astc_supported)
-        : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
+    explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_,
+                          Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+                          bool is_astc_supported_)
+        : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+          gpu_memory{gpu_memory_} {
         for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
             SetEmptyColorBuffer(i);
         }
@@ -373,9 +369,9 @@ protected:
             siblings_table[static_cast<std::size_t>(b)] = a;
         };
         std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
-        make_siblings(PixelFormat::Z16, PixelFormat::R16U);
-        make_siblings(PixelFormat::Z32F, PixelFormat::R32F);
-        make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F);
+        make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
+        make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
+        make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
 
         sampled_textures.reserve(64);
     }
@@ -395,7 +391,7 @@ protected:
     virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
 
     void ManageRenderTargetUnregister(TSurface& surface) {
-        auto& dirty = system.GPU().Maxwell3D().dirty;
+        auto& dirty = maxwell3d.dirty;
         const u32 index = surface->GetRenderTarget();
         if (index == DEPTH_RT) {
             dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
@@ -408,8 +404,7 @@ protected:
     void Register(TSurface surface) {
         const GPUVAddr gpu_addr = surface->GetGpuAddr();
         const std::size_t size = surface->GetSizeInBytes();
-        const std::optional<VAddr> cpu_addr =
-            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
         if (!cpu_addr) {
             LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
                          gpu_addr);
@@ -459,7 +454,6 @@ protected:
         return new_surface;
     }
 
-    Core::System& system;
     const bool is_astc_supported;
 
 private:
@@ -954,8 +948,7 @@ private:
      * @param params   The parameters on the candidate surface.
      **/
     Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
-        const std::optional<VAddr> cpu_addr =
-            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
 
         if (!cpu_addr) {
             Deduction result{};
@@ -1031,7 +1024,7 @@ private:
         params.pitch = 4;
         params.num_levels = 1;
         params.emulated_levels = 1;
-        params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
+        params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM;
         params.type = VideoCore::Surface::SurfaceType::ColorTexture;
         auto surface = CreateSurface(0ULL, params);
         invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
@@ -1112,7 +1105,7 @@ private:
 
     void LoadSurface(const TSurface& surface) {
         staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
-        surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
+        surface->LoadBuffer(gpu_memory, staging_cache);
         surface->UploadTexture(staging_cache.GetBuffer(0));
         surface->MarkAsModified(false, Tick());
     }
@@ -1123,7 +1116,7 @@ private:
         }
         staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
         surface->DownloadTexture(staging_cache.GetBuffer(0));
-        surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
+        surface->FlushBuffer(gpu_memory, staging_cache);
         surface->MarkAsModified(false, Tick());
     }
 
@@ -1253,6 +1246,8 @@ private:
     }
 
     VideoCore::RasterizerInterface& rasterizer;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::MemoryManager& gpu_memory;
 
     FormatLookupTable format_lookup_table;
     FormatCompatibility format_compatibility;
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
index f3efa7eb0..962921483 100644
--- a/src/video_core/textures/convert.cpp
+++ b/src/video_core/textures/convert.cpp
@@ -35,7 +35,7 @@ void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
     S8Z24 s8z24_pixel{};
     Z24S8 z24s8_pixel{};
     constexpr auto bpp{
-        VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
+        VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
     for (std::size_t y = 0; y < height; ++y) {
         for (std::size_t x = 0; x < width; ++x) {
             const std::size_t offset{bpp * (y * width + x)};
@@ -73,7 +73,7 @@ void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format,
             in_data, width, height, depth, block_width, block_height);
         std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
 
-    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+    } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
         Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
     }
 }
@@ -85,7 +85,7 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h
                      static_cast<u32>(pixel_format));
         UNREACHABLE();
 
-    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+    } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
         Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
     }
 }
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 98beabef1..16d46a018 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -184,53 +184,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
     }
 }
 
-u32 BytesPerPixel(TextureFormat format) {
-    switch (format) {
-    case TextureFormat::DXT1:
-    case TextureFormat::DXN1:
-        // In this case a 'pixel' actually refers to a 4x4 tile.
-        return 8;
-    case TextureFormat::DXT23:
-    case TextureFormat::DXT45:
-    case TextureFormat::DXN2:
-    case TextureFormat::BC7U:
-    case TextureFormat::BC6H_UF16:
-    case TextureFormat::BC6H_SF16:
-        // In this case a 'pixel' actually refers to a 4x4 tile.
-        return 16;
-    case TextureFormat::R32_G32_B32:
-        return 12;
-    case TextureFormat::ASTC_2D_4X4:
-    case TextureFormat::ASTC_2D_5X4:
-    case TextureFormat::ASTC_2D_8X8:
-    case TextureFormat::ASTC_2D_8X5:
-    case TextureFormat::ASTC_2D_10X8:
-    case TextureFormat::ASTC_2D_5X5:
-    case TextureFormat::A8R8G8B8:
-    case TextureFormat::A2B10G10R10:
-    case TextureFormat::BF10GF11RF11:
-    case TextureFormat::R32:
-    case TextureFormat::R16_G16:
-        return 4;
-    case TextureFormat::A1B5G5R5:
-    case TextureFormat::B5G6R5:
-    case TextureFormat::G8R8:
-    case TextureFormat::R16:
-        return 2;
-    case TextureFormat::R8:
-        return 1;
-    case TextureFormat::R16_G16_B16_A16:
-        return 8;
-    case TextureFormat::R32_G32_B32_A32:
-        return 16;
-    case TextureFormat::R32_G32:
-        return 8;
-    default:
-        UNIMPLEMENTED_MSG("Format not implemented");
-        return 1;
-    }
-}
-
 void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
                       u32 block_depth, u32 width_spacing) {
@@ -275,24 +228,30 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
     }
 }
 
-void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
-                      u32 block_height_bit, u32 offset_x, u32 offset_y) {
-    const u32 block_height = 1U << block_height_bit;
-    for (u32 line = 0; line < subrect_height; ++line) {
-        const u32 y2 = line + offset_y;
-        const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height +
-                                  ((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
-        const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y];
-        for (u32 x = 0; x < subrect_width; ++x) {
-            const u32 x2 = (x + offset_x) * bytes_per_pixel;
-            const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height;
-            const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X];
-            const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel;
-            u8* dest_line = unswizzled_data + unswizzled_offset;
-            u8* source_addr = swizzled_data + swizzled_offset;
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+                      u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+    const u32 stride = width * bytes_per_pixel;
+    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
+    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
+
+    const u32 block_height_mask = (1U << block_height) - 1;
+    const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
 
-            std::memcpy(dest_line, source_addr, bytes_per_pixel);
+    for (u32 line = 0; line < line_count; ++line) {
+        const u32 src_y = line + origin_y;
+        const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
+
+        const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
+        const u32 src_offset_y = (block_y >> block_height) * block_size +
+                                 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+        for (u32 column = 0; column < line_length_in; ++column) {
+            const u32 src_x = (column + origin_x) * bytes_per_pixel;
+            const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+            const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
+            const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
+
+            std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
         }
     }
 }
@@ -308,7 +267,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
     const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
 
     const u32 block_height_mask = (1U << block_height) - 1;
-    const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth));
+    const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
 
     for (u32 line = 0; line < line_count; ++line) {
         const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];
@@ -348,48 +307,6 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
     }
 }
 
-std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
-                              u32 height) {
-    std::vector<u8> rgba_data;
-
-    // TODO(Subv): Implement.
-    switch (format) {
-    case TextureFormat::DXT1:
-    case TextureFormat::DXT23:
-    case TextureFormat::DXT45:
-    case TextureFormat::DXN1:
-    case TextureFormat::DXN2:
-    case TextureFormat::BC7U:
-    case TextureFormat::BC6H_UF16:
-    case TextureFormat::BC6H_SF16:
-    case TextureFormat::ASTC_2D_4X4:
-    case TextureFormat::ASTC_2D_8X8:
-    case TextureFormat::ASTC_2D_5X5:
-    case TextureFormat::ASTC_2D_10X8:
-    case TextureFormat::A8R8G8B8:
-    case TextureFormat::A2B10G10R10:
-    case TextureFormat::A1B5G5R5:
-    case TextureFormat::B5G6R5:
-    case TextureFormat::R8:
-    case TextureFormat::G8R8:
-    case TextureFormat::BF10GF11RF11:
-    case TextureFormat::R32_G32_B32_A32:
-    case TextureFormat::R32_G32:
-    case TextureFormat::R32:
-    case TextureFormat::R16:
-    case TextureFormat::R16_G16:
-    case TextureFormat::R32_G32_B32:
-        // TODO(Subv): For the time being just forward the same data without any decoding.
-        rgba_data = texture_data;
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Format not implemented");
-        break;
-    }
-
-    return rgba_data;
-}
-
 std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                           u32 block_height, u32 block_depth) {
     if (tiled) {
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 232b696b3..01e156bc8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -38,10 +38,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
                       u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
                       bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
 
-/// Decodes an unswizzled texture into a A8R8G8B8 texture.
-std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
-                              u32 height);
-
 /// This function calculates the correct size of a texture depending if it's tiled or not.
 std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                           u32 block_height, u32 block_depth);
@@ -52,9 +48,8 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
                     u32 block_height_bit, u32 offset_x, u32 offset_y);
 
 /// Copies a tiled subrectangle into a linear surface.
-void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
-                      u32 offset_x, u32 offset_y);
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+                      u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input);
 
 /// @brief Swizzles a 2D array of pixels into a 3D texture
 /// @param line_length_in  Number of pixels per line
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index eba05aced..0574fef12 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -12,10 +12,10 @@
 namespace Tegra::Texture {
 
 enum class TextureFormat : u32 {
-    R32_G32_B32_A32 = 0x01,
-    R32_G32_B32 = 0x02,
-    R16_G16_B16_A16 = 0x03,
-    R32_G32 = 0x04,
+    R32G32B32A32 = 0x01,
+    R32G32B32 = 0x02,
+    R16G16B16A16 = 0x03,
+    R32G32 = 0x04,
     R32_B24G8 = 0x05,
     ETC2_RGB = 0x06,
     X8B8G8R8 = 0x07,
@@ -23,19 +23,19 @@ enum class TextureFormat : u32 {
     A2B10G10R10 = 0x09,
     ETC2_RGB_PTA = 0x0a,
     ETC2_RGBA = 0x0b,
-    R16_G16 = 0x0c,
-    G8R24 = 0x0d,
-    G24R8 = 0x0e,
+    R16G16 = 0x0c,
+    R24G8 = 0x0d,
+    R8G24 = 0x0e,
     R32 = 0x0f,
-    BC6H_SF16 = 0x10,
-    BC6H_UF16 = 0x11,
+    BC6H_SFLOAT = 0x10,
+    BC6H_UFLOAT = 0x11,
     A4B4G4R4 = 0x12,
     A5B5G5R1 = 0x13,
     A1B5G5R5 = 0x14,
     B5G6R5 = 0x15,
     B6G5R5 = 0x16,
-    BC7U = 0x17,
-    G8R8 = 0x18,
+    BC7 = 0x17,
+    R8G8 = 0x18,
     EAC = 0x19,
     EACX2 = 0x1a,
     R16 = 0x1b,
@@ -43,23 +43,23 @@ enum class TextureFormat : u32 {
     R8 = 0x1d,
     G4R4 = 0x1e,
     R1 = 0x1f,
-    E5B9G9R9_SHAREDEXP = 0x20,
-    BF10GF11RF11 = 0x21,
+    E5B9G9R9 = 0x20,
+    B10G11R11 = 0x21,
     G8B8G8R8 = 0x22,
     B8G8R8G8 = 0x23,
-    DXT1 = 0x24,
-    DXT23 = 0x25,
-    DXT45 = 0x26,
-    DXN1 = 0x27,
-    DXN2 = 0x28,
-    S8Z24 = 0x29,
+    BC1_RGBA = 0x24,
+    BC2 = 0x25,
+    BC3 = 0x26,
+    BC4 = 0x27,
+    BC5 = 0x28,
+    S8D24 = 0x29,
     X8Z24 = 0x2a,
-    Z24S8 = 0x2b,
+    D24S8 = 0x2b,
     X4V4Z24__COV4R4V = 0x2c,
     X4V4Z24__COV8R8V = 0x2d,
     V8Z24__COV4R12V = 0x2e,
-    ZF32 = 0x2f,
-    ZF32_X24S8 = 0x30,
+    D32 = 0x2f,
+    D32S8 = 0x30,
     X8Z24_X20V4S8__COV4R4V = 0x31,
     X8Z24_X20V4S8__COV8R8V = 0x32,
     ZF32_X20V4X8__COV4R4V = 0x33,
@@ -69,7 +69,7 @@ enum class TextureFormat : u32 {
     X8Z24_X16V8S8__COV4R12V = 0x37,
     ZF32_X16V8X8__COV4R12V = 0x38,
     ZF32_X16V8S8__COV4R12V = 0x39,
-    Z16 = 0x3a,
+    D16 = 0x3a,
     V8Z24__COV8R24V = 0x3b,
     X8Z24_X16V8S8__COV8R24V = 0x3c,
     ZF32_X16V8X8__COV8R24V = 0x3d,
@@ -375,7 +375,4 @@ struct FullTextureInfo {
     TSCEntry tsc;
 };
 
-/// Returns the number of bytes per pixel of the input texture format.
-u32 BytesPerPixel(TextureFormat format);
-
 } // namespace Tegra::Texture
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 45f360bdd..a14df06a3 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <memory>
+
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/settings.h"
@@ -16,37 +17,49 @@
 #include "video_core/video_core.h"
 
 namespace {
-std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
-                                                        Core::System& system,
-                                                        Core::Frontend::GraphicsContext& context) {
+
+std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
+    Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+    std::unique_ptr<Core::Frontend::GraphicsContext> context) {
+    auto& telemetry_session = system.TelemetrySession();
+    auto& cpu_memory = system.Memory();
+
     switch (Settings::values.renderer_backend.GetValue()) {
     case Settings::RendererBackend::OpenGL:
-        return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context);
+        return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory,
+                                                        gpu, std::move(context));
 #ifdef HAS_VULKAN
     case Settings::RendererBackend::Vulkan:
-        return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
+        return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory,
+                                                        gpu, std::move(context));
 #endif
     default:
         return nullptr;
     }
 }
+
 } // Anonymous namespace
 
 namespace VideoCore {
 
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+    std::unique_ptr<Tegra::GPU> gpu;
+    if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+        gpu = std::make_unique<VideoCommon::GPUAsynch>(system);
+    } else {
+        gpu = std::make_unique<VideoCommon::GPUSynch>(system);
+    }
+
     auto context = emu_window.CreateSharedContext();
     const auto scope = context->Acquire();
-    auto renderer = CreateRenderer(emu_window, system, *context);
+
+    auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
     if (!renderer->Init()) {
         return nullptr;
     }
 
-    if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
-        return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer),
-                                                        std::move(context));
-    }
-    return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context));
+    gpu->BindRenderer(std::move(renderer));
+    return gpu;
 }
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer) {